In [None]:
from collections import defaultdict
from poif.tests import get_img, get_temp_path
from poif.utils import FileIterator, get_relative_path
from pathlib import Path
import cv2
from tqdm import tqdm
import pickle
from dataclasses import dataclass

In [None]:
ds_path = Path('/home/gilles/datasets/pneumonia')

In [None]:
@dataclass
class ImageSize:
    pkl: int
    jpg: int
    png: int

size_comparison = defaultdict(list)

temp_dir = get_temp_path()
test_img_sizes = [256, 384, 512, 640, 840]
for img_size in test_img_sizes:
    for img in tqdm(ds_path.rglob('*.jpeg'), desc=f'Image size: {img_size}'):
        original_size_img = cv2.imread(str(img))

        resized = cv2.resize(original_size_img, (img_size, img_size))

        jpg_file = temp_dir / 'image.jpg'
        png_file = temp_dir / 'image.png'
        pickle_file = temp_dir / 'image.pkl'

        cv2.imwrite(str(jpg_file), resized)
        cv2.imwrite(str(png_file), resized)

        with open(pickle_file, 'wb') as f:
            pickle.dump(resized, f)

        size_wrapper = ImageSize(
            jpg = jpg_file.stat().st_size,
            png = png_file.stat().st_size,
            pkl = pickle_file.stat().st_size
        )


        size_comparison[img_size].append(size_wrapper)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots()
ax.grid(True)

for img_size, comparison_tuples in size_comparison.items():
    jpg_values = [comp_tuple.jpg for comp_tuple in comparison_tuples]
    png_values = [comp_tuple.png for comp_tuple in comparison_tuples]
    pkl_values = [comp_tuple.pkl for comp_tuple in comparison_tuples]

    jpg_mean = np.mean(jpg_values)
    png_mean = np.mean(png_values)
    pkl_mean = np.mean(pkl_values)

    pkl_ratio = pkl_mean / jpg_mean
    png_ratio = png_mean / jpg_mean

    ax.plot(img_size, pkl_ratio, linestyle='None', marker='^', color='green')
    ax.plot(img_size, png_ratio, linestyle='None', marker='^', color='green')
