Environment: MacBook Pro: 16GB RAM, 500GB ROM

Every Image has 1024 * 1024 pixels = 1 048 576 pixels
Each pixel grey scale from 0 to 255 -> 1 Byte
Per image: 1.048576 MB -> 1 MB
About 110.000 images -> 110GB storage needed
Files stored as PNG, so only about 440KB per image, when working in Python and loading the Bitmap 1MB again

Unlike many other \ac{DL} research projects that can use up to \todo{zahlen und quelle}, this work has to deal with significant computational limitations. We are working on a MacBook Pro with a M1 CPU, 16GB of RAM and 500GB of ROM.

\section{Storage}
When downloading

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

image_sizes = np.linspace(64, 1024, 100)  # From 256px to 2048px (width and height)
num_images = np.linspace(10000, 100000, 100)     # From 1 to 100 images
X, Y = np.meshgrid(image_sizes, num_images)

# Calculate total storage needed in MB
# Storage = width * height * bytes_per_pixel * num_images
storage_needed = (X * X) * Y / 1e9  # Convert from bytes to GB

# Create 3D plot
fig = plt.figure(figsize=(4,3.5))
ax = fig.add_subplot(111, projection='3d')

# Plot the surface
surf = ax.plot_surface(X, Y, storage_needed, cmap=cm.bwr, linewidth=0, rstride=1, cstride=1, shade=True, alpha=1)
surf.set_facecolor((1, 1, 1, 1))  # Set the alpha to 1 (fully opaque)


# Labels and title
ax.set_xlabel('Image Size (Pixels)', fontsize=11)
ax.set_ylabel('Number of Images', fontsize=11)
ax.set_zlabel('Memory Needed (GB)', fontsize=11)
ax.grid(False)
ax.tick_params(axis='x', labelsize=9)  # X-axis tick labels
ax.tick_params(axis='y', labelsize=9)  # Y-axis tick labels
ax.tick_params(axis='z', labelsize=9)  # Z-axis tick labels

ax.set_facecolor('white')

# Adjust layout to avoid clipping
plt.tight_layout(pad=0)  # Remove padding
plt.subplots_adjust(left=-0.1, right=0.95, top=1, bottom=0.15)  # Adjust plot borders


plt.savefig('memory3dplot.pdf', format='pdf', dpi=300, pad_inches=0)  # Adjust dpi for higher resolution (e.g., 300)


# Show the plot
plt.show()



In [None]:
from pympler import asizeof

image_sizes = np.linspace(64, 1024, 100)  # From 256px to 2048px (width and height)
num_images = np.linspace(10000, 100000, 100)     # From 1 to 100 images
X, Y = np.meshgrid(image_sizes, num_images)

# Initialize storage_needed array to store memory required for each combination of X and Y
storage_needed = np.zeros_like(X)

# Calculate memory needed for each (X, Y) combination
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        # For each image size X[i, j], calculate the memory needed
        single_image = np.random.rand(int(X[i, j]), int(X[i, j])).astype(np.float32)
        single_image = single_image.reshape(-1, int(X[i, j]), int(X[i, j]))
        storage_array = asizeof.asizeof(single_image)  # Memory for one image in bytes
        storage_needed[i, j] = storage_array * Y[i, j] / 1e9 # Convert from bytes to GB

# Create 3D plot
fig = plt.figure(figsize=(4,3.5))
ax = fig.add_subplot(111, projection='3d')

# Plot the surface
surf = ax.plot_surface(X, Y, storage_needed, cmap=cm.bwr, linewidth=0, rstride=1, cstride=1, shade=True, alpha=1)
surf.set_facecolor((1, 1, 1, 1))  # Set the alpha to 1 (fully opaque)


# Labels and title
ax.set_xlabel('Image Size (Pixels)', fontsize=11)
ax.set_ylabel('Number of Images', fontsize=11)
ax.set_zlabel('Memory Needed (GB)', fontsize=11)
ax.grid(False)
ax.tick_params(axis='x', labelsize=9)  # X-axis tick labels
ax.tick_params(axis='y', labelsize=9)  # Y-axis tick labels
ax.tick_params(axis='z', labelsize=9)  # Z-axis tick labels

ax.set_facecolor('white')


# Adjust layout to avoid clipping
plt.tight_layout(pad=0)  # Remove padding
plt.subplots_adjust(left=-0.1, right=0.95, top=1, bottom=0.15)  # Adjust plot borders

plt.savefig('memory3dplot_2.pdf', format='pdf', dpi=300, pad_inches=0)  # Adjust dpi for higher resolution (e.g., 300)

# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(8, 6))

x_values = [50, 100, 150, 200, 250, 300, 350, 400]
y_values = {"test": [50.0, 51.0, 53.0, 55.0, 56.0, 57.0, 56.0, 53.0],
            "test2": [55.0, 51.0, 53.0, 55.0, 56.0, 59.0, 56.0, 53.0],
            "test3": [57.0, 51.0, 55.0, 55.0, 56.0, 57.0, 56.0, 53.0],
            "test4": [53.0, 51.0, 53.0, 55.0, 52.0, 57.0, 51.0, 53.0]}

for label, y_value in y_values.items():
    plt.plot(x_values, y_value, label=label)

plt.ylabel("Area under curve (AUC)")
plt.xlabel("Resolution in pixel x pixel")
plt.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5)

plt.show()

In [None]:
labels = ['Atelectasis', 'Effusion', 'Infiltration', 'No Finding']
values = [6259, 7281, 13869, 60361]  # Replace these with the actual values you want

# labels = ['No Finding', 'Infiltration', 'Effusion', 'Atelectasis']
# value = [60361, 13869, 7281, 6259]

def func(pct, allvalues):
    absolute = int(pct/100.*sum(allvalues))
    return f"{absolute}\n({pct:.1f}%)"

fig, ax = plt.subplots()
wedges, texts, autotexts = ax.pie(values, labels=labels, autopct=lambda pct: func(pct, values), startangle=90, 
       textprops={'fontsize': 10, 'color': 'white', 'alpha': 1}, labeldistance=1.1, pctdistance=0.8, counterclock=False)

for text in texts:
    text.set_fontsize(14)
    text.set_color('black')

ax.axis('equal')  # Ensures the pie chart is a circle

plt.savefig('dataset_pie.pdf', format='pdf', dpi=300, bbox_inches='tight')  # Adjust dpi for higher resolution (e.g., 300)

plt.show()