In [8]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os

In [None]:
# get the subfolders in root folder 
root_folder = r"../../resources"
sub_folder_names = [f for f in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, f)) and f != ".DS_Store"]
sub_folder_names

In [None]:
#empty list for dataframes
df_list = []

# make dataframes for each subfolder and label each image
for sub_folder_name in sub_folder_names:
    subfolder_path = os.path.join(root_folder, sub_folder_name)
    file_names = os.listdir(subfolder_path)
    df = pd.DataFrame(file_names, columns=['image_id'])
    df['label'] = sub_folder_name
    df_list.append(df)

# concat list of dataframes into one dataframe
concat_df = pd.concat(df_list)
concat_df

In [None]:
#check if there are null values
concat_df.info()

In [None]:
# check that all image ids are unique
len(concat_df['image_id'].unique())

In [None]:
# value couts for labels
concat_df['label'].value_counts()

In [None]:
# graph value counts
concat_df['label'].value_counts().sort_values().plot.barh()

In [None]:
plt.figure(figsize=(10,10))

for i, folder in enumerate(sub_folder_names):
    try:
        path = os.path.join(root_folder,folder)
        folder_path = os.path.join(path,os.listdir(path)[0])
        img = Image.open(folder_path)
        plt.subplot(2,3,i+1)
        plt.imshow(img)
        plt.title(folder)

    except Exception as e:
        print(f"Error processing {folder_path}: {e}")

plt.tight_layout()
plt.show()

In [10]:
imgs = []

for row in concat_df.itertuples():
    file_path = os.path.join(root_folder, row.label, row.image_id)
    with Image.open(file_path) as img:
        imgs.append(img.copy())


In [None]:
sizes = set([img.size for img in imgs])
sizes

In [None]:
img_size_count = {}

for img in imgs:
    if str(img.size) in img_size_count:
        img_size_count[str(img.size)] += 1
    else:
        img_size_count[str(img.size)] = 1

img_size_count

In [None]:
target_size = (150, 150)

resized_imgs = [img.resize(target_size, resample = Image.LANCZOS) for img in imgs]
resized_imgs[1]

In [None]:
sizes = set([img.size for img in resized_imgs])
sizes