In [8]:
# Importing libraries
import os 
import random
import pickle
import torch
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from collections import Counter, defaultdict

In [3]:
# Path to dataset folder
dataset_path = "../Datasets"

# Collect all image size counts
size_counter = Counter()

# Loop through files in the dataset directory
for filename in os.listdir(dataset_path):
    if filename.endswith(".png"):
        img_path = os.path.join(dataset_path, filename)
        try:
            with Image.open(img_path) as img:
                size = img.size  # (width, height)
                size_counter[f"{size[1]} x {size[0]}"] += 1  # format as H x W
        except Exception as e:
            print(f"Failed to open {filename}: {e}")

# Print results
for size, count in size_counter.most_common():
    print(f"{size}: {count}")

100 x 94: 26
94 x 103: 20
125 x 142: 20
76 x 78: 20
95 x 110: 18
101 x 115: 18
209 x 228: 18
58 x 65: 16
101 x 101: 16
87 x 82: 16
81 x 76: 16
78 x 95: 16
121 x 134: 16
130 x 136: 16
98 x 97: 14
56 x 56: 14
79 x 77: 14
122 x 116: 14
110 x 116: 14
88 x 88: 12
164 x 182: 12
101 x 92: 12
144 x 165: 12
101 x 113: 12
159 x 163: 12
102 x 109: 12
158 x 179: 12
157 x 168: 12
97 x 98: 12
105 x 112: 12
129 x 140: 12
83 x 93: 12
64 x 67: 12
91 x 99: 10
144 x 154: 10
76 x 84: 10
84 x 88: 10
57 x 52: 10
69 x 77: 10
117 x 119: 10
119 x 117: 10
148 x 153: 10
151 x 165: 10
87 x 95: 10
161 x 171: 10
152 x 167: 10
100 x 93: 10
115 x 124: 10
155 x 169: 10
129 x 147: 10
75 x 69: 10
70 x 72: 10
134 x 141: 10
90 x 89: 10
147 x 178: 8
154 x 171: 8
201 x 204: 8
180 x 207: 8
43 x 44: 8
156 x 165: 8
122 x 141: 8
216 x 229: 8
48 x 50: 8
86 x 92: 8
54 x 53: 8
45 x 42: 8
208 x 235: 8
149 x 146: 8
81 x 82: 8
98 x 113: 8
221 x 244: 8
179 x 203: 8
121 x 126: 8
153 x 164: 8
211 x 211: 8
55 x 57: 8
97 x 107: 8
139 x 15

In [4]:
# Counter for class labels
label_counter = Counter()

# Loop through files and extract class label from filename
for filename in os.listdir(dataset_path):
    if filename.endswith(".png"):
        try:
            class_label = filename.split("_")[0]
            label_counter[class_label] += 1
        except Exception as e:
            print(f"Skipping {filename}: {e}")

# Print result sorted by class label
for label in sorted(label_counter.keys(), key=lambda x: int(x)):
    print(f"Class {label}: {label_counter[label]}")

Class 0: 514
Class 1: 500
Class 2: 344
Class 3: 268
Class 4: 260
Class 5: 244
Class 6: 220
Class 7: 218
Class 8: 214
Class 9: 202
Class 10: 202
Class 11: 198
Class 12: 184
Class 13: 156
Class 14: 150
Class 15: 140
Class 16: 140
Class 17: 132
Class 18: 130
Class 19: 128
Class 20: 126
Class 21: 118
Class 22: 108


In [9]:
# Set seed for reproducibility
random.seed(42)

# Group image paths by class label
class_to_images = defaultdict(list)

# Collect all images into the dictionary
for filename in os.listdir(dataset_path):
    if filename.endswith(".png"):
        try:
            class_label = filename.split("_")[0]
            relative_path = os.path.join(dataset_path, filename)
            class_to_images[class_label].append(relative_path)
        except Exception as e:
            print(f"Skipping {filename}: {e}")

# Prepare train/test lists
train_data = []
test_data = []

for class_label, images in class_to_images.items():
    random.shuffle(images)
    split_idx = int(0.8 * len(images))
    
    train_images = images[:split_idx]
    test_images = images[split_idx:]
    
    train_data.extend([(img, class_label) for img in train_images])
    test_data.extend([(img, class_label) for img in test_images])