In [8]:
import os
from PIL import Image
import numpy as np
from collections import Counter

SRC_DIR = "data/high_res"

widths = []
heights = []
aspect_ratios = []
file_sizes_mb = []
image_count = 0

for root, dirs, files in os.walk(SRC_DIR):
    for filename in files:
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(root, filename)
            try:
                img = Image.open(img_path)
                w, h = img.size
                widths.append(w)
                heights.append(h)
                aspect_ratios.append(w / h)
                file_sizes_mb.append(os.path.getsize(img_path) / (1024 * 1024))
                image_count += 1
            except Exception as e:
                print(f"Error: {img_path} - {e}")

if image_count == 0:
    print("No images found!")
    exit()

print(f"=== Image Dimensions ({image_count} images) ===")
print(f"Width  - Min: {min(widths)}, Max: {max(widths)}, Avg: {np.mean(widths):.1f}")
print(f"Height - Min: {min(heights)}, Max: {max(heights)}, Avg: {np.mean(heights):.1f}")
print(f"\nAspect Ratio - Min: {min(aspect_ratios):.2f}, Max: {max(aspect_ratios):.2f}, Avg: {np.mean(aspect_ratios):.2f}")
print(f"File Size (MB) - Min: {min(file_sizes_mb):.2f}, Max: {max(file_sizes_mb):.2f}, Avg: {np.mean(file_sizes_mb):.2f}")

# Top 5 resolutions
resolutions = Counter(zip(widths, heights)).most_common(5)
print(f"\n=== Most Common Resolutions ===")
for (w, h), count in resolutions:
    print(f"{w}×{h}: {count} images")

# Orientation
landscape = sum(r > 1 for r in aspect_ratios)
portrait = sum(r < 1 for r in aspect_ratios)
square = sum(r == 1 for r in aspect_ratios)
print(f"\n=== Orientation ===")
print(f"Landscape: {landscape}, Portrait: {portrait}, Square: {square}")

=== Image Dimensions (15421 images) ===
Width  - Min: 384, Max: 4892, Avg: 884.2
Height - Min: 127, Max: 4892, Avg: 757.5

Aspect Ratio - Min: 0.76, Max: 3.38, Avg: 1.16
File Size (MB) - Min: 0.01, Max: 7.44, Avg: 0.39

=== Most Common Resolutions ===
400×400: 8794 images
4020×4892: 97 images
4892×4020: 41 images
3000×2939: 15 images
3000×2945: 15 images

=== Orientation ===
Landscape: 6091, Portrait: 523, Square: 8807


**Chaning the Files from various different inconsistant and high reselutions to 128*128**

In [None]:
import os
from PIL import Image
from torchvision import transforms

# paths
SRC_DIR = r"C:\My Projects\GAN-data-balancing\data\high_res"        # original dataset (class subfolders)
DST_DIR = r"C:\My Projects\GAN-data-balancing\data\low_res"         # resized output
IMG_SIZE = 128                   # target resolution

os.makedirs(DST_DIR, exist_ok=True)

transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),            # resize shortest side to 128
    transforms.CenterCrop(IMG_SIZE)         # crop to 128×128
])

for class_name in os.listdir(SRC_DIR):

    # Skip universal_test folder
    if class_name == "universal_test":
        continue

    src_class = os.path.join(SRC_DIR, class_name)
    dst_class = os.path.join(DST_DIR, class_name)
    os.makedirs(dst_class, exist_ok=True)

    for img_name in os.listdir(src_class):
        img_path = os.path.join(src_class, img_name)
        img = Image.open(img_path).convert("RGB")
        img = transform(img)
        img.save(os.path.join(dst_class, img_name))

print("Done.")


Done.


In [26]:
print(f"Source images: {sum(len(files) for _, _, files in os.walk(SRC_DIR))}")
print(f"Output images: {sum(len(files) for _, _, files in os.walk(DST_DIR))}")

# the universal test images remained unchanged

Source images: 15421
Output images: 15421
