In [None]:
import shutil

from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
import os
import shutil
from tqdm import tqdm

# === Source DR folders ===
dr_folders = [
    "/content/drive/MyDrive/EECE 490 Project/Aptos/Unbalanced_Split_train/DR",
    "/content/drive/MyDrive/EECE 490 Project/RandSet2/Unbalanced_Split/DR",
    "/content/drive/MyDrive/EECE 490 Project/DDR dataset/Unbalanced_Split/DR",
    "/content/drive/MyDrive/EECE 490 Project/Messidor2/Unbalanced_Split/DR",
    "/content/drive/MyDrive/EECE 490 Project/EyePac/Unbalanced_Split/DR"
]

# === Destination folder ===
all_dr_dir = "/content/drive/MyDrive/EECE 490 Project/All_DR_Images"
os.makedirs(all_dr_dir, exist_ok=True)

# === Track names to avoid overwriting ===
existing = set(os.listdir(all_dr_dir))
conflict_counter = 0
total_copied = 0

# === Copy images from all folders ===
for folder in dr_folders:
    for fname in tqdm(os.listdir(folder), desc=f"Processing {os.path.basename(folder)}"):
        src = os.path.join(folder, fname)
        if not os.path.isfile(src):
            continue

        # If filename already exists, modify it to avoid overwrite
        target_name = fname
        while target_name in existing:
            name, ext = os.path.splitext(fname)
            conflict_counter += 1
            target_name = f"{name}_dup{conflict_counter}{ext}"

        dst = os.path.join(all_dr_dir, target_name)
        shutil.copy2(src, dst)
        existing.add(target_name)
        total_copied += 1

print(f"\n✅ Done! Total images copied to All_DR_Images: {total_copied}")


Processing DR: 100%|██████████| 258/258 [00:08<00:00, 30.53it/s]
Processing DR: 100%|██████████| 227/227 [00:06<00:00, 33.80it/s]
Processing DR: 100%|██████████| 5485/5485 [03:29<00:00, 26.18it/s]
Processing DR: 100%|██████████| 212/212 [00:05<00:00, 41.69it/s]
Processing DR: 100%|██████████| 6335/6335 [07:04<00:00, 14.94it/s]


✅ Done! Total images copied to All_DR_Images: 12517





In [None]:
import os
import shutil
from tqdm import tqdm

# === Source DR folders ===
dr_folders = [
    "/content/drive/MyDrive/EECE 490 Project/Aptos/Unbalanced_Split_train/NoDR",
    "/content/drive/MyDrive/EECE 490 Project/RandSet2/Unbalanced_Split/NoDR",
    "/content/drive/MyDrive/EECE 490 Project/DDR dataset/Unbalanced_Split/NoDR",
    "/content/drive/MyDrive/EECE 490 Project/Messidor2/Unbalanced_Split/NoDR",
    "/content/drive/MyDrive/EECE 490 Project/EyePac/Unbalanced_Split/NoDR"
]

# === Destination folder ===
all_dr_dir = "/content/drive/MyDrive/EECE 490 Project/All_NoDR_Images"
os.makedirs(all_dr_dir, exist_ok=True)

# === Track names to avoid overwriting ===
existing = set(os.listdir(all_dr_dir))
conflict_counter = 0
total_copied = 0

# === Copy images from all folders ===
for folder in dr_folders:
    for fname in tqdm(os.listdir(folder), desc=f"Processing {os.path.basename(folder)}"):
        src = os.path.join(folder, fname)
        if not os.path.isfile(src):
            continue

        # If filename already exists, modify it to avoid overwrite
        target_name = fname
        while target_name in existing:
            name, ext = os.path.splitext(fname)
            conflict_counter += 1
            target_name = f"{name}_dup{conflict_counter}{ext}"

        dst = os.path.join(all_dr_dir, target_name)
        shutil.copy2(src, dst)
        existing.add(target_name)
        total_copied += 1

print(f"\n✅ Done! Total images copied to All_DR_Images: {total_copied}")


Processing NoDR: 100%|██████████| 1317/1317 [01:20<00:00, 16.34it/s]
Processing NoDR: 100%|██████████| 16/16 [00:04<00:00,  3.92it/s]
Processing NoDR: 100%|██████████| 6041/6041 [04:48<00:00, 20.91it/s]
Processing NoDR: 100%|██████████| 150/150 [00:04<00:00, 36.10it/s]
Processing NoDR: 100%|██████████| 17655/17655 [27:11<00:00, 10.82it/s]


✅ Done! Total images copied to All_DR_Images: 25179





In [None]:
import os

# === Folders to count ===
folders = {
    "DR": "/content/drive/MyDrive/EECE 490 Project/All_DR_Images",
    "NoDR": "/content/drive/MyDrive/EECE 490 Project/All_NoDR_Images"
}

# === File extensions considered as images
image_extensions = ('.jpg', '.jpeg', '.png')

# === Count images per folder
for label, path in folders.items():
    if not os.path.exists(path):
        print(f"⚠️ Folder not found: {path}")
        continue

    count = sum(1 for f in os.listdir(path) if f.lower().endswith(image_extensions))
    print(f"📂 {label} folder contains: {count} images")


📂 DR folder contains: 11061 images
📂 NoDR folder contains: 25179 images


In [None]:
import os
from PIL import Image
from tqdm import tqdm

# === Input and output folders mapping ===
folder_map = {
    "DR": "/content/drive/MyDrive/EECE 490 Project/All_DR_Images",
    "NoDR": "/content/drive/MyDrive/EECE 490 Project/All_NoDR_Images"
}

# === Output base folder ===
output_base = "/content/drive/MyDrive/EECE 490 Project"

# === Process each folder ===
for label, input_folder in folder_map.items():
    print(f"🔄 Converting and renaming images in: {input_folder}")

    output_folder = os.path.join(output_base, f"All_{label}_Images_Processed")
    os.makedirs(output_folder, exist_ok=True)

    # List and sort valid image files
    files = sorted([f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

    for idx, fname in enumerate(tqdm(files)):
        input_path = os.path.join(input_folder, fname)
        try:
            img = Image.open(input_path).convert("RGB")
            outname = f"image{idx:05d}.png"
            outpath = os.path.join(output_folder, outname)
            img.save(outpath, "PNG")
        except Exception as e:
            print(f"⚠️ Failed to process {fname}: {e}")

    print(f"✅ {len(files)} images saved to: {output_folder}")

print("🎉 All processing complete!")


🔄 Converting and renaming images in: /content/drive/MyDrive/EECE 490 Project/All_DR_Images


100%|██████████| 11061/11061 [45:14<00:00,  4.07it/s]


✅ 11061 images saved to: /content/drive/MyDrive/EECE 490 Project/All_DR_Images_Processed
🔄 Converting and renaming images in: /content/drive/MyDrive/EECE 490 Project/All_NoDR_Images


100%|██████████| 25179/25179 [1:13:48<00:00,  5.69it/s]


✅ 25179 images saved to: /content/drive/MyDrive/EECE 490 Project/All_NoDR_Images_Processed
🎉 All processing complete!


In [None]:
import os
from PIL import Image

folder = "/content/drive/MyDrive/EECE 490 Project/All_DR_Images_Processed"
sample = os.listdir(folder)[0]
img = Image.open(os.path.join(folder, sample))
print("✅ Format:", img.mode)
print("✅ Size:", img.size)


✅ Format: RGB
✅ Size: (512, 512)


In [None]:
import os
from PIL import Image

folder = "/content/drive/MyDrive/EECE 490 Project/All_NoDR_Images_Processed"
sample = os.listdir(folder)[0]
img = Image.open(os.path.join(folder, sample))
print("✅ Format:", img.mode)
print("✅ Size:", img.size)


✅ Format: RGB
✅ Size: (512, 512)


In [None]:
import os
from PIL import Image
from tqdm import tqdm

# === Input and output folders mapping ===
folder_map = {
    "DR": "/content/drive/MyDrive/EECE 490 Project/All_DR_Images",
    "NoDR": "/content/drive/MyDrive/EECE 490 Project/All_NoDR_Images"
}

# === Output base folder ===
output_base = "/content/drive/MyDrive/EECE 490 Project"

# === Load and balance ===
image_lists = {}
min_len = float("inf")

# Step 1: Collect files & determine min count
for label, input_folder in folder_map.items():
    files = sorted([f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    image_lists[label] = files
    min_len = min(min_len, len(files))

print(f"📊 Balancing dataset to {min_len} images per class")

# Step 2: Process and save balanced set
for label, input_folder in folder_map.items():
    print(f"🔄 Processing {label} images...")

    output_folder = os.path.join(output_base, f"All_{label}_Images_Processed_Balanced")
    os.makedirs(output_folder, exist_ok=True)

    selected_files = image_lists[label][:min_len]

    for idx, fname in enumerate(tqdm(selected_files)):
        input_path = os.path.join(input_folder, fname)
        try:
            img = Image.open(input_path).convert("RGB")
            outname = f"image{idx:05d}.png"
            outpath = os.path.join(output_folder, outname)
            img.save(outpath, "PNG")
        except Exception as e:
            print(f"⚠️ Failed to process {fname}: {e}")

    print(f"✅ {len(selected_files)} images saved to: {output_folder}")

print("🎉 All balanced processing complete!")


📊 Balancing dataset to 11061 images per class
🔄 Processing DR images...


100%|██████████| 11061/11061 [32:26<00:00,  5.68it/s]


✅ 11061 images saved to: /content/drive/MyDrive/EECE 490 Project/All_DR_Images_Processed_Balanced
🔄 Processing NoDR images...


100%|██████████| 11061/11061 [28:08<00:00,  6.55it/s]

✅ 11061 images saved to: /content/drive/MyDrive/EECE 490 Project/All_NoDR_Images_Processed_Balanced
🎉 All balanced processing complete!





In [None]:
import os

# Paths to your balanced folders
base_dir = "/content/drive/MyDrive/EECE 490 Project"
dr_path = os.path.join(base_dir, "All_DR_Images_Processed_Balanced")
nodr_path = os.path.join(base_dir, "All_NoDR_Images_Processed_Balanced")

# Count .png files
dr_count = len([f for f in os.listdir(dr_path) if f.lower().endswith(".png")])
nodr_count = len([f for f in os.listdir(nodr_path) if f.lower().endswith(".png")])

print(f"🧮 DR images count:   {dr_count}")
print(f"🧮 NoDR images count: {nodr_count}")

if dr_count == nodr_count:
    print("✅ Folders are balanced!")
else:
    print("⚠️ Folders are NOT balanced.")


🧮 DR images count:   11061
🧮 NoDR images count: 11061
✅ Folders are balanced!
