ODIR Data

In [None]:
#TRAINING SET ANNOTATION PROCESSING

import pandas as pd
df = pd.read_excel(r"Training Set\Annotation\training annotation (English).xlsx")
classes = ['N','D','G','C','A','H','M','O']
df[classes] = df[classes].astype(int)
df['labels'] = df[classes].values.tolist()
df[['ID', 'labels']].head()

In [None]:
#ONSITE TEST SET ANNOTATION PROCESSING

import pandas as pd
df = pd.read_excel(r"On-site Test Set\Annotation\on-site test annotation (English).xlsx")
classes = ['N','D','G','C','A','H','M','O']
df[classes] = df[classes].astype(int)
df['labels'] = df[classes].values.tolist()
df[['ID', 'labels']].head()

In [None]:
#OFFSITE TEST SET ANNOTATION PROCESSING

import pandas as pd
df = pd.read_excel(r"Off-site Test Set\Annotation\off-site test annotation (English).xlsx")
classes = ['N','D','G','C','A','H','M','O']
df[classes] = df[classes].astype(int)
df['labels'] = df[classes].values.tolist()
df[['ID', 'labels']].head()

In [None]:
import cv2
import numpy as np

def apply_clahe(img):
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)

    merged = cv2.merge((cl,a,b))
    return cv2.cvtColor(merged, cv2.COLOR_LAB2RGB)

In [None]:
import torchvision.transforms as T

mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

train_transform = T.Compose([
    T.Resize((224,224)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(15),
    T.ColorJitter(brightness=0.2, contrast=0.2),
    T.ToTensor(),
    T.Normalize(mean, std)
])

val_transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor(),
    T.Normalize(mean, std)
])

In [28]:
#TOTAL NUMBER OF IMAGES IN TRAINING, ON SITE AND OFF SITE TEST SETS

import os

image_dir_1 = r"Training Set\Images"
total_images_1 = len([f for f in os.listdir(image_dir_1) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in training set - ",total_images_1)

image_dir_2 = r"On-site Test Set\Images"
total_images_2 = len([f for f in os.listdir(image_dir_2) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in on-site test set - ",total_images_2)

image_dir_3 = r"Off-site Test Set\Images"
total_images_3 = len([f for f in os.listdir(image_dir_3) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in off-site test set - ",total_images_3)

image_dir_4 = r"Resized Images"
total_images_4 = len([f for f in os.listdir(image_dir_4) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in resized images - ",total_images_4)

image_dir_5 = r"CLAHE_Resized_Images"
total_images_5 = len([f for f in os.listdir(image_dir_5) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in CLAHE resized images - ",total_images_5)

image_dir_6 = r"Augmented_CLAHE_Resized_Images"
total_images_6 = len([f for f in os.listdir(image_dir_6) if f.endswith(('.jpg', '.png', '.jpeg'))])
print("total images in Augmented CLAHE resized images - ",total_images_6)

total images in training set -  7000
total images in on-site test set -  2000
total images in off-site test set -  1000
total images in resized images -  7000
total images in CLAHE resized images -  7000
total images in Augmented CLAHE resized images -  10500


In [16]:
import os
from PIL import Image
from collections import Counter
from tqdm import tqdm

# --- CONFIGURATION ---
# Make sure this points to your Training Set/Images folder
INPUT_DIR = r"Training Set\Images"

def analyze_sizes(folder_path):
    # Store all sizes here as tuples: (width, height)
    all_sizes = []
    
    file_list = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    print(f"ðŸ“Š Analyzing {len(file_list)} images...")
    
    for filename in tqdm(file_list):
        try:
            with Image.open(os.path.join(folder_path, filename)) as img:
                all_sizes.append(img.size) # img.size is (width, height)
        except Exception:
            pass # Skip broken files

    # 1. Find the Most Common Size (Mode)
    size_counts = Counter(all_sizes)
    most_common = size_counts.most_common(5) # Get top 5
    
    # 2. Find Average Size
    if all_sizes:
        avg_w = sum(w for w, h in all_sizes) / len(all_sizes)
        avg_h = sum(h for w, h in all_sizes) / len(all_sizes)
    else:
        avg_w, avg_h = 0, 0

    print("\n" + "="*40)
    print("       DATASET SIZE REPORT       ")
    print("="*40)
    print(f"Total Images Scanned: {len(all_sizes)}")
    print(f"Unique Sizes Found:   {len(size_counts)}")
    print(f"Average Size:         {int(avg_w)} x {int(avg_h)}")
    print("-" * 40)
    print("TOP 5 MOST COMMON SIZES:")
    for size, count in most_common:
        print(f"  Size: {size}  -> Found {count} times")
    print("="*40)

# Run it
analyze_sizes(INPUT_DIR)

ðŸ“Š Analyzing 7000 images...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7000/7000 [01:34<00:00, 73.86it/s] 


       DATASET SIZE REPORT       
Total Images Scanned: 7000
Unique Sizes Found:   72
Average Size:         1655 x 1264
----------------------------------------
TOP 5 MOST COMMON SIZES:
  Size: (512, 512)  -> Found 2873 times
  Size: (2592, 1728)  -> Found 1408 times
  Size: (2048, 1536)  -> Found 352 times
  Size: (1956, 1934)  -> Found 216 times
  Size: (2304, 1728)  -> Found 206 times





In [17]:
import cv2
import numpy as np
import os
from tqdm import tqdm

# --- CONFIGURATION ---
# Your Input Path (from the kagglehub download)
INPUT_DIR = r"Training Set\Images"

# New Output Path for the "Giant" images
OUTPUT_DIR = r"Resized Images" 
os.makedirs(OUTPUT_DIR, exist_ok=True)

# âœ… TARGET SIZE: 2048
TARGET_SIZE = 512

def resize_pad_high_quality(image, target_size):
    old_size = image.shape[:2] # (height, width)
    
    # 1. Calc Ratio
    ratio = float(target_size) / max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    
    # 2. Resize
    # We use INTER_LANCZOS4 because it preserves the most detail for large images
    image = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_LANCZOS4)
    
    # 3. Add Padding (Black Borders)
    delta_w = target_size - new_size[1]
    delta_h = target_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)
    
    # borderType=BORDER_CONSTANT with value 0 (Black)
    new_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    
    return new_image

# --- MAIN EXECUTION ---
image_files = [f for f in os.listdir(INPUT_DIR) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

print(f"ðŸš€ Processing {len(image_files)} images to {TARGET_SIZE}x{TARGET_SIZE}...")
print("â˜• This will take a while (approx 5-10 mins due to high resolution)...")

for filename in tqdm(image_files):
    try:
        img_path = os.path.join(INPUT_DIR, filename)
        img = cv2.imread(img_path)
        
        if img is None: continue

        # Resize
        final_img = resize_pad_high_quality(img, TARGET_SIZE)
        
        # Save
        # We use high quality JPEG compression (95) to avoid artifacts
        save_path = os.path.join(OUTPUT_DIR, filename)
        cv2.imwrite(save_path, final_img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        
    except Exception as e:
        print(f"Error on {filename}: {e}")

print(f"\nâœ… High-Res (512px) Dataset saved to: {OUTPUT_DIR}")

ðŸš€ Processing 7000 images to 512x512...
â˜• This will take a while (approx 5-10 mins due to high resolution)...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7000/7000 [03:10<00:00, 36.68it/s] 


âœ… High-Res (512px) Dataset saved to: Resized Images





In [21]:
import os
from tqdm import tqdm

# Directory for CLAHE-enhanced images
CLAHE_DIR = r"CLAHE_Resized_Images"
os.makedirs(CLAHE_DIR, exist_ok=True)

# Get list of resized images
resized_files = [f for f in os.listdir(OUTPUT_DIR) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

print(f"Applying CLAHE to {len(resized_files)} images...")

for filename in tqdm(resized_files):
    img_path = os.path.join(OUTPUT_DIR, filename)
    img = cv2.imread(img_path)
    
    if img is not None:
        enhanced_img = apply_clahe(img)
        save_path = os.path.join(CLAHE_DIR, filename)
        cv2.imwrite(save_path, enhanced_img)

print(f"CLAHE-enhanced images saved to - {CLAHE_DIR}")

Applying CLAHE to 7000 images...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7000/7000 [01:10<00:00, 99.15it/s] 

CLAHE-enhanced images saved to - CLAHE_Resized_Images





In [22]:
import pandas as pd
from torch.utils.data import Dataset
from PIL import Image
import torch

# Reload training set annotation to overwrite previous df
df = pd.read_excel(r"Training Set\Annotation\training annotation (English).xlsx")
classes = ['N','D','G','C','A','H','M','O']
df[classes] = df[classes].astype(int)
df['labels'] = df[classes].values.tolist()

class ODIRDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_name = row['Left-Fundus']
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        labels = torch.tensor(row['labels'], dtype=torch.float32)
        if self.transform:
            image = self.transform(image)
        return image, labels

# Create training dataset with augmentation and normalization applied via train_transform
train_dataset = ODIRDataset(df, CLAHE_DIR, train_transform)

In [26]:
import os
from torchvision.utils import save_image

# Directory for augmented and normalized images
augmented_dir = r"Augmented_CLAHE_Resized_Images"
os.makedirs(augmented_dir, exist_ok=True)

# Denormalization parameters (inverse of the normalization in transforms)
mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

# Number of augmentations per original image
num_augs = 3

print(f"Augmenting {len(train_dataset)} images with {num_augs} variations each...")

for idx in range(len(train_dataset)):
    for aug in range(num_augs):
        image, label = train_dataset[idx]  # This applies random augmentation each time
        
        # Denormalize the image to bring it back to 0-1 range for saving
        image = image * std + mean
        image = torch.clamp(image, 0, 1)  # Ensure values are in [0, 1]
        
        # Save the augmented image
        save_path = os.path.join(augmented_dir, f"{idx}_{aug}.png")
        save_image(image, save_path)

print(f"Augmented images saved to: {augmented_dir}")
print(f"Total images created: {len(train_dataset) * num_augs}")

Augmenting 3500 images with 3 variations each...
Augmented images saved to: Augmented_CLAHE_Resized_Images
Total images created: 10500
