## Importing all the necessary libraries

In [1]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

## Creating Directories

In [None]:
# Original data paths
original_train_dir = "data/aptos2019/train_images"
original_csv = "data/aptos2019/train.csv"

# New directory for preprocessed images
preprocessed_dir = "data/aptos2019/train_preprocessed"
os.makedirs(preprocessed_dir, exist_ok=True)

## Transforming images in the dataset

In [None]:
# Defining transformation pipeline
transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
])

# Applying transformation and saving images
train_df = pd.read_csv(original_csv)

for idx, row in train_df.iterrows():
    img_path = os.path.join(original_train_dir, f"{row['id_code']}.png")
    img = Image.open(img_path)
    
    # Applying transformation and saving
    transformed_img = transforms(img)
    transformed_img.save(os.path.join(preprocessed_dir, f"{row['id_code']}.png"))

In [4]:
print("Preprocessing complete")
print(f"Number of images: {len(train_df)}")
print(f"Transformed images saved to: {preprocessed_dir}")

Preprocessing complete
Number of images: 3662
Transformed images saved to: data/aptos2019/train_preprocessed


So, we have ~3.5k labeled and preprocessed images. I'll use them to fine-tune and evaluate a model (or train from scratch).