In [1]:
import os
import sys
import json
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from torch.cuda.amp import GradScaler, autocast
import torch.nn.functional as F

In [3]:
import xgboost as xgb
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                           f1_score, confusion_matrix, classification_report,
                           roc_auc_score, cohen_kappa_score)
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

In [4]:
class Config:
    def __init__(self):
        #dataset pathways
        self.data_dir = Path('Modular Implementation/aptos2019-blindness-detection')
        self.train_csv = self.data_dir/'train.csv'
        self.train_image = self.data_dir/'train_images.csv'
        self.test_image = self.data_dir/'test_images'

        #Model path
        self.model_dir = Path('savemodels')
        self.model_features = Path('extracted_features')
        self.model_results = Path('results')

        #Create Directories 
        for dir_path in [self.model_dir, self.model_features, self.model_results]:
            dir_path.mkdir(parents=True, exist_ok=True)

        # Hyperparameters
        self.batch_size = 32
        self.epochs = 10
        self.learning_rate = 1e-4
        self.num_classes = 5 #(0-4 severity)
        self.img_size = (512, 512) # for highlightling retina images

        # Models
        self.pretrained_models = {
            'resnet50': models.resnet50,
            'densenet121': models.densenet121,
            'inceptionV3': models.inception_v3
        }

        # XGBoost parameters
        self.xgb_params = {
            'objective': 'multi:softmax',
            'num_class': 5,
            'max_depth': 6,
            'learning_rate': 0.1,
            'n_estimators': 100,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'random_state': 42,
            'n_jobs': -1
        }

        self.n_splits = 5
        self.random_seed = 42

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        def to_dict(self):
            return {k:v for k,v in self.__dict__.items() if not k.startswith('_')}

In [6]:
class DataAugmentation:
    "performing Data augmentation for our images"

    @staticmethod
    def get_train_transform():

        return transforms.Compose([
            transforms.Resize((512,512)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.RandomRotation(degrees=30),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.RandomAffine(degrees=0, translate=(0.1,0.1), scale=(0.9,1.1)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    @staticmethod
    def get_val_transform():
        return transforms.Compose([
            transforms.Resize((512,512)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    @staticmethod
    def preprocess_image(image_path):
        transfrom = transforms.Compose([
            transforms.Resize((512,512)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = Image.open(image_path).convert('RGB')
        return transfrom(image).unsqueeze(0)
11        

11

In [8]:
class CustomAptos(Dataset):
    #Customizing the AptosDataset for our use
    def __init__(self, dataframe, image_dir, transform = None, is_test = False):

        self.dataframe = dataframe
        self.image_dir = Path(image_dir)
        self.transform = transform
        self.is_test = is_test

        def __len__(self):
            return len(self.dataframe)
        
        def __getitem__(self, idx):
            img_name = self.dataframe.iloc[idx]['id_code']
        
            # Handle different image extensions
            image_paths = [
                self.image_dir / f"{img_name}.png",
                self.image_dir / f"{img_name}.jpg",
                self.image_dir / f"{img_name}.jpeg"
            ]

            image_path = None
            for path in image_paths:
                if path.exists():
                    image_path = path
                    break
            
            if image_path is None:
                raise FileNotFoundError(f'image not found for{img_name}')
            
            image = Image.open(image_path).convert('RGB')

            if self.transform:
                image = self.transform(image)

            if self.is_test:
                return image, img_name
            
            label = self.dataframe.iloc[idx]['diagnosis']

            return image, label

In [None]:
class DRModelManager:
    #Manages multiple pre-trained models for diabetic retinopathy

    def __init__(self, config, model_name):
        self.config = config
        self.model_name = model_name
        self.device = config.device
        self.model = None
        self.feature_extractor = None
        self._initialize_model_finetune()

    def _initialize_model_finetune(self):
        """Initialize pre-trained model with fine-tuning on last blocks only"""
        
        if self.model_name == 'resnet50':
            self._initialize_resnet50_finetune()
            
        elif self.model_name == 'inceptionV3':
            self._initialize_inception_v3_finetune()
            
        elif self.model_name == 'densenet121':
            self._initialize_densenet121_finetune()
        
        else:
            raise ValueError(f"Model {self.model_name} not supported")
        
        self.model.to(self.device)
        
        # Create feature extractor (all layers except the final classifier)
        self._feature_extractor()
    
    def _initialize_resnet50_finetune(self):

        self.model = models.resnet50(pretrained = True)

        for param in self.model.parameters():
            param.requires_grad = False

        for param in self.model.layer4.parameters():
            param.requires_grad = True

        # Also unfreeze the BatchNorm layers in the last block
        for module in self.model.layer4.modules():
            if isinstance(module, nn.BatchNorm2d):
                module.train()  # Set to training mode
                for param in module.parameters():
                    param.requires_grad = True

        num_feature = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_feature, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(256, self.config.num_classes)
        )

        for param in self.model.fc.parameters():
            param.requires_grad = True
        
        return f'Succefully initaited {self.model_name}'

    def _initialize_inception_v3_finetune(self):
            
        self.model = models.inception_v3(pretrained=True, aux_logits=False)

        for param in self.model.parameters():
            param.requires_grad = False
        # InceptionV3 architecture: Unfreeze from Mixed_7c onward (last few blocks)
        # Mixed_6a to Mixed_7c are the later blocks

        for name, param in self.model.named_parameters():
            if 'Mixed_7' in name or 'Mixed_6e' in name or 'Mixed_6d' in name: # type: ignore
                param.requires_grad = True
            if 'bn' in name or 'BatchNorm' in name:  # Unfreeze BatchNorm in unfrozen blocks
                if 'Mixed_7' in name or 'Mixed_6' in name:
                    param.requires_grad = True
    
    # Replace the final FC layer
        num_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(256, self.config.num_classes)
        )
        
        # Set FC layer to trainable
        for param in self.model.fc.parameters():
            param.requires_grad = True
        
        return f'Succefully initaited {self.model_name}'

    def _initialize_densenet121_finetune(self):
        """Fine-tune DenseNet121: only train last dense block"""
        self.model = models.densenet121(pretrained=True)
        
        # FREEZE ALL LAYERS FIRST
        for param in self.model.parameters():
            param.requires_grad = False
        
        # DenseNet121: Unfreeze only the last dense block (denseblock4)
        # and transition layer before it
        for name, param in self.model.named_parameters():
            if 'denseblock4' in name or 'norm5' in name:
                param.requires_grad = True
            if 'transition3' in name:  # The transition before last block
                param.requires_grad = True
        
        # Unfreeze BatchNorm layers in the unfrozen blocks
        for name, module in self.model.named_modules():
            if isinstance(module, nn.BatchNorm2d):
                if 'denseblock4' in name or 'norm5' in name:
                    module.train()
                    for param in module.parameters():
                        param.requires_grad = True
        
        # Replace the classifier
        num_features = self.model.classifier.in_features
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(256, self.config.num_classes)
        )
        
        # Set classifier to trainable
        for param in self.model.classifier.parameters():
            param.requires_grad = True

        return f'Succefully initaited {self.model_name}'

    def _feature_extractor(self):

        if self.model_name == 'resnet50':
            # Original ResNet50 structure:
            # [conv1, bn1, relu, maxpool, layer1, layer2, layer3, layer4, avgpool, fc]

            # We take everything EXCEPT the final FC layer:
            self.feature_extractor = nn.Sequential(*list(self.model.children())[:-1])
            # This gives us: [conv1 → layer4 → avgpool]
            # Output shape: (batch_size, 2048, 1, 1) after avgpool
        
        # InceptionV3 has different structure
        # We need to add AdaptiveAvgPool2d because Inception's pooling might vary
        elif self.model_name == 'inception_v3':
            self.feature_extractor = nn.Sequential(
                *list(self.model.children())[:-1],
                nn.AdaptiveAvgPool2d((1,1))
            )
        # Output shape: (batch_size, 2048, 1, 1)

        # DenseNet structure is different: features + classifier
        elif self.model_name == 'densenet121':
            self.feature_extractor = nn.Sequential(
                self.model.features,
                nn.ReLU(inplace=True),
                nn.AdaptiveAvgPool2d((1,1))
            )
        # Output shape: (batch_size, 1024, 1, 1)

        #Before Extraction of(CNN Output):
        #For a batch of 16 images: Shape: (16, 2048, 7, 7)
        # 2048 channels, 7x7 spatial grid

        #After AdaptiveAvgPool2d((1,1)):
        #Shape: (16, 2048, 1, 1)  # Each channel averaged to single value

        #Shape: (16, 2048, 1, 1)  # Each channel averaged to single value
        #Shape: (16, 2048)  # 2048-dimensional feature vector per image

        self.feature_extractor.to(self.device)

    def print_trainable_parameters(self):
        """Print which layers are trainable - useful for debugging"""
        print(f"\n{'='*60}")
        print(f"Trainable parameters for {self.model_name}:")
        print('='*60)

        total_parameter = 0
        trainable_parameter = 0 

        for name, param in self.model.named_parameters():
            total_parameter += param.numel()
            if param.requires_grad:
                trainable_parameter += param.numel()
                print(f"✓ TRAINABLE: {name}")
            else:
                print(f"  Frozen: {name}")
        
        print(f"\nTotal parameters: {total_parameter:,}")
        print(f"Trainable parameters: {trainable_parameter:,}")
        print(f"Percentage trainable: {100 * trainable_parameter / total_parameter:.2f}%")
        print('='*60)
        
        return trainable_parameter, total_parameter
    
    def get_model(self):
        return self.model
    
    def get_feature_extractor(self):
        return self.feature_extractor
    
    def save_model(self, path):
        "Save the important parameters and model details to use even after the training is done"

        trainable_names = []
        for name, params in self.model.named_parameters():
            if params.requires_grad:
                trainable_names.append(name)

                torch.save({
            'model_state_dict': self.model.state_dict(),
            'model_name': self.model_name,
            'config': self.config.to_dict(),
            'trainable_layers': trainable_names,
            'feature_extractor_state_dict': self.feature_extractor.state_dict()
        }, path)
                
    def load_model(self, path):
        #Loading the saved model
        checkpoint = torch.load(path, map_location= self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])

        #loading the trainable parameter if in the model
        if 'trainable_layers' in checkpoint:
            # First freeze all
            for param in self.model.parameters():
                param.requires_grad = False
            # Then unfreeze saved trainable layers
            for name, param in self.model.named_parameters():
                if name in checkpoint['trainable_layers']:
                    param.requires_grad = True

        return self.model

        

In [None]:
class DRTrainer:
    "Training engine for our fine tune CNNs"

    def __init__(self, config, model_manager):
        self.config = config
        self.model_manager = model_manager
        self.model = self.model_manager.get_model()
        self.device = self.config.device

        #Printing the Trainable parameter information
        self.model_manager.print_trainable_parameters()

        training_params = [p for p in self.model.parameter() if p.requires_grad]

        if training_params == 0:
            raise ValueError("No training parameters found. Check the fine tuning.")
        
        print(f"\nOptimizing {len(training_params)} parameter groups")
        
        # Different learning rates for fine-tuned layers vs new layers
        # Higher LR for new layers, lower LR for fine-tuned pretrained layers
        
        # Group parameters by type
        new_layers = []
        finetune_layers = []
        
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                if 'fc' in name or 'classifier' in name:
                    new_layers.append(param)  # New classifier layers
                else:
                    finetune_layers.append(param)  # Fine-tuned pretrained layers
        
        # Create parameter groups with different learning rates
        param_groups = [
            {'params': finetune_layers, 'lr': config.learning_rate * 0.1},
            {'params': new_layers, 'lr': config.learning_rate}  
        ]
        self.optimizer = optim.AdamW(param_groups, lr=config.learning_rate)
        
        #lr scheduler for countinuouly chaning learning and then restarting with higher after some epochs
        self.shecduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=5, T_mult=2, eta_min=1e-6
        )

        # Loss fucntion with class wieght imbalance
        self.criterion = self._get_weighted_loss()

        self.history = {
            'train_loss': [], 'train_acc': [],
            'val_loss': [], 'val_acc': [],
            'learning_rates': []
        }

        def _get_weighted_loss(self):
            #Used to assign more weight to less frequency labels in the dataset to avoid baises
            #Calculate the weight of each class by - Total sample / no.of classes * count of item in class i

            df = pd.read_csv('aptos2019-blindness-detection/train.csv')
            counts = df["diagnosis"].value_counts().sort_index()
            class_counts = counts.values
            class_weights = torch.tensor(
                sum(class_counts) / (len(class_counts) * class_counts),
                dtype=torch.float32
            )

            #trunsout to be tensor([0.4058, 1.9795, 0.7331, 3.7948, 2.4827])
            #Normalize weights
            class_weights = class_weights / class_weights.sum()
            class_weights.to(self.device)

            return nn.CrossEntropyLoss(weight= class_weights)
        
        def train_epoch(self, train_loader, epoch):
            self.model.train()

            #Handling the BatchNorm blocks in fine tunning to make sure they are in traning mode
            for name, module in self.model.named_modules():
                if isinstance(module, nn.BatchNorm2d) and hasattr(module, 'weight'):
                    if module.weight.requires_grad:
                        module.train()

            running_loss = 0
            correct = 0
            total = 0
            pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{self.config.num_epochs}')
            for batch_idx, (inputs, labels) in enumerate(pbar):
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                #Handling the InceptionV3 duo outputs during training (output, aux_output)

                if self.model_manager.model_name == 'inceptionV3':
                    outputs, aux_outputs = self.model(inputs)
                    # outputs: Main prediction from final layer
                    # aux_outputs: Auxiliary prediction from middle layer

                    loss1 = self.criterion(outputs, labels)
                    loss2 = self.criterion(aux_outputs, labels)
                    loss = loss1 + 0.4 * loss2  # Weighted sum as in original paper
                else:
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)

                
                #for faster training we use mix precision training where we use FP16 and Fp32
                if self.scaler:  # If we have a GPU that supports mixed precision
                    # 1. Scale up the loss (prevents underflow)
                    self.scaler.scale(loss).backward()
                    # Loss is multiplied by e.g., 65536 before backward pass
                    
                    # 2. Unscale gradients before optimizer step
                    self.scaler.unscale_(self.optimizer)
                    # Now gradients are back to normal scale
                    
                    # 3. Clip gradients (prevent overflow)
                    torch.nn.utils.clip_grad_norm_(
                        [p for p in self.model.parameters() if p.requires_grad], 
                        max_norm=1.0
                    )
                    
                    # 4. Optimizer step with scaling
                    self.scaler.step(self.optimizer)
                    
                    # 5. Update scale factor for next iteration
                    self.scaler.update()

                else:
                    loss.backward()

                    #gradient Clipping 
                    if not hasattr(self, 'trainable_params'):
                        self.trainable_params = [p for p in self.model.parameters() if p.requires_grad]
                    torch.nn.utils.clip_grad_norm_(
                        self.trainable_params,
                        max_norm=1.0
                    )

                    self.optimizer.step()

                #statistics
                #running_loss: Sum of all batch losses in the current epoch
                #Example: If 100 batches with losses [0.5, 0.4, 0.3, ...], running_loss = 0.5 + 0.4 + 0.3 + ...
                running_loss += loss.item()

                #getting the prediction outputs where we recive 5 output and only choose max value from each iteration
                _, predicted = outputs.max(1)
                total += labels.size(0)

                #Gettting the total correctly predicted labels in each iteration
                correct += predicted.eq(labels).sum().item()

                #showing the progress bar to monitor the performance
                pbar.set_postfix({
                    'loss': running_loss / (batch_idx + 1)
                    'acc': 100. * correct / total
                })

            #Managing the loss per epcoh
            epoch_loss = running_loss / len(train_loader)
            epoch_acc = 100. * correct / total

            self.history['train_loss'].append(epoch_loss)
            self.history['train_acc'].append(epoch_acc)

            return epoch_loss, epoch_acc
        
        def validate(self, val_loader):
            #Performing the validation for our trained model
            self.model.eval()
            running_loss = 0.0
            correct = 0
            total = 0
            
            all_preds = []
            all_labels = []

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)

                    #For InceptionV3 in eval mode, no aux output
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)

                    running_loss = loss.item()
                    _, predicted = outputs.max(1)
                    total += labels.size(0)
                    correct += predicted.eq(labels).sum().item()

                    all_preds.extend(predicted.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())

            epoch_loss = running_loss / len(val_loader)
            epoch_acc = 100* correct/ total

            self.history['val_loss'].append(epoch_loss)
            self.history['val_acc'].append(epoch_acc)

            return epoch_loss, epoch_acc, all_preds, all_labels
        
        def train(self, train_loader, val_loader, start_epoch=0, best_acc=0):
            #Complete Traing loopwith fine tunning included

            checkpoint_path = self.config.model_dir / f'{self.model_manager.model_name}_finetune_checkpoint.pth'
            best_model_path = self.config.model_dir / f"{self.model_manager.model_name}_finetune_best.pth"

            print(f"\nStarting fine-tunning for {self.model_manager.model_name}")
            print(f"Checkpoint will be saved to: {checkpoint_path}")

            for epoch in range(start_epoch, self.config.num_epochs):
                #Adjust learning rate if using warmup
                if epoch < 5: #Warmpup phase
                    for param_group in self.optimizer.prarm_groups:
                        param_group ['lr'] = self.config.learning_rate * (epoch + 1) / 5

                        #Train
                        train_loss, train_acc = self.train_epoch(train_loader, epoch)
                        
                        #Validate
                        val_loss, val_acc, val_preds, val_labels = self.validate(val_loader)

                        #Upadate the learning rate scheduler
                        self.scheduler.step(epoch + train_loss)

                        #storing learning rate
                        current_lr = self.optimizer.param_groups[0]['lr']
                        self.history['learning_rates'].append(current_lr)

                        #Save checkpoint
                        self.save_checkpoint(epoch, best_acc, checkpoint_path)

                        #Save the best model
                        if val_acc > best_acc:
                            best_acc = val_acc
                            self.model_manager.save_model(best_model_path)
                            print(f"New best model saved with accuracy{best_acc:.f}%")

                        print(f'/nEpoch {epoch+1}/{self.config.num_epcohs}:')
                        print(f'Train loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
                        print(f'Val loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}')

                        # Print learning rates for each parameter group
                        for i, param_group in enumerate(self.optimizer.param_groups):
                            if i == 0:
                                print(f"  Fine-tune LR: {param_group['lr']:.6f}")
                            else:
                                print(f"  New layers LR: {param_group['lr']:.6f}")
                        
                        print("-" * 60)
                        # Load best model for final evaluation
                    self.model_manager.load_model(best_model_path)
                    print(f"\n✓ Fine-tuning completed for {self.model_manager.model_name}")
                    print(f"✓ Best validation accuracy: {best_acc:.2f}%")
                    return self.history
