In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from torchvision.transforms import functional as F
import torchvision.models as models
from sklearn.metrics import f1_score, recall_score, accuracy_score

In [2]:
# Load train and test dataframes
train_df = pd.read_csv('data/train_df.csv')
test_df = pd.read_csv('data/test_df.csv')

In [3]:
# Define Dataset Class
class FundusDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.df = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.label_mapping = {'D': 0, 'O': 1, 'N': 2}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.df.loc[idx, 'filename'])
        image = Image.open(img_name).convert('RGB')
        label_str = self.df.loc[idx, 'Grouped-Label']
        label = self.label_mapping[label_str]
        label = torch.tensor(label)

        if self.transform:
            image = self.transform(image)

        return image, label

In [11]:
# Data Loading with transformation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Lambda(lambda img: F.adjust_brightness(img, brightness_factor=1.2)),
    transforms.Lambda(lambda img: F.adjust_contrast(img, contrast_factor=1.2)),
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
])

train_dataset = FundusDataset(dataframe=train_df, root_dir='data/preprocessed_images', transform=transform)
val_dataset = FundusDataset(dataframe=test_df, root_dir='data/preprocessed_images', transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [8]:
# Load pre-trained ResNet18 and modify output layer
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)

In [9]:
# Move to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [10]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
# Early stopping initialization
best_val_loss = float('inf')
patience_counter = 0
patience_limit = 5

In [None]:
from sklearn.metrics import f1_score, recall_score, accuracy_score

# Initialize metrics
train_f1, train_recall, train_accuracy = [], [], []
val_f1, val_recall, val_accuracy = [], [], []

# Training Loop (for 2 epochs as a demonstration)
for epoch in range(2):
    model.train()
    true_labels, pred_labels = [], []
    
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(predicted.cpu().numpy())
        
        print(f"Epoch [{epoch+1}/2], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
    
    # Calculate training metrics
    train_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    train_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    train_accuracy.append(accuracy_score(true_labels, pred_labels))

    # Validation Loop
    model.eval()
    true_labels, pred_labels = [], []
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())
            
    val_loss /= len(val_loader)
    print(f"Validation Loss: {val_loss:.4f}")
    
    # Calculate validation metrics
    val_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    val_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    val_accuracy.append(accuracy_score(true_labels, pred_labels))
    
    print(f"Epoch {epoch+1}, Train F1: {train_f1[-1]:.4f}, Train Recall: {train_recall[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}")
    print(f"Epoch {epoch+1}, Val F1: {val_f1[-1]:.4f}, Val Recall: {val_recall[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")

    # Early Stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= patience_limit:
        print("Early stopping.")
        break

print("Training complete.")

m

In [None]:
from torchvision import models
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, recall_score, accuracy_score

# Initialize metrics storage
train_f1, train_recall, train_accuracy = [], [], []
val_f1, val_recall, val_accuracy = [], [], []

# Load pre-trained ResNet18 model + higher level API to construct a new output layer
model = models.resnet18(pretrained=True)

# Freeze all layers for initial training
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last layer (classifier) for initial training
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 3)
)
for param in model.fc.parameters():
    param.requires_grad = True

# Move to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss and Optimizer for the new layer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Early stopping initialization
best_val_loss = float('inf')
patience_counter = 0
patience_limit = 3

# Training Loop - Phase 1 (Train only the new layer)
for epoch in range(15):  # 2 epochs for demonstration; you can adjust this
    model.train()
    true_labels, pred_labels = [], []

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(predicted.cpu().numpy())

        print(f"Phase 1 - Epoch [{epoch+1}/2], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    # Metrics for training
    train_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    train_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    train_accuracy.append(accuracy_score(true_labels, pred_labels))

    # Validation Loop and metrics
    model.eval()
    true_labels, pred_labels = [], []
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

    val_loss /= len(val_loader)
    val_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    val_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    val_accuracy.append(accuracy_score(true_labels, pred_labels))

    print(f"Validation Loss: {val_loss:.4f}")
    print(f"Train F1: {train_f1[-1]:.4f}, Train Recall: {train_recall[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}")
    print(f"Val F1: {val_f1[-1]:.4f}, Val Recall: {val_recall[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= patience_limit:
        print("Early stopping in Phase 1.")
        break

    # Phase 2: Unfreeze some layers (here layer4) and fine-tune
    for param in model.layer4.parameters():
        param.requires_grad = True

    # Update optimizer for Phase 2
    optimizer = optim.Adam([
        {'params': model.layer4.parameters(), 'lr': 1e-4},
        {'params': model.fc.parameters(), 'lr': 1e-3}
    ])

    # Training Loop - Phase 2
for epoch in range(15):  # 2 epochs for demonstration; you can adjust this
    model.train()
    true_labels, pred_labels = [], []

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(predicted.cpu().numpy())

        print(f"Phase 2 - Epoch [{epoch+1}/2], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    # Metrics for training
    train_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    train_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    train_accuracy.append(accuracy_score(true_labels, pred_labels))

    # Validation Loop and metrics
    model.eval()
    true_labels, pred_labels = [], []
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

    val_loss /= len(val_loader)
    val_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
    val_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
    val_accuracy.append(accuracy_score(true_labels, pred_labels))

    print(f"Validation Loss: {val_loss:.4f}")
    print(f"Train F1: {train_f1[-1]:.4f}, Train Recall: {train_recall[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}")
    print(f"Val F1: {val_f1[-1]:.4f}, Val Recall: {val_recall[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= patience_limit:
        print("Early stopping in Phase 2.")
        break

print("Training complete.")

In [None]:
from torchvision import models
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, recall_score, accuracy_score
from sklearn.model_selection import ParameterGrid

# Hyperparameter grid
param_grid = {'lr': [1e-3, 5e-4], 'dropout': [0.3, 0.4]}
best_recall = 0
best_params = {}

# Grid search
for params in ParameterGrid(param_grid):
    print(f"Training with parameters: {params}")

    # Initialize metrics storage
    train_f1, train_recall, train_accuracy = [], [], []
    val_f1, val_recall, val_accuracy = [], [], []

    # Initialize model and optimizer here, otherwise they will carry state
    model = models.resnet18(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 256),
        nn.ReLU(),
        nn.Dropout(params['dropout']),
        nn.Linear(256, 3)
    )

    for param in model.fc.parameters():
        param.requires_grad = True

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=params['lr'])
    
    best_val_loss = float('inf')
    patience_counter = 0
    patience_limit = 3

    # Training Loop - Phase 1 (Train only the new layer)
    for epoch in range(15):
        model.train()
        true_labels, pred_labels = [], []

        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

            print(f"Phase 1 - Epoch [{epoch+1}/2], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        # Metrics for training
        train_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
        train_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
        train_accuracy.append(accuracy_score(true_labels, pred_labels))

        # Validation Loop and metrics
        model.eval()
        true_labels, pred_labels = [], []
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                true_labels.extend(labels.cpu().numpy())
                pred_labels.extend(predicted.cpu().numpy())

        val_loss /= len(val_loader)
        val_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
        val_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
        val_accuracy.append(accuracy_score(true_labels, pred_labels))

        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Train F1: {train_f1[-1]:.4f}, Train Recall: {train_recall[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}")
        print(f"Val F1: {val_f1[-1]:.4f}, Val Recall: {val_recall[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")
        
        current_val_recall = val_recall[-1]

        if current_val_recall > best_recall:
            best_recall = current_val_recall
            best_params = params

        print(f"Best recall so far: {best_recall}, Best parameters so far: {best_params}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience_limit:
            print("Early stopping in Phase 1.")
            break

        # Phase 2: Unfreeze some layers (here layer4) and fine-tune
        for param in model.layer4.parameters():
            param.requires_grad = True

        # Update optimizer for Phase 2
        optimizer = optim.Adam([
            {'params': model.layer4.parameters(), 'lr': 1e-4},
            {'params': model.fc.parameters(), 'lr': 1e-3}
        ])

        # Training Loop - Phase 2
    for epoch in range(15):
        model.train()
        true_labels, pred_labels = [], []

        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

            print(f"Phase 2 - Epoch [{epoch+1}/2], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        # Metrics for training
        train_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
        train_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
        train_accuracy.append(accuracy_score(true_labels, pred_labels))

        # Validation Loop and metrics
        model.eval()
        true_labels, pred_labels = [], []
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                true_labels.extend(labels.cpu().numpy())
                pred_labels.extend(predicted.cpu().numpy())

        val_loss /= len(val_loader)
        val_f1.append(f1_score(true_labels, pred_labels, average='weighted'))
        val_recall.append(recall_score(true_labels, pred_labels, average='weighted'))
        val_accuracy.append(accuracy_score(true_labels, pred_labels))

        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Train F1: {train_f1[-1]:.4f}, Train Recall: {train_recall[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}")
        print(f"Val F1: {val_f1[-1]:.4f}, Val Recall: {val_recall[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience_limit:
            print("Early stopping in Phase 2.")
            break

    print("Training complete.")

    print(f"Best recall so far: {best_recall}, Best parameters so far: {best_params}")

print(f"Best parameters found: {best_params}")

Training with parameters: {'dropout': 0.3, 'lr': 0.001}
Phase 1 - Epoch [1/2], Step [1/160], Loss: 1.1472
Phase 1 - Epoch [1/2], Step [2/160], Loss: 1.1355
Phase 1 - Epoch [1/2], Step [3/160], Loss: 1.0976
Phase 1 - Epoch [1/2], Step [4/160], Loss: 1.1172
Phase 1 - Epoch [1/2], Step [5/160], Loss: 1.1179
Phase 1 - Epoch [1/2], Step [6/160], Loss: 1.1759
Phase 1 - Epoch [1/2], Step [7/160], Loss: 1.1234
Phase 1 - Epoch [1/2], Step [8/160], Loss: 1.1411
Phase 1 - Epoch [1/2], Step [9/160], Loss: 1.1567
Phase 1 - Epoch [1/2], Step [10/160], Loss: 1.0417
Phase 1 - Epoch [1/2], Step [11/160], Loss: 1.2018
Phase 1 - Epoch [1/2], Step [12/160], Loss: 1.0546
Phase 1 - Epoch [1/2], Step [13/160], Loss: 1.0910
Phase 1 - Epoch [1/2], Step [14/160], Loss: 1.1575
Phase 1 - Epoch [1/2], Step [15/160], Loss: 1.1987
Phase 1 - Epoch [1/2], Step [16/160], Loss: 1.1667
Phase 1 - Epoch [1/2], Step [17/160], Loss: 1.1980
Phase 1 - Epoch [1/2], Step [18/160], Loss: 1.1376
Phase 1 - Epoch [1/2], Step [19/160