In [None]:
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, random_split, DataLoader
from torchvision import models, transforms
from PIL import Image
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchinfo import summary
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import glob
from torch.utils.data import DataLoader
import os
import cv2
import torch
from torch.utils.data import Dataset, DataLoader,Subset
from tqdm import tqdm
import json
import random
import math
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,confusion_matrix
import time
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve, auc

In [None]:
def set_seed(seed: int):
    """Seed everything for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # enforce deterministic algorithms (may slow things down)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # torch 2.x: fully deterministic
    if hasattr(torch, "use_deterministic_algorithms"):
        torch.use_deterministic_algorithms(True)
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
# choose your seed
seed_list = [3,5,11,1344,2506]
SEED = 3
set_seed(SEED)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Load Data

In [None]:
train_df = pd.read_csv('train_df_encoded1.csv')
train_df

In [None]:
total_abnormal_count = (train_df['label'] == 1).sum() 
total_normal_count = (train_df['label'] == 0).sum()
total_samples = len(train_df)

# Print
print("Total abnormal images:", total_abnormal_count)
print("Total normal images:", total_normal_count)
print("Total samples:", total_samples)
# Inverse frequency
weight_normal = 1 / total_normal_count
weight_abnormal = 1 / total_abnormal_count

# Normalize
total_inv = weight_normal + weight_abnormal
weight_normal /= total_inv
weight_abnormal /= total_inv

# PyTorch tensor
class_weights = torch.tensor([weight_normal, weight_abnormal], dtype=torch.float32).to(device)
print(class_weights)
print('Weight Ratio:',class_weights[1]/class_weights[0])

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit most CNNs
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean/std
                         std=[0.229, 0.224, 0.225])
])

class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        label = self.df.loc[idx, 'label']

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)


train_dataset = ImageDataset(train_df, transform=transform)

# Model

In [None]:
# --- Training for one epoch -----------------------------------
def train_epoch(model, loader, optimizer,criterion, device):
    
    model.train()
    total_loss, total_correct, total_samples = 0.0, 0, 0
    
    step_times = []
    epoch_start = time.time()
    
    for images, labels in tqdm(loader, desc='Train', leave=False):
        step_start = time.time()
        images, labels = images.to(device), labels.to(device).long()

        optimizer.zero_grad()
        outputs = model(images).float()                  # [batch, 2] raw logits
        loss = criterion(outputs, labels)        # CrossEntropyLoss
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * labels.size(0)
        preds = outputs.argmax(dim=1)
        total_correct += (preds == labels).sum().item()
        total_samples += labels.size(0)

        step_end = time.time()
        step_times.append(step_end - step_start)
        
    avg_loss = total_loss / total_samples
    avg_acc  = total_correct / total_samples

    epoch_end = time.time()
    epoch_time = epoch_end - epoch_start
    
    return avg_loss, avg_acc, epoch_time, step_times

# --- Validation (no threshold sweep) --------------------------
def validate_epoch(model, loader,criterion, device):
    model.eval()
    total_loss, total_correct, total_samples = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Val', leave=False):
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images).float()
            loss = criterion(outputs, labels)

            total_loss += loss.item() * labels.size(0)
            preds = outputs.argmax(dim=1)
            total_correct += (preds == labels).sum().item()
            total_samples += labels.size(0)

    avg_loss = total_loss / total_samples
    avg_acc  = total_correct / total_samples
    return avg_loss, avg_acc


def plot_kfold_metrics(metrics_path, model_name=None, save_dir=None, dpi=500):
    # --- Load JSON ---
    with open(metrics_path, "r") as f:
        metrics_data = json.load(f)

    per_fold_data = metrics_data["per_fold"]
    num_folds = len(per_fold_data)

    # --- Inference ---
    if model_name is None:
        model_name = os.path.splitext(os.path.basename(metrics_path))[0].replace("_training_metrics_all_folds", "")

    if save_dir is None:
        save_dir = os.path.dirname(metrics_path)

    os.makedirs(save_dir, exist_ok=True)

    # --- Plot Each Fold ---
    for fold_idx, fold_metrics in enumerate(per_fold_data, 1):
        train_loss = fold_metrics["train_loss"]
        val_loss = fold_metrics["val_loss"]
        train_acc = fold_metrics["train_acc"]
        val_acc = fold_metrics["val_acc"]
        epochs = range(1, len(train_loss) + 1)

        fig, ax1 = plt.subplots(figsize=(10, 6))

        # Loss
        l1 = ax1.plot(epochs, train_loss, label='Train Loss', marker='o', color='tab:red')
        l2 = ax1.plot(epochs, val_loss, label='Validation Loss', marker='o', linestyle='--', color='tab:orange')
        ax1.set_xlabel('Epochs')
        ax1.set_ylabel('Loss', color='tab:red')
        ax1.tick_params(axis='y', labelcolor='tab:red')

        # Accuracy
        ax2 = ax1.twinx()
        l3 = ax2.plot(epochs, train_acc, label='Train Accuracy', marker='o', color='tab:blue')
        l4 = ax2.plot(epochs, val_acc, label='Validation Accuracy', marker='o', linestyle='--', color='tab:cyan')
        ax2.set_ylabel('Accuracy', color='tab:blue')
        ax2.tick_params(axis='y', labelcolor='tab:blue')

        # Combine legends
        lines = l1 + l2 + l3 + l4
        labels = [line.get_label() for line in lines]
        fig.legend(
            handles=lines,
            labels=labels,
            loc='upper center',
            bbox_to_anchor=(0.5, 1.15),
            ncol=2,
            fontsize='medium',
            frameon=True
        )

        # Title and layout
        plt.title(f'{model_name} - Fold {fold_idx}: Loss and Accuracy over Epochs')
        plt.subplots_adjust(top=0.82, bottom=0.1)
        plt.grid(True)
        plt.tight_layout()
        
        # Save
        save_path = os.path.join(save_dir, f"{model_name}_fold{fold_idx}_metrics.png")
        plt.savefig(save_path, dpi=dpi, bbox_inches='tight')
        plt.close()

        print(f"✅ Saved plot: {save_path}")


In [None]:
def get_model_by_name(name: str, num_classes=2, device='cuda'):
    base_model = models.resnet18
    # --- Pretrained or not ---
    if "scratch" in name:
        model = base_model(weights=None, zero_init_residual=True)
    else:
        model = base_model(pretrained=True)
    # --- Modifications ---
    in_features = model.fc.in_features
    dropout_match = re.search(r'dp\(([\d.]+)\)', name)
    dropout_p = float(dropout_match.group(1)) if dropout_match else None
    
    if "mod2" in name:
        # Mod 2: Two-layer MLP with dropout
        model.fc = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_p if dropout_p is not None else 0.5),
            nn.Linear(256, num_classes)
        )
        for param in model.parameters():
            param.requires_grad = False
        for param in model.layer4[:].parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True
            
    elif "mod1" in name:
        # Mod 1: Dropout + final layer
        model.fc = nn.Sequential(
            nn.Linear(in_features, in_features),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_p if dropout_p is not None else 0.5),
            nn.Linear(in_features, num_classes)
                                )
        for param in model.parameters():
            param.requires_grad = False
        for param in model.layer4[:].parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True
 
    else:
        # Default
        model.fc = nn.Linear(in_features, num_classes)
        for param in model.parameters():
            param.requires_grad = False
        for param in model.layer4[:].parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True

    return model.to(device)

## Model Config

In [None]:
NUM_EPOCHS = 100
BATCH_SIZE = 32
NUM_FOLDS     = 5
BATCH_SIZE    = 32
EARLY_STOPING_PATIENCE = 20
INITIAL_LR = 0.001
REDUCE_LR_PATIENCE = 10
REDUCE_LR_FACTOR = 0.5
CLASS_WEIGHTS = class_weights

In [None]:
def run_kfold_training(
    model_name_prefix,
    train_dataset = train_dataset,
    get_model_fn = get_model_by_name,
    train_epoch_fn = train_epoch,
    validate_epoch_fn = validate_epoch,
    plot_metrics_fn = plot_kfold_metrics,
    class_weights = CLASS_WEIGHTS,
    device='cuda',
    num_folds=NUM_FOLDS,
    batch_size=BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    patience=EARLY_STOPING_PATIENCE,
    lr=INITIAL_LR,
    weight_decay=1e-5,
    seed=SEED,
    dir_path = os.getcwd(),
    metrics_filename=None
):

    model_dir = os.path.join(dir_path, model_name_prefix)
    os.makedirs(model_dir, exist_ok=True)

    kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed)
    all_fold_metrics = []
    print('Training',model_name_prefix) 
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset), 1):
        print(f"\n=== Fold {fold}/{num_folds} ===")

        # Prepare loaders
        train_loader = DataLoader(Subset(train_dataset, train_idx), batch_size=batch_size, shuffle=True)
        val_loader   = DataLoader(Subset(train_dataset, val_idx), batch_size=batch_size, shuffle=False)

        # Model and optimizer
        model = get_model_fn(model_name_prefix)
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=REDUCE_LR_PATIENCE)
        if 'weighted' in model_name_prefix:
            criterion = nn.CrossEntropyLoss(weight=class_weights.to(torch.float32))
            print('Using Weighted Loss')
        else:
            criterion = nn.CrossEntropyLoss()
        # History for this fold
        fold_hist = {
            "train_loss": [], "train_acc": [],
            "val_loss": [],   "val_acc": [],
            "epoch_time_s": [], "avg_step_time_s": [],
            "lr_history": []
        }

        best_loss = float('inf')
        epochs_no_improve = 0

        model_basename = f"{model_name_prefix}_fold{fold}"
        model_path = os.path.join(model_dir, f"{model_basename}.pth")
        best_model_path = os.path.join(model_dir, f"{model_basename}_best.pth")

        for epoch in range(1, num_epochs + 1):
            train_loss, train_acc, epoch_time, step_times = train_epoch_fn(model, train_loader, optimizer, criterion, device)
            val_loss, val_acc = validate_epoch_fn(model, val_loader, criterion, device)
            scheduler.step(val_loss)

            # Record
            fold_hist["train_loss"].append(train_loss)
            fold_hist["train_acc"].append(train_acc)
            fold_hist["val_loss"].append(val_loss)
            fold_hist["val_acc"].append(val_acc)
            fold_hist["epoch_time_s"].append(epoch_time)
            fold_hist["avg_step_time_s"].append(sum(step_times) / len(step_times))
            fold_hist["lr_history"].append(optimizer.param_groups[0]['lr'])

            print(f"Epoch {epoch:02d}: Train Loss={train_loss:.4f}, Acc={train_acc*100:.2f}% | "
                  f"Val Loss={val_loss:.4f}, Acc={val_acc*100:.2f}% | LR={fold_hist['lr_history'][-1]:.6f}")

            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), best_model_path)
                epochs_no_improve = 0
                print("  → New best model saved")
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping at epoch {epoch}")
                    break

        # Save final model for fold
        torch.save(model.state_dict(), model_path)
        all_fold_metrics.append(fold_hist)

    # Aggregate validation loss
    def agg(key):
        arr = [np.array(f[key]) for f in all_fold_metrics]
        min_len = min(len(a) for a in arr)
        stacked = np.stack([a[:min_len] for a in arr], axis=0)
        return stacked.mean(axis=0), stacked.std(axis=0)

    mean_val_loss, std_val_loss = agg("val_loss")

    
    # Print summary — Last 5 epochs
    print("\n=== Cross‐Val Summary (last 5 epochs) ===")
    num_epochs = len(mean_val_loss)
    start_epoch = max(0, num_epochs - 5)
    for epoch in range(start_epoch, num_epochs):
        print(f"Epoch {epoch + 1:02d}: Val Loss = {mean_val_loss[epoch]:.4f} ± {std_val_loss[epoch]:.4f}")

    # Save to file
    if metrics_filename is None:
        metrics_filename = f"{model_name_prefix}_training_metrics_all_folds.json"
    metrics_path = os.path.join(model_dir, metrics_filename)

    with open(metrics_path, "w") as f:
        json.dump({
            "per_fold": all_fold_metrics,
            "val_loss_mean": mean_val_loss.tolist(),
            "val_loss_std": std_val_loss.tolist()
        }, f, indent=4)
    
    print(f"\nK-fold cross-validation complete. Metrics written to {metrics_path}.")
    plot_kfold_metrics(metrics_path=metrics_path)
    model_summary_str = str(summary(model, input_size=(32, 3, 224, 224)))
    summary_path = os.path.join(model_dir, f"{model_name_prefix}_summary.txt")
    with open(summary_path, "w", encoding="utf-8") as f:
        f.write(model_summary_str)

In [None]:
model_list = [
    "ResNet18_scratch",
    "ResNet18_weighted_scratch",
]

In [None]:
for model_name in model_list:
    run_kfold_training(model_name)