In [None]:
# Step 0: Set Global Seed for Reproducibility
import random
import numpy as np
import torch

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)


In [None]:
# Step 1: Install essential libraries for training, evaluation, and model analysis
!pip install torch torchvision
!pip install timm transformers roboflow pycocotools
!pip install grad-cam torchinfo


In [None]:
# Step 2: Connect to Roboflow and download the COCO-format segmentation dataset
from roboflow import Roboflow
import os
import json

rf = Roboflow(api_key="Mvt9FCxE4mY6vBy5OG08")  # Replace with your key if needed
project = rf.workspace("urban-lake-wastef").project("another_approach_try")
version = project.version(4)
dataset = version.download("coco-segmentation")

# Set paths to the annotation file and images
DATA_ROOT = dataset.location
TRAIN_JSON = os.path.join(DATA_ROOT, 'train', '_annotations.coco.json')
IMG_DIR = os.path.join(DATA_ROOT, 'train')

# Define output directories
REGION_ROOT = '/content/extracted_regions'
SPLIT_ROOT = '/content/split_regions'
os.makedirs(REGION_ROOT, exist_ok=True)


In [None]:
# Step 3: Extract foreground objects from COCO masks and save cropped images

from PIL import Image, ImageDraw
import numpy as np

# Load COCO annotation JSON
with open(TRAIN_JSON) as f:
    ann_data = json.load(f)

# Create a mapping from category ID to name
cat_map = {c['id']: c['name'] for c in ann_data['categories']}

# Iterate over all annotations to extract and save cropped object regions
for ann in ann_data['annotations']:
    img_info = next(img for img in ann_data['images'] if img['id'] == ann['image_id'])
    img_path = os.path.join(IMG_DIR, img_info['file_name'])
    img = Image.open(img_path).convert('RGB')

    seg = ann['segmentation']
    mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)

    for poly in seg:
        pts = np.array(poly).reshape(-1, 2)
        m = Image.new('L', (img_info['width'], img_info['height']), 0)
        ImageDraw.Draw(m).polygon([tuple(p) for p in pts], outline=1, fill=1)
        mask = np.maximum(mask, np.array(m))

    if mask.sum() < 100:  # Skip very small masks
        continue

    region = np.array(img) * mask[:, :, None]
    region_img = Image.fromarray(region)

    label = cat_map[ann['category_id']]
    out_dir = os.path.join(REGION_ROOT, label)
    os.makedirs(out_dir, exist_ok=True)
    base = os.path.splitext(img_info['file_name'])[0]
    out_path = os.path.join(out_dir, f"{base}_{ann['id']}.png")
    region_img.save(out_path)


In [None]:
# Step 4: Split dataset into 60% train, 20% val, 20% test

import shutil
from sklearn.model_selection import train_test_split

# Create split folders
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(SPLIT_ROOT, split), exist_ok=True)

# For each class, split its images into train/val/test
for class_name in os.listdir(REGION_ROOT):
    class_path = os.path.join(REGION_ROOT, class_name)
    if not os.path.isdir(class_path):
        continue

    files = os.listdir(class_path)
    train_files, temp_files = train_test_split(files, test_size=0.4, random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

    for split, split_files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
        split_class_dir = os.path.join(SPLIT_ROOT, split, class_name)
        os.makedirs(split_class_dir, exist_ok=True)
        for f in split_files:
            shutil.copy2(os.path.join(class_path, f), os.path.join(split_class_dir, f))


In [None]:
# Step 5: Define a PyTorch Dataset class with augmentation and weighted sampling

from torch.utils.data import Dataset, WeightedRandomSampler
from torchvision import transforms
from collections import defaultdict
from PIL import Image

class WasteRegionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.03),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.label2id = {}
        class_counts = defaultdict(int)

        # Collect all image paths and assign class indices
        classes = sorted(os.listdir(root_dir))
        self.label2id = {c: i for i, c in enumerate(classes)}

        for c in classes:
            class_dir = os.path.join(root_dir, c)
            for f in os.listdir(class_dir):
                self.samples.append((os.path.join(class_dir, f), self.label2id[c]))
                class_counts[self.label2id[c]] += 1

        # Compute sample weights for balancing
        self.sample_weights = [1.0 / class_counts[label] for _, label in self.samples]

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert('RGB')
        img = self.transform(img)
        return img, label


In [None]:
# Step 6: Set device (GPU or CPU) and define paths to the dataset splits
from timm import create_model

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Define the root directories for each data split
TRAIN_DIR = os.path.join(SPLIT_ROOT, 'train')
VAL_DIR   = os.path.join(SPLIT_ROOT, 'val')
TEST_DIR  = os.path.join(SPLIT_ROOT, 'test')

# Determine the number of classes from folder names in train set
num_classes = len(os.listdir(TRAIN_DIR))


In [None]:
# Step 7: Initialize datasets and DataLoaders with augmentation and weighted sampling for training

# Create dataset instances
train_ds = WasteRegionDataset(TRAIN_DIR)
val_ds   = WasteRegionDataset(VAL_DIR, transform=train_ds.transform)
test_ds  = WasteRegionDataset(TEST_DIR, transform=val_ds.transform)

from torch.utils.data import DataLoader, WeightedRandomSampler

# Apply WeightedRandomSampler to address class imbalance in training data
train_sampler = WeightedRandomSampler(train_ds.sample_weights, len(train_ds.sample_weights), replacement=True)

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=16, sampler=train_sampler)
val_loader   = DataLoader(val_ds, batch_size=16, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=16, shuffle=False)


In [None]:
# Step 8: Define training function with:
# - Mixup augmentation
# - Label smoothing
# - Early stopping with patience
# - Weighted class loss
# - Saving best model by validation loss

from sklearn.utils.class_weight import compute_class_weight
from transformers import get_cosine_schedule_with_warmup
from torch.cuda.amp import GradScaler
import torch.nn as nn

def train_one_model(model_name, use_mixup=True, label_smooth=0.1, seed=42, patience=10):
    set_seed(seed)
    model = create_model(model_name, pretrained=True, num_classes=num_classes).to(device)

    # Compute class weights for loss function
    labels = [label for _, label in train_ds.samples]
    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

    # Define loss function, optimizer, scheduler, and mixed precision scaler
    criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=label_smooth)
    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-3)
    scheduler = get_cosine_schedule_with_warmup(
        optimizer, num_warmup_steps=3 * len(train_loader),
        num_training_steps=len(train_loader) * 100
    )
    scaler = GradScaler()

    # Define Mixup helpers
    def mixup_data(x, y, alpha=0.4):
        lam = np.random.beta(alpha, alpha)
        index = torch.randperm(x.size(0)).to(x.device)
        mixed_x = lam * x + (1 - lam) * x[index]
        y_a, y_b = y, y[index]
        return mixed_x, y_a, y_b, lam

    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    # Initialize training state
    history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
    best_val_loss = float('inf')
    no_improve_epochs = 0

    # Training loop
    for epoch in range(200):
        model.train()
        total_loss = 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()

            if use_mixup:
                imgs, y_a, y_b, lam = mixup_data(imgs, labels)
            else:
                y_a, y_b, lam = labels, labels, 1.0

            with torch.cuda.amp.autocast():
                outputs = model(imgs)
                loss = mixup_criterion(criterion, outputs, y_a, y_b, lam) if use_mixup else criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()

        scheduler.step()
        avg_train_loss = total_loss / len(train_loader)
        history['train_loss'].append(avg_train_loss)

        # Validation
        model.eval()
        val_loss = 0
        correct = total = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                val_loss += criterion(outputs, labels).item()
                preds = outputs.argmax(dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_acc)

        print(f"[{model_name}] Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")

        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            no_improve_epochs = 0
            torch.save(model.state_dict(), f"{model_name}_best.pth")
            print("🔸 New best model saved.")
        else:
            no_improve_epochs += 1
            if no_improve_epochs >= patience:
                print("🛑 Early stopping triggered.")
                break

    return model, history


In [None]:
# Step 9: Train multiple models using different settings (mixup, label smoothing, seeds)
# Log validation accuracy to a CSV file and collect training history for visualization

import csv

model_list = ['efficientnet_b0', 'resnet34', 'densenet121', 'convnext_tiny', 'swin_tiny_patch4_window7_224']
seeds = [42, 123, 777]
mixup_options = [True, False]
label_smoothings = [0.1, 0.0]

all_histories = {}  # Store histories to later visualize training curves

with open("ablation_results.csv", "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Model", "Seed", "Mixup", "Label Smoothing", "Val Accuracy"])
    
    for model_name in model_list:
        for mixup in mixup_options:
            for smooth in label_smoothings:
                for seed in seeds:
                    run_id = f"{model_name}_mixup{mixup}_smooth{smooth}_seed{seed}"
                    print(f"🔁 Training: {run_id}")
                    
                    model, history = train_one_model(
                        model_name, use_mixup=mixup, label_smooth=smooth, seed=seed
                    )

                    # Save final validation accuracy to CSV
                    final_val_acc = history['val_acc'][-1]
                    writer.writerow([model_name, seed, mixup, smooth, final_val_acc])
                    
                    # Save training history to plot later
                    all_histories[run_id] = history


In [None]:
# Step 10.1: Plot training vs validation loss and validation accuracy to assess learning and overfitting

import matplotlib.pyplot as plt

def plot_model_histories(all_histories):
    """
    Plots loss and accuracy curves from the training history of each model configuration.
    
    Parameters:
    - all_histories (dict): Dictionary of training histories returned by train_one_model().
                            Keys are run names, values are dicts with 'train_loss', 'val_loss', 'val_acc'.

    Each model will generate:
    - Train vs Val Loss curve
    - Val Accuracy curve
    """
    for run_id, hist in all_histories.items():
        epochs = range(1, len(hist['train_loss']) + 1)
        
        plt.figure(figsize=(14, 5))
        
        # Loss Curve
        plt.subplot(1, 2, 1)
        plt.plot(epochs, hist['train_loss'], label='Train Loss', marker='o')
        plt.plot(epochs, hist['val_loss'], label='Val Loss', marker='x')
        plt.title(f"{run_id} – Loss Curve")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)

        # Accuracy Curve
        plt.subplot(1, 2, 2)
        plt.plot(epochs, hist['val_acc'], label='Val Accuracy', marker='s', color='green')
        plt.title(f"{run_id} – Accuracy Curve")
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy")
        plt.legend()
        plt.grid(True)

        plt.suptitle(f"Training Progress – {run_id}", fontsize=14)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()


In [None]:
# Step 10.2: Generate overfitting plots for all trained models using saved training histories
plot_model_histories(all_histories)


In [None]:
# Step 10: Read the CSV results and compute average and std deviation for each model setting
import pandas as pd

# Load the ablation results
df = pd.read_csv("ablation_results.csv")

# Group by configuration and calculate mean ± std
summary = df.groupby(["Model", "Mixup", "Label Smoothing"])["Val Accuracy"].agg(['mean', 'std']).reset_index()

# Print summary table
print("📊 Ablation Results (mean ± std across seeds):")
print(summary)


In [None]:
# Step 11: Evaluate best-performing model on held-out test set
# Includes confusion matrix, classification report, per-class F1, precision, recall, ROC-AUC

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix, roc_auc_score,
    precision_recall_fscore_support
)
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import label_binarize

def evaluate_model_detailed(model, dataloader, device, label2id, show_roc_auc=True):
    model.eval()
    all_preds, all_probs, all_labels = [], [], []

    # Collect predictions and ground truth
    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            probs = torch.softmax(outputs, dim=1)
            preds = outputs.argmax(dim=1)

            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)

    # Create label mapping
    id2label = {v: k for k, v in label2id.items()}
    target_names = [id2label[i] for i in sorted(id2label)]

    # Print classification metrics
    print("🔍 Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=target_names, digits=4))
    print(f"Accuracy:  {accuracy_score(all_labels, all_preds):.4f}")
    print(f"Precision: {precision_score(all_labels, all_preds, average='weighted'):.4f}")
    print(f"Recall:    {recall_score(all_labels, all_preds, average='weighted'):.4f}")
    print(f"F1-Score:  {f1_score(all_labels, all_preds, average='weighted'):.4f}")

    # Plot confusion matrix (raw)
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=target_names, yticklabels=target_names)
    plt.title("Confusion Matrix – Raw")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

    # Normalized confusion matrix
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Oranges',
                xticklabels=target_names, yticklabels=target_names)
    plt.title("Confusion Matrix – Normalized")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

    # ROC AUC (optional)
    if show_roc_auc:
        try:
            y_true_bin = label_binarize(all_labels, classes=list(range(len(label2id))))
            auc = roc_auc_score(y_true_bin, all_probs, multi_class='ovr')
            print(f"ROC AUC (Multiclass OVR): {auc:.4f}")
        except Exception as e:
            print(f"⚠️ ROC-AUC computation failed: {e}")

# Load best model (adjust name if needed)
best_model_name = 'efficientnet_b0'  # Replace with best performing model if needed
model = create_model(best_model_name, pretrained=False, num_classes=num_classes).to(device)
model.load_state_dict(torch.load(f"{best_model_name}_best.pth"))

# Evaluate on test data
evaluate_model_detailed(model, test_loader, device, train_ds.label2id)


In [None]:
# Step 12: Visualize model attention (Grad-CAM) for a given input image and target class

from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

def apply_gradcam(model, image_path, class_idx, target_layer=None):
    model.eval()

    # Choose last transformer block or final convolutional layer
    if target_layer is None:
        target_layer = [model.blocks[-1].norm1] if hasattr(model, 'blocks') else [model.layer4[-1]]

    # Initialize Grad-CAM
    cam = GradCAM(model=model, target_layers=target_layer, use_cuda=torch.cuda.is_available())

    # Load and preprocess image
    img = Image.open(image_path).convert('RGB').resize((224, 224))
    rgb_img = np.array(img) / 255.0
    input_tensor = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])(img).unsqueeze(0).to(device)

    # Generate heatmap
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(class_idx)])[0]
    cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

    # Show the result
    plt.imshow(cam_image)
    plt.title(f"Grad-CAM for class index {class_idx}")
    plt.axis('off')
    plt.show()


In [None]:
# Step 13: Visualize the model's learned feature space using t-SNE dimensionality reduction
from sklearn.manifold import TSNE
import seaborn as sns

def plot_tsne_features(model, dataloader, device, label2id, num_samples=500):
    model.eval()
    features = []
    labels = []

    # Extract features and labels
    with torch.no_grad():
        for imgs, lbls in dataloader:
            imgs = imgs.to(device)
            feats = model.forward_features(imgs) if hasattr(model, 'forward_features') else model(imgs)
            features.append(feats.cpu().numpy())
            labels.extend(lbls.numpy())
            if len(labels) >= num_samples:
                break

    # Prepare for t-SNE
    features = np.concatenate(features, axis=0)[:num_samples]
    labels = labels[:num_samples]

    # Run t-SNE
    tsne = TSNE(n_components=2, random_state=42)
    tsne_feats = tsne.fit_transform(features)
    label_names = [list(label2id.keys())[i] for i in labels]

    # Plot t-SNE
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=tsne_feats[:, 0], y=tsne_feats[:, 1], hue=label_names, palette="deep", alpha=0.7)
    plt.title("t-SNE of Learned Features")
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()


In [None]:
# Step 14: Plot Precision-Recall curve per class to assess class-specific confidence
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.preprocessing import label_binarize

def plot_precision_recall(model, dataloader, device, label2id):
    model.eval()
    all_labels = []
    all_probs = []

    # Gather probabilities and labels
    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.numpy())

    # Format data for multi-class PR calculation
    all_labels = np.array(all_labels)
    all_probs = np.array(all_probs)
    n_classes = len(label2id)
    y_true = label_binarize(all_labels, classes=list(range(n_classes)))

    # Plot PR curve for each class
    plt.figure(figsize=(10, 6))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true[:, i], all_probs[:, i])
        ap = average_precision_score(y_true[:, i], all_probs[:, i])
        plt.plot(recall, precision, label=f"Class {i} (AP={ap:.2f})")

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision-Recall Curve (per class)")
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()


In [None]:
# Step 15: Print model architecture summary including parameter count and MACs (FLOPs)
from torchinfo import summary

def print_model_summary(model, input_size=(1, 3, 224, 224)):
    model.eval()
    print(summary(model, input_size=input_size, col_names=["input_size", "output_size", "num_params", "mult_adds"]))
