In [1]:
print("Hello World")

Hello World


## ðŸŽ¯ Project Summary

- **Dataset**: HMDB51 Fight (75,900 images, 8 classes)
- **Models**: Sequential CNN, MobileNetV2, VGG-16
- **Best Result**: VGG-16 with 71% test accuracy
- **Hardware**: 8x H100/H200 GPUs
- **Goal**: Reproduce paper results with production-ready code


In [2]:
# ============================================================
# ENVIRONMENT SETUP
# ============================================================
# Run in terminal BEFORE Jupyter:
# conda create -n violence_detection python=3.10 -y
# conda activate violence_detection
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# pip install numpy pandas scikit-learn opencv-python pillow matplotlib seaborn kaggle tensorboard tqdm jupyter

import os
import sys
import json
import random
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Callable
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Vision
import torchvision
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from PIL import Image
import cv2

# Data processing
import numpy as np
import pandas as pd

# Metrics
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    roc_auc_score, top_k_accuracy_score
)

# Visualization
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from tqdm import tqdm

print("="*80)
print("VIOLENCE DETECTION - DEEP LEARNING IMPLEMENTATION")
print("Hsairi et al. (2024)")
print("="*80)
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    for i in range(torch.cuda.device_count()):
        props = torch.cuda.get_device_properties(i)
        print(f"  GPU {i}: {props.name} - {props.total_memory / 1e9:.1f}GB")

VIOLENCE DETECTION - DEEP LEARNING IMPLEMENTATION
Hsairi et al. (2024)
PyTorch Version: 2.9.1+cu128
CUDA Available: True
Number of GPUs: 8
GPU: NVIDIA H200
  GPU 0: NVIDIA H200 - 150.1GB
  GPU 1: NVIDIA H200 - 150.1GB
  GPU 2: NVIDIA H200 - 150.1GB
  GPU 3: NVIDIA H200 - 150.1GB
  GPU 4: NVIDIA H200 - 150.1GB
  GPU 5: NVIDIA H200 - 150.1GB
  GPU 6: NVIDIA H200 - 150.1GB
  GPU 7: NVIDIA H200 - 150.1GB


## CELL 2: Global Configuration


In [17]:
# ============================================================
# GLOBAL CONFIGURATION - ALL HYPERPARAMETERS
# ============================================================

CONFIG = {
    # Dataset - Updated to match actual folder structure
    'dataset_path': './fight_dataset/actions (2)/actions',
    'kaggle_dataset': 'anbumalar1991/fight-dataset',
    'models_dir': './models',
    'results_dir': './results',
    
    # Image processing
    'image_size': 224,  # VGG-16 standard
    'imagenet_mean': [0.485, 0.456, 0.406],
    'imagenet_std': [0.229, 0.224, 0.225],
    
    # Data split (from paper)
    'train_ratio': 0.72,
    'val_ratio': 0.20,
    'test_ratio': 0.08,
    
    # Classes (8 from HMDB51 Fight) - Updated with correct names
    'classes': {
        0: 'hit', 1: 'kick', 2: 'punch', 3: 'shoot_gun',
        4: 'push', 5: 'ride_horse', 6: 'stand', 7: 'wave'
    },

    'num_classes': 8,
    'violence_classes': [0, 1, 2, 3],
    'non_violence_classes': [4, 5, 6, 7],
    
    # Multi-GPU
    'num_gpus': torch.cuda.device_count(),
    'distributed': torch.cuda.device_count() > 1,
    'backend': 'nccl',
    
    # Training
    'epochs': 50,
    'early_stopping_patience': 10,
    'lr_scheduler_patience': 5,
    'lr_scheduler_factor': 0.5,
    'batch_size_per_gpu': 64,  # Total = 64 * 8 = 512
    
    # S-CNN hyperparameters
    'scnn': {
        'learning_rate': 0.001,
        'dropout_rates': [0.4, 0.4, 0.2],
        'dense_units': [128, 64],
        'conv_filters': [64, 128, 32, 64],
    },
    
    # MobileNetV2 hyperparameters
    'mobilenetv2': {
        'learning_rate': 0.001,
        'dropout_rate': 0.2,
        'weight_decay': 1e-4,  # L2 regularization
        'freeze_backbone': True,
        'unfreeze_at_epoch': 20,
    },
    
    # VGG-16 hyperparameters (CRITICAL!)
    'vgg16': {
        'learning_rate': 0.0001,  # NOT 0.001!
        'fc_units': 500,  # NOT 2048 or 1000!
        'dropout_rate': 0.1,
        'weight_decay': 0.0,
        'freeze_backbone': True,
        'unfreeze_at_epoch': 20,
    },
    
    # Other settings
    'optimizer': 'adam',
    'adam_betas': (0.9, 0.999),
    'loss_function': 'crossentropy',
    'label_smoothing': 0.1,
    'random_seed': 42,
    'deterministic': True,
    'metrics': ['accuracy', 'auc', 'top_k'],
    'top_k_values': [2, 3],
    'save_plots': True,
}

# Create directories
Path(CONFIG['models_dir']).mkdir(exist_ok=True)
Path(CONFIG['results_dir']).mkdir(exist_ok=True)

print(f"Configuration loaded:")
print(f"  - Dataset path: {CONFIG['dataset_path']}")
print(f"  - Image size: {CONFIG['image_size']}x{CONFIG['image_size']}")
print(f"  - Epochs: {CONFIG['epochs']}")
print(f"  - Batch size per GPU: {CONFIG['batch_size_per_gpu']}")
print(f"  - Total batch size (8 GPUs): {CONFIG['batch_size_per_gpu'] * 8}")
print(f"  - VGG-16 FC units: {CONFIG['vgg16']['fc_units']}")
print(f"  - VGG-16 Learning Rate: {CONFIG['vgg16']['learning_rate']}")

Configuration loaded:
  - Dataset path: ./fight_dataset/actions (2)/actions
  - Image size: 224x224
  - Epochs: 50
  - Batch size per GPU: 64
  - Total batch size (8 GPUs): 512
  - VGG-16 FC units: 500
  - VGG-16 Learning Rate: 0.0001


In [13]:
from kaggle.api.kaggle_api_extended import KaggleApi
import kaggle

# REPLACE THESE WITH YOUR ACTUAL KAGGLE CREDENTIALS
os.environ['KAGGLE_USERNAME'] = "Baari" 
os.environ['KAGGLE_KEY'] = "KGAT_df4cc0be6e9be3f8e28c2242065a2fec"

def download_dataset():
    """Download and extract the dataset from Kaggle if not present."""
    dataset_dir = Path(CONFIG['dataset_path'])
    
    # Check if dataset already exists and is not empty
    if dataset_dir.exists() and any(dataset_dir.iterdir()):
        print(f"Dataset already exists at {dataset_dir}")
        return

    print(f"Downloading dataset {CONFIG['kaggle_dataset']}...")
    dataset_dir.mkdir(parents=True, exist_ok=True)
    
    try:
        auth = KaggleApi()
        auth.authenticate()
        
        # Download and unzip
        kaggle.api.dataset_download_files(
            CONFIG['kaggle_dataset'],
            path=dataset_dir,
            unzip=True,
            quiet=False
        )
        print("Dataset downloaded and extracted successfully.")
        
    except ImportError:
        print("Error: 'kaggle' library not found. Please install it via 'pip install kaggle'.")
    except Exception as e:
        print(f"Error downloading dataset: {e}")
        print("Ensure you have placed your 'kaggle.json' API token in ~/.kaggle/")

download_dataset()

Downloading dataset anbumalar1991/fight-dataset...
Dataset URL: https://www.kaggle.com/datasets/anbumalar1991/fight-dataset
Downloading fight-dataset.zip to fight_dataset


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1.80G/1.80G [00:01<00:00, 1.63GB/s]



Dataset downloaded and extracted successfully.


## CELL 3: Reproducibility Setup


In [4]:
def set_seeds(seed: int) -> None:
    """Set random seeds for reproducibility."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    
    if CONFIG['deterministic']:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seeds(CONFIG['random_seed'])
print(f"Random seeds set to {CONFIG['random_seed']}")

Random seeds set to 42


## CELL 4: Data Loading & Preprocessing


In [19]:
class FightDataset(Dataset):
    """Custom dataset for HMDB51 Fight images with augmentation."""
    
    def __init__(self, image_paths: List[str], labels: List[int], 
                 transform: Optional[transforms.Compose] = None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self) -> int:
        return len(self.image_paths)
    
    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        
        # Apply transforms
        if self.transform:
            image = self.transform(image)
        
        return image, label


def load_split_from_folder(base_path: Path, split: str) -> Tuple[List[str], List[int]]:
    """Load images from train or test folder structure.
    
    Args:
        base_path: Root path containing train/test folders
        split: Either 'train' or 'test'
    
    Returns:
        Tuple of (image_paths, labels)
    """
    image_paths = []
    labels = []
    
    class_to_idx = CONFIG['classes']
    idx_to_class = {v: k for k, v in class_to_idx.items()}
    
    split_path = base_path / split
    
    if not split_path.exists():
        raise ValueError(f"Split path does not exist: {split_path}")
    
    for class_name, class_idx in idx_to_class.items():
        class_dir = split_path / class_name
        if class_dir.exists():
            # Support both .jpg and .png files
            for img_file in list(class_dir.glob('*.jpg')) + list(class_dir.glob('*.png')):
                image_paths.append(str(img_file))
                labels.append(class_idx)
    
    return image_paths, labels


def create_data_loaders(dataset_path: str, batch_size: int) -> Tuple[DataLoader, DataLoader, DataLoader]:
    """Create training, validation, and test data loaders from train/test folders.
    
    This version works with pre-split train/test folders and creates a validation
    set by splitting part of the training data.
    """
    
    base_path = Path(dataset_path)
    
    # Load train and test sets
    train_paths, train_labels = load_split_from_folder(base_path, 'train')
    test_paths, test_labels = load_split_from_folder(base_path, 'test')
    
    # Split training set into train and validation
    n_train = len(train_paths)
    # Use 80% of train for actual training, 20% for validation
    val_split_ratio = 0.2
    val_size = int(val_split_ratio * n_train)
    
    # Shuffle training data
    indices = np.arange(n_train)
    np.random.shuffle(indices)
    
    val_indices = indices[:val_size]
    actual_train_indices = indices[val_size:]
    
    # Define transforms
    train_transform = transforms.Compose([
        transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=10),
        transforms.RandomResizedCrop(size=CONFIG['image_size'], scale=(0.8, 1.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=CONFIG['imagenet_mean'], 
                           std=CONFIG['imagenet_std']),
    ])
    
    val_test_transform = transforms.Compose([
        transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
        transforms.ToTensor(),
        transforms.Normalize(mean=CONFIG['imagenet_mean'], 
                           std=CONFIG['imagenet_std']),
    ])
    
    # Create datasets
    train_dataset = FightDataset(
        [train_paths[i] for i in actual_train_indices],
        [train_labels[i] for i in actual_train_indices],
        train_transform
    )
    
    val_dataset = FightDataset(
        [train_paths[i] for i in val_indices],
        [train_labels[i] for i in val_indices],
        val_test_transform
    )
    
    test_dataset = FightDataset(
        test_paths,
        test_labels,
        val_test_transform
    )
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    print(f"Dataset split:")
    print(f"  - Training: {len(train_dataset)} images")
    print(f"  - Validation: {len(val_dataset)} images")
    print(f"  - Testing: {len(test_dataset)} images")
    print(f"  - Total: {len(train_dataset) + len(val_dataset) + len(test_dataset)} images")
    
    # Print class distribution
    print(f"\nClass distribution in training set:")
    train_class_counts = defaultdict(int)
    for label in [train_labels[i] for i in actual_train_indices]:
        train_class_counts[CONFIG['classes'][label]] += 1
    for class_name, count in sorted(train_class_counts.items()):
        print(f"  - {class_name}: {count} images")
    
    return train_loader, val_loader, test_loader


# Load dataset
train_loader, val_loader, test_loader = create_data_loaders(
    CONFIG['dataset_path'],
    CONFIG['batch_size_per_gpu']
)

Dataset split:
  - Training: 54439 images
  - Validation: 13609 images
  - Testing: 7807 images
  - Total: 75855 images

Class distribution in training set:
  - hit: 2983 images
  - kick: 4331 images
  - punch: 7001 images
  - push: 7634 images
  - ride_horse: 12454 images
  - shoot_gun: 7254 images
  - stand: 8314 images
  - wave: 4468 images


## CELL 5: Model Architecture - Sequential CNN


In [20]:
class SequentialCNN(nn.Module):
    """
    Sequential CNN for violence detection.
    
    Architecture (from paper):
    - Conv2D(64, 3x3) + ReLU + Conv2D(128, 3x3) + ReLU + MaxPool + Dropout(0.4)
    - Conv2D(32, 3x3) + ReLU + MaxPool + Dropout(0.4)
    - Conv2D(64, 3x3) + ReLU + Dropout(0.2) + MaxPool
    - Flatten + Dense(128) + Dense(64) + Output(8)
    
    Key innovation: Strategic dropout placement prevents overfitting
    Paper result: 63% test accuracy
    """
    
    def __init__(self, num_classes: int = 8):
        super().__init__()
        
        # First block
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.4)  # After first maxpool
        
        # Second block
        self.conv2_1 = nn.Conv2d(128, 32, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.4)  # After second maxpool
        
        # Third block
        self.conv3_1 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.dropout3 = nn.Dropout(0.2)  # Before third maxpool
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Calculate flattened size (224 -> 56 -> 28 -> 14)
        self.fc1 = nn.Linear(64 * 14 * 14, 128)
        self.dropout_fc = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # First block
        x = F.relu(self.conv1_1(x))
        x = F.relu(self.conv1_2(x))
        x = self.maxpool1(x)
        x = self.dropout1(x)
        
        # Second block
        x = F.relu(self.conv2_1(x))
        x = self.maxpool2(x)
        x = self.dropout2(x)
        
        # Third block
        x = F.relu(self.conv3_1(x))
        x = self.dropout3(x)
        x = self.maxpool3(x)
        
        # Flatten and dense layers
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout_fc(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

## CELL 6: Model Architecture - MobileNetV2


In [21]:
class MobileNetV2Classifier(nn.Module):
    """
    MobileNetV2 with custom head for violence detection.
    
    Modifications (from paper):
    - L2 regularization (weight_decay=1e-4)
    - Batch normalization in custom head
    - Dropout(0.2) for regularization
    
    Paper result: 69% test accuracy
    """
    
    def __init__(self, num_classes: int = 8):
        super().__init__()
        
        # Load pretrained MobileNetV2
        self.backbone = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)
        
        # Remove original classification head
        self.backbone.classifier = nn.Sequential()
        
        # Get feature dimension (1280 for MobileNetV2)
        num_features = 1280
        
        # Custom classifier head with batch norm and dropout
        self.classifier = nn.Sequential(
            nn.BatchNorm1d(num_features),
            nn.Dropout(0.2),  # As in paper
            nn.Linear(num_features, num_classes)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Extract features from backbone
        x = self.backbone.features(x)
        # Global average pooling
        x = nn.functional.adaptive_avg_pool2d(x, 1)
        x = torch.flatten(x, 1)
        # Classification head
        x = self.classifier(x)
        return x

## CELL 7: Model Architecture - VGG-16 (BEST MODEL)


In [22]:
class VGG16Classifier(nn.Module):
    """
    VGG-16 fine-tuned for violence detection (BEST PERFORMER).
    
    CRITICAL HYPERPARAMETERS (from paper experiments):
    - FC layer size: 500 units (NOT 2048 or 1000)
      * 2048 units -> 15% accuracy (SEVERE OVERFITTING)
      * 1000 units -> 69% accuracy
      * 500 units -> 71% TEST ACCURACY âœ“
    
    - Learning rate: 0.0001 (NOT default 0.001)
      * 0.001 -> Model diverges (15% accuracy)
      * 0.0001 -> 71% test accuracy âœ“
    
    - Dropout: 0.1 in head
    
    Paper result: 71% test accuracy (SOTA for this dataset)
    """
    
    def __init__(self, num_classes: int = 8):
        super().__init__()
        
        # Load pretrained VGG-16
        self.backbone = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        
        # Remove original classifier
        self.backbone.classifier = nn.Sequential()
        
        # Get feature dimension (25088 after flattening)
        num_features = 25088
        
        # Custom classifier head with the critical FC=500
        self.classifier = nn.Sequential(
            nn.Linear(num_features, 500),  # CRITICAL: 500, not 2048!
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),  # As in paper
            nn.Linear(500, num_classes)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Extract features using VGG backbone
        x = self.backbone.features(x)
        # Flatten
        x = torch.flatten(x, 1)
        # Classification head
        x = self.classifier(x)
        return x

## CELL 8: Training Function


In [23]:
def train_epoch(model: nn.Module, 
                train_loader: DataLoader,
                criterion: nn.Module,
                optimizer: optim.Optimizer,
                device: torch.device,
                model_name: str = "model") -> Tuple[float, float]:
    """Train for one epoch."""
    
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(train_loader, desc=f"[{model_name}] Training")
    
    for images, labels in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        progress_bar.set_postfix({'loss': loss.item()})
    
    epoch_loss = total_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc


def validate(model: nn.Module,
             val_loader: DataLoader,
             criterion: nn.Module,
             device: torch.device,
             model_name: str = "model") -> Tuple[float, float]:
    """Validate model."""
    
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc=f"[{model_name}] Validating")
        
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            progress_bar.set_postfix({'loss': loss.item()})
    
    epoch_loss = total_loss / len(val_loader)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc


def train_model(model: nn.Module,
                train_loader: DataLoader,
                val_loader: DataLoader,
                model_name: str,
                hyperparams: Dict,
                num_epochs: int = 50,
                device: torch.device = None) -> Dict:
    """Complete training loop with early stopping."""
    
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss(label_smoothing=CONFIG['label_smoothing'])
    optimizer = optim.Adam(
        model.parameters(),
        lr=hyperparams['learning_rate'],
        betas=CONFIG['adam_betas']
    )
    
    # Learning rate scheduler
    scheduler = ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=CONFIG['lr_scheduler_factor'],
        patience=CONFIG['lr_scheduler_patience'],
        verbose=True
    )
    
    # Training history
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'best_epoch': 0,
        'best_val_acc': 0.0
    }
    
    # Early stopping
    patience_counter = 0
    best_val_acc = 0.0
    
    print(f"\n{'='*80}")
    print(f"Training {model_name}")
    print(f"{'='*80}")
    print(f"Learning Rate: {hyperparams['learning_rate']}")
    print(f"Batch Size: {CONFIG['batch_size_per_gpu']}")
    print(f"Epochs: {num_epochs}\n")
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        # Train
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, device, model_name
        )
        
        # Validate
        val_loss, val_acc = validate(
            model, val_loader, criterion, device, model_name
        )
        
        # Store history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
        
        # Scheduler step
        scheduler.step(val_loss)
        
        # Best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            history['best_epoch'] = epoch + 1
            history['best_val_acc'] = val_acc
            patience_counter = 0
            
            # Save checkpoint
            checkpoint_path = f"{CONFIG['models_dir']}/{model_name}_best.pt"
            torch.save(model.state_dict(), checkpoint_path)
            print(f"âœ“ Best model saved at epoch {epoch+1}")
        else:
            patience_counter += 1
        
        # Early stopping
        if patience_counter >= CONFIG['early_stopping_patience']:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break
    
    # Load best model
    best_model_path = f"{CONFIG['models_dir']}/{model_name}_best.pt"
    model.load_state_dict(torch.load(best_model_path))
    
    return history, model

# CELL 9: Evaluation Metrics

In [24]:
def evaluate_model(model: nn.Module,
                   test_loader: DataLoader,
                   device: torch.device,
                   model_name: str) -> Dict:
    """Comprehensive evaluation on test set."""
    
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Evaluating {model_name}"):
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    all_probs = np.array(all_probs)
    
    # Metrics
    accuracy = accuracy_score(all_labels, all_preds)
    
    # One-vs-rest AUC
    auc_scores = []
    for i in range(CONFIG['num_classes']):
        try:
            auc = roc_auc_score((all_labels == i).astype(int), all_probs[:, i])
            auc_scores.append(auc)
        except:
            pass
    mean_auc = np.mean(auc_scores) if auc_scores else 0.0
    
    # Top-K accuracy
    top_k_acc = {}
    for k in CONFIG['top_k_values']:
        try:
            top_k_acc[f'top_{k}'] = top_k_accuracy_score(all_labels, all_probs, k=k)
        except:
            top_k_acc[f'top_{k}'] = 0.0
    
    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    
    # Classification report
    report = classification_report(all_labels, all_preds, 
                                  target_names=[CONFIG['classes'][i] for i in range(CONFIG['num_classes'])])
    
    metrics = {
        'accuracy': accuracy,
        'mean_auc': mean_auc,
        'auc_per_class': auc_scores,
        'top_k_accuracy': top_k_acc,
        'confusion_matrix': cm,
        'classification_report': report,
        'predictions': all_preds,
        'labels': all_labels,
        'probabilities': all_probs
    }
    
    print(f"\n{model_name} - Test Results:")
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Mean AUC: {mean_auc:.4f}")
    for k, v in top_k_acc.items():
        print(f"  {k} Accuracy: {v:.4f}")
    
    return metrics

# CELL 10: Train All Models

In [25]:
# Initialize models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train Sequential CNN
scnn = SequentialCNN(CONFIG['num_classes'])
scnn_history, scnn_model = train_model(
    scnn,
    train_loader,
    val_loader,
    "Sequential_CNN",
    CONFIG['scnn'],
    num_epochs=CONFIG['epochs'],
    device=device
)

# Evaluate S-CNN
scnn_metrics = evaluate_model(scnn_model, test_loader, device, "Sequential_CNN")

# Train MobileNetV2
mobilenet = MobileNetV2Classifier(CONFIG['num_classes'])
mobilenet_history, mobilenet_model = train_model(
    mobilenet,
    train_loader,
    val_loader,
    "MobileNetV2",
    CONFIG['mobilenetv2'],
    num_epochs=CONFIG['epochs'],
    device=device
)

# Evaluate MobileNetV2
mobilenet_metrics = evaluate_model(mobilenet_model, test_loader, device, "MobileNetV2")

# Train VGG-16 (CRITICAL: Use specific hyperparameters)
vgg16 = VGG16Classifier(CONFIG['num_classes'])
vgg16_history, vgg16_model = train_model(
    vgg16,
    train_loader,
    val_loader,
    "VGG16",
    CONFIG['vgg16'],
    num_epochs=CONFIG['epochs'],
    device=device
)

# Evaluate VGG-16
vgg16_metrics = evaluate_model(vgg16_model, test_loader, device, "VGG-16")

TypeError: ReduceLROnPlateau.__init__() got an unexpected keyword argument 'verbose'