<a href="https://colab.research.google.com/github/Mustaq7777777/DL-Assignment-2/blob/main/partB/dl_assignment2_partb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Statements

In [None]:
import os
import math
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.amp import GradScaler, autocast
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

SET UP and Configuration

In [None]:
# Mount Google Drive for data access
# from google.colab import drive
# drive.mount('/content/drive')

wandb.login(key="c4db2008beb715972687303f6cbced62af338b92")

# Define path to dataset
BASE_PATH = '/kaggle/input/nature-12k1/inaturalist_12K'

# Default configuration dictionary with hyperparameters
DEFAULT_CONFIG = {
    "batch_size": 64,                         # Number of samples per batch
    "learning_rate": 1e-4,                    # Learning rate for optimizer
    "augmentation": True,                     # Whether to use data augmentation
}


Utility

In [None]:
# Calculate output dimensions after convolution operation
def calculate_output_dimensions(input_size, kernel_size, stride=1, padding=0):
    """Calculate the output dimensions after applying convolution"""
    return math.floor((input_size - kernel_size + 2*padding) / stride) + 1

Data Preparation

In [None]:
def get_data_loaders(cfg):
    """
    Prepare data loaders for training, validation and testing

    Args:
        cfg: Configuration object containing data parameters

    Returns:
        Tuple of (train_loader, val_loader, test_loader)
    """
    # Define transformations based on augmentation flag
    if cfg.augmentation:
        # More aggressive transformations for training
        train_transforms = transforms.Compose([
            transforms.Resize((cfg.img_size, cfg.img_size)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(30),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        # Basic transformations without augmentation
        train_transforms = transforms.Compose([
            transforms.Resize((cfg.img_size, cfg.img_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    # Validation transforms (no augmentation needed)
    val_transforms = transforms.Compose([
        transforms.Resize((cfg.img_size, cfg.img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Load datasets
    train_dataset = datasets.ImageFolder(os.path.join(BASE_PATH, 'train'), transform=train_transforms)
    test_dataset = datasets.ImageFolder(os.path.join(BASE_PATH, 'val'), transform=val_transforms)

    # Split training data to create validation set
    indices = list(range(len(train_dataset)))
    train_indices, val_indices = train_test_split(
        indices,
        test_size=0.2,  # 20% for validation
        stratify=train_dataset.targets,  # Maintain class distribution
        random_state=42  # For reproducibility
    )

    # Create subsets
    train_subset = Subset(train_dataset, train_indices)
    val_subset = Subset(train_dataset, val_indices)

    # Get number of CPU cores for worker calculation
    num_workers = min(2, os.cpu_count() or 1)  # Use at most 2 workers to avoid warning

    # Create and return data loaders
    return (
        DataLoader(train_subset, batch_size=cfg.batch_size, shuffle=True,
                   num_workers=num_workers, pin_memory=True),
        DataLoader(val_subset, batch_size=cfg.batch_size, shuffle=False,
                   num_workers=num_workers, pin_memory=True),
        DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False,
                   num_workers=num_workers, pin_memory=True)
    )

Pre Trained model Resnet50

In [None]:
def get_model(strategy):
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

    # Common classifier replacement
    num_ftrs = model.fc.in_features

    # Strategy 1: Freeze all except final layer
    if strategy == 1:
        for param in model.parameters():
            param.requires_grad = False
        model.fc = nn.Linear(num_ftrs, 10)
        return model, model.fc.parameters()

Train Function

In [None]:
def train():
    """Main training function that handles the entire training process"""
    # Initialize wandb with default configuration
    wandb.init(config=DEFAULT_CONFIG, reinit=True)
    cfg = wandb.config

    # Set fixed image size
    cfg.img_size = 224

    # Create run name in the requested format
    wandb.run.name = " batch_size {} augmentation {} learning_rate {}".format(
        cfg.batch_size,
        cfg.augmentation,
        cfg.learning_rate

    )

    # Set device (GPU if available, otherwise CPU)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.backends.cudnn.benchmark = True  # For faster training

    # Get data loaders
    train_loader, val_loader, test_loader = get_data_loaders(cfg)

    cfg.strategy = 1

    # Initialize model
    model, params = get_model(cfg.strategy)
    model = nn.DataParallel(model)
    model = model.to(device)


    optimizer = optim.NAdam(params, lr=cfg.learning_rate)

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Mixed precision training for better performance
    # Fixed to use new API format
    scaler = GradScaler('cuda')

    # Tracking metrics
    train_loss_history = []
    val_loss_history = []
    train_acc_history = []
    val_acc_history = []

    best_val_accuracy = 0.0
    epochs = 10  # Fixed number of epochs

    # Training loop
    for epoch in range(1, epochs+1):
        # ---------- TRAINING PHASE ----------
        model.train()
        running_loss, correct, total = 0, 0, 0

        # Process batches
        for inputs, targets in tqdm(train_loader, desc=f"Training epoch {epoch}/{epochs}"):
            inputs, targets = inputs.to(device), targets.to(device)

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass with mixed precision
            # Fixed to use new API format
            with autocast('cuda'):
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            # Backward pass with gradient scaling
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            # Update statistics
            running_loss += loss.item()
            predictions = outputs.argmax(1)
            correct += (predictions == targets).sum().item()
            total += targets.size(0)

        # Calculate epoch metrics
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_loss_history.append(train_loss)
        train_acc_history.append(train_accuracy)

        # ---------- VALIDATION PHASE ----------
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0

        # No gradient calculation needed for validation
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)

                # Calculate loss
                val_loss += criterion(outputs, targets).item()

                # Calculate accuracy
                predictions = outputs.argmax(1)
                val_correct += (predictions == targets).sum().item()
                val_total += targets.size(0)

        # Calculate validation metrics
        val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_loss_history.append(val_loss)
        val_acc_history.append(val_accuracy)

        # Log metrics to wandb
        wandb.log({
            "epoch": epoch,
            "train_accuracy": train_accuracy,
            "train_loss": train_loss,
            "val_accuracy": val_accuracy,
            "val_loss": val_loss
        })

        # Print epoch summary
        print(f"Epoch {epoch}/{epochs}")
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.2f}%")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.2f}%")

        # Save best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), "best_model.pth")

    # ---------- TESTING PHASE ----------
    # Load best model for final evaluation
    model.load_state_dict(torch.load("best_model.pth"))
    test_correct, test_total = 0, 0

    # Evaluate on test set
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            predictions = model(inputs).argmax(1)
            test_correct += (predictions == targets).sum().item()
            test_total += targets.size(0)

    test_accuracy = 100 * test_correct / test_total
    wandb.log({"test_accuracy": test_accuracy})
    print(f"Test Accuracy: {test_accuracy:.2f}%")