Importing required Libraries

In [None]:
import os
import math
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.amp import GradScaler, autocast

Setup and Configuration

In [None]:
# Mount Google Drive for data access
from google.colab import drive
drive.mount('/content/drive')

# Define path to dataset
BASE_PATH = '/content/drive/MyDrive/DL-Assignment2-data/inaturalist_12K'

Utility Functions

In [None]:
# Calculate output dimensions after convolution operation
def calculate_output_dimensions(input_size, kernel_size, stride=1, padding=0):
    """Calculate the output dimensions after applying convolution"""
    return math.floor((input_size - kernel_size + 2*padding) / stride) + 1

Data Preparation(train, validation and test)

In [None]:
def get_data_loaders(cfg):
    """
    Prepare data loaders for training, validation and testing

    Args:
        cfg: Configuration object containing data parameters

    Returns:
        Tuple of (train_loader, val_loader, test_loader)
    """
    # Define transformations based on augmentation flag
    if cfg.augmentation:
        # More aggressive transformations for training
        train_transforms = transforms.Compose([
            transforms.Resize((cfg.img_size, cfg.img_size)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(30),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        # Basic transformations without augmentation
        train_transforms = transforms.Compose([
            transforms.Resize((cfg.img_size, cfg.img_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    # Validation transforms (no augmentation needed)
    val_transforms = transforms.Compose([
        transforms.Resize((cfg.img_size, cfg.img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Load datasets
    train_dataset = datasets.ImageFolder(os.path.join(BASE_PATH, 'train'), transform=train_transforms)
    test_dataset = datasets.ImageFolder(os.path.join(BASE_PATH, 'val'), transform=val_transforms)

    # Split training data to create validation set
    indices = list(range(len(train_dataset)))
    train_indices, val_indices = train_test_split(
        indices,
        test_size=0.2,  # 20% for validation
        stratify=train_dataset.targets,  # Maintain class distribution
        random_state=42  # For reproducibility
    )

    # Create subsets
    train_subset = Subset(train_dataset, train_indices)
    val_subset = Subset(train_dataset, val_indices)

    # Get number of CPU cores for worker calculation
    num_workers = min(2, os.cpu_count() or 1)  # Use at most 2 workers to avoid warning

    # Create and return data loaders
    return (
        DataLoader(train_subset, batch_size=cfg.batch_size, shuffle=True,
                   num_workers=num_workers, pin_memory=True),
        DataLoader(val_subset, batch_size=cfg.batch_size, shuffle=False,
                   num_workers=num_workers, pin_memory=True),
        DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False,
                   num_workers=num_workers, pin_memory=True)
    )

Convolution Nueral Network implemented as class

In [None]:
class CNN(nn.Module):
    """
    Convolutional Neural Network with configurable architecture
    - Variable number of convolutional layers
    - Configurable filter sizes and counts
    - Choice of activation functions
    - Optional batch normalization
    """
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg

        # Initialize lists to hold network components
        self.conv_blocks = nn.ModuleList()

        # Track dimensions for proper sizing of fully connected layer
        in_channels = 3  # RGB input
        current_size = cfg.img_size

        # Create convolutional blocks
        for i, (out_channels, kernel_size) in enumerate(zip(cfg.num_filters, cfg.filter_sizes)):
            # Create a block with conv, optional batchnorm, activation, and pooling
            block = self._create_conv_block(
                in_channels,
                out_channels,
                kernel_size,
                use_batchnorm=cfg.batch_norm,
                activation=cfg.activation
            )
            self.conv_blocks.append(block)

            # Update dimensions for next layer
            current_size = calculate_output_dimensions(current_size, kernel_size, padding=1)
            current_size = calculate_output_dimensions(current_size, 2, stride=2)  # pooling
            in_channels = out_channels

        # Adaptive pooling ensures fixed size regardless of input dimensions
        self.adaptive_pool = nn.AdaptiveAvgPool2d((6, 6))

        # Fully connected classification layers
        self.classifier = self._create_classifier(
            in_channels * 6 * 6,  # Flattened feature maps
            cfg.fc_hidden_sizes,
            10,  # Number of classes
            cfg.dropout,
            cfg.batch_norm,
            cfg.activation
        )

    def _create_conv_block(self, in_channels, out_channels, kernel_size,
                           use_batchnorm=True, activation='relu'):
        """Create a convolutional block with optional batch normalization"""
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size, padding=1),
        ]

        if use_batchnorm:
            layers.append(nn.BatchNorm2d(out_channels))

        layers.append(self._get_activation_function(activation))
        layers.append(nn.MaxPool2d(2, 2))

        return nn.Sequential(*layers)

    def _create_classifier(self, in_features, hidden_size, num_classes,
                           dropout_rate, use_batchnorm, activation):
        """Create the classifier part of the network"""
        layers = [
            nn.Linear(in_features, hidden_size),
        ]

        if use_batchnorm:
            layers.append(nn.BatchNorm1d(hidden_size))

        layers.extend([
            self._get_activation_function(activation),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_classes)
        ])

        return nn.Sequential(*layers)

    def _get_activation_function(self, name):
        """Return the appropriate activation function based on name"""
        activation_functions = {
            'relu': nn.ReLU(),
            'gelu': nn.GELU(),
            'silu': nn.SiLU(),
            'mish': nn.Mish(),
            'elu': nn.ELU(),
            'selu': nn.SELU()
        }
        return activation_functions.get(name.lower(), nn.ReLU())

    def forward(self, x):
        """Forward pass through the network"""
        # Pass input through convolutional blocks
        for block in self.conv_blocks:
            x = block(x)

        # Global pooling and flatten
        x = self.adaptive_pool(x)
        x = torch.flatten(x, 1)

        # Classification
        return self.classifier(x)