In [1]:
import numpy as np
import pandas as pd
import cv2
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,Subset
import os
import gc
import wandb

from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

In [2]:
class CustomCNN(nn.Module):
    
    def __init__(self,num_filters,hidden_size,filter_size=3,num_classes=10,activation_function='ReLU',n_blocks=5,filter_organisation_factor=2,dropout=0.4,batch_norm=True):
        """
        Custom CNN model for image classification.
        Args:
            num_filters (int): Number of filters in the first convolutional layer.
            size_filter (int): Size of the convolutional filters.
            num_classes (int): Number of output classes.
            activation (str): Activation function to use .
            n_blocks (int): Number of convolutional blocks.
        """
        super(CustomCNN,self).__init__()
        self.conv = nn.ModuleList()
        self.activation = {"ReLU": nn.ReLU(), "GeLU": nn.GELU(), "SiLU": nn.SiLU(), "Mish": nn.Mish()}
        in_channel = 3
        # convolutional blocks
        for i in range(n_blocks):
            in_channel = num_filters*(filter_organisation_factor**(i-1)) if i>0 else 3
            in_channel = int(in_channel)
            numberOfFilters = num_filters*(filter_organisation_factor**(i))
            numberOfFilters = int(numberOfFilters)
            # start of the convolutional block
            # convolution layer
            self.conv.append(nn.Conv2d(in_channels = in_channel, out_channels=numberOfFilters, kernel_size=filter_size, stride=1))
            # batch normalization layer
            if batch_norm:
                self.conv.append(nn.BatchNorm2d(numberOfFilters))
            # activation layer
            self.conv.append(self.activation[activation_function])
            # max pooling layer
            self.conv.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # flattening last layer
        self.conv.append(nn.AdaptiveAvgPool2d(1))
        self.conv.append(nn.Flatten())
        # fully connected layer
        in_channel = num_filters*(filter_organisation_factor**(n_blocks-1))
        in_channel = int(in_channel)    
        self.conv.append(nn.Linear(in_channel, hidden_size))
        self.conv.append(self.activation['GeLU'])
        # dropout layer
        self.conv.append(nn.Dropout(dropout))
        self.conv.append(nn.Linear(hidden_size, num_classes))

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for layer in self.conv:
            x = layer(x)
        return x
    
    def train_model(self, 
                   train_loader: DataLoader, 
                   val_loader: DataLoader, 
                   epochs: int, 
                   learning_rate: float, 
                   device: torch.device,
                   test_loader: DataLoader = None,
                   criterion: nn.Module = nn.CrossEntropyLoss(),
                   optimizer_class: optim.Optimizer = optim.Adam):
        """
        Train the model with accuracy evaluation.
        Args:
            train_loader (DataLoader): DataLoader for training data.
            val_loader (DataLoader): DataLoader for validation data.
            epochs (int): Number of training epochs.
            learning_rate (float): Learning rate for optimizer.
            device (torch.device): Device to train on ('cuda' or 'cpu').
            criterion (nn.Module): Loss function.
            optimizer_class (optim.Optimizer): Optimizer class (e.g., Adam, SGD).
        Returns:
            Dict[str, List[float]]: Dictionary containing training/validation losses and accuracies.
        """
        self.to(device)
        optimizer = optimizer_class(self.parameters(), lr=learning_rate)
        
        history = {
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_acc':[]
        }
        
        for epoch in range(epochs):
            self.train()
            running_loss = 0.0
            running_correct = 0
            total_samples = 0
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                batch_size = inputs.size(0)
                total_samples += batch_size
                
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item() * batch_size
                running_correct += (outputs.argmax(1) == labels).sum().item()
 
                del loss
                del inputs
            # Calculate training metrics
            epoch_train_loss = running_loss / total_samples
            epoch_train_acc = running_correct / total_samples
            history['train_loss'].append(epoch_train_loss)
            history['train_acc'].append(epoch_train_acc)
            
            # Validation
            self.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0
            test_correct = 0
            test_total = 0
            
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    batch_size = inputs.size(0)
                    val_total += batch_size
                    
                    outputs = self(inputs)
                    loss = criterion(outputs, labels)
                    
                    val_loss += loss.item() * batch_size
                    val_correct += (outputs.argmax(1) == labels).sum().item()
                    del loss
                    del inputs
            if test_loader is not None:
                with torch.no_grad():
                    for inputs, labels in test_loader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        batch_size = inputs.size(0)
                        test_total += batch_size
                        
                        outputs = self(inputs)
                        loss = criterion(outputs, labels)
                        
                        test_correct += (outputs.argmax(1) == labels).sum().item()
                        del loss
                        del inputs
            # Calculate validation metrics
            epoch_val_loss = val_loss / val_total
            epoch_val_acc = val_correct / val_total
            epoch_test_acc = test_correct / test_total if test_loader is not None else None
            history['val_loss'].append(epoch_val_loss)
            history['val_acc'].append(epoch_val_acc)
            history['test_acc'].append(epoch_test_acc)
            
            # Print metrics
            print(f'Epoch {epoch+1}/{epochs}: '
                  f'Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.4f} | '
                  f'Val Loss: {epoch_val_loss:.4f} | Val Acc: {epoch_val_acc:.4f}')
        
 
        return history
    
    def predict(self, 
                test_loader: DataLoader, 
                device: torch.device) :
        """
        Make predictions on test data.
        Args:
            test_loader (DataLoader): DataLoader for test data.
            device (torch.device): Device to use for prediction.
        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Predictions and true labels.
        """
        self.eval()
        self.to(device)
        
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(device)
                outputs = self(inputs)
                _, preds = torch.max(outputs, 1)
                
                all_preds.append(preds.cpu())
                all_labels.append(labels.cpu())
        
        return torch.cat(all_preds), torch.cat(all_labels)

def transform_image(dataAugmentation=False):
    if dataAugmentation:
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomRotation(15),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])
        return transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    return transform

def data_loader(data_dir, batch_size, dataAugmentation, num_workers=3):
    # Load the full training dataset
    full_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'train'), transform=transform_image(dataAugmentation=dataAugmentation))
    targets = full_dataset.targets  # class labels for stratification

    # Stratified split: 80% train, 20% val
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, val_idx = next(sss.split(full_dataset.samples, targets))

    # Subsets
    train_dataset = Subset(full_dataset, train_idx)
    val_dataset = Subset(full_dataset, val_idx)

    # Override transform for val set (no augmentation)
    val_dataset.dataset.transform = transform_image(dataAugmentation=False)

    # Test dataset
    test_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'val'), transform=transform_image(dataAugmentation=False))

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    return train_loader, val_loader, test_loader


In [3]:
import os
import torch
import wandb
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import StratifiedShuffleSplit

# Assuming CustomCNN, stratified_split, and transform_image are already defined

train_loader, val_loader, test_loader = data_loader(data_dir='data', batch_size=32, dataAugmentation=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = CustomCNN(num_filters=64,
#                     hidden_size=128,
#                     filter_organisation_factor=2,
#                     dropout=0.4,
#                     batch_norm=True,
#                     n_blocks=5,
#                     activation_function="ReLU").to(device)

# history = model.train_model(train_loader=train_loader,
#                             val_loader=val_loader,
#                             test_loader=test_loader,
#                             epochs=2,
#                             learning_rate=0.001,
#                             device=device)

In [4]:
# sum([p.numel() for p in model.parameters()])*1e-3

In [7]:
def train():
    wandb.init()
    config = wandb.config

    wandb.run.name = (
        f"n_filters_{config.num_filters}_act_{config.activation_function}_"
        f"fof_{config.filter_organisation_factor}_dropout_{config.dropout}_"
        f"bn_{config.batch_norm}_data_aug_{config.data_aug}_hs_{config.hidden_size}_"
        f"n_blocks_{config.n_blocks}_num_epochs_{config.epochs}"
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = CustomCNN(num_filters=config.num_filters,
                      hidden_size=config.hidden_size,
                      filter_organisation_factor=config.filter_organisation_factor,
                      dropout=config.dropout,
                      batch_norm=config.batch_norm,
                      n_blocks=config.n_blocks,
                      activation_function=config.activation_function).to(device)
    
    history = model.train_model(train_loader=train_loader,
                                val_loader=val_loader,
                                test_loader=test_loader,
                                epochs=config.epochs,
                                learning_rate=0.001,
                                device=device)

    # Log metrics to wandb
    for epoch in range(len(history['train_loss'])):
        wandb.log({
            'train_loss': history['train_loss'][epoch],
            'val_loss': history['val_loss'][epoch],
            'train_acc': history['train_acc'][epoch],
            'val_acc': history['val_acc'][epoch],
            'test_acc': history['test_acc'][epoch],
            'epoch': epoch
        })
    torch.cuda.empty_cache()
    gc.collect()
    del model

In [None]:
sweep_config = {
    "method": "bayes",  
    "metric": {"name": "val_acc", "goal": "maximize"},
    "parameters": {
        "num_filters": {"values": [16]},
        "activation_function": {"values": ["GeLU"]},
        "filter_organisation_factor": {"values": [1.5]},
        "dropout": {"values": [0.4]},
        "batch_norm": {"values": [True]},
        "data_aug": {"values": [True]},
        "hidden_size": {"values": [512]},
        "n_blocks": {"values": [5]},
        "epochs": {"values": [15]},
    }
}

project_name = "iNaturalist-CNN-Optimization"

# Create sweep
sweep_id = wandb.sweep(
    sweep_config,
    project="iNaturalist-CNN-Optimization",
)

# Run agent for N trials
# wandb.agent(sweep_id=sweep_id, function=train, count=1)

Create sweep with ID: enhlczy2
Sweep URL: https://wandb.ai/harshtrivs-indian-institute-of-technology-madras/iNaturalist-CNN-Optimization/sweeps/enhlczy2


wandb: Agent Starting Run: 93rj26b8 with config:
wandb: 	activation_function: GeLU
wandb: 	batch_norm: True
wandb: 	data_aug: True
wandb: 	dropout: 0.4
wandb: 	epochs: 15
wandb: 	filter_organisation_factor: 1.5
wandb: 	hidden_size: 512
wandb: 	n_blocks: 5
wandb: 	num_filters: 16


Epoch 1/15: Train Loss: 2.1346 | Train Acc: 0.2213 | Val Loss: 2.1473 | Val Acc: 0.2215
Epoch 2/15: Train Loss: 2.0252 | Train Acc: 0.2690 | Val Loss: 2.0943 | Val Acc: 0.2615
Epoch 3/15: Train Loss: 1.9857 | Train Acc: 0.2830 | Val Loss: 1.9668 | Val Acc: 0.2915
Epoch 4/15: Train Loss: 1.9360 | Train Acc: 0.3078 | Val Loss: 1.9865 | Val Acc: 0.2945
Epoch 5/15: Train Loss: 1.8952 | Train Acc: 0.3173 | Val Loss: 1.9975 | Val Acc: 0.2985
Epoch 6/15: Train Loss: 1.8616 | Train Acc: 0.3345 | Val Loss: 1.9263 | Val Acc: 0.3315
Epoch 7/15: Train Loss: 1.8307 | Train Acc: 0.3482 | Val Loss: 1.8486 | Val Acc: 0.3580
Epoch 8/15: Train Loss: 1.7909 | Train Acc: 0.3588 | Val Loss: 1.8374 | Val Acc: 0.3720
Epoch 9/15: Train Loss: 1.7555 | Train Acc: 0.3754 | Val Loss: 1.8551 | Val Acc: 0.3715
Epoch 10/15: Train Loss: 1.7287 | Train Acc: 0.3898 | Val Loss: 1.7895 | Val Acc: 0.3730
Epoch 11/15: Train Loss: 1.7003 | Train Acc: 0.3973 | Val Loss: 1.7433 | Val Acc: 0.4025
Epoch 12/15: Train Loss: 1.677

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_acc,▁▂▄▃▄▅▆▇▆▇█▇▇██
train_acc,▁▃▃▄▄▅▅▅▆▆▇▇███
train_loss,█▇▆▅▅▅▄▄▃▃▂▂▂▁▁
val_acc,▁▃▄▄▄▅▆▇▇▇█▇▇██
val_loss,█▇▅▅▅▄▃▃▃▂▁▁▁▁▁

0,1
epoch,14.0
test_acc,0.392
train_acc,0.44031
train_loss,1.58716
val_acc,0.401
val_loss,1.73386


Question 2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm

# ----------------------------
# Configuration
# ----------------------------
BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 10
LR = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----------------------------
# Load Dataset
# ----------------------------

train_loader, val_loader, test_loader = data_loader(data_dir='data', batch_size=32, dataAugmentation=True)
# ----------------------------
# Load Pretrained Model
# ----------------------------
model = models.resnet50(pretrained=True)

# Strategy: Freeze all layers except final fc
for param in model.parameters():
    param.requires_grad = False

# Replace final layer for 1010-class classification
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)

model = model.to(DEVICE)

# ----------------------------
# Loss and Optimizer
# ----------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=LR)

# ----------------------------
# Training Loop
# ----------------------------
def train(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0
    for images, labels in tqdm(loader, desc="Training"):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)


def validate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validation"):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            output = model(images)
            _, preds = torch.max(output, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total


for epoch in range(EPOCHS):
    train_loss = train(model, train_loader, criterion, optimizer)
    val_acc = validate(model, val_loader)
    test_acc = validate(model, test_loader)
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Val Accuracy: {val_acc:.4f} | Test Accuracy: {test_acc:.4f}")

# Optional: Save Model
torch.save(model.state_dict(), "resnet50_inaturalist_finetuned.pth")
