In [1]:
import numpy as np
import pandas as pd
import cv2
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,Subset
import os
import gc
import wandb

from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

In [2]:
class CustomCNN(nn.Module):
    
    def __init__(self,num_filters,hidden_size,filter_size=3,num_classes=10,activation_function='ReLU',n_blocks=5,filter_organisation_factor=2,dropout=0.4,batch_norm=True):
        """
        Custom CNN model for image classification.
        Args:
            num_filters (int): Number of filters in the first convolutional layer.
            size_filter (int): Size of the convolutional filters.
            num_classes (int): Number of output classes.
            activation (str): Activation function to use .
            n_blocks (int): Number of convolutional blocks.
        """
        super(CustomCNN,self).__init__()
        self.conv = nn.ModuleList()
        self.activation = {"ReLU": nn.ReLU(), "GeLU": nn.GELU(), "SiLU": nn.SiLU(), "Mish": nn.Mish()}
        in_channel = 3
        # convolutional blocks
        for i in range(n_blocks):
            in_channel = num_filters*(filter_organisation_factor**(i-1)) if i>0 else 3
            in_channel = int(in_channel)
            numberOfFilters = num_filters*(filter_organisation_factor**(i))
            numberOfFilters = int(numberOfFilters)
            # start of the convolutional block
            # convolution layer
            self.conv.append(nn.Conv2d(in_channels = in_channel, out_channels=numberOfFilters, kernel_size=filter_size, stride=1))
            # batch normalization layer
            if batch_norm:
                self.conv.append(nn.BatchNorm2d(numberOfFilters))
            # activation layer
            self.conv.append(self.activation[activation_function])
            # max pooling layer
            self.conv.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # flattening last layer
        self.conv.append(nn.AdaptiveAvgPool2d(1))
        self.conv.append(nn.Flatten())
        # fully connected layer
        in_channel = num_filters*(filter_organisation_factor**(n_blocks-1))
        in_channel = int(in_channel)    
        self.conv.append(nn.Linear(in_channel, hidden_size))
        self.conv.append(self.activation['GeLU'])
        # dropout layer
        self.conv.append(nn.Dropout(dropout))
        self.conv.append(nn.Linear(hidden_size, num_classes))

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for layer in self.conv:
            x = layer(x)
        return x
    
    def train_model(self, 
                   train_loader: DataLoader, 
                   val_loader: DataLoader, 
                   epochs: int, 
                   learning_rate: float, 
                   device: torch.device,
                   criterion: nn.Module = nn.CrossEntropyLoss(),
                   optimizer_class: optim.Optimizer = optim.Adam):
        """
        Train the model with accuracy evaluation.
        Args:
            train_loader (DataLoader): DataLoader for training data.
            val_loader (DataLoader): DataLoader for validation data.
            epochs (int): Number of training epochs.
            learning_rate (float): Learning rate for optimizer.
            device (torch.device): Device to train on ('cuda' or 'cpu').
            criterion (nn.Module): Loss function.
            optimizer_class (optim.Optimizer): Optimizer class (e.g., Adam, SGD).
        Returns:
            Dict[str, List[float]]: Dictionary containing training/validation losses and accuracies.
        """
        self.to(device)
        optimizer = optimizer_class(self.parameters(), lr=learning_rate)
        
        history = {
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
        }
        
        for epoch in range(epochs):
            self.train()
            running_loss = 0.0
            running_correct = 0
            total_samples = 0
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                batch_size = inputs.size(0)
                total_samples += batch_size
                
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item() * batch_size
                running_correct += (outputs.argmax(1) == labels).sum().item()
 
                del loss
                del inputs
            # Calculate training metrics
            epoch_train_loss = running_loss / total_samples
            epoch_train_acc = running_correct / total_samples
            history['train_loss'].append(epoch_train_loss)
            history['train_acc'].append(epoch_train_acc)
            
            # Validation
            self.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    batch_size = inputs.size(0)
                    val_total += batch_size
                    
                    outputs = self(inputs)
                    loss = criterion(outputs, labels)
                    
                    val_loss += loss.item() * batch_size
                    val_correct += (outputs.argmax(1) == labels).sum().item()
                    del loss
                    del inputs
            # Calculate validation metrics
            epoch_val_loss = val_loss / val_total
            epoch_val_acc = val_correct / val_total
            history['val_loss'].append(epoch_val_loss)
            history['val_acc'].append(epoch_val_acc)
            
            # Print metrics
            print(f'Epoch {epoch+1}/{epochs}: '
                  f'Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.4f} | '
                  f'Val Loss: {epoch_val_loss:.4f} | Val Acc: {epoch_val_acc:.4f}')
        
 
        return history
    
    def predict(self, 
                test_loader: DataLoader, 
                device: torch.device) :
        """
        Make predictions on test data.
        Args:
            test_loader (DataLoader): DataLoader for test data.
            device (torch.device): Device to use for prediction.
        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Predictions and true labels.
        """
        self.eval()
        self.to(device)
        
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(device)
                outputs = self(inputs)
                _, preds = torch.max(outputs, 1)
                
                all_preds.append(preds.cpu())
                all_labels.append(labels.cpu())
        
        return torch.cat(all_preds), torch.cat(all_labels)

def transform_image(dataAugmentation=False):
    if dataAugmentation:
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomRotation(15),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],       
                std=[0.229, 0.224, 0.225])
        ])
        return transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],       
            std=[0.229, 0.224, 0.225]
        )
    ])
    return transform

def data_loader(data_dir, batch_size, dataAugmentation, num_workers=3):
    # Load the full training dataset
    full_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'train'), transform=transform_image(dataAugmentation=dataAugmentation))
    targets = full_dataset.targets  # class labels for stratification

    # Stratified split: 80% train, 20% val
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, val_idx = next(sss.split(full_dataset.samples, targets))

    # Subsets
    train_dataset = Subset(full_dataset, train_idx)
    val_dataset = Subset(full_dataset, val_idx)

    # Override transform for val set (no augmentation)
    val_dataset.dataset.transform = transform_image(dataAugmentation=False)

    # Test dataset
    test_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'val'), transform=transform_image(dataAugmentation=False))

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    return train_loader, val_loader, test_loader


In [3]:
from partA.model import CustomCNN, data_loader
import torch
import wandb
import gc

def train():
    wandb.init()
    config = wandb.config
    train_loader, val_loader, test_loader = data_loader(data_dir='data', batch_size=32, dataAugmentation=config.data_aug)
    wandb.run.name = (
        f"n_filters_{config.num_filters}_act_{config.activation_function}_"
        f"fof_{config.filter_organisation_factor}_dropout_{config.dropout}_"
        f"bn_{config.batch_norm}_data_aug_{config.data_aug}_hs_{config.hidden_size}_"
        f"n_blocks_{config.n_blocks}_num_epochs_{config.epochs}"
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = CustomCNN(num_filters=config.num_filters,
                      hidden_size=config.hidden_size,
                      filter_organisation_factor=config.filter_organisation_factor,
                      dropout=config.dropout,
                      batch_norm=config.batch_norm,
                      n_blocks=config.n_blocks,
                      activation_function=config.activation_function).to(device)
    
    history = model.train_model(train_loader=train_loader,
                                val_loader=val_loader,
                                epochs=config.epochs,
                                learning_rate=0.001,
                                device=device)

    # Log metrics to wandb
    for epoch in range(len(history['train_loss'])):
        wandb.log({
            'train_loss': history['train_loss'][epoch],
            'val_loss': history['val_loss'][epoch],
            'train_acc': history['train_acc'][epoch],
            'val_acc': history['val_acc'][epoch],
            'epoch': epoch
        })
    torch.cuda.empty_cache()
    gc.collect()
    del model

sweep_config = {
    "method": "bayes",
    "metric": {"name": "val_acc", "goal": "maximize"},
    "parameters": {
        "activation_function": {"values": ["SiLU", "Mish", "GeLU"]},
        "batch_norm": {"values": [True, False]},
        "data_aug": {"values": [True, False]},
        "dropout": {"values": [0.2, 0.3, 0.4]},
        "epochs": {"values": [5, 10, 15]},
        "filter_organisation_factor": {"values": [0.5, 1, 1.5]},
        "hidden_size": {"values": [128, 256]},
        "num_filters": {"values": [16, 32, 64]},
        "n_blocks": {"values": [5]}
    }
}


project_name = "iNaturalist-CNN-Optimization"

# Create sweep
sweep_id = wandb.sweep(
    sweep_config,
    project="iNaturalist-CNN-Optimization",
)

# Run agent for N trials
# wandb.agent(sweep_id=sweep_id, function=train, count=15)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: 8uhfd32b
Sweep URL: https://wandb.ai/harshtrivs-indian-institute-of-technology-madras/iNaturalist-CNN-Optimization/sweeps/8uhfd32b


In [4]:
import torch
import wandb
import os
import matplotlib.pyplot as plt
from partA.model import CustomCNN, data_loader
import torchvision.datasets as datasets
import numpy as np

train_loader, val_loader, test_loader = data_loader(data_dir='data', batch_size=32, dataAugmentation=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CustomCNN(
                    filter_size=3,
                    num_classes=10,
                    num_filters=64,
                    hidden_size=256,
                    filter_organisation_factor=1.5,
                    dropout=0.4,
                    batch_norm=True,
                    n_blocks=5,
                    activation_function="GeLU").to(device)

# Check if the best model exists
if os.path.exists("partA/best_model.pth"):
    print("Best model found. Skipping training...")
    model.load_state_dict(torch.load("partA/best_model.pth"))
    model.eval()
else:
    print("No best model found. Starting training...")
    history = model.train_model(train_loader=train_loader,
                                val_loader=val_loader,
                                epochs=15,
                                learning_rate=0.001,
                                device=device)
    # Save the best model
    torch.save(model.state_dict(), "partA/best_model.pth")

model.load_state_dict(torch.load("partA/best_model.pth"))
# ---- Get best epoch based on test accuracy ----
predictions,true_labels = model.predict(test_loader, device)
best_accuracy = (predictions == true_labels).sum().item() / len(true_labels)

dataset = datasets.ImageFolder(root=os.path.join('data', 'val'))
class_to_name = {i: name.split('/')[-1] for i, name in enumerate(dataset.classes)}
                               

# ---- Initialize Weights & Biases ----
wandb.init(project="iNaturalist-CNN", name="best_model_test_eval")

# ---- Get 30 sample predictions ----
model.eval()
images, labels, preds = [], [], []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim=1)

        images.extend(inputs.cpu())
        labels.extend(targets.cpu())
        preds.extend(predicted.cpu())

        if len(images) >= 30:
            break

images = images[:30]
labels = labels[:30]
preds = preds[:30]

# ---- Create 10×3 prediction grid ----
wandb_images =[]
for i in range(30):
        # Denormalize image
        img = images[i].cpu().numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        img = std * img + mean
        img = np.clip(img, 0, 1)
        
        # Create caption with prediction info
        caption = (f"Correct ✅" if labels[i] == preds[i] else f"Wrong ❌") + \
                 f"\nPredicted: {class_to_name[labels[i].item()]}\nActual: {class_to_name[preds[i].item()]}"
        
        # Create wandb.Image with caption
        wandb_images.append(wandb.Image(
            img, 
            caption=caption,
        ))

# ---- Log grid and accuracy to wandb ----
wandb.log({
    "Prediction Grid": wandb_images,
    "Best Test Accuracy": best_accuracy
})

Best model found. Skipping training...


Question 2

In [1]:
from model import data_loader
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm
import wandb

# ----------------------------
# Configuration
# ----------------------------
BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 15
LR = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----------------------------
# Init wandb
# ----------------------------
wandb.init(project="resnet50-finetune-inaturalist", config={
    "batch_size": BATCH_SIZE,
    "epochs": EPOCHS,
    "lr": LR,
    "architecture": "resnet50",
    "unfrozen_layers": "layer4 + fc",
})

# ----------------------------
# Load Dataset
# ----------------------------
train_loader, val_loader, test_loader = data_loader(
    data_dir='data', batch_size=BATCH_SIZE, dataAugmentation=True
)

# ----------------------------
# Load Pretrained Model
# ----------------------------
model = models.resnet50(pretrained=True)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Unfreeze last conv block (layer4)
for param in model.layer4.parameters():
    param.requires_grad = True

# Replace final layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)

model = model.to(DEVICE)

# ----------------------------
# Loss, Optimizer, Scheduler
# ----------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=LR
)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# ----------------------------
# Training & Evaluation
# ----------------------------
def train(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for images, labels in tqdm(loader, desc="Training"):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, preds = torch.max(output, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    acc = correct / total
    return running_loss / len(loader), acc


def validate(model, loader, desc="Validation"):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(loader, desc=desc):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            output = model(images)
            _, preds = torch.max(output, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total

# ----------------------------
# Main Training Loop
# ----------------------------
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    val_acc = validate(model, val_loader, desc="Validation")
    test_acc = validate(model, test_loader, desc="Testing")

    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | Test Acc: {test_acc:.4f}")
    
    # Log metrics to wandb
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_acc": val_acc,
        "test_acc": test_acc,
        "lr": scheduler.get_last_lr()[0]
    })

    scheduler.step()

# ----------------------------
# Save model
# ----------------------------
torch.save(model.state_dict(), "resnet50_inaturalist_finetuned.pth")
wandb.save("resnet50_inaturalist_finetuned.pth")
wandb.finish()


wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: harshtrivs (harshtrivs-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin





Epoch 1/15


Training: 100%|██████████| 250/250 [01:07<00:00,  3.73it/s]
Validation: 100%|██████████| 63/63 [00:24<00:00,  2.56it/s]
Testing: 100%|██████████| 63/63 [00:23<00:00,  2.74it/s]


Train Loss: 1.1836 | Train Acc: 0.6116 | Val Acc: 0.6945 | Test Acc: 0.6945

Epoch 2/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.89it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.66it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.75it/s]


Train Loss: 0.7509 | Train Acc: 0.7573 | Val Acc: 0.7255 | Test Acc: 0.7255

Epoch 3/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.91it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.66it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.86it/s]


Train Loss: 0.5020 | Train Acc: 0.8392 | Val Acc: 0.7040 | Test Acc: 0.7150

Epoch 4/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.91it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.64it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.84it/s]


Train Loss: 0.3540 | Train Acc: 0.8862 | Val Acc: 0.7190 | Test Acc: 0.7210

Epoch 5/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.89it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.69it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.80it/s]


Train Loss: 0.2502 | Train Acc: 0.9185 | Val Acc: 0.7020 | Test Acc: 0.7215

Epoch 6/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.90it/s]
Validation: 100%|██████████| 63/63 [00:24<00:00,  2.57it/s]
Testing: 100%|██████████| 63/63 [00:23<00:00,  2.70it/s]


Train Loss: 0.0969 | Train Acc: 0.9704 | Val Acc: 0.7710 | Test Acc: 0.7755

Epoch 7/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.88it/s]
Validation: 100%|██████████| 63/63 [00:24<00:00,  2.62it/s]
Testing: 100%|██████████| 63/63 [00:23<00:00,  2.66it/s]


Train Loss: 0.0419 | Train Acc: 0.9891 | Val Acc: 0.7770 | Test Acc: 0.7685

Epoch 8/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.92it/s]
Validation: 100%|██████████| 63/63 [00:24<00:00,  2.56it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.78it/s]


Train Loss: 0.0274 | Train Acc: 0.9940 | Val Acc: 0.7755 | Test Acc: 0.7750

Epoch 9/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.92it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.67it/s]
Testing: 100%|██████████| 63/63 [00:21<00:00,  2.88it/s]


Train Loss: 0.0194 | Train Acc: 0.9955 | Val Acc: 0.7785 | Test Acc: 0.7770

Epoch 10/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.93it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.64it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.75it/s]


Train Loss: 0.0162 | Train Acc: 0.9951 | Val Acc: 0.7775 | Test Acc: 0.7730

Epoch 11/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.89it/s]
Validation: 100%|██████████| 63/63 [00:25<00:00,  2.47it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.82it/s]


Train Loss: 0.0110 | Train Acc: 0.9977 | Val Acc: 0.7775 | Test Acc: 0.7760

Epoch 12/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.90it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.65it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.83it/s]


Train Loss: 0.0104 | Train Acc: 0.9981 | Val Acc: 0.7730 | Test Acc: 0.7735

Epoch 13/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.92it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.66it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.83it/s]


Train Loss: 0.0109 | Train Acc: 0.9979 | Val Acc: 0.7810 | Test Acc: 0.7750

Epoch 14/15


Training: 100%|██████████| 250/250 [01:03<00:00,  3.93it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.72it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.85it/s]


Train Loss: 0.0087 | Train Acc: 0.9987 | Val Acc: 0.7800 | Test Acc: 0.7785

Epoch 15/15


Training: 100%|██████████| 250/250 [01:04<00:00,  3.90it/s]
Validation: 100%|██████████| 63/63 [00:23<00:00,  2.72it/s]
Testing: 100%|██████████| 63/63 [00:22<00:00,  2.79it/s]


Train Loss: 0.0078 | Train Acc: 0.9986 | Val Acc: 0.7770 | Test Acc: 0.7765


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
lr,█████▂▂▂▂▂▁▁▁▁▁
test_acc,▁▄▃▃▃█▇████████
train_acc,▁▄▅▆▇▇█████████
train_loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▄▂▃▂▇█████▇███

0,1
epoch,15.0
lr,1e-05
test_acc,0.7765
train_acc,0.99862
train_loss,0.00777
val_acc,0.777
