In [None]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import os

class DataLoaderHelper:
    def __init__(self, train_data_dir,test_data_dir, input_size, batch_size, augmentation):
        self.data_dir = train_data_dir
        self.test_dir = test_data_dir
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.input_size = input_size  # tuple like (224, 224)

        self.transform = self.get_transform()
        self.train_data, self.val_data = self.load_train_val_data()
        self.test_data = self.load_test_data()

    def get_transform(self):
        if self.augmentation:
            transforms_list = [
                transforms.RandomResizedCrop(self.input_size),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(30),
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                transforms.ToTensor(),
            ]
        else:
            transforms_list = [
                transforms.Resize(self.input_size),
                transforms.ToTensor(),
            ]

        transforms_list.append(
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        )
        return transforms.Compose(transforms_list)

    def load_train_val_data(self):
        full_dataset = datasets.ImageFolder(root=self.data_dir, transform=self.transform)
        total_size = len(full_dataset)
        indices = list(range(total_size))

        train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42, shuffle=True)

        # print(f"Total: {total_size} | Train: {len(train_idx)} | Val: {len(val_idx)}")
        return Subset(full_dataset, train_idx), Subset(full_dataset, val_idx)

    def load_test_data(self):
        return datasets.ImageFolder(root=self.test_dir, transform=self.transform)

    def get_dataloaders(self):
        train_loader = DataLoader(self.train_data, batch_size=self.batch_size,
                                  shuffle=True, num_workers=2, pin_memory=True)
        val_loader = DataLoader(self.val_data, batch_size=self.batch_size,
                                shuffle=False, num_workers=2, pin_memory=True)

        test_loader = DataLoader(self.test_data, batch_size=self.batch_size,
                                     shuffle=False, num_workers=2, pin_memory=True)


        return train_loader, val_loader, test_loader

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FlexibleCNN(nn.Module):
    def __init__(self, num_filters, filter_sizes,dropout,activation, batch_norm, input_size,fc_hidden_sizes,num_classes):
        #Initialize the pytorch neural network..
        super(FlexibleCNN, self).__init__()

        self.dropout = dropout
        self.activation = activation
        self.batch_norm = batch_norm
        self.num_filters = num_filters # list
        self.filter_sizes = filter_sizes #list
        self.input_size=input_size #tuple
        self.fc_hidden_sizes=fc_hidden_sizes #list
        self.num_classes=num_classes
        self.flatten = nn.Flatten()

        #Call the functions to create Convolution layers and Fully Connected layers
        # self.conv_layers = self.create_conv_layers()
        # self.fc_layers = self.create_fc_layers()
        self.conv_layers = nn.Sequential(*self.create_conv_layers())
        self.fc_layers = nn.Sequential(*self.create_fc_layers())


    def create_conv_layers(self):
        layers = []
        channels = 3  #RGB

        for idx, (filters, size) in enumerate(zip(self.num_filters, self.filter_sizes)):

            #For each layer we are adding the below there.

            #Number of filters and filter size.
            layers.append(nn.Conv2d(channels, filters, size,stride=1, padding=0))

            if self.activation == 'relu':
                layers.append(nn.ReLU())
            elif self.activation == 'elu':
                layers.append(nn.ELU())
            elif self.activation == 'selu':
                layers.append(nn.SELU())
            elif self.activation == 'silu':
                layers.append(nn.SiLU())
            elif self.activation == 'gelu':
                layers.append(nn.GELU())
            else:
                layers.append(nn.Mish())

            # You can add new activation functions If you want

            layers.append(nn.MaxPool2d(2))

            if self.batch_norm:
                layers.append(nn.BatchNorm2d(filters))

            channels = filters


        # return nn.Sequential(*layers)
        return layers

    def create_fc_layers(self):
        # Calculate flattened size from conv layers
        flattened_size = self.output_conv_layers()

        layers = []
        features = flattened_size

        for hidden_size in self.fc_hidden_sizes:
            # Add linear layer
            layers.append(nn.Linear(features, hidden_size))

            # Add activation function
            if self.activation == 'relu':
                layers.append(nn.ReLU())
            elif self.activation == 'elu':
                layers.append(nn.ELU())
            elif self.activation == 'selu':
                layers.append(nn.SELU())
            elif self.activation == 'silu':
                layers.append(nn.SiLU())
            elif self.activation == 'gelu':
                layers.append(nn.GELU())
            else:
                layers.append(nn.Mish())

            # Add dropout
            layers.append(nn.Dropout(self.dropout))

            features = hidden_size

        # Final output layer (no activation, no batch norm)
        layers.append(nn.Linear(features, self.num_classes))

        # return nn.Sequential(*layers)
        return layers



    def output_conv_layers(self):
        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, *self.input_size)
            conv_output = self.conv_layers(dummy_input)
            return conv_output.view(1, -1).size(1)


    def forward(self, x):
        x = self.conv_layers(x)
        x= self.flatten(x)
        x = self.fc_layers(x)
        return x


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from datetime import datetime
import os
import argparse
from tqdm import tqdm

class Trainer:
    def __init__(self, model, train_loader, val_loader, optimizer_name, learning_rate, num_epochs,weight_decay):
        # Set device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model=torch.nn.DataParallel(model,device_ids = [0,1]).to(device)
        # self.model = model.to(self.device)

        self.train_loader = train_loader
        self.val_loader = val_loader
        self.num_epochs = num_epochs
        self.weight_decay= weight_decay
        self.learning_rate=learning_rate
        self.train_loss_history = []
        self.val_loss_history = []
        self.train_acc_history = []
        self.val_acc_history = []

        # Initialize optimizer
        if optimizer_name.lower() == 'adam':
            self.optimizer = optim.Adam(model.parameters(), lr=self.learning_rate,weight_decay=self.weight_decay)
        elif optimizer_name.lower() == 'nadam':
            self.optimizer = optim.NAdam(model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        elif optimizer_name.lower() == 'rmsprop':
            self.optimizer = optim.RMSprop(model.parameters(), lr=self.learning_rate,weight_decay=self.weight_decay)
        else:
            raise ValueError(f"Unsupported optimizer: {optimizer_name}")

        self.criterion = nn.CrossEntropyLoss()

    def train_epoch(self):
        #Initialize the training requirements that we have defined in model
        self.model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in tqdm(self.train_loader, desc="Training"):
            images, labels = images.to(self.device), labels.to(self.device)

            self.optimizer.zero_grad()
            outputs = self.model(images)
            loss = self.criterion(outputs, labels)
            loss.backward()
            #weight update
            self.optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(self.train_loader)
        epoch_acc = correct / total
        return epoch_loss, epoch_acc

    def validate(self):
        self.model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(self.val_loader, desc="Validating"):
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(self.val_loader)
        epoch_acc = correct / total
        return epoch_loss, epoch_acc

    def train(self):
        for epoch in range(self.num_epochs):
            print(f"\nEpoch {epoch+1}/{self.num_epochs}")

            train_loss, train_acc = self.train_epoch()
            val_loss, val_acc = self.validate()

            # Store history for analysis
            self.train_loss_history.append(train_loss)
            self.val_loss_history.append(val_loss)
            self.train_acc_history.append(train_acc)
            self.val_acc_history.append(val_acc)

            print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.4f}")
            print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.4f}")
            torch.cuda.empty_cache()



In [None]:
from google.colab import drive
drive.mount('/content/drive')

print(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
cuda


In [None]:
# directory='/content/drive/MyDrive/Ass2_dataset/nature_12K/inaturalist_12K/train'
# director2='/content/drive/MyDrive/Ass2_dataset/nature_12K/inaturalist_12K/val'
# input_dim=(224,224)
# num_classes=10

# data_loader = DataLoaderHelper(
#             directory,test_data_dir=director2,
#             input_size=input_dim,
#             batch_size=64,
#             augmentation=True
#         )

In [None]:
import wandb
import torch
import os
# from model import FlexibleCNN
# from data_loader import DataLoaderHelper
# from model_train import Trainer

input_dim=(400,400)
num_classes=10
train_directory='/content/drive/MyDrive/Ass2_dataset/nature_12K/inaturalist_12K/train'
test_directory='/content/drive/MyDrive/Ass2_dataset/nature_12K/inaturalist_12K/val'
# Sweep configuration dictionary for wandb
sweep_configuration = {
    'method': 'bayes',
    'name' : 'cnn-hyperparameter-tuning',
    'metric': {
      'name': 'validation_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {
          'values': [[64,128,256,512, 1024], [32,32,32,32,32],[32,64,64,128,128],[128,128,64,64,32],[32,64,128,256,512]]
        },
        'filter_sizes': {
          'values': [[3,3,3,3,3], [5,5,5,5,5], [5,3,5,3,5]]
        },
        'weight_decay': {
            'values':[0, 0.0005, 0.5]
        },
        'augmentation': {
            'values': [True, False]
        },
        'dropout': {
            'values': [0, 0.2, 0.4]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'activation': {
            'values': ['relu', 'elu', 'selu', 'silu', 'gelu','mish']
        },
        'optimizer': {
            'values': ['nadam', 'adam', 'rmsprop']
        },
        'batch_norm':{
            'values': [True, False]
        },
        'batch_size': {
            'values': [32, 64]
        },
        'fc_hidden_sizes':{
            'values': [128, 256, 512]
        }
    }
}

def train_sweep(config=None):
    with wandb.init(config=config) as run:
        config = wandb.config


        run.name = "optimizer {} activation {} num_filters {} dropout {} filter_sizes {} batch_size {} augmentation {} weight_decay {} batch_norm {} ".format(
            config.optimizer,
            config.activation,
            config.num_filters,
            config.dropout,
            config.filter_sizes,
            config.batch_size,
            config.augmentation,
            config.weight_decay,
            config.batch_norm
          )
        # Initialize data loaders
        data_loader = DataLoaderHelper(
            train_directory,test_data_dir=test_directory,
            input_size=input_dim,
            batch_size=config.batch_size,
            augmentation=config.augmentation
        )
        train_loader, val_loader,test_loader = data_loader.get_dataloaders()

        hidden_sizes = [config.fc_hidden_sizes]
        # Initialize model
        model = FlexibleCNN(
            num_filters=config.num_filters,
            filter_sizes=config.filter_sizes,
            dropout=config.dropout,
            activation=config.activation,
            batch_norm=config.batch_norm,
            input_size=input_dim,
            fc_hidden_sizes=hidden_sizes,
            num_classes=num_classes
        )

        # Initialize trainer
        trainer = Trainer(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            optimizer_name=config.optimizer,
            learning_rate=config.learning_rate,
            num_epochs=10,
            weight_decay=config.weight_decay
        )

        # Train the model
        trainer.train()

        # Log final metrics
        for epoch in range(10):
            wandb.log({
                'train_accuracy': trainer.train_acc_history[epoch]*100,
                'train_loss': trainer.train_loss_history[epoch],
                'val_accuracy': trainer.val_acc_history[epoch]*100,
                'val_loss': trainer.val_loss_history[epoch],
                'epoch' : epoch
            })

if __name__ == "__main__":

    sweep_id = wandb.sweep(sweep_configuration, project="DA6401-Assignment-2")


    # Start sweep
    wandb.agent('mes9cvi4', function=train_sweep, count=1)

Create sweep with ID: ym7ukbbt
Sweep URL: https://wandb.ai/cs24m042-iit-madras-foundation/DA6401-Assignment-2/sweeps/ym7ukbbt


[34m[1mwandb[0m: Agent Starting Run: odf2o3xd with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	augmentation: False
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	fc_hidden_sizes: 512
[34m[1mwandb[0m: 	filter_sizes: [3, 5, 3, 5, 3]
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: [32, 64, 128, 256, 512]
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005



Epoch 1/10


Training: 100%|██████████| 250/250 [02:34<00:00,  1.62it/s]
Validating: 100%|██████████| 63/63 [00:33<00:00,  1.89it/s]


Train Loss: 2.4845 | Train Acc: 23.0154
Val Loss: 2.2228 | Val Acc: 28.2000

Epoch 2/10


Training: 100%|██████████| 250/250 [02:29<00:00,  1.67it/s]
Validating: 100%|██████████| 63/63 [00:36<00:00,  1.73it/s]


Train Loss: 2.0317 | Train Acc: 33.0041
Val Loss: 2.1635 | Val Acc: 30.4000

Epoch 3/10


Training: 100%|██████████| 250/250 [02:31<00:00,  1.65it/s]
Validating: 100%|██████████| 63/63 [00:34<00:00,  1.82it/s]


Train Loss: 1.4792 | Train Acc: 49.0561
Val Loss: 2.0676 | Val Acc: 33.1500

Epoch 4/10


Training: 100%|██████████| 250/250 [02:39<00:00,  1.57it/s]
Validating: 100%|██████████| 63/63 [00:37<00:00,  1.69it/s]


Train Loss: 0.8979 | Train Acc: 71.3589
Val Loss: 2.1772 | Val Acc: 33.1000

Epoch 5/10


Training: 100%|██████████| 250/250 [02:37<00:00,  1.59it/s]
Validating: 100%|██████████| 63/63 [00:34<00:00,  1.84it/s]


Train Loss: 0.5118 | Train Acc: 85.4232
Val Loss: 2.2239 | Val Acc: 35.3500

Epoch 6/10


Training: 100%|██████████| 250/250 [02:30<00:00,  1.66it/s]
Validating: 100%|██████████| 63/63 [00:33<00:00,  1.90it/s]


Train Loss: 0.2676 | Train Acc: 93.8367
Val Loss: 2.2628 | Val Acc: 34.7000

Epoch 7/10


Training: 100%|██████████| 250/250 [02:30<00:00,  1.66it/s]
Validating: 100%|██████████| 63/63 [00:34<00:00,  1.82it/s]


Train Loss: 0.1655 | Train Acc: 96.8371
Val Loss: 2.3669 | Val Acc: 35.0500

Epoch 8/10


Training: 100%|██████████| 250/250 [02:30<00:00,  1.66it/s]
Validating: 100%|██████████| 63/63 [00:33<00:00,  1.89it/s]


Train Loss: 0.0922 | Train Acc: 98.6748
Val Loss: 2.3983 | Val Acc: 34.8500

Epoch 9/10


Training: 100%|██████████| 250/250 [02:31<00:00,  1.65it/s]
Validating: 100%|██████████| 63/63 [00:33<00:00,  1.86it/s]


Train Loss: 0.0596 | Train Acc: 99.3749
Val Loss: 2.4196 | Val Acc: 35.8000

Epoch 10/10


Training: 100%|██████████| 250/250 [02:32<00:00,  1.64it/s]
Validating: 100%|██████████| 63/63 [00:34<00:00,  1.81it/s]

Train Loss: 0.0447 | Train Acc: 99.5124
Val Loss: 2.4866 | Val Acc: 34.5500





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▅▇▇████
train_loss,█▇▅▃▂▂▁▁▁▁
val_accuracy,▁▃▆▆█▇▇▇█▇
val_loss,▄▃▁▃▄▄▆▇▇█

0,1
epoch,9.0
train_accuracy,99.51244
train_loss,0.04469
val_accuracy,34.55
val_loss,2.48664


In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()