## IMPORT STATEMENTS

In [1]:
# Import Statements
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import wandb

## SET UP FOR DEVICE AGNOSTIC CODE

In [2]:
# Check if GPU is available
torch.cuda.is_available()

True

In [3]:
# Setup device agnostic code
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

## DATA LOADING AND PREPROCESSING

In [4]:
# Mount the google drive (RUN ONLY IN COLAB)
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [5]:
# Set up data directory path
TRAIN_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/train"
TEST_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/val"

In [21]:
def data_transformations(image_size=(224, 224), data_augment=False):

  # Define transformations to be applied (Base Transformations)
  transformations = [
      transforms.Resize(image_size),
      transforms.ToTensor(),
      transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) # NOTE:- I am planning to use EfficientNetV2 so using the same values as used for that network
  ]

  # If Augmentation is needed, add them to transform list
  if data_augment:
      transformations += [
          transforms.RandomHorizontalFlip(0.05),
          transforms.RandomVerticalFlip(0.05),
          transforms.RandomRotation(degrees=20),
          transforms.RandomApply([transforms.ColorJitter(0.2, 0.2, 0.2, 0.2)], p=0.05),
          transforms.RandomApply([transforms.GaussianBlur(3)], p=0.05)
      ]

  transformer = transforms.Compose(transformations)

  return transformer


def get_train_and_val_dataloaders(train_data_dir, image_size=(224, 224), data_augment=False, valset_size=0.2, batch_size=16):

  transformer = data_transformations(image_size, data_augment)

  # Dowload the total_train dataset
  total_trainset = torchvision.datasets.ImageFolder(root = train_data_dir, transform=transformer)
  classnames = total_trainset.classes

  # Split the total_train data into train data and val data
  labels = [label for _, label in total_trainset.samples]

  train_indices, val_indices = train_test_split(
                                  range(len(total_trainset)),
                                  test_size=valset_size,
                                  stratify=labels,
                                  random_state=42
                                  )

  trainset = torch.utils.data.Subset(total_trainset, train_indices)
  valset = torch.utils.data.Subset(total_trainset, val_indices)

  # Create the dataloaders
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

  valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return trainloader, valloader, classnames



def get_test_dataloader(test_data_dir, image_size=(224, 224), batch_size=8):

  transformer = data_transformations(image_size, False)

  #Download the test data
  testset = torchvision.datasets.ImageFolder(root=test_data_dir, transform=transformer)



  testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return testloader

In [22]:
image_size = (224, 224)

In [23]:
trainloader, valloader, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, image_size=image_size)

## MODEL CLASS

In [24]:
class CNNModel(nn.Module):
    def __init__(self, image_size, in_channels=3, num_classes=10,
                 num_filters=[64, 64, 64, 64, 64], kernel_size=[3, 3, 3, 3, 3],
                 activation_fn=nn.ReLU, fc_layer_size=2048,
                 batchnorm=False, dropout=0.0):

        super().__init__()

        h, w = image_size

        # Block 1
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=num_filters[0], kernel_size=kernel_size[0])
        h, w = h - kernel_size[0] + 1, w - kernel_size[0] + 1
        self.batchnorm1 = nn.BatchNorm2d(num_filters[0]) if batchnorm else nn.Identity()
        self.activation1 = activation_fn()
        self.dropout1 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
        self.maxpool1 = nn.MaxPool2d(2, 2)
        h, w = h//2, w//2

        # Block 2
        self.conv2 = nn.Conv2d(in_channels=num_filters[0], out_channels=num_filters[1], kernel_size=kernel_size[1])
        h, w = h - kernel_size[1] + 1, w - kernel_size[1] + 1
        self.batchnorm2 = nn.BatchNorm2d(num_filters[1]) if batchnorm else nn.Identity()
        self.activation2 = activation_fn()
        self.dropout2 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
        self.maxpool2 = nn.MaxPool2d(2, 2)
        h, w = h//2, w//2

        # Block 3
        self.conv3 = nn.Conv2d(in_channels=num_filters[1], out_channels=num_filters[2], kernel_size=kernel_size[2])
        h, w = h - kernel_size[2] + 1, w - kernel_size[2] + 1
        self.batchnorm3 = nn.BatchNorm2d(num_filters[2]) if batchnorm else nn.Identity()
        self.activation3 = activation_fn()
        self.dropout3 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
        self.maxpool3 = nn.MaxPool2d(2, 2)
        h, w = h//2, w//2

        # Block 4
        self.conv4 = nn.Conv2d(in_channels=num_filters[2], out_channels=num_filters[3], kernel_size=kernel_size[3])
        h, w = h - kernel_size[3] + 1, w - kernel_size[3] + 1
        self.batchnorm4 = nn.BatchNorm2d(num_filters[3]) if batchnorm else nn.Identity()
        self.activation4 = activation_fn()
        self.dropout4 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
        self.maxpool4 = nn.MaxPool2d(2, 2)
        h, w = h//2, w//2

        # Block 5
        self.conv5 = nn.Conv2d(in_channels=num_filters[3], out_channels=num_filters[4], kernel_size=kernel_size[4])
        h, w = h - kernel_size[4] + 1, w - kernel_size[4] + 1
        self.batchnorm5 = nn.BatchNorm2d(num_filters[4]) if batchnorm else nn.Identity()
        self.activation5 = activation_fn()
        self.dropout5 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
        self.maxpool5 = nn.MaxPool2d(2, 2)
        h, w = h//2, w//2

        # Flattening layer
        self.flatten = nn.Flatten()

        # Fully connected layer
        self.fc_layer = nn.Linear(in_features=num_filters[4] * h * w, out_features=fc_layer_size)
        self.batchnorm_fc = nn.BatchNorm1d(fc_layer_size) if batchnorm else nn.Identity()
        self.act_fc = activation_fn()
        self.drop_fc = nn.Dropout(dropout) if dropout!=0 else nn.Identity()

        # Output layer
        self.out = nn.Linear(in_features=fc_layer_size, out_features=num_classes)

    def forward(self, x):
        # Block 1
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.activation1(x)
        x = self.dropout1(x)
        x = self.maxpool1(x)

        # Block 2
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = self.activation2(x)
        x = self.dropout2(x)
        x = self.maxpool2(x)

        # Block 3
        x = self.conv3(x)
        x = self.batchnorm3(x)
        x = self.activation3(x)
        x = self.dropout3(x)
        x = self.maxpool3(x)

        # Block 4
        x = self.conv4(x)
        x = self.batchnorm4(x)
        x = self.activation4(x)
        x = self.dropout4(x)
        x = self.maxpool4(x)

        # Block 5
        x = self.conv5(x)
        x = self.batchnorm5(x)
        x = self.activation5(x)
        x = self.dropout5(x)
        x = self.maxpool5(x)

        # Flatten
        x = self.flatten(x)

        # Fully connected layers
        x = self.fc_layer(x)
        x = self.batchnorm_fc(x)
        x = self.act_fc(x)
        x = self.drop_fc(x)

        # Output layer
        x = self.out(x)

        return x

In [25]:
model = CNNModel(image_size)
model.to(device)

CNNModel(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm1): Identity()
  (activation1): ReLU()
  (dropout1): Identity()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm2): Identity()
  (activation2): ReLU()
  (dropout2): Identity()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm3): Identity()
  (activation3): ReLU()
  (dropout3): Identity()
  (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm4): Identity()
  (activation4): ReLU()
  (dropout4): Identity()
  (maxpool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm5): Identity()

## TRAINING AND EVALUATION

In [26]:
import torch
from tqdm import tqdm
import time

def train_model(model, trainloader, valloader, criterion, optimizer, device, epochs=10):

    train_epoch_losses = []
    train_epoch_accuracies = []


    val_epoch_losses = []
    val_epoch_accuracies = []

    for epoch in range(epochs):

        model.train()

        train_running_loss = 0
        train_correct = 0
        train_total = 0

        for data in tqdm(trainloader):

            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_running_loss += loss.item() * inputs.size(0)
            train_total += inputs.size(0)
            train_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

        train_epoch_loss = train_running_loss / train_total
        train_epoch_accuracy = 100 * train_correct / train_total

        train_epoch_losses.append(train_epoch_loss)
        train_epoch_accuracies.append(train_epoch_accuracy)

        model.eval()

        val_running_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for data in tqdm(valloader):

                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item() * inputs.size(0)
                val_total += inputs.size(0)
                val_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

        val_epoch_loss = val_running_loss / val_total
        val_epoch_accuracy = 100 * val_correct / val_total

        val_epoch_losses.append(val_epoch_loss)
        val_epoch_accuracies.append(val_epoch_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_epoch_loss:.3f}, Train Acc: {train_epoch_accuracy:.2f}%, Val Loss: {val_epoch_loss:.3f}, Val Acc: {val_epoch_accuracy:.2f}%")

    return train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies

In [27]:
def evaluate_model(model, testloader, criterion, device):
  # Set the model to Evaluation mode
  model.eval()

  # Values to keep track of
  running_loss = 0
  correct = 0
  total = 0

  with torch.no_grad():
    for data in tqdm(testloader):

      inputs, labels = data[0].to(device), data[1].to(device)
      outputs = model(inputs)
      loss = criterion(outputs, labels)

      running_loss += loss.item() * inputs.size(0)
      total += inputs.size(0)
      correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

  test_loss = running_loss / total
  test_accuracy = (correct / total) * 100

  print(f"Test Loss: {test_loss:.3f}, Test Acc: {test_accuracy:.2f}%")

  return test_loss, test_accuracy


In [28]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

In [29]:
train_model(model, trainloader, valloader, criterion, optimizer, device, 10)

100%|██████████| 501/501 [01:57<00:00,  4.25it/s]
100%|██████████| 126/126 [00:26<00:00,  4.81it/s]


Epoch 1/10, Train Loss: 2.311, Train Acc: 9.48%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:48<00:00,  4.61it/s]
100%|██████████| 126/126 [00:25<00:00,  4.95it/s]


Epoch 2/10, Train Loss: 2.304, Train Acc: 9.78%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:48<00:00,  4.63it/s]
100%|██████████| 126/126 [00:26<00:00,  4.83it/s]


Epoch 3/10, Train Loss: 2.304, Train Acc: 9.12%, Val Loss: 2.303, Val Acc: 10.09%


100%|██████████| 501/501 [01:47<00:00,  4.65it/s]
100%|██████████| 126/126 [00:25<00:00,  4.86it/s]


Epoch 4/10, Train Loss: 2.304, Train Acc: 9.64%, Val Loss: 2.303, Val Acc: 10.09%


100%|██████████| 501/501 [01:47<00:00,  4.67it/s]
100%|██████████| 126/126 [00:25<00:00,  4.86it/s]


Epoch 5/10, Train Loss: 2.304, Train Acc: 9.76%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:47<00:00,  4.64it/s]
100%|██████████| 126/126 [00:25<00:00,  4.88it/s]


Epoch 6/10, Train Loss: 2.304, Train Acc: 9.57%, Val Loss: 2.303, Val Acc: 10.09%


100%|██████████| 501/501 [01:47<00:00,  4.65it/s]
100%|██████████| 126/126 [00:25<00:00,  4.93it/s]


Epoch 7/10, Train Loss: 2.304, Train Acc: 10.02%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:45<00:00,  4.73it/s]
100%|██████████| 126/126 [00:26<00:00,  4.81it/s]


Epoch 8/10, Train Loss: 2.304, Train Acc: 9.29%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:47<00:00,  4.66it/s]
100%|██████████| 126/126 [00:26<00:00,  4.83it/s]


Epoch 9/10, Train Loss: 2.304, Train Acc: 9.64%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:47<00:00,  4.67it/s]
100%|██████████| 126/126 [00:26<00:00,  4.83it/s]

Epoch 10/10, Train Loss: 2.304, Train Acc: 9.04%, Val Loss: 2.303, Val Acc: 9.99%





([2.3105565779750536,
  2.3038416414477663,
  2.3039181630313026,
  2.3038147626863013,
  2.3038577751728586,
  2.3038967442518468,
  2.3037263104059744,
  2.3039017121493446,
  2.3040772961462377,
  2.3041392147555935],
 [9.48038970771921,
  9.780164876342743,
  9.118161378965775,
  9.642767924056958,
  9.755183612290782,
  9.567824131901075,
  10.017486884836373,
  9.293030227329503,
  9.642767924056958,
  9.043217586809893],
 [2.302867354927482,
  2.3029153780503706,
  2.302903371138292,
  2.3031853519595944,
  2.302911263483983,
  2.3029766730614356,
  2.302807408732015,
  2.3028145369949873,
  2.302782053714032,
  2.3029784717998067],
 [9.99000999000999,
  9.99000999000999,
  10.08991008991009,
  10.08991008991009,
  9.99000999000999,
  10.08991008991009,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999])

## WANDB HYPERPARAMETER SWEEP

In [None]:
def get_activation_function(name):
  if name == "leaky_relu":
    return nn.LeakyReLU
  elif name == "parametric_relu":
    return nn.PReLU
  elif name == "gelu":
    return nn.GELU
  elif name == "silu":
    return nn.SiLU
  elif name == "mish":
    return nn.Mish
  return nn.ReLU

def get_num_filters(name):
  if name == "equal16":
    return [16, 16, 16, 16, 16]
  elif name == "equal32":
    return [32, 32, 32, 32, 32]
  elif name == "equal64":
    return [64, 64, 64, 64, 64]
  elif name == "doubling16":
    return [16, 32, 64, 128, 256]
  elif name == "doubling32":
    return [32, 64, 128, 256, 512]
  elif name == "halving256":
    return [256, 128, 64, 32, 16]
  else:
    return [100, 80, 50, 80, 100]

def get_kernel_size(name):
  if name == "constant5":
    return [5, 5, 5, 5, 5]
  elif name == "constant7":
    return [7, 7, 7, 7, 7]
  elif name == "decreasing":
    return [11, 7, 5, 3, 1]
  elif name == "increasing":
    return [1, 3, 5, 7, 11]
  return [3, 3, 3, 3, 3]



In [None]:
def sweep_hyperparameters(config=None):

  with wandb.init(config=config):

    config = wandb.config
    wandb.run.name = f"activation_{str(config.activation)}_filters_{str(config.num_filters)}_lr_{config.learning_rate}_kernel_{config.kernel_size}_fc_size_{config.fc_layer_size}"

    # Log in my details
    wandb.config.update({"NAME": "SIDDHANT BARANWAL", "ROLL NO.": "DA24M021"})

    image_size=(224, 224)

    TRAIN_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/train"
    trainloader, valloader, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, image_size=image_size, data_augment=config.data_augment, valset_size=0.2, batch_size=config.batch_size)


    model = CNNModel(image_size, num_filters=get_num_filters(config.num_filters), kernel_size=get_kernel_size(config.kernel_size),
                     activation_fn=get_activation_function(config.activation), batchnorm=config.batch_norm, dropout=config.dropout,
                     fc_layer_size=config.fc_layer_size)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    train_losses, train_accuracies, val_losses, val_accuracies = train_model(model, trainloader, valloader, criterion, optimizer, device, epochs=10)

    # Log the evaluation metrics
    wandb.log({
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "val_losses": val_losses,
        "val_accuracies": val_accuracies,
        "val_accuracy": val_accuracies[0]
    })




In [None]:
sweep_config = {
    "method" : "bayes",
    "metric" : {"name": "val_accuracy", "goal": "maximize"},
    "parameters" : {
        "data_augment" : {"values" : [True, False]},
        "batch_norm" : {"values" : [True, False]},
        "dropout" : {"values" : [0.0, 0.2, 0.4]},
        "learning_rate" : {"values" : [0.01, 0.001, 0.0005, 0.0001]},
        "activation" : {"values" : ["relu", "leaky_relu", "parametric_relu",
                                    "gelu", "silu", "mish"]},
        "num_filters" : {"values" : ["equal16", "equal32", "equal64", "doubling16", "doubling32", "halving256"]},
        "kernel_size" : {"values" : ["constant3", "constant5", "constant7", "decreasing", "increasing"]},
        "fc_layer_size" : {"values": [2048, 1024, 512]},
        "batch_size": {"values": [8, 16, 32]}
    }
}

sweep_id = wandb.sweep(sweep_config, project = "da24m021_da6401_assignment2")
wandb.agent(sweep_id, function = sweep_hyperparameters, count = 50)