In [12]:
# Standard library imports
import math
import argparse
from torch.utils.data import DataLoader, Subset

# Third-party library imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm

# Torchvision imports
import torchvision
from torchvision import datasets
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms

In [2]:
!pip install --upgrade wandb
import wandb
# import socket
# socket.setdefaulttimeout(30)
wandb.login(key='1d2423ec9b728fe6cc1e2c0b9a2af0e67a45183c')


Collecting wandb
  Downloading wandb-0.19.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading wandb-0.19.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.19.6
    Uninstalling wandb-0.19.6:
      Successfully uninstalled wandb-0.19.6
Successfully installed wandb-0.19.9


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs24m047[0m ([33mcs24m047-iitm-ac-in[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
# path in kaggle for the datasets
train_directory='/kaggle/input/dataset2/inaturalist_12K/train'
test_directory='/kaggle/input/dataset2/inaturalist_12K/val'

In [5]:
class CNN(nn.Module):
    def __init__(self, no_of_input_channels=3, no_of_classes=10, no_of_filters=[32,32,32,32,32], size_of_filter=[3,3,3,3,3],
                 no_of_neurons=128, activation_function='sigmoid',dropout_probability=0.0, batch_normalization='no'):
        super(CNN, self).__init__()
        self.activation_function_name = activation_function
        self.batch_normalization = batch_normalization
        
        width = height = 256.0 # Initialize width and height for feature map calculations
        
        # Create convolutional, batch norm, and pooling layers dynamically
        for i in range(len(no_of_filters)):
            # Conv layer
            conv_layer = nn.Conv2d(in_channels=no_of_input_channels if i == 0 else no_of_filters[i-1],
                                   out_channels=no_of_filters[i],kernel_size=size_of_filter[i],stride=1)
            setattr(self, f'conv_layer{i+1}', conv_layer)

            width = height = (width - size_of_filter[i]) + 1  # Update feature map dimensions after convolution
            # Batch norm layer
            if batch_normalization == 'yes':
                batch_norm = nn.BatchNorm2d(no_of_filters[i])
                setattr(self, f'batch_norm{i+1}', batch_norm) 
            # Pooling layer
            pool_layer = nn.MaxPool2d(kernel_size=size_of_filter[i], stride=2)
            setattr(self, f'pool_layer{i+1}', pool_layer)
            width = height = math.floor((width - size_of_filter[i]) / 2) + 1 # Update feature map dimensions after pooling
        # Fully connected layers
        self.dropout = nn.Dropout(p=dropout_probability)
        self.full_connected1 = nn.Linear(no_of_filters[-1] * int(width) * int(height), no_of_neurons) 
        if batch_normalization == 'yes':
            self.batch_norm6 = nn.BatchNorm1d(no_of_neurons) 
        self.full_connected2 = nn.Linear(no_of_neurons, no_of_classes)

    def forward(self, x):
      # Set activation function
      if(self.activation_function_name == 'relu'):
            activation_function = F.relu
      elif(self.activation_function_name == 'gelu'):
          activation_function = F.gelu
      elif(self.activation_function_name == 'silu'):
          activation_function = F.silu
      else:
            activation_function = F.mish

      # Process through 5 convolutional blocks
      for i in range(1, 6):
          conv_layer = getattr(self, f'conv_layer{i}')
          if self.batch_normalization == 'yes':
              batch_norm = getattr(self, f'batch_norm{i}')
              x = activation_function(batch_norm(conv_layer(x)))
          else:
              x = activation_function(conv_layer(x)) 
          pool_layer = getattr(self, f'pool_layer{i}')
          x = pool_layer(x)
      # Flatten the output
      x = x.reshape(x.shape[0], -1)
      # First fully connected layer
      if self.batch_normalization == 'yes':
          x = activation_function(self.batch_norm6(self.full_connected1(x)))
      else:
          x = activation_function(self.full_connected1(x))
      x = self.dropout(x)
      x = self.full_connected2(x)
      return x

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [7]:
transform_basic = transforms.Compose([
    transforms.Resize((256,256)), # resized to a threshold value so that all images have same shape and size
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))]) # normalized for better accuracy.

train_dataset = datasets.ImageFolder(root=train_directory,transform=transform_basic) # train_data loading
training_dataset,validation_dataset = torch.utils.data.random_split(train_dataset,[8000,1999]) #splitting the data into 80%(training) and 20%(validation) The overall data size is 9999

transform_augmented = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),      # Randomly rotate the image by a maximum of 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, saturation, and hue
    transforms.RandomResizedCrop(256),  # Randomly crop and resize the image to 256x256
    transforms.ToTensor(),              # Convert the image to a PyTorch tensor
    transforms.Normalize((0.5,),(0.5,))  # Normalize the image
]) # for augumenting the training data
train_dataset2 = datasets.ImageFolder(root=train_directory,transform=transform_augmented)
training_dataset_aug,validation_dataset_aug = torch.utils.data.random_split(train_dataset2,[8000,1999]) #  #splitting the data into 80%(training) and 20%(validation) The overall data size is 9999

test_dataset = datasets.ImageFolder(root=test_directory,transform=transform_basic); # test data loading.

In [8]:
def data_loader_creator(augmentation_flag,batch_size): # function to return the data loaders depending on augumentation.
    if(augmentation_flag == 'no'):
        train_loader = torch.utils.data.DataLoader(training_dataset,batch_size =batch_size,shuffle = True,num_workers=2,pin_memory=True)
        val_loader = torch.utils.data.DataLoader(validation_dataset,batch_size =batch_size,shuffle = True,num_workers=2,pin_memory=True)
        return train_loader,val_loader
    else:
        train_loader_aug = torch.utils.data.DataLoader(training_dataset_aug,batch_size =batch_size,shuffle = True,num_workers=4,pin_memory=True)
        val_loader_aug = torch.utils.data.DataLoader(validation_dataset_aug,batch_size =batch_size,shuffle = True,num_workers=4,pin_memory=True)
        return train_loader_aug,val_loader_aug

In [9]:
def Accuracy_calculator(loader, model, criterion, batch_size):
    """Computes model accuracy and average loss on a given dataset loader"""
    correct_predictions = 0
    total_samples = 0
    accumulated_loss = 0.0
    model.eval()  # Set model to evaluation mode
    
    with torch.no_grad():  # Disable gradient computation
        for inputs, targets in loader:
            # Move data to the appropriate device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            # Update metrics
            accumulated_loss += loss.item() * batch_size
            _, predicted = outputs.max(1)
            correct_predictions += (predicted == targets).sum().item()
            total_samples += predicted.size(0)
    model.train()  # Restore model to training mode
    accuracy = (correct_predictions / total_samples) * 100
    average_loss = accumulated_loss / total_samples
    return accuracy, average_loss

In [10]:
def train_the_model(no_of_neurons, no_of_filters, size_of_filter, activation_function_name, optimizer_name, batch_size,
                   dropout_probability, no_of_epochs, learning_rate, batch_normalization, augmentation_flag):
    no_of_input_channels = 3
    no_of_classes = 10

    train_loader, val_loader = data_loader_creator(augmentation_flag, batch_size)  # getting dataloaders
    # Uncomment the below line for test data loader
    # test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size,shuffle=True,num_workers=2,pin_memory=True)
    
    model = CNN(no_of_input_channels, no_of_classes, no_of_filters, size_of_filter, no_of_neurons,
                activation_function_name, dropout_probability, batch_normalization).to(device)
    # model=nn.DataParallel(model)
    # model=model.to(device)

    if optimizer_name == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    else:
        optimizer = optim.NAdam(model.parameters(), lr=learning_rate)  # optimzers selection
    criterion = nn.CrossEntropyLoss()  # since it is classification problem corss entropy loss is used.

    for epoch in range(no_of_epochs):
        for batchId, (input_images, target_classes) in enumerate(tqdm(train_loader)):
            input_images = input_images.to(device=device)
            target_classes = target_classes.to(device=device)
            # forward
            scores = model(input_images)  # give the last layer pre-activation values.
            loss = criterion(scores, target_classes)  # gets the overll cross entropy loss for each batch
            
            optimizer.zero_grad()  # gradients are made to zero for each batch.
            loss.backward()  # calculaing the gradients
            optimizer.step()  # updates the parameters
        
        training_accuracy, training_loss = Accuracy_calculator(train_loader, model, criterion, batch_size)
        validation_accuracy, validation_loss = Accuracy_calculator(val_loader, model, criterion, batch_size)
        
        # Uncomment the below lines for test data evaluation
        # test_accuracy, test_loss = Accuracy_calculator(test_loader, model, criterion, batch_size)
        # print(f"test_accuracy:{test_accuracy:.4f},test_loss:{test_loss:.4f}")
        # wandb.log({'test_accuracy': test_accuracy})
        # wandb.log({'test_loss': test_loss})
        
        print(f"training_accuracy:{training_accuracy:.4f},training_loss:{training_loss:.4f}")
        print(f"validation_accuracy:{validation_accuracy:.4f},validation_loss:{validation_loss:.4f}")
        wandb.log({'training_accuracy': training_accuracy})  # plotting the data in wandb
        wandb.log({'training_loss': training_loss})
        wandb.log({'validation_accuracy': validation_accuracy})
        wandb.log({'validation_loss': validation_loss})

In [13]:
def parse_arguments():
    parser = argparse.ArgumentParser(description='Training_Parameters')

    parser.add_argument('-wp', '--wandb_project', type=str, default='DA6401_Assignment_2',
                      help='Project name used to track experiments in Weights & Biases dashboard')
    
    parser.add_argument('-n', '--no_of_neurons', type=int, default=128, 
                      choices=[128, 256, 512], help='Number of neurons in dense layer')
    
    parser.add_argument('-nF', '--no_of_filters', type=str, default='32,64,128,256,512',
                      help='Number of filters per layer as comma-separated values')
    
    parser.add_argument('-sF', '--size_of_filter', type=str, default='3,3,3,3,3',
                      help='Filter sizes per layer as comma-separated values')

    parser.add_argument('-aF', '--activation_function_name', type=str, default='gelu', 
                      choices=['relu','gelu','silu','mish'], help='Activation function type')
    
    parser.add_argument('-opt', '--optimizer_name', type=str, default='nadam', 
                      choices=['adam','nadam'], help='Optimizer type')

    parser.add_argument('-bS', '--batch_size', type=int, default=32, 
                      choices=[32, 64, 128], help='Batch size for training')

    parser.add_argument('-d', '--dropout_probability', type=float, default=0.4, 
                      choices=[0, 0.2, 0.4], help='Dropout probability')

    parser.add_argument('-nE', '--no_of_epochs', type=int, default=10, 
                      choices=[5, 10], help='Number of training epochs')

    parser.add_argument('-lR', '--learning_rate', type=float, default=0.001, 
                      choices=[1e-3, 1e-4], help='Learning rate')

    parser.add_argument('-bN', '--batch_normalization', type=str, default='yes', 
                      choices=['yes','no'], help='Whether to use batch normalization')

    parser.add_argument('-ag', '--augmentation_flag', type=str, default='no', 
                      choices=['yes','no'], help='Whether to use data augmentation')

    return parser.parse_args()

args = parse_arguments()
args.no_of_filters = [int(x) for x in args.no_of_filters.split(',')]
args.size_of_filter = [int(x) for x in args.size_of_filter.split(',')]
wandb.init(project=args.wandb_project)

wandb.run.name = (
    f"No_of_neurons: {args.no_of_neurons}, "
    f"No_of_filters: {args.no_of_filters}, "
    f"Size_of_filter: {args.size_of_filter}, "
    f"Activation_function: {args.activation_function_name}, "
    f"Optimizer: {args.optimizer_name}, "
    f"Batch_size: {args.batch_size}, "
    f"Dropout: {args.dropout_probability}, "
    f"No_of_epochs: {args.no_of_epochs}, "
    f"Learning_Rate: {args.learning_rate}, "
    f"Batch_normalization: {args.batch_normalization}, "
    f"Augmentation_flag: {args.augmentation_flag}"
)

train_the_model(
    args.no_of_neurons,
    args.no_of_filters,
    args.size_of_filter,
    args.activation_function_name,
    args.optimizer_name,
    args.batch_size,
    args.dropout_probability,
    args.no_of_epochs,
    args.learning_rate,
    args.batch_normalization,
    args.augmentation_flag
)

usage: colab_kernel_launcher.py [-h] [-wp WANDB_PROJECT] [-n {128,256,512}] [-nF NO_OF_FILTERS]
                                [-sF SIZE_OF_FILTER] [-aF {relu,gelu,silu,mish}]
                                [-opt {adam,nadam}] [-bS {32,64,128}] [-d {0,0.2,0.4}]
                                [-nE {5,10}] [-lR {0.001,0.0001}] [-bN {yes,no}] [-ag {yes,no}]
colab_kernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-c62b995f-2e89-4c08-9440-5175305cd488.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [11]:
# Sweep config for wandb plotting
# wandb.init(project ='DA6401_Assignment_2')
sweep_config = {
    'name'  : "final_code_5",
    'method': 'bayes',
    'metric': {
      'name': 'validation_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'no_of_neurons': {
            'values': [128, 256, 512]
        },
        'no_of_filters': {
            'values': [[64,128,256,512, 1024], [32,32,32,32,32],[32,64,64,128,128],[128,128,64,64,32],[32,64,128,256,512]]
        },
        'size_of_filter': {
            'values': [[3,3,3,3,3], [5,5,5,5,5], [5,3,5,3,5]]
        },
        'activation_function_name': {
            'values': ['relu','gelu','silu','mish']
        },
        'optimizer_name': {
            'values': ['nadam', 'adam']
        },
        'batch_size': {
            'values': [32, 64,128]
        },
        'dropout_probability': {
            'values': [0, 0.2, 0.4]
        },
        'no_of_epochs': {
            'values': [5,10]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_normalization': {
            'values': ['yes','no']
        },
        'augmentation_flag': {
            'values': ['yes','no']
        }
    }
}

In [None]:
def run_experiment():
    try:
        run = wandb.init()  # No config argument here
        cfg = run.config
        run.name = (
            f"No_of_neurons: {cfg.no_of_neurons}, "
            f"No_of_filters: {cfg.no_of_filters}, "
            f"Size_of_filter: {cfg.size_of_filter}, "
            f"Activation_function: {cfg.activation_function_name}, "
            f"Optimizer: {cfg.optimizer_name}, "
            f"Batch_size: {cfg.batch_size}, "
            f"Dropout: {cfg.dropout_probability}, "
            f"No_of_epochs: {cfg.no_of_epochs}, "
            f"Learning_Rate: {cfg.learning_rate}, "
            f"Batch_normalization: {cfg.batch_normalization}, "
            f"Augmentation_flag: {cfg.augmentation_flag}"
        )
        train_the_model(
            cfg.no_of_neurons,
            cfg.no_of_filters,
            cfg.size_of_filter,
            cfg.activation_function_name,
            cfg.optimizer_name,
            cfg.batch_size,
            cfg.dropout_probability,
            cfg.no_of_epochs,
            cfg.learning_rate,
            cfg.batch_normalization,
            cfg.augmentation_flag
        )
    except Exception as e:
        print(f"Error during training: {e}")
        if wandb.run:
            wandb.finish(exit_code=1)
        raise
    finally:
        if wandb.run:
            wandb.finish
if __name__=="__main__":
    sweep_id = wandb.sweep(sweep_config, project="DA6401_Assignment_2")
    wandb.agent(sweep_id, run_experiment ,  count=5)

Create sweep with ID: 8lnmtrxl
Sweep URL: https://wandb.ai/cs24m047-iitm-ac-in/DA6401_Assignment_2/sweeps/8lnmtrxl


[34m[1mwandb[0m: Agent Starting Run: fonaha9k with config:
[34m[1mwandb[0m: 	activation_function_name: gelu
[34m[1mwandb[0m: 	augmentation_flag: no
[34m[1mwandb[0m: 	batch_normalization: no
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_probability: 0.4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	no_of_epochs: 5
[34m[1mwandb[0m: 	no_of_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	no_of_neurons: 256
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	size_of_filter: [5, 5, 5, 5, 5]


100%|██████████| 250/250 [00:49<00:00,  5.08it/s]


training_accuracy:20.0625,training_loss:2.1623
validation_accuracy:18.7094,validation_loss:2.1833


100%|██████████| 250/250 [00:52<00:00,  4.78it/s]


training_accuracy:23.8375,training_loss:2.1113
validation_accuracy:23.9620,validation_loss:2.1240


100%|██████████| 250/250 [00:51<00:00,  4.84it/s]


training_accuracy:28.1500,training_loss:2.0108
validation_accuracy:27.2636,validation_loss:2.0513


100%|██████████| 250/250 [00:53<00:00,  4.68it/s]


training_accuracy:28.2125,training_loss:2.0204
validation_accuracy:26.9135,validation_loss:2.0669


100%|██████████| 250/250 [00:52<00:00,  4.78it/s]


training_accuracy:30.8375,training_loss:1.9233
validation_accuracy:30.6153,validation_loss:1.9639


0,1
training_accuracy,▁▃▆▆█
training_loss,█▇▄▄▁
validation_accuracy,▁▄▆▆█
validation_loss,█▆▄▄▁

0,1
training_accuracy,30.8375
training_loss,1.92327
validation_accuracy,30.61531
validation_loss,1.96393


[34m[1mwandb[0m: Agent Starting Run: mrjqiy7s with config:
[34m[1mwandb[0m: 	activation_function_name: relu
[34m[1mwandb[0m: 	augmentation_flag: yes
[34m[1mwandb[0m: 	batch_normalization: yes
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_probability: 0.2
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	no_of_epochs: 10
[34m[1mwandb[0m: 	no_of_filters: [128, 128, 64, 64, 32]
[34m[1mwandb[0m: 	no_of_neurons: 512
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	size_of_filter: [3, 3, 3, 3, 3]


 78%|███████▊  | 97/125 [02:24<00:41,  1.48s/it]