In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install wandb



In [3]:
import wandb

wandb.login(key='208eb9fbdf5d2187fde3a83cdf51d2c458066577')

[34m[1mwandb[0m: Currently logged in as: [33mdibakar[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

def get_transform(data_augmentation):
    if data_augmentation:
        transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
            transforms.ToTensor(),     
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    else:
        # Regular transformations
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    return transform

def load_dataset(train_folder, test_folder, transform):
    train_dataset = datasets.ImageFolder(train_folder, transform=get_transform(transform))
    test_dataset = datasets.ImageFolder(test_folder, transform=get_transform(False))
    return train_dataset, test_dataset

def train(model, criterion, optimizer, train_loader, val_loader, num_epochs):
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if (i + 1) % 240 == 0:  # Print every 100 batches
#                 print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 10:.4f}")
                wandb.log({'Epoch': (epoch + 1)/num_epochs, 'Step': (i + 1)/(len(train_loader)), 'Loss': running_loss / 240})
                running_loss = 0.0
        # Validation loop
        model.eval()  # Set the model to evaluation mode
        
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                if torch.cuda.is_available():
                    images, labels = images.cuda(), labels.cuda()

                # Forward pass
                outputs = model(images)
                
                # Calculate loss
                loss = criterion(outputs, labels)
                
                # Update validation loss
                val_loss += loss.item()

                # Calculate accuracy
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        # Calculate average validation loss and accuracy for the epoch
        val_loss /= len(val_loader)
        val_accuracy = correct / total * 100

        wandb.log({'Val_Loss': val_loss, 'val_accuracy': val_accuracy})
        


class CNN(nn.Module):
    def __init__(self, hparams):
        super(CNN, self).__init__()
        self.hparams = hparams
        num_conv_layers = hparams['num_conv_layers']
        in_channels = 3
        num_filters = hparams['num_filters']
        kernel_size = hparams['kernel_size']  # Single integer value
        num_classes = 10
        num_neurons_dense = hparams['num_neurons_dense']
        input_size = 224
        filter_organization = hparams['filter_organization']
        batch_normalization = hparams['batch_normalization']
        dropout_prob = hparams['dropout_prob']
        conv_activation = hparams['conv_activation']

        self.conv_layers = nn.ModuleList()
        self.num_conv_layers = num_conv_layers

        # Add convolution layers
        for i in range(num_conv_layers):
            # Determine the number of filters for this layer based on filter_organization
            if filter_organization == 'same':
                out_channels = num_filters
            elif filter_organization == 'double':
                out_channels = num_filters * (2 ** i)
            elif filter_organization == 'halve':
                out_channels = num_filters // (2 ** i)
            else:
                raise ValueError("Invalid filter organization")

            # Determine padding value to maintain spatial dimensions
            padding = kernel_size // 2 if kernel_size % 2 == 1 else (kernel_size - 1) // 2

            # Add convolution layer with the same kernel size for all layers
            conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding)
            self.conv_layers.append(conv_layer)

            # Add batch normalization if enabled
            if batch_normalization:
                bn_layer = nn.BatchNorm2d(out_channels)
                self.conv_layers.append(bn_layer)

            in_channels = out_channels

        # Add dropout layer after the last convolution layer
        self.dropout_conv = nn.Dropout2d(p=dropout_prob)

        # Calculate input size for dense layer
        dense_input_size = out_channels * (input_size // (2 ** num_conv_layers)) ** 2

        # Dense layer
        self.dense = nn.Linear(dense_input_size, num_neurons_dense)

        # Output layer
        self.output = nn.Linear(num_neurons_dense, num_classes)

    def forward(self, x):
        # Convolution layers
        for i, layer in enumerate(self.conv_layers):
            x = layer(x)
            if isinstance(layer, nn.Conv2d):
                # Apply activation function dynamically
                if self.hparams['conv_activation'] == 'relu':
                    x = F.relu(x)
                elif self.hparams['conv_activation'] == 'gelu':
                    x = F.gelu(x)
                elif self.hparams['conv_activation'] == 'mish':
                    x = F.mish(x)
                else:
                    raise ValueError("Invalid convolutional activation function")
                
                x = F.max_pool2d(x, 2)

        # Flatten
        x = torch.flatten(x, 1)

        # Dense layer
        x = F.relu(self.dense(x))

        # Output layer (raw scores)
        x = self.output(x)
        return x

# Define the hyperparameter sweep configuration
sweep_config = {
    'method': 'bayes',
    'name' : 'sweep inaturalist part',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'num_conv_layers': {'values': [5]},
        'num_filters': {'values': [32, 64, 128]},
        'kernel_size': {'values': [3, 5]},
        'num_neurons_dense': {'values': [50, 100, 128]},
        'filter_organization': {'values': ['same', 'double', 'halve']},
        'data_augmentation': {'values': [False, True]},
        'batch_normalization': {'values': [False, True]},
        'dropout_prob': {'values': [0.0, 0.2, 0.3]},
        'conv_activation': {'values': ['relu', 'gelu', 'mish']},
        'max_epoch': {'values': [5 , 10]},
    }
}

# Define the sweep
sweep_id = wandb.sweep(sweep_config, project='CS6910_Assignment_2_partA')

def main():
    
    with wandb.init() as run:

        run_name="-num_fil_"+ str(wandb.config.num_filters) +"-k_"+ str(wandb.config.kernel_size) +"-neuron_dense_"+ str(wandb.config.num_neurons_dense) +"-af_conv_"+ str(wandb.config.conv_activation) +"-fil_org_"+ str(wandb.config.filter_organization) +"-data_aug_"+ str(wandb.config.data_augmentation) +"-batch_norm_"+ str(wandb.config.batch_normalization) +"-drop_p_"+ str(wandb.config.dropout_prob)
        wandb.run.name=run_name
        
        hparams = wandb.config
        
        train_folder = '/kaggle/input/dataset/inaturalist_12K/train'
        test_folder = '/kaggle/input/dataset/inaturalist_12K/val'
        transform = get_transform(hparams['data_augmentation'])
        train_dataset, test_dataset = load_dataset(train_folder, test_folder, transform)

        # Define batch size for DataLoader
        batch_size = 32

        # Split train dataset into train and validation sets
        train_indices, val_indices = train_test_split(list(range(len(train_dataset))), test_size=0.2, shuffle=True, stratify=train_dataset.targets)
        train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
        val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)

        # Create DataLoader for train and validation datasets
        train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
        val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        print("Number of samples in train set after splitting:", len(train_indices))
        print("Number of samples in validation set after splitting:", len(val_indices))

        for images, labels in train_loader:
            batch_size, in_channels, height, width = images.shape
            num_classes = len(train_dataset.classes)
            input_size = height, width
            break 

        print("In channels:", in_channels)
        print("Number of classes:", num_classes)
        print("Input size (height, width):", input_size)


        # Create the model
        model = CNN(hparams)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        # checking if GPU is available
        if torch.cuda.is_available():
            model = model.cuda()
            criterion = criterion.cuda()
        train(model,criterion,optimizer,train_loader,val_loader, hparams['max_epoch'])

# Run the sweep
wandb.agent(sweep_id, function=main, count=50)
# Finish the run
wandb.finish()
    

Create sweep with ID: hc2gewm6
Sweep URL: https://wandb.ai/dibakar/CS6910_Assignment_2_partA/sweeps/hc2gewm6


[34m[1mwandb[0m: Agent Starting Run: shgck3xp with config:
[34m[1mwandb[0m: 	batch_normalization: True
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	max_epoch: 5
[34m[1mwandb[0m: 	num_conv_layers: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 64


Number of samples in train set after splitting: 7999
Number of samples in validation set after splitting: 2000
In channels: 3
Number of classes: 10
Input size (height, width): (224, 224)


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁█
Loss,█▁
Step,▁▁
Val_Loss,▁
val_accuracy,▁

0,1
Epoch,0.4
Loss,2.30362
Step,0.96
Val_Loss,2.30365
val_accuracy,9.9
