<a href="https://colab.research.google.com/github/Sai-sakunthala/Assignment2/blob/main/Assignment_2_partA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytorch-lightning

In [None]:
import os
import torch
from torch import nn
import torch.nn.functional as functional
from torch.utils.data import DataLoader, random_split, Subset
import pytorch_lightning as pl
from torchvision import transforms, datasets
from collections import defaultdict
import random
from pytorch_lightning.loggers import WandbLogger
import wandb

In [None]:
!unzip -q "C:\Users\sai sakunthala\Desktop\deep learning\A2\nature_12K.zip" -d 'C:\Users\sai sakunthala\Desktop\deep learning\A2\inaturalist_data'

In [None]:
class CNN(pl.LightningModule):
    def __init__(self, initial_in_channels=3, num_classes=10, num_conv_layers=5, num_filters=32, kernel_size=3, activation_fn=nn.ReLU,
                 dense_neurons=256, learning_rate=1e-3, use_batchnorm=False, dropout_rate=0.3, filter_organization='same', data_augmentation = False):

        super().__init__()
        self.save_hyperparameters()

        layers_list = []
        input_channels = initial_in_channels
        current_filters = num_filters

        for i in range(num_conv_layers):
            output_channels = current_filters
            layers_list.append(nn.Conv2d(input_channels, output_channels, kernel_size = kernel_size, padding = kernel_size//2))
            if use_batchnorm:
                layers_list.append(nn.BatchNorm2d(output_channels))
            layers_list.append(activation_fn())
            if dropout_rate == 0:
                layers_list.append(nn.Dropout(dropout_rate))
            layers_list.append(nn.MaxPool2d(kernel_size=2, stride=2))
            input_channels = output_channels
            if filter_organization == 'double':
                current_filters *= 2
            elif filter_organization == 'half':
                current_filters = max(4, current_filters // 2)

        self.convolution_block = nn.Sequential(*layers_list)
        self.fc1 = nn.LazyLinear(dense_neurons)
        self.bn_fc1 = nn.BatchNorm1d(dense_neurons) if use_batchnorm else None
        self.activation_dense = activation_fn()
        self.dropout_fc1 = nn.Dropout(dropout_rate) if dropout_rate == 0 else None
        self.fc2 = nn.Linear(dense_neurons, num_classes)
        self.learning_rate = learning_rate

    def forward(self, x):
        x = self.convolution_block(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        if self.hparams.use_batchnorm:
            x = self.bn_fc1(x)
        x = self.activation_dense(x)
        if self.hparams.dropout_rate == 0:
            x = self.dropout_fc1(x)
        x = self.fc2(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = functional.cross_entropy(y_hat, y)
        acc = (y_hat.argmax(dim=1) == y).float().mean()
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = functional.cross_entropy(y_hat, y)
        acc = (y_hat.argmax(dim=1) == y).float().mean()
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {'values': [32, 64]},
        'activation_fn': {'values': ['ReLU', 'SiLU', 'GELU', 'Mish']},
        'filter_organization': {'values': ['same', 'double', 'half']},
        'use_batchnorm': {'values': [True, False]},
        'dropout_rate': {'values': [0, 0.2, 0.3]},
        'dense_neurons': {'values': [128, 256, 512]},
        'learning_rate': {'values': [1e-3]},
        'batch_size': {'values': [64]},
        'data_augmentation': {'values': [True, False]},
        'kernel_size': {'values': [3, 5]},
    }
}

In [None]:
def get_activation(name):
    return {
        "ReLU": nn.ReLU,
        "GELU": nn.GELU,
        "SiLU": nn.SiLU,
        "Mish": nn.Mish
    }[name]


def train(config=None):
    with wandb.init(config=config) as run:
        random.seed(42)
        torch.manual_seed(42)

        config = wandb.config
        run.name = f"{config.activation_fn}_f{config.num_filters}_k{config.kernel_size}_{config.filter_organization}_bn{int(config.use_batchnorm)}_r{config.dropout_rate}_fc{config.dense_neurons}_aug{int(config.data_augmentation)}"
        run.save()

        wandb_logger = WandbLogger(project="cnn-sweep", log_model='all')

        if config.get("data_augmentation", False):
            transform_list = [
                transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15),
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                transforms.Resize((128, 128)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]
        else:
            transform_list = [
                transforms.Resize((128, 128)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]

        transform = transforms.Compose(transform_list)

        data_dir = "/content/inaturalist_data/inaturalist_12K/train"

        full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)

        num_classes = len(full_dataset.classes)

        class_to_indices = defaultdict(list)
        for idx, (_, label) in enumerate(full_dataset.samples):
            class_to_indices[label].append(idx)

        train_indices = []
        val_indices = []

        for label, indices in class_to_indices.items():
            random.shuffle(indices)
            split = int(0.8 * len(indices))
            train_indices.extend(indices[:split])
            val_indices.extend(indices[split:])

        random.shuffle(train_indices)

        train_dataset = Subset(full_dataset, train_indices)
        val_dataset = Subset(full_dataset, val_indices)

        train_loader = DataLoader(train_dataset, config.batch_size, shuffle=True, num_workers=2, pin_memory=True)
        val_loader = DataLoader(val_dataset, config.batch_size, shuffle=False, num_workers=2, pin_memory=True)

        class_names = full_dataset.classes

        model = CNN(
            initial_in_channels=3,
            num_classes=num_classes,
            num_conv_layers=5,
            num_filters=config.num_filters,
            kernel_size=config.kernel_size,
            activation_fn=get_activation(config.activation_fn),
            dense_neurons=config.dense_neurons,
            learning_rate=config.learning_rate,
            use_batchnorm=config.use_batchnorm,
            dropout_rate=config.dropout_rate,
            filter_organization=config.filter_organization,
            data_augmentation=config.data_augmentation
        )

        callbacks = [
            #pl.callbacks.EarlyStopping(monitor="val_loss", patience=5),
            pl.callbacks.ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1)
        ]

        trainer = pl.Trainer(
            max_epochs=5,
            precision=16,
            logger=wandb_logger,
            accelerator="gpu",   # Ensure it uses GPU
            devices=1,
            callbacks=callbacks,
            gradient_clip_val=0.5
        )
        try:
            trainer.fit(model, train_loader, val_loader)
        finally:
            wandb.finish()

In [None]:
sweep_id = wandb.sweep(sweep_config, project="cnn-sweep-2")
wandb.agent(sweep_id, function=train, count=60)