    DA6401 - Assignment 02 (P1)
        This notebook contains the source code written for this assignment which will be later transfered to a python scripy on successful passage of testing and checking.

In [1]:
# Importing the necessary libraries #
import torch
import numpy as np
import torch.nn as nn
import torchvision.transforms.functional as F
import lightning as L
from typing import List
from lightning.pytorch import Trainer
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import precision_score
from torchmetrics import Accuracy

In [2]:
torch.cuda.empty_cache()
torch.set_float32_matmul_precision("medium")

In [3]:
import wandb
from lightning.pytorch.loggers import WandbLogger
wandb.login()
# Initializing wandb logger #
wandb_logger = WandbLogger(
    entity="A2_DA6401_DL",
    project="Lightning_CNN",       
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mae21b105[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
wandb.login(key="5ef7c4bbfa350a2ffd3c198cb9289f544e3a0910")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/joel/.netrc


True

In [5]:
# Function to give the activation function #
def return_activation_function(activation : str = "ReLU"):
    possible_activations = ["ReLU", "Mish", "GELU", "SELU", "SiLU", "LeakyReLU" ]
    # Assertion to be made for the activations possible #
    assert activation in possible_activations, f"activation not in {possible_activations}"

    if activation == "ReLU":
        return nn.ReLU()
    elif activation == "GELU":
        return nn.GELU()
    elif activation == "SiLU":
        return nn.SiLU()
    elif activation == "SELU":
        return nn.SELU()
    elif activation == "Mish":
        return nn.Mish()
    else:
        return nn.LeakyReLU()

In [6]:
class CNN_(nn.Module):
    def __init__(self, config = None):
        super().__init__()
        # Configuration to build the CNN #
        self.config = config
        
        # Some assertions to be made #
        assert config["no_of_conv_blocks"]==len(config["no_of_filters"]), "The filter number do not match with number of conv layers"
        assert config["no_of_conv_blocks"]==len(config["filter_sizes"]), "The filter sizes do not match with number of conv layers"
        assert config["no_of_conv_blocks"]==len(config["conv_strides"]), "The strides do not match with number of conv layers"
        assert config["no_of_conv_blocks"]==len(config["conv_padding"]), "The padding do not match with number of conv layers"
        assert config["no_of_conv_blocks"]==len(config["max_pooling_stride"]), "The max pooling stride do not match with number of conv layers"

        # building the convolution blocks #
        conv_blocks = []
        for block_no in range(config["no_of_conv_blocks"]):
            # Getting the hyper-parameters from the config #
            if block_no == 0:
                in_channels = config["input_channels"]
            else:
                in_channels = config["no_of_filters"][block_no-1]
            out_channels = config["no_of_filters"][block_no]
            filter_size = config["filter_sizes"][block_no]
            stride = config["conv_strides"][block_no]
            padding = config["conv_padding"][block_no]
            if padding == None:
                padding = int((filter_size - 1)/2) if filter_size > 1 else 0
            # Defining the block to add to conv_blocks #
            block_add = nn.Sequential(
                nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=filter_size, stride=stride, padding=padding),
                nn.BatchNorm2d(num_features=out_channels) if config["batch_norm_conv"] else nn.Identity(),
                return_activation_function(activation=config["conv_activation"][block_no]),
                nn.MaxPool2d(kernel_size=config["max_pooling_kernel_size"][block_no],stride=config["max_pooling_stride"][block_no]) if config["max_pooling_stride"][block_no] != None else nn.Identity(),
                nn.Dropout(config["dropout_conv"]) if config["dropout_conv"]>0 else nn.Identity(),
            )
            # Appending the blocks to the total #
            conv_blocks.append(block_add)

        # Converting the list to a sequential module #
        self.conv_blocks = nn.Sequential(*conv_blocks)

        # Calculating the size of the output #
        dummy_in = torch.randn(size=(1, config["input_channels"],config["input_size"][0], config["input_size"][1]))
        dummy_out = self.conv_blocks(dummy_in).flatten()
        flat_size = len(dummy_out)

        # building the fc blocks #
        fc_blocks = []
        for block_no in range(config["no_of_fc_layers"]):
            if block_no == 0:
                in_channels = flat_size
            else:
                in_channels = config["fc_neurons"][block_no-1]
            out_channels = config["fc_neurons"][block_no]
            block_add = nn.Sequential(
                nn.Linear(in_features=in_channels, out_features=out_channels),
                nn.BatchNorm1d(out_channels) if config["batch_norm_fc"] else nn.Identity(),
                return_activation_function(activation=config["fc_activations"][block_no]),
                nn.Dropout(config["dropout_fc"]) if config["dropout_fc"]>0 else nn.Identity(),
            )
            # Appending to the fc final
            fc_blocks.append(block_add)

        # converting the list to a sequential module #
        self.fc_layers = nn.Sequential(*fc_blocks)

        # Output layer #
        self.output_layer = nn.Sequential(
            nn.Linear(in_features=config["fc_neurons"][-1], out_features=config["num_classes"]),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_blocks(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        x = self.output_layer(x)
        return x

In [7]:
class Lightning_CNN(L.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.save_hyperparameters()

        # Define the model
        self.model = CNN_(config=config)

        # Defining the loss and optimizers
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr = config["learning_rate"])

        # Defining the metrics
        self.acc_metric = Accuracy(task="multiclass", num_classes=config["num_classes"], average="weighted")

    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        input_, target_ = batch
        output_ = self(input_)
        # Finding the loss to backprop #
        loss = self.loss_fn(output_, target_)
        # Logging the metrics #
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        input_, target_ = batch
        output_ = self(input_)
        # Finding the loss to backprop #
        loss = self.loss_fn(output_, target_)

        output_pred = torch.argmax(output_, dim=1) 
        acc = self.acc_metric(output_pred, target_)
        # Logging the metrics #
        self.log("val_loss", loss, prog_bar=True, logger=True, sync_dist=True)
        self.log("val_acc", acc, prog_bar=True, logger=True, sync_dist=True)
        return loss
    
    def test_step(self, batch, batch_idx):
        input_, target_ = batch
        output_ = self(input_)
        # Finding the loss to backprop #
        loss = self.loss_fn(output_, target_)
        
        output_pred = torch.argmax(output_, dim=1) 
        acc = self.acc_metric(output_pred, target_)
        # Logging the metrics #
        self.log("test_loss", loss, prog_bar=True, logger=True, sync_dist=True)
        self.log("test_acc", acc, prog_bar=True, logger=True, sync_dist=True)
        return loss
    
    def configure_optimizers(self):
        return self.optimizer


In [8]:
config = {
    "no_of_conv_blocks" : 5,
    "input_size" : (256, 256),
    "input_channels" : 3,
    "num_classes" : 10,
    "no_of_filters" : [128, 128, 128, 256, 256],
    "conv_activation" : ["GELU"]*5,
    "filter_sizes" : [5, 5, 5, 3, 3], # Filter sizes has to be odd number
    "conv_strides" : [1, 1, 1, 1, 1],
    "conv_padding" : [None, None, None, None, None], # Use None if you want no reduction in size of image (stride = 1)
    "max_pooling_kernel_size" : [5, 5, 3, 2, 2],
    "max_pooling_stride" : [3, 3, 3, 2, 2], # Use None if you dont want a max pooling between layers
    "batch_norm_conv" : True,
    "dropout_conv" : 0.1, # if dont need use 0
    "no_of_fc_layers" : 1, # Ignore the output layer
    "fc_activations" : ["Mish"], 
    "fc_neurons" : [512],
    "batch_norm_fc" : True,
    "dropout_fc" : 0.2, # if dont need use 0
    "learning_rate" : 1e-4, 
}

In [9]:
# class to orient
class OrientReshape:
    def __init__(self, size = (256, 256)):
        self.size = size
    
    def __call__(self, img):
        # rotate the image to landscape if potrait #
        if img.height > img.width:
            img = img.rotate(90, expand = True)
        # Reshape to target dimension #
        img = F.resize(img, size = self.size)

        return img

In [10]:
# Data augementation and transforms
data_transforms = {
    "orient_" : transforms.Compose([
        OrientReshape(size=(256, 256)),
        transforms.ToTensor()
    ]),
    "train_" : transforms.Compose([
        transforms.RandomHorizontalFlip(p = 0.2),
        transforms.RandomVerticalFlip(p = 0.2),
        transforms.RandomRotation(degrees=15),
        transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3),
        transforms.GaussianBlur(kernel_size=3),
        transforms.ToTensor(),
        transforms.RandomErasing(p = 0.2, scale=(0.02, 0.075)),
    ])
}

In [11]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import os
import numpy as np

# Path to your dataset
data_dir = os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/train/")  # Replace with your path

# Create full dataset
full_dataset = datasets.ImageFolder(root=data_dir, transform=data_transforms["orient_"])

# Get labels for stratified split
labels = [sample[1] for sample in full_dataset.samples]

In [12]:
# Stratified split #
from sklearn.model_selection import train_test_split

train_indices, val_indices = train_test_split(
    np.arange(len(labels)),
    test_size=0.2,
    stratify=labels,
    random_state=42
)


# Create subsets
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)
train_dataset.dataset.transform = data_transforms['train_']
val_dataset.dataset.transform = data_transforms["orient_"] 

batch_size = 32
num_workers = 2 # Adaptive number of workers

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    #pin_memory=True,
    drop_last=True  # Helps with batch norm stability
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    #pin_memory=True
)

In [13]:
# Call backs
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
early_stopping = EarlyStopping('val_acc', patience=10, mode="max")
checkpoint_callback = ModelCheckpoint(
    monitor="val_acc",
    dirpath="checkpoints/",     # Folder to save
    filename="best-checkpoint_2", # File name
    save_top_k=1,
    mode="max"                  # Save only when val_loss is minimized
)

model = Lightning_CNN(config=config)
trainer = Trainer(max_epochs=50, precision=16, accelerator="auto", logger=wandb_logger, callbacks=[early_stopping, checkpoint_callback])

trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/joel/Pytorch_CUDA/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params | Mode 
----------------------------------------------------------
0 | model      | CNN_               | 2.2 M  | train
1 | loss_fn    | CrossEntropyLoss   | 0      | train
2 | acc_metric | MulticlassAccuracy | 0      | train
----------------------------------------------------------
2.2 M     Trainable params
0         Non-trainable params
2.2 M     Total params
8.989     Total estimated model params size (MB)
43        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [14]:
wandb.finish()

0,1
epoch,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train_loss_epoch,█▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
train_loss_step,███▇█▆▇▆▄▅▅▄▄▄▅▄▄▃▃▄▃▄▄▂▅▂▂▂▁▂▂▃▂▂▃▂▃▃▂▃
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▃▃▄▅▅▆▆▆▆▆▇▇▇▆▇▇▇▇█▆▇█▆▇▇█▇▇▇▇▇▇
val_loss,█▆▆▅▄▄▃▃▃▃▃▂▂▂▃▂▂▂▂▁▃▂▁▂▂▂▁▂▂▂▂▂▂

0,1
epoch,32.0
train_loss_epoch,1.66075
train_loss_step,1.79858
trainer/global_step,8216.0
val_acc,0.429
val_loss,2.03048


In [15]:
# Testing
# Path to your dataset
data_dir = os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/val/")  # Replace with your path

# Create full dataset
test_dataset = datasets.ImageFolder(root=data_dir, transform=data_transforms["orient_"])

test_dataset.transform = data_transforms["orient_"] 

batch_size = 32
num_workers = 2 # Adaptive number of workers

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    #pin_memory=True
)

In [16]:
# testing 
best_model_path = checkpoint_callback.best_model_path
model = Lightning_CNN.load_from_checkpoint(best_model_path)

In [17]:
# Disable wandb or other loggers during inference to avoid usage errors
trainer = Trainer(logger=False)

# Run prediction
predictions = trainer.test(model=model, dataloaders=test_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/pytorch/core/module.py:512: You called `self.log('test_loss', ..., logger=True)` but have no logger configured. You can enable one by doing `Trainer(logger=ALogger(...))`
/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/pytorch/core/module.py:512: You called `self.log('test_acc', ..., logger=True)` but have no logger configured. You can enable one by doing `Trainer(logger=ALogger(...))`
