# Imports

In [25]:
import torch
import torch.nn as nn
import torchvision
import wandb
import os
from PIL import Image
import random
from tqdm import tqdm
from collections import Counter
from torchsummary import summary
from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader
from sklearn.model_selection import StratifiedShuffleSplit
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Functions

## CNN Model Architecture

In [3]:
class CNN(nn.Module):
    def __init__(self, num_filters=32, size_filters=3, activation_func='relu', filter_org=1, num_dense=128, batch_normalisation=False, dropout_rate=0.2, input_channels=3, num_classes=10, num_conv=5):
        '''
        num_filters: Number of filters in each layer --> 32,64,etc
        size_filters: Size of each filter (=F) --> 5,10,etc
        activation_func: Activation function for the convolutional layers --> ReLU, GeLU,SiLU, Mish
        filter_org: Ratio of number of filters in i+1th layer to number of filters in ith layer --> 1,0.5,2,etc
        num_dense: Number of neurons in dense layer --> 128
        batch_normalisation: Whether or not to apply batch normalisation after convolution layers --> True, False
        dropout_rate: Fraction of neurons to randomly drop (=p) --> 0.2 to 0.5
        input_channels: number of channels in input layer --> 3 (RGB)
        num_classes: Number of Classes in the iNaturalist Dataset --> 10
        num_conv: number of Conv-activation-maxpool blocks in the CNN model --> given:5
        '''
        super(CNN, self).__init__()
        self.layers=nn.ModuleList()

        def get_activation(name):
            if name == 'relu':
                return nn.ReLU()
            elif name == 'gelu':
                return nn.GELU()
            elif name == 'silu':
                return nn.SiLU()
            elif name == 'mish':
                return nn.Mish()

        for layer in range(num_conv):
            out_channels=int(num_filters*((filter_org)**(layer)))
            self.layers.append(nn.Conv2d(in_channels=input_channels, out_channels=out_channels, kernel_size=size_filters, padding=size_filters//2))
            if batch_normalisation==True:
                self.layers.append(nn.BatchNorm2d(out_channels))
            input_channels=out_channels
            self.layers.append(get_activation(name=activation_func))    
            self.layers.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
        if batch_normalisation==True:
            self.fc_layers = nn.Sequential(nn.Linear(input_channels, num_dense),get_activation(name=activation_func), nn.BatchNorm1d(num_dense), nn.Dropout(p=dropout_rate), nn.Linear(num_dense, num_classes))
        elif batch_normalisation==False:
            self.fc_layers = nn.Sequential(nn.Linear(input_channels, num_dense),get_activation(name=activation_func), nn.Dropout(p=dropout_rate), nn.Linear(num_dense, num_classes))

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

## Loading Data

In [4]:
def get_dataloaders(dir='/kaggle/input/nature-12k/inaturalist_12K/train',augment='No',split=0.2,batch_size=64):
    labels = datasets.ImageFolder(root=dir).targets
    
    val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    ])
    if augment=='Yes':
        train_transforms = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2,
                                saturation=0.2, hue=0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        ])  
    elif augment=='No':
        train_transforms=val_transforms

    splitter = StratifiedShuffleSplit(n_splits=1, test_size=split, random_state=42)
    train_idx, val_idx = next(splitter.split(torch.zeros(len(labels)), labels))

    train_dataset=datasets.ImageFolder(root=dir,transform=train_transforms)
    val_dataset=datasets.ImageFolder(root=dir,transform=val_transforms)

    train_dataset = Subset(train_dataset, train_idx)
    val_dataset = Subset(val_dataset, val_idx)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader,val_loader

def test_dataloader(dir='/kaggle/input/nature-12k/inaturalist_12K/val',batch_size=32):    
    all_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    ])
    
    test_dataset=datasets.ImageFolder(root=dir,transform=all_transforms)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2)

    return test_loader

## Validating correctness of data split

In [5]:
def get_class_counts(dataloader):
    dataset = dataloader.dataset
    targets = [dataset.dataset.targets[i] for i in dataset.indices]
    total=0
    for cls,count in sorted(Counter(targets).items()):
        print(f'Class{cls}: {count} samples')
        total+=count
    print(f'total samples={total}')
    return total

## Optimizer

In [6]:
def get_optimizer(optim,lr,model):
    if optim=='sgd':
        return (torch.optim.SGD(model.parameters(), lr, weight_decay=0, momentum=0))
    elif optim=='momentum':
        return (torch.optim.SGD(model.parameters(), lr, weight_decay=0, momentum=0.9))
    elif optim=='adam':
        return (torch.optim.Adam(model.parameters(), lr, weight_decay=0.005))

# Best Model

In [11]:
# Parameters of the Best Model
# Validation Accuracy = 40.4%
activation_func= 'relu'
batch_normalisation=True
batch_size=32
data_augmentation='No'
dropout_rate=0.20426922413644705
filter_org=1
learning_rate=0.0032148335356218384
num_dense=256
num_filters=64
optimizer='momentum'
size_filters=3

model=CNN(num_filters, size_filters, activation_func, filter_org, num_dense, batch_normalisation, dropout_rate, input_channels=3, num_classes=10, num_conv=5)
summary(model,input_size=(3,224,224),device='cpu')

if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(model)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer=get_optimizer(optimizer,learning_rate,model=model)
train_loader,val_loader=get_dataloaders('/kaggle/input/nature-12k/inaturalist_12K/train',data_augmentation,0.2,batch_size)

print('\nNo. of samples in each class in training data:')
train_count = get_class_counts(train_loader)
print('\nNo. of samples in each class in Validation data:')
val_count = get_class_counts(val_loader)

print('\nPercentage of train data kept aside for validation={:.2f}%'.format(val_count*100/(train_count+val_count)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5         [-1, 64, 112, 112]          36,928
       BatchNorm2d-6         [-1, 64, 112, 112]             128
              ReLU-7         [-1, 64, 112, 112]               0
         MaxPool2d-8           [-1, 64, 56, 56]               0
            Conv2d-9           [-1, 64, 56, 56]          36,928
      BatchNorm2d-10           [-1, 64, 56, 56]             128
             ReLU-11           [-1, 64, 56, 56]               0
        MaxPool2d-12           [-1, 64, 28, 28]               0
           Conv2d-13           [-1, 64, 28, 28]          36,928
      BatchNorm2d-14           [-1, 64,

In [13]:
# Train
num_epochs=20
print("training...")
for epoch in range(num_epochs):
    model.train()
    total_loss=0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
    avg_loss=total_loss/len(train_loader)
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, avg_loss))

    # Validation on training data
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Train Accuracy: {:.2f}%'.format(accuracy))

    # Validation on validation data
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Validation Accuracy: {:.2f}%'.format(accuracy))

test_loader=test_dataloader(batch_size=32)
# Test
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print('Test Accuracy: {:.2f}%'.format(accuracy))

training...


                                                             

Epoch [1/20], Loss: 2.1974




Train Accuracy: 24.05%
Validation Accuracy: 23.25%


                                                             

Epoch [2/20], Loss: 2.1068




Train Accuracy: 26.10%
Validation Accuracy: 27.35%


                                                             

Epoch [3/20], Loss: 2.0623




Train Accuracy: 28.32%
Validation Accuracy: 27.45%


                                                             

Epoch [4/20], Loss: 2.0136




Train Accuracy: 30.12%
Validation Accuracy: 29.00%


                                                             

Epoch [5/20], Loss: 1.9854




Train Accuracy: 31.58%
Validation Accuracy: 30.90%


                                                             

Epoch [6/20], Loss: 1.9635




Train Accuracy: 30.57%
Validation Accuracy: 28.80%


                                                             

Epoch [7/20], Loss: 1.9274




Train Accuracy: 32.57%
Validation Accuracy: 32.00%


                                                             

Epoch [8/20], Loss: 1.9089




Train Accuracy: 35.53%
Validation Accuracy: 33.75%


                                                             

Epoch [9/20], Loss: 1.8993




Train Accuracy: 31.35%
Validation Accuracy: 28.90%


                                                              

Epoch [10/20], Loss: 1.8642




Train Accuracy: 37.54%
Validation Accuracy: 34.55%


                                                              

Epoch [11/20], Loss: 1.8557




Train Accuracy: 34.05%
Validation Accuracy: 33.40%


                                                              

Epoch [12/20], Loss: 1.8317




Train Accuracy: 35.20%
Validation Accuracy: 33.70%


                                                              

Epoch [13/20], Loss: 1.8185




Train Accuracy: 36.49%
Validation Accuracy: 34.85%


                                                              

Epoch [14/20], Loss: 1.8070




Train Accuracy: 39.63%
Validation Accuracy: 38.05%


                                                              

Epoch [15/20], Loss: 1.7972




Train Accuracy: 36.89%
Validation Accuracy: 34.70%


                                                              

Epoch [16/20], Loss: 1.7719




Train Accuracy: 39.43%
Validation Accuracy: 36.20%


                                                              

Epoch [17/20], Loss: 1.7653




Train Accuracy: 38.90%
Validation Accuracy: 35.05%


                                                              

Epoch [18/20], Loss: 1.7439




Train Accuracy: 42.04%
Validation Accuracy: 37.30%


                                                              

Epoch [19/20], Loss: 1.7400




Train Accuracy: 43.37%
Validation Accuracy: 40.00%


                                                              

Epoch [20/20], Loss: 1.7178




Train Accuracy: 42.37%
Validation Accuracy: 36.75%
Test Accuracy: 36.40%


## Grid

In [30]:
def get_images(dir="/kaggle/input/nature-12k/inaturalist_12K/val"):
    class_images = {}
    all_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    
    for class_name in os.listdir(dir):
        class_path = os.path.join(dir, class_name)
        images = [f for f in os.listdir(class_path)]
        img_path = os.path.join(class_path, random.choice(images))
        img = Image.open(img_path).convert("RGB")
        img_tensor = all_transforms(img)
        class_images[class_name] = img_tensor
    return class_images

In [19]:
wandb.login(key='70a00ae1607c730fb9cd50b1268b191bec7a2901')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mishita49[0m ([33mishita49-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [32]:
wandb.init(project="DA6401_Assign2")
table = wandb.Table(columns=["Test Image", "True Label", "Predicted Label"])
class_images = get_images()
model.eval()
labels = datasets.ImageFolder('/kaggle/input/nature-12k/inaturalist_12K/val').classes

with torch.no_grad():
    for class_name, img_tensor in class_images.items():
        image = img_tensor.unsqueeze(0).to(device)
        outputs = model(image)
        _, predicted = torch.max(outputs.data, 1)
        predicted_class = labels[predicted.item()]

        img_disp = img_tensor.clone().detach().cpu()
        img_disp = img_disp * torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
        img_disp = img_disp + torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
        img_disp = img_disp.clamp(0, 1)
        table.add_data(wandb.Image(img_disp),class_name, predicted_class)

wandb.log({"10x3_grid": table})
wandb.finish()