In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split

In [2]:
import torch.backends.cudnn as cudnn
cudnn.benchmark = True

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:

# Define transforms for preprocessing and data augmentation
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


# Load dataset
#\\wsl.localhost\Ubuntu-22.04\home\student\DL\inaturalist_12K
dataset_path = "inaturalist_12K/"
train_data = datasets.ImageFolder(root=f"{dataset_path}/train", transform=transform)
test_data = datasets.ImageFolder(root=f"{dataset_path}/val", transform=transform)

train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_subset, val_subset = random_split(train_data, [train_size, val_size])

# Define DataLoaders
batch_size = 64

# Loading data onto ram
class PreloadDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.data = [(img, label) for img, label in dataset]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

train_subset = PreloadDataset(train_subset)
val_subset = PreloadDataset(val_subset)
test_data = PreloadDataset(test_data)

train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

print(f"Training set size: {len(train_subset)}")
print(f"Validation set size: {len(val_subset)}")
print(f"Test set size: {len(test_data)}")

Training set size: 7999
Validation set size: 2000
Test set size: 2000


In [4]:

class CNN(nn.Module):
    def __init__(self,
                num_conv_layer=5,
                num_conv_filter=32,
                filter_org=2,
                batch_norm=False,
                dropout=0.25,
                kernel_size=3,
                activation_func='relu',
                num_dense_layers=1,
                dense_neurons=512):
        super().__init__()
        self.cnn_blocks = nn.Sequential()
        self.conv_output_dim = None
        
        input_channel = 3
        K = 0 # To keep track of the depth of the last conv layer
        output_channel = num_conv_filter
        
        for _ in range(num_conv_layer):
            layers = [nn.Conv2d(input_channel, output_channel, kernel_size, padding=(kernel_size-1)//2)]
            if batch_norm:
                layers.append(nn.BatchNorm2d(output_channel))
            if activation_func == 'relu':
                layers.append(nn.ReLU())
            elif activation_func == 'gelu':
                layers.append(nn.GELU())
            elif activation_func == 'silu':
                layers.append(nn.SiLU())
            elif activation_func == 'mish':
                layers.append(nn.Mish())
            elif activation_func == 'tanh':
                layers.append(nn.Tanh())
            layers.append(nn.MaxPool2d((2, 2)))
            self.cnn_blocks.append(nn.Sequential(*layers))
            
            input_channel = output_channel
            K = output_channel
            output_channel = int(output_channel * filter_org)
        
        conv_flatten = int(256/ 2**num_conv_layer)
        self.conv_output_dim = conv_flatten*conv_flatten*K
        
        self.dense_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.conv_output_dim, dense_neurons),
            nn.Tanh(),
            # *[nn.Linear(dense_neurons, dense_neurons) for _ in range(num_dense_layers - 1)],
            *[nn.Sequential(nn.Linear(dense_neurons, dense_neurons), nn.Tanh()) for _ in range(num_dense_layers - 1)],
            nn.Dropout(dropout),
            nn.Linear(dense_neurons, 10)
        )

    
    def forward(self, x):
        x = self.cnn_blocks(x)
        x = x.view(x.size(0), -1)
        x = self.dense_layer(x)
        return x
        


In [7]:
# Loading the Model with best hyper parameters from sweep
model = CNN(num_conv_layer=5, num_conv_filter=32, filter_org=1, batch_norm=True,
            dropout=0.2,kernel_size=3, activation_func='gelu',
            num_dense_layers=2, dense_neurons=256).to(device)
print(model)
model.to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Model is on: {next(model.parameters()).device}")

# Training loop
epochs = 10
best_val_loss = float('inf')

for epoch in range(epochs):
    print(f"\nEpoch [{epoch+1}/{epochs}]")

    # Training Phase
    model.train()
    train_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    avg_train_loss = train_loss / len(train_loader)

    # Validation Phase
    model.eval()
    val_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    avg_val_loss = val_loss / len(val_loader)

    print(f"Train Loss: {avg_train_loss:.4f} | Train Acc: {train_accuracy:.2f}% | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.2f}%")
    #print(f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.2f}%")


CNN(
  (cnn_blocks): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
      (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=

In [8]:
# Testing the Best Model
model.eval()
test_loss, correct, total = 0.0, 0, 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    avg_test_loss = test_loss / len(test_loader)

print(f"Test Loss: {avg_test_loss:.4f} | Test Acc: {test_accuracy:.2f}%")

Test Loss: 1.8921 | Test Acc: 38.50%


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

test_data = datasets.ImageFolder(root=f"{dataset_path}/val", transform=transform)
class_names = test_data.classes

idx_to_class = {v: k for k, v in test_data.class_to_idx.items()}

# 30 random samples from test set
num_images = 30
samples = random.sample(range(len(test_data)), num_images)

rows, cols = 10, 3
plt.figure(figsize=(8,25))

model.eval()
with torch.no_grad():
    for i, idx in enumerate(samples):
        image, label = test_data[idx]
        image_input = image.unsqueeze(0).to(device)
        output = model(image_input)
        pred = torch.argmax(output, 1).item()

        image_np = image.permute(1, 2, 0).cpu().numpy()
        image_np = (image_np * 0.5 + 0.5)  # Unnormalize

        ax = plt.subplot(rows, cols, i + 1)
        ax.imshow(image_np)
        ax.axis("off")
        ax.set_title(f"True: {idx_to_class[label]}\nPred: {idx_to_class[pred]}", fontsize=8)

plt.tight_layout()
plt.savefig("predictions_grid.png", dpi=300, bbox_inches='tight')
plt.show()
