In [8]:
!pip install wandb



In [7]:
import os
import numpy as np
import zipfile
import requests
from tqdm import tqdm
import torch
import torch.nn as nn
# import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split
from torch import optim
from pathlib import Path
import json
import wandb

In [9]:
dataset_url = "https://storage.googleapis.com/wandb_datasets/nature_12K.zip"
dataset_zip_path = "/kaggle/working/nature_12K.zip"
dataset_dir = "nature_12K"

In [10]:
if not os.path.exists(dataset_dir):
    if not os.path.exists(dataset_zip_path):
        print("Downloading iNaturalist-12K...")
        response = requests.get(dataset_url, stream=True)
        total_size = int(response.headers.get('content-length', 0))
        with open(dataset_zip_path, 'wb') as f, tqdm(
            desc=dataset_zip_path,
            total=total_size,
            unit='iB',
            unit_scale=True,
            unit_divisor=1024,
        ) as bar:
            for data in response.iter_content(chunk_size=1024):
                size = f.write(data)
                bar.update(size)

Downloading iNaturalist-12K...


/kaggle/working/nature_12K.zip: 100%|██████████| 3.55G/3.55G [01:30<00:00, 42.3MiB/s] 


In [11]:
if not os.path.exists(dataset_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall(".")

Extracting dataset...


In [12]:
def prepare_datasets(data_dir, val_split=0.2, batch_size=32, image_size=(224, 224)):
    data_dir = Path(data_dir)

    # Define transforms (customize as needed)
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),  # Converts to [0, 1] and CxHxW
        transforms.Normalize(mean=[0.5]*3, std=[0.5]*3),  # Normalize RGB
    ])

    # Load training and testing datasets
    full_train_dataset = ImageFolder(root=data_dir / "train", transform=transform)
    test_dataset = ImageFolder(root=data_dir / "val", transform=transform)

    # Create validation split from training set
    val_size = int(val_split * len(full_train_dataset))
    train_size = len(full_train_dataset) - val_size

    train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

    # Extract X and Y by iterating over DataLoader batches if needed
    train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

    # Convert to X, Y tensors
    def extract_XY(loader):
        for X, Y in loader:
            return X, Y

    X_train, Y_train = extract_XY(train_loader)
    X_val, Y_val = extract_XY(val_loader)
    X_test, Y_test = extract_XY(test_loader)

    return X_train, Y_train, X_val, Y_val, X_test, Y_test

In [31]:
# dataset_dir = "/kaggle/working/inaturalist_12K"

In [13]:
X_train, Y_train, X_val, Y_val, X_test, Y_test = prepare_datasets(
    data_dir='/kaggle/working/inaturalist_12K', val_split=0.2, batch_size=64, image_size=(224, 224)
)

KeyboardInterrupt: 

In [None]:
print(X_train.shape)

# Part A
### Question 1

Build a small CNN model consisting of 5 convolution layers. Each convolution layer would be followed by an activation and a max-pooling layer.

After 5 such conv-activation-maxpool blocks, you should have one dense layer followed by the output layer containing 10 neurons. The input layer should be compatible with the images in the iNaturalist dataset dataset.
The code should be flexible such that the number of filters, size of filters, and activation function of the convolution layers and dense layers can be changed. You should also be able to change the number of neurons in the dense layer.

In [14]:
api_key = "7040d84a3ed65a967eb3389dd6fe774b418576ed" 
wandb.login(key=api_key)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mda24m004[0m ([33mda24m004-iitmaana[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [15]:
class FlexibleCNN(nn.Module):
    def __init__(self, 
                 num_filters=32,  # number of filters in each conv layer
                 filter_size=3,   # size of filters (k x k)
                 activation='relu',  # activation function
                 dense_neurons=512,  # number of neurons in dense layer
                 input_channels=3,   # RGB images
                 num_classes=10,    # number of output classes
                 use_batch_norm=True,  # whether to use batch normalization
                 dropout_rate=0.2):  # dropout rate
        super(FlexibleCNN, self).__init__()
        
        # Store parameters for calculations
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.dense_neurons = dense_neurons
        self.use_batch_norm = use_batch_norm
        self.dropout_rate = dropout_rate
        
        # Choose activation function
        if activation.lower() == 'relu':
            self.activation = nn.ReLU()
        elif activation.lower() == 'leakyrelu':
            self.activation = nn.LeakyReLU()
        elif activation.lower() == 'gelu':
            self.activation = nn.GELU()
        elif activation.lower() == 'silu':
            self.activation = nn.SiLU()
        elif activation.lower() == 'mish':
            self.activation = nn.Mish()
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
        
        # Create 5 conv-activation-maxpool blocks
        self.conv_blocks = nn.ModuleList()
        in_channels = input_channels
        
        for _ in range(5):
            block = []
            # Conv layer
            block.append(nn.Conv2d(in_channels, num_filters, filter_size, padding=filter_size//2))
            
            # Batch normalization if enabled
            if use_batch_norm:
                block.append(nn.BatchNorm2d(num_filters))
            
            # Activation
            block.append(self.activation)
            
            # Max pooling
            block.append(nn.MaxPool2d(2, 2))
            
            # Dropout after pooling
            block.append(nn.Dropout2d(dropout_rate))
            
            self.conv_blocks.extend(block)
            in_channels = num_filters
        
        # Calculate the size of the flattened features after conv blocks
        # Assuming input size of 224x224 (standard for iNaturalist)
        self.flattened_size = num_filters * (224 // (2**5)) * (224 // (2**5))
        
        # Dense layers
        self.dense = nn.Sequential(
            nn.Linear(self.flattened_size, dense_neurons),
            self.activation,
            nn.Dropout(dropout_rate),  # Dropout before final layer
            nn.Linear(dense_neurons, num_classes)
        )
    
    def forward(self, x):
        for block in self.conv_blocks:
            x = block(x)
        x = x.view(x.size(0), -1)
        x = self.dense(x)
        return x
    
    def get_computations(self):
        """Calculate total number of computations"""
        # Computations in conv layers
        conv_computations = 0
        input_size = 224
        in_channels = 3
        
        for i in range(5):
            # Each conv layer
            conv_computations += (input_size * input_size * in_channels * 
                                self.num_filters * self.filter_size * self.filter_size)
            # Each maxpool reduces size by 2
            input_size = input_size // 2
            in_channels = self.num_filters
        
        # Computations in dense layers
        dense_computations = (self.flattened_size * self.dense_neurons +  # first dense layer
                            self.dense_neurons * 10)  # output layer
        
        return conv_computations + dense_computations
    
    def get_parameters(self):
        """Calculate total number of parameters"""
        # Parameters in conv layers
        conv_params = 0
        in_channels = 3
        
        for _ in range(5):
            # Each conv layer has (filter_size * filter_size * in_channels + 1) * num_filters parameters
            conv_params += (self.filter_size * self.filter_size * in_channels + 1) * self.num_filters
            # Batch norm parameters if enabled
            if self.use_batch_norm:
                conv_params += 2 * self.num_filters  # gamma and beta for each channel
            in_channels = self.num_filters
        
        # Parameters in dense layers
        dense_params = (self.flattened_size * self.dense_neurons + self.dense_neurons +  # first dense layer
                       self.dense_neurons * 10 + 10)  # output layer
        
        return conv_params + dense_params

In [16]:
def train(config=None):
    # Initialize wandb
    with wandb.init(config=config):
        config = wandb.config
        
        # Set random seed for reproducibility
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)
        
        # Data augmentation and normalization
        train_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                              std=[0.229, 0.224, 0.225])
        ])
        
        val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                              std=[0.229, 0.224, 0.225])
        ])
        
        # Load dataset
        train_dataset = datasets.ImageFolder(
            root='/kaggle/working/inaturalist_12K/train',
            transform=train_transform
        )
        
        # Split into train and validation
        train_size = int(0.8 * len(train_dataset))
        val_size = len(train_dataset) - train_size
        
        # Use stratified split to maintain class balance
        train_indices, val_indices = train_test_split(
            list(range(len(train_dataset))),
            test_size=0.2,
            stratify=train_dataset.targets,
            random_state=config.seed
        )
        
        train_subset = torch.utils.data.Subset(train_dataset, train_indices)
        val_subset = torch.utils.data.Subset(train_dataset, val_indices)
        
        train_loader = DataLoader(
            train_subset,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=4
        )
        
        val_loader = DataLoader(
            val_subset,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=4
        )
        
        # Initialize model
        model = FlexibleCNN(
            num_filters=config.num_filters,
            filter_size=config.filter_size,
            activation=config.activation,
            dense_neurons=config.dense_neurons,
            use_batch_norm=config.use_batch_norm,
            dropout_rate=config.dropout_rate
        )
        
        # Move model to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        
        # Loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
        
        # Training loop
        best_val_acc = 0
        best_config = None
        
        for epoch in range(config.epochs):
            # Training phase
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0
            
            for batch_idx, (inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                train_total += targets.size(0)
                train_correct += predicted.eq(targets).sum().item()
                
                if batch_idx % 100 == 0:
                    print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss.item():.4f}')
            
            train_acc = 100. * train_correct / train_total
            
            # Validation phase
            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for inputs, targets in val_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    
                    val_loss += loss.item()
                    _, predicted = outputs.max(1)
                    val_total += targets.size(0)
                    val_correct += predicted.eq(targets).sum().item()
            
            val_acc = 100. * val_correct / val_total
            
            # Log metrics to wandb
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss / len(train_loader),
                "train_acc": train_acc,
                "val_loss": val_loss / len(val_loader),
                "val_acc": val_acc
            })
            
            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), 'best_model.pth')
            
            print(f'Epoch: {epoch}, Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {train_acc:.2f}%, Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {val_acc:.2f}%')
            

In [17]:

# Define sweep configuration
sweep_config = {
    'method': 'bayes',  # Use Bayesian optimization
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {
            'values': [32, 64, 128]
        },
        'filter_size': {
            'values': [3, 5]
        },
        'activation': {
            'values': ['relu', 'gelu', 'silu', 'mish']
        },
        'dense_neurons': {
            'values': [256, 512, 1024]
        },
        'learning_rate': {
            'min': 1e-4,
            'max': 1e-2
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'use_batch_norm': {
            'values': [True, False]
        },
        'dropout_rate': {
            'values': [0.2, 0.3, 0.4]
        },
        'epochs': {
            'value': 5
        },
        'seed': {
            'value': 42
        }
    }
}

# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="inaturalist-cnn-sweep")

# Run sweep
wandb.agent(sweep_id, train, count=50)

Create sweep with ID: ohumz2ah
Sweep URL: https://wandb.ai/da24m004-iitmaana/inaturalist-cnn-sweep/sweeps/ohumz2ah


[34m[1mwandb[0m: Agent Starting Run: n6orkbv3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.0011060816138786856
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_batch_norm: False
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch: 0, Batch: 0, Loss: 2.2873
Epoch: 0, Batch: 100, Loss: 2.3042
Epoch: 0, Batch: 200, Loss: 2.4302
Epoch: 0, Train Loss: 2.2917, Train Acc: 11.80%, Val Loss: 2.2303, Val Acc: 15.95%
Epoch: 1, Batch: 0, Loss: 2.2839
Epoch: 1, Batch: 100, Loss: 2.2084
Epoch: 1, Batch: 200, Loss: 2.3020
Epoch: 1, Train Loss: 2.2387, Train Acc: 16.71%, Val Loss: 2.1943, Val Acc: 20.50%
Epoch: 2, Batch: 0, Loss: 2.0512
Epoch: 2, Batch: 100, Loss: 2.2482
Epoch: 2, Batch: 200, Loss: 2.3688
Epoch: 2, Train Loss: 2.1968, Train Acc: 18.85%, Val Loss: 2.1451, Val Acc: 22.55%
Epoch: 3, Batch: 0, Loss: 2.1231
Epoch: 3, Batch: 100, Loss: 2.0208
Epoch: 3, Batch: 200, Loss: 2.1466
Epoch: 3, Train Loss: 2.1715, Train Acc: 20.63%, Val Loss: 2.1297, Val Acc: 21.25%
Epoch: 4, Batch: 0, Loss: 2.0204
Epoch: 4, Batch: 100, Loss: 2.1920
Epoch: 4, Batch: 200, Loss: 2.0479
Epoch: 4, Train Loss: 2.1495, Train Acc: 21.23%, Val Loss: 2.1020, Val Acc: 23.50%


0,1
epoch,▁▃▅▆█
train_acc,▁▅▆██
train_loss,█▅▃▂▁
val_acc,▁▅▇▆█
val_loss,█▆▃▃▁

0,1
epoch,4.0
train_acc,21.22765
train_loss,2.14946
val_acc,23.5
val_loss,2.10196


[34m[1mwandb[0m: Agent Starting Run: 6y4m9n28 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.0064928089332368264
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3722
Epoch: 0, Batch: 100, Loss: 2.3137
Epoch: 0, Train Loss: 2.5559, Train Acc: 10.44%, Val Loss: 2.2965, Val Acc: 10.20%
Epoch: 1, Batch: 0, Loss: 2.3852
Epoch: 1, Batch: 100, Loss: 2.3203
Epoch: 1, Train Loss: 2.3200, Train Acc: 9.94%, Val Loss: 2.3025, Val Acc: 10.05%
Epoch: 2, Batch: 0, Loss: 2.3032
Epoch: 2, Batch: 100, Loss: 2.3041
Epoch: 2, Train Loss: 2.3079, Train Acc: 10.08%, Val Loss: 2.3012, Val Acc: 12.15%
Epoch: 3, Batch: 0, Loss: 2.2972
Epoch: 3, Batch: 100, Loss: 2.4603
Epoch: 3, Train Loss: 2.3114, Train Acc: 9.68%, Val Loss: 2.3027, Val Acc: 10.00%
Epoch: 4, Batch: 0, Loss: 2.3068
Epoch: 4, Batch: 100, Loss: 2.3064
Epoch: 4, Train Loss: 2.3134, Train Acc: 10.11%, Val Loss: 2.3027, Val Acc: 10.00%


0,1
epoch,▁▃▅▆█
train_acc,█▃▅▁▅
train_loss,█▁▁▁▁
val_acc,▂▁█▁▁
val_loss,▁█▆██

0,1
epoch,4.0
train_acc,10.11376
train_loss,2.31337
val_acc,10.0
val_loss,2.30273


[34m[1mwandb[0m: Agent Starting Run: qg12fchy with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.006884890630348681
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_batch_norm: False
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [39]:
# /kaggle/working/inaturalist_12K/train