Question 1 (5 Marks)



In [4]:
!pip list
!pip install wandb

Package                  Version
------------------------ -----------
annotated-types          0.7.0
asttokens                3.0.0
certifi                  2025.1.31
charset-normalizer       3.4.1
click                    8.1.8
comm                     0.2.2
contourpy                1.3.1
cycler                   0.12.1
debugpy                  1.8.14
decorator                5.2.1
docker-pycreds           0.4.0
exceptiongroup           1.2.2
executing                2.2.0
filelock                 3.18.0
fonttools                4.57.0
fsspec                   2025.3.2
gitdb                    4.0.12
GitPython                3.1.44
idna                     3.10
ipykernel                6.29.5
ipython                  8.35.0
jedi                     0.19.2
Jinja2                   3.1.6
jupyter_client           8.6.3
jupyter_core             5.7.2
kiwisolver               1.4.8
MarkupSafe               3.0.2
matplotlib               3.10.1
matplotlib-inline        0.1.7
mpmath         

In [12]:
import os
import shutil
import random
from tqdm import tqdm

def split_dataset(
    src_dir,
    dest_train_dir='train',
    dest_val_dir='val',
    train_ratio=0.8,
    seed=42
):
    random.seed(seed)

    # Create destination directories if they don't exist
    os.makedirs(dest_train_dir, exist_ok=True)
    os.makedirs(dest_val_dir, exist_ok=True)

    classes = os.listdir(src_dir)
    print(f"Found {len(classes)} class folders: {classes}")

    for cls in tqdm(classes, desc="Processing classes"):
        src_cls_path = os.path.join(src_dir, cls)
        if not os.path.isdir(src_cls_path):
            continue

        all_images = [img for img in os.listdir(src_cls_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
        random.shuffle(all_images)

        total_images = len(all_images)
        train_count = int(train_ratio * total_images)
        val_count = total_images - train_count
        print(train_count)
        print(val_count)

        train_images = all_images[:train_count]
        val_images = all_images[train_count:]

        train_cls_path = os.path.join(dest_train_dir, cls)
        val_cls_path = os.path.join(dest_val_dir, cls)

        os.makedirs(train_cls_path, exist_ok=True)
        os.makedirs(val_cls_path, exist_ok=True)

        for img in train_images:
            shutil.copy(os.path.join(src_cls_path, img), os.path.join(train_cls_path, img))

        for img in val_images:
            shutil.copy(os.path.join(src_cls_path, img), os.path.join(val_cls_path, img))

    print("Dataset is splitted into train of 80% and validation of 20%")

# Usage
split_dataset(src_dir="inaturalist_12K/train")


Found 11 class folders: ['.DS_Store', 'Aves', 'Insecta', 'Animalia', 'Mammalia', 'Plantae', 'Fungi', 'Amphibia', 'Arachnida', 'Mollusca', 'Reptilia']


Processing classes:   0%|          | 0/11 [00:00<?, ?it/s]

800
200


Processing classes:  18%|█▊        | 2/11 [00:00<00:03,  2.76it/s]

800
200


Processing classes:  27%|██▋       | 3/11 [00:01<00:04,  1.79it/s]

800
200


Processing classes:  36%|███▋      | 4/11 [00:02<00:05,  1.33it/s]

800
200


Processing classes:  45%|████▌     | 5/11 [00:03<00:04,  1.28it/s]

800
200


Processing classes:  55%|█████▍    | 6/11 [00:04<00:04,  1.09it/s]

799
200


Processing classes:  64%|██████▎   | 7/11 [00:05<00:03,  1.01it/s]

800
200


Processing classes:  73%|███████▎  | 8/11 [00:06<00:02,  1.01it/s]

800
200


Processing classes:  82%|████████▏ | 9/11 [00:07<00:01,  1.06it/s]

800
200


Processing classes:  91%|█████████ | 10/11 [00:08<00:01,  1.00s/it]

800
200


Processing classes: 100%|██████████| 11/11 [00:09<00:00,  1.11it/s]

Dataset is splitted into train of 80% and validation of 20%





Question 1 (5 Marks)
Build a small CNN model consisting of 5 convolution layers. Each convolution layer would be followed by an activation and a max-pooling layer.

After 5 such conv-activation-maxpool blocks, you should have one dense layer followed by the output layer containing 10 neurons (1 for each of the 10 classes). The input layer should be compatible with the images in the iNaturalist dataset dataset.

The code should be flexible such that the number of filters, size of filters, and activation function of the convolution layers and dense layers can be changed. You should also be able to change the number of neurons in the dense layer.

What is the total number of computations done by your network? (assume mmm filters in each layer of size k×kk\times kk×k and nnn neurons in the dense layer)
What is the total number of parameters in your network? (assume mmm filters in each layer of size k×kk\times kk×k and nnn neurons in the dense layer)

In [None]:
import torch
import torch.nn as nn

class SmallCNN(nn.Module):
    def __init__(self, config):
        super(SmallCNN, self).__init__()

        in_channels = 3  # RGB
        conv_layers = []

        for i in range(5):
            conv_layers.append(nn.Conv2d(
                in_channels=in_channels,
                out_channels=config['conv_filters'][i],
                kernel_size=config['kernel_sizes'][i],
                padding=1
            ))
            conv_layers.append(config['conv_activation']())
            conv_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            in_channels = config['conv_filters'][i]

        self.conv_block = nn.Sequential(*conv_layers)

        dummy_input = torch.zeros(1, 3, *config['image_size'])
        dummy_output = self.conv_block(dummy_input)
        flat_size = dummy_output.view(1, -1).shape[1]

        self.fc1 = nn.Linear(flat_size, config['dense_neurons'])
        self.act_dense = config['dense_activation']()
        self.output = nn.Linear(config['dense_neurons'], config['num_classes'])

    def forward(self, x):
        x = self.conv_block(x)
        x = torch.flatten(x, 1)
        x = self.act_dense(self.fc1(x))
        return self.output(x)
    
config = {
    'conv_filters': [32, 64, 128, 256, 256],
    'kernel_sizes': [3, 3, 3, 3, 3],
    'conv_activation': nn.ReLU,
    'dense_activation': nn.ReLU,
    'dense_neurons': 512,
    'num_classes': 10,
    'image_size': (128, 128)
}

model = SmallCNN(config)


Layer 1: ConvBlock1 | In: 3 | Out: 32 | Kernel: 3 | Activation: ReLU | Pooling: MaxPool2d(2x2)

Layer 2: ConvBlock2 | In: 32 | Out: 64 | Kernel: 3 | Activation: ReLU | Pooling: MaxPool2d(2x2)

Layer 3: ConvBlock3 | In: 64 | Out: 128 | Kernel: 3 | Activation: ReLU | Pooling: MaxPool2d(2x2)

Layer 4: ConvBlock4 | In: 128 | Out: 256 | Kernel: 3 | Activation: ReLU | Pooling: MaxPool2d(2x2)

Layer 5: ConvBlock5 | In: 256 | Out: 256 | Kernel: 3 | Activation: ReLU | Pooling: MaxPool2d(2x2)

Dense Layer: 512 neurons | Activation: ReLU

Output Layer: 10 neurons for 10 classes


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class TrainAndPredict:
    def __init__(self, model, device, class_names, lr=0.001):
        self.model = model.to(device)
        self.device = device
        self.class_names = class_names
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)

    def train(self, train_loader, val_loader, epochs=10):
        for epoch in range(epochs):
            self.model.train()
            total_loss = 0

            for images, labels in train_loader:
                images, labels = images.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()

            val_acc = self.validate(val_loader)
            print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Val Acc: {val_acc:.2f}%")

    def validate(self, val_loader):
        self.model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        return 100 * correct / total

    def predict(self, image_tensor):
        self.model.eval()
        image_tensor = image_tensor.to(self.device).unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            outputs = self.model(image_tensor)
            _, pred = torch.max(outputs, 1)
        
        return self.class_names[pred.item()]


In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader


# Configuration
config = {
    'conv_filters': [32, 64, 128, 256, 256],
    'kernel_sizes': [3, 3, 3, 3, 3],
    'conv_activation': nn.ReLU,
    'dense_activation': nn.ReLU,
    'dense_neurons': 512,
    'num_classes': 10,
    'image_size': (128, 128)
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load data
transform = transforms.Compose([
    transforms.Resize(config['image_size']),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder('train', transform=transform)
val_dataset = datasets.ImageFolder('val', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Initialize
model = SmallCNN(config)
trainer = TrainAndPredict(model, device, train_dataset.classes)

# Train
trainer.train(train_loader, val_loader, epochs=10)


Epoch 1/10 | Loss: 287.6734 | Val Acc: 13.20%
Epoch 2/10 | Loss: 277.8231 | Val Acc: 20.45%
Epoch 3/10 | Loss: 263.9331 | Val Acc: 26.15%
Epoch 4/10 | Loss: 254.9120 | Val Acc: 27.15%
Epoch 5/10 | Loss: 245.4232 | Val Acc: 28.55%
Epoch 6/10 | Loss: 238.8246 | Val Acc: 28.50%
Epoch 7/10 | Loss: 229.0875 | Val Acc: 32.45%
Epoch 8/10 | Loss: 218.4348 | Val Acc: 31.50%
Epoch 9/10 | Loss: 207.5130 | Val Acc: 30.30%
Epoch 10/10 | Loss: 190.5046 | Val Acc: 31.15%


In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader


# Configuration
config = {
    'conv_filters': [32, 64, 128, 256, 256],
    'kernel_sizes': [3, 3, 3, 3, 3],
    'conv_activation': nn.ReLU,
    'dense_activation': nn.ReLU,
    'dense_neurons': 512,
    'num_classes': 10,
    'image_size': (700, 700)
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


train_transform = transforms.Compose([
    transforms.RandomResizedCrop(config['image_size'], scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.RandomGrayscale(p=0.1),
    transforms.GaussianBlur(kernel_size=3),
    transforms.ToTensor(),
])


# For validation, keep only resizing and tensor conversion (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize(config['image_size']),
    transforms.ToTensor(),
])


train_dataset = datasets.ImageFolder('train', transform=train_transform)
val_dataset = datasets.ImageFolder('val', transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize
model = SmallCNN(config)
trainer = TrainerPredictor(model, device, train_dataset.classes)

# Train
trainer.train(train_loader, val_loader, epochs=10)

Epoch 1/10 | Loss: 293.8017 | Val Acc: 15.38%
Epoch 2/10 | Loss: 286.5302 | Val Acc: 20.03%
Epoch 3/10 | Loss: 280.2457 | Val Acc: 25.00%
Epoch 4/10 | Loss: 275.7233 | Val Acc: 26.27%
Epoch 5/10 | Loss: 269.8967 | Val Acc: 25.52%
Epoch 6/10 | Loss: 266.1672 | Val Acc: 26.03%
Epoch 7/10 | Loss: 266.0421 | Val Acc: 27.86%
Epoch 8/10 | Loss: 263.0377 | Val Acc: 30.49%
Epoch 9/10 | Loss: 261.2026 | Val Acc: 31.52%
Epoch 10/10 | Loss: 258.7496 | Val Acc: 31.52%


In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import wandb
import torch
import torch.nn as nn
import torch.optim as optim



activations = {
    'relu': nn.ReLU(),
    'tanh': nn.Tanh(),
    'sigmoid': nn.Sigmoid(),
    'leaky_relu': nn.LeakyReLU(),
    'mish':nn.Mish(),
    'gelu':nn.GELU(),
    'silu':nn.SiLU(),
    'relu6':nn.ReLU6()
}

optimizer_dict = {
    'adam': optim.Adam,
    'adamw': optim.AdamW,
    'sgd': optim.SGD
}


def generate_filters(base_m, strategy):
            if strategy == 'same':
                return [base_m] * 5
            elif strategy == 'double':
                return [base_m * (2 ** i) for i in range(5)]
            elif strategy == 'half':
                return [max(1, base_m // (2 ** i)) for i in range(5)]
            else:
                raise ValueError(f"Unknown strategy: {strategy}")

class CNN(nn.Module):
    def __init__(self, config):
        super(CNN, self).__init__()
        
        in_channels = config['input_dimension'][0]
        base_m = config['conv_filters']
        strategy = config['filter_org']
        conv_filters = generate_filters(base_m, strategy)
        kernel_sizes = config['kernel_sizes']
        stride = config['stride']
        padding = config['padding']
        pool = config['max_pooling_size']
        dropout = config['dropout_rate']
        use_bn = config['use_batchnorm']
        dropout_org = config['dropout_organisation']

        conv_layers = []
        for i in range(5):  # 5 conv layers
            out_channels = conv_filters[i]
            conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=kernel_sizes[i], stride=stride, padding=padding))
            if use_bn:
                conv_layers.append(nn.BatchNorm2d(out_channels))
            if dropout_org == 'before_relu':
                conv_layers.append(nn.Dropout2d(dropout))
            conv_layers.append(activations[config['conv_activation']])
            if dropout_org == 'after_relu':
                conv_layers.append(nn.Dropout2d(dropout))
            conv_layers.append(nn.MaxPool2d(kernel_size=pool))
            in_channels = out_channels

        self.conv = nn.Sequential(*conv_layers)

        # Estimate flattened size
        with torch.no_grad():
            dummy_input = torch.zeros((1, *config['input_dimension']))
            dummy_output = self.conv(dummy_input)
            flattened_size = dummy_output.view(1, -1).shape[1]

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(flattened_size, config['dense_neurons']),
            activations[config['dense_activation']],
            nn.Dropout(dropout),
            nn.Linear(config['dense_neurons'], config['num_classes'])
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


class TrainAndPredict:
    def __init__(self, model, device, class_names, optimizer=None, lr=0.001, weight_decay=0.0):
        self.model = model.to(device)
        self.device = device
        self.class_names = class_names
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optimizer_dict[optimizer](self.model.parameters(), lr=lr, weight_decay=weight_decay)

    def train(self, train_loader, val_loader, epochs=10, save_path='best_model.pth'):
        best_val_acc = 0.0
        best_epoch = 0

        for epoch in range(epochs):
            self.model.train()
            total_loss = 0
            correct, total = 0, 0

            for images, labels in train_loader:
                images, labels = images.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

            train_loss = total_loss / len(train_loader)
            train_acc = 100 * correct / total
            val_acc = self.validate(val_loader)

            print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")


            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_epoch = epoch + 1
                torch.save(self.model.state_dict(), save_path)

                artifact = wandb.Artifact('best-model', type='model')
                artifact.add_file(save_path)
                wandb.log_artifact(artifact)

            # Log to Weights & Biases
            wandb.log({
                'epoch': epoch + 1,
                'train_loss': train_loss,
                'train_acc': train_acc,
                'val_acc': val_acc
            })

        print(f"\nBest model saved from Epoch {best_epoch} with Val Acc: {best_val_acc:.2f}%")

    def validate(self, val_loader):
        self.model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        return 100 * correct / total

    def predict(self, image_tensor):
        self.model.eval()
        image_tensor = image_tensor.to(self.device).unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            outputs = self.model(image_tensor)
            _, pred = torch.max(outputs, 1)
        
        return self.class_names[pred.item()]



def train_sweep(config=None):
    with wandb.init(config=config):
        config = wandb.config
        # print(config.conv_filters)
        wandb.run.name = f"filter_{config.filter_size}/dn_{config.n_neurons}/opt_{config.optimizer}/aug_{config.use_augmentation}"

        # Build dynamic config from sweep values
        dynamic_config = {
            'input_dimension': (3, 224, 224),
            'conv_filters': config.conv_filters,
            'kernel_sizes': [config.filter_size] * 5,
            'stride': config.stride,
            'filter_org': config.filter_org,
            'padding': config.padding,
            'max_pooling_size': config.max_pooling_size,
            'dropout_rate': config.dropout_rate,
            'use_batchnorm': config.use_batchnorm,
            'factor': config.factor,
            'dropout_organisation': 'after_relu',
            'dense_neurons': config.n_neurons,
            'num_classes': config.n_classes,
            'optimizer': config.optimizer,
            'conv_activation': config.conv_activation,
            'dense_activation': config.dense_activation,
            'image_size': (224, 224),
            
        }
        
        if config['filter_org'] == 'half' and config['conv_filters'] < 32:
            print("Skipping config: unsafe filter_org with too few filters")
            return
        if config['stride'] > 1 and config['max_pooling_size'] > 1 and config['filter_size'] >= 7:
            print("Skipping config: stride/pool too aggressive with large filter")
            return

        # Define your model
        model = CNN(dynamic_config)

        # Dataloaders
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(dynamic_config['image_size'], scale=(0.5, 1.0)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
            transforms.RandomGrayscale(p=0.1),
            transforms.GaussianBlur(kernel_size=3),
            transforms.ToTensor(),
        ]) if config.use_augmentation else transforms.Compose([
            transforms.Resize(dynamic_config['image_size']),
            transforms.ToTensor(),
        ])

        val_transform = transforms.Compose([
            transforms.Resize(dynamic_config['image_size']),
            transforms.ToTensor(),
        ])
        
        
        

        device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
        torch.cuda.set_device(device)
        train_dataset = datasets.ImageFolder('train', transform=train_transform)
        val_dataset = datasets.ImageFolder('val', transform=val_transform)
        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True,num_workers=4, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=True,num_workers=4, pin_memory=True)

        trainer = TrainAndPredict(model, device, train_dataset.classes,optimizer=config.optimizer,lr=config.learning_rate)

        # Train and log
        trainer.train(train_loader, val_loader, epochs=config.epochs)
        

sweep_config = {
    'method': 'bayes',
    'name': 'Custom CNN',
    'metric': {'name': "val_accuracy", 'goal': 'maximize'},
    'parameters': {
        'conv_filters': {'values': [32, 64, 128]},
        'filter_org': {
            'values': ['same', 'double', 'half']
        },
        'filter_size': {'values': [1,3,7,11]},
        'stride': {'values': [1,2]},
        'padding': {'values': [1,2]},
        'max_pooling_size': {'value': 2},
        'n_neurons': {'values': [64, 128, 256, 512, 1024]},
        'n_classes': {'value': 10},
        'conv_activation': {
            'values': ['relu', 'gelu', 'silu', 'mish', 'relu6','leaky_relu']
        },
        'dense_activation': {
            'values': ['relu', 'gelu', 'silu', 'mish', 'relu6','leaky_relu']
        },
        'dropout_rate': {'values': [0.2, 0.3, 0.4, 0.5]},
        'use_batchnorm': {'values': [True, False]},
        'factor': {'values': [0.5, 1, 2, 3]},
        'learning_rate': {'values': [0.001,0.0001]},
        'batch_size': {'values': [16,32,64]},
        'optimizer': {'values': ['adam', 'adamw','sgd']},
        'epochs': {'values': [5,10,15]},
        'use_augmentation': {'values': [True, False]},
        'dropout_organisation': {'values': ['after_relu','before_relu']},  # simplified for now
    },
}

sweep_id = wandb.sweep(sweep_config, project="iNaturalist_CNN")
wandb.agent(sweep_id, function=train_sweep, count=10)



Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f0e54566d40>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7f0e90c99ff0, raw_cell="import torch
import torch.nn as nn
import torchvis.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B10.24.6.107/mnt/e_disk/ch24s016/da6401_assignment2/main.ipynb#X15sdnNjb2RlLXJlbW90ZQ%3D%3D>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe



Create sweep with ID: di20w0lf
Sweep URL: https://wandb.ai/ch24s016-iitm/iNaturalist_CNN/sweeps/di20w0lf


Exception in thread Thread-40 (_run_job):
Traceback (most recent call last):
  File "/mnt/e_disk/ch24s016/da6401_assignment2/venv/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 300, in _run_job
    wandb.teardown()
  File "/mnt/c_disk/hemnath/.pyenv/versions/3.10.11/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/mnt/e_disk/ch24s016/da6401_assignment2/venv/lib/python3.10/site-packages/wandb/sdk/wandb_setup.py", line 404, in teardown
    orig_singleton._teardown(exit_code=exit_code)
  File "/mnt/e_disk/ch24s016/da6401_assignment2/venv/lib/python3.10/site-packages/wandb/sdk/wandb_setup.py", line 249, in _teardown
    internal_exit_code = self._connection.teardown(exit_code or 0)
  File "/mnt/e_disk/ch24s016/da6401_assignment2/venv/lib/python3.10/site-packages/wandb/sdk/lib/service_connection.py", line 228, in teardown
    self._client.send_server_request(
  File "/mnt/e_disk/ch24s016/da6401_assignment2/venv/lib/python3.10/site-package

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f0e54566d40>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f0e721f8730, execution_count=4 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7f0e90c99ff0, raw_cell="import torch
import torch.nn as nn
import torchvis.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B10.24.6.107/mnt/e_disk/ch24s016/da6401_assignment2/main.ipynb#X15sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import wandb
import torch
import torch.nn as nn
import torch.optim as optim



activations = {
    'relu': nn.ReLU(),
    'tanh': nn.Tanh(),
    'sigmoid': nn.Sigmoid(),
    'leaky_relu': nn.LeakyReLU(),
    'mish':nn.Mish(),
    'gelu':nn.GELU(),
    'silu':nn.SiLU(),
    'relu6':nn.ReLU6()
}

optimizer_dict = {
    'adam': optim.Adam,
    'adamw': optim.AdamW,
    'sgd': optim.SGD
}


def generate_filters(base_m, strategy):
            if strategy == 'same':
                return [base_m] * 5
            elif strategy == 'double':
                return [base_m * (2 ** i) for i in range(5)]
            elif strategy == 'half':
                return [max(1, base_m // (2 ** i)) for i in range(5)]
            else:
                raise ValueError(f"Unknown strategy: {strategy}")

class CNN(nn.Module):
    def __init__(self, config):
        super(CNN, self).__init__()
        
        in_channels = config['input_dimension'][0]
        base_m = config['conv_filters']
        strategy = config['filter_org']
        conv_filters = generate_filters(base_m, strategy)
        kernel_sizes = config['kernel_sizes']
        stride = config['stride']
        padding = config['padding']
        pool = config['max_pooling_size']
        dropout = config['dropout_rate']
        use_bn = config['use_batchnorm']
        dropout_org = config['dropout_organisation']

        conv_layers = []
        for i in range(5):  # 5 conv layers
            out_channels = conv_filters[i]
            conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=kernel_sizes[i], stride=stride, padding=padding))
            if use_bn:
                conv_layers.append(nn.BatchNorm2d(out_channels))
            if dropout_org == 'before_relu':
                conv_layers.append(nn.Dropout2d(dropout))
            conv_layers.append(activations[config['conv_activation']])
            if dropout_org == 'after_relu':
                conv_layers.append(nn.Dropout2d(dropout))
            conv_layers.append(nn.MaxPool2d(kernel_size=pool))
            in_channels = out_channels

        self.conv = nn.Sequential(*conv_layers)

        # Estimate flattened size
        with torch.no_grad():
            dummy_input = torch.zeros((1, *config['input_dimension']))
            dummy_output = self.conv(dummy_input)
            flattened_size = dummy_output.view(1, -1).shape[1]

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(flattened_size, config['dense_neurons']),
            activations[config['dense_activation']],
            nn.Dropout(dropout),
            nn.Linear(config['dense_neurons'], config['num_classes'])
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


class TrainAndPredict:
    def __init__(self, model, device, class_names, optimizer=None, lr=0.001, weight_decay=0.0):
        self.model = model.to(device)
        self.device = device
        self.class_names = class_names
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optimizer_dict[optimizer](self.model.parameters(), lr=lr, weight_decay=weight_decay)

    def train(self, train_loader, val_loader, epochs=10, save_path='best_model.pth'):
        best_val_acc = 0.0
        best_epoch = 0

        for epoch in range(epochs):
            self.model.train()
            total_loss = 0
            correct, total = 0, 0

            for images, labels in train_loader:
                images, labels = images.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

            train_loss = total_loss / len(train_loader)
            train_acc = 100 * correct / total
            val_acc = self.validate(val_loader)

            print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")


            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_epoch = epoch + 1
                torch.save(self.model.state_dict(), save_path)

                artifact = wandb.Artifact('best-model', type='model')
                artifact.add_file(save_path)
                wandb.log_artifact(artifact)

            # Log to Weights & Biases
            wandb.log({
                'epoch': epoch + 1,
                'train_loss': train_loss,
                'train_acc': train_acc,
                'val_acc': val_acc
            })

        print(f"\nBest model saved from Epoch {best_epoch} with Val Acc: {best_val_acc:.2f}%")

    def validate(self, val_loader):
        self.model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        return 100 * correct / total

    def predict(self, image_tensor):
        self.model.eval()
        image_tensor = image_tensor.to(self.device).unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            outputs = self.model(image_tensor)
            _, pred = torch.max(outputs, 1)
        
        return self.class_names[pred.item()]



def train_sweep(config=None):
    with wandb.init(config=config):
        config = wandb.config
        # print(config.conv_filters)
        wandb.run.name = f"filter_{config.filter_size}/dn_{config.n_neurons}/opt_{config.optimizer}/aug_{config.use_augmentation}"

        # Build dynamic config from sweep values
        dynamic_config = {
            'input_dimension': (3, 224, 224),
            'conv_filters': config.conv_filters,
            'kernel_sizes': [config.filter_size] * 5,
            'stride': config.stride,
            'filter_org': config.filter_org,
            'padding': config.padding,
            'max_pooling_size': config.max_pooling_size,
            'dropout_rate': config.dropout_rate,
            'use_batchnorm': config.use_batchnorm,
            'factor': config.factor,
            'dropout_organisation': 'after_relu',
            'dense_neurons': config.n_neurons,
            'num_classes': config.n_classes,
            'optimizer': config.optimizer,
            'conv_activation': config.conv_activation,
            'dense_activation': config.dense_activation,
            'image_size': (224, 224),
            
        }
        
        if config['filter_org'] == 'half' and config['conv_filters'] < 32:
            print("Skipping config: unsafe filter_org with too few filters")
            return
        if config['stride'] > 1 and config['max_pooling_size'] > 1 and config['filter_size'] >= 7:
            print("Skipping config: stride/pool too aggressive with large filter")
            return

        # Define your model
        model = CNN(dynamic_config)

        # Dataloaders
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(dynamic_config['image_size'], scale=(0.5, 1.0)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
            transforms.RandomGrayscale(p=0.1),
            transforms.GaussianBlur(kernel_size=3),
            transforms.ToTensor(),
        ]) if config.use_augmentation else transforms.Compose([
            transforms.Resize(dynamic_config['image_size']),
            transforms.ToTensor(),
        ])

        val_transform = transforms.Compose([
            transforms.Resize(dynamic_config['image_size']),
            transforms.ToTensor(),
        ])
        
        
        

        device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
        torch.cuda.set_device(device)
        train_dataset = datasets.ImageFolder('train', transform=train_transform)
        val_dataset = datasets.ImageFolder('val', transform=val_transform)
        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True,num_workers=4, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=True,num_workers=4, pin_memory=True)

        trainer = TrainAndPredict(model, device, train_dataset.classes,optimizer=config.optimizer,lr=config.learning_rate)

        # Train and log
        trainer.train(train_loader, val_loader, epochs=config.epochs)
        

sweep_config = {
    'method': 'bayes',
    'name': 'Custom CNN',
    'metric': {'name': "val_accuracy", 'goal': 'maximize'},
    'parameters': {
        'conv_filters': {'values': [32, 64, 128]},
        'filter_org': {
            'values': ['same', 'double', 'half']
        },
        'filter_size': {'values': [1,3,7,11]},
        'stride': {'values': [1,2]},
        'padding': {'values': [1,2]},
        'max_pooling_size': {'value': 2},
        'n_neurons': {'values': [64, 128, 256, 512, 1024]},
        'n_classes': {'value': 10},
        'conv_activation': {
            'values': ['relu', 'gelu', 'silu', 'mish', 'relu6','leaky_relu']
        },
        'dense_activation': {
            'values': ['relu', 'gelu', 'silu', 'mish', 'relu6','leaky_relu']
        },
        'dropout_rate': {'values': [0.2, 0.3, 0.4, 0.5]},
        'use_batchnorm': {'values': [True, False]},
        'factor': {'values': [0.5, 1, 2, 3]},
        'learning_rate': {'values': [0.001,0.0001]},
        'batch_size': {'values': [16,32,64]},
        'optimizer': {'values': ['adam', 'adamw','sgd']},
        'epochs': {'values': [5,10,15]},
        'use_augmentation': {'values': [True, False]},
        'dropout_organisation': {'values': ['after_relu','before_relu']},  # simplified for now
    },
}

sweep_id = wandb.sweep(sweep_config, project="iNaturalist_CNN")
wandb.agent(sweep_id, function=train_sweep, count=10)

