In [1]:
# !pip install wandb
import wandb
wandb.login(key='12c0b23d6865ce943b48c8ea1451c9b2d3aedf60')

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [2]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.transforms import transforms
from torchvision.transforms import RandomRotation, RandomHorizontalFlip, RandomVerticalFlip, ColorJitter
from torch.nn import BatchNorm2d, Dropout
import numpy as np

# Data transformations with data augmentation
transform = transforms.Compose([
    RandomRotation(degrees=15),  # Random rotations up to 15 degrees
    RandomHorizontalFlip(),      # Random horizontal flips
    RandomVerticalFlip(),        # Random vertical flips
    ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),  # Random color jitter
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Path to iNaturalist dataset directory
data_dir = '/kaggle/input/naturalist-data/nature_12K/inaturalist_12K/train'

# Load the iNaturalist dataset and apply transformations
train_dataset = ImageFolder(root=data_dir, transform=transform)

# Define the indices
dataset_size = len(train_dataset)
indices = list(range(dataset_size))
split = int(np.floor(0.2 * dataset_size))  # 20% for validation

# Shuffle indices
np.random.seed(42)
np.random.shuffle(indices)

# Split the indices into training and validation sets
train_indices, val_indices = indices[split:], indices[:split]

# DataLoader instances for training and validation sets
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_dataset, batch_size=64, sampler=train_sampler)
val_loader = DataLoader(train_dataset, batch_size=64, sampler=val_sampler)

from torch.utils.data.sampler import SubsetRandomSampler

# Define the path to the iNaturalist dataset directory
test_data_dir = '/kaggle/input/naturalist-data/nature_12K/inaturalist_12K/val'

# Define transformations with data augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # ImageNet normalization
])

# Load the iNaturalist dataset using ImageFolder and apply transformations
test_dataset = ImageFolder(root=test_data_dir, transform=transform)

test_loader = DataLoader(test_dataset, batch_size=64)

# print
for images, labels in test_loader:
    print("Test batch shapes:", images.shape, labels.shape)
    break

for images, labels in train_loader:
    print("Training batch shapes:", images.shape, labels.shape)
    break

for images, labels in val_loader:
    print("Validation batch shapes:", images.shape, labels.shape)
    break

Test batch shapes: torch.Size([64, 3, 224, 224]) torch.Size([64])
Training batch shapes: torch.Size([64, 3, 224, 224]) torch.Size([64])
Validation batch shapes: torch.Size([64, 3, 224, 224]) torch.Size([64])


In [3]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)


Using device: cuda


In [4]:
import torch.nn as nn
import torch.nn.functional as F

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print('Using device:', device)

# Convolution nueral Network
class CNN(nn.Module):
    def __init__(self, num_classes=10, out_channels=[32, 64, 128, 256, 512], filter_sizes=[3, 3, 3, 3, 3],
                 stride=1, padding=1, pool_size=(2, 2), fullyconnected_size=128, activations=['relu', 'relu', 'relu', 'relu', 'relu'], dropout_rate=0.2, batch_norm=True, data_augmentation=False):
        super(CNN, self).__init__()

        # Store arguments as class attributes
        self.num_classes = num_classes
        self.out_channels = out_channels
        self.filter_sizes = filter_sizes
        self.stride = stride
        self.padding = padding
        self.pool_size = pool_size
        self.fullyconnected_size = fullyconnected_size
        self.activations = activations
        self.dropout_rate = dropout_rate
        self.batch_norm = batch_norm
        self.data_augmentation = data_augmentation

        # Define the convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.out_channels[0], kernel_size=self.filter_sizes[0],
                               stride=self.stride, padding=self.padding)
        self.conv2 = nn.Conv2d(in_channels=self.out_channels[0], out_channels=self.out_channels[1],
                               kernel_size=self.filter_sizes[1], stride=self.stride, padding=self.padding)
        self.conv3 = nn.Conv2d(in_channels=self.out_channels[1], out_channels=self.out_channels[2],
                               kernel_size=self.filter_sizes[2], stride=self.stride, padding=self.padding)
        self.conv4 = nn.Conv2d(in_channels=self.out_channels[2], out_channels=self.out_channels[3],
                               kernel_size=self.filter_sizes[3], stride=self.stride, padding=self.padding)
        self.conv5 = nn.Conv2d(in_channels=self.out_channels[3], out_channels=self.out_channels[4],
                               kernel_size=self.filter_sizes[4], stride=self.stride, padding=self.padding)

        # Define batch normalization layers
        if self.batch_norm:
            self.bn1 = nn.BatchNorm2d(self.out_channels[0])
            self.bn2 = nn.BatchNorm2d(self.out_channels[1])
            self.bn3 = nn.BatchNorm2d(self.out_channels[2])
            self.bn4 = nn.BatchNorm2d(self.out_channels[3])
            self.bn5 = nn.BatchNorm2d(self.out_channels[4])

        # Dropout layers
        self.dropout = nn.Dropout2d(p=self.dropout_rate)

        # Calculate the input size for the fully connected layer
        self._calculate_fc_input_size()

        # Define the fully connected layer
        self.fc1 = nn.Linear(self.fc_input_size, self.fullyconnected_size)

        # Define the output layer
        self.fc2 = nn.Linear(self.fullyconnected_size, self.num_classes)

    def forward(self, x):
        # Convolutional layers with specified activations, batch normalization, dropout, and max pooling
        x = getattr(F, self.activations[0])(self.conv1(x))
        if self.batch_norm:
            x = getattr(F, self.activations[0])(self.bn1(x))
        if self.data_augmentation:
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.max_pool2d(x, kernel_size=self.pool_size, stride=self.pool_size)
        x = getattr(F, self.activations[1])(self.conv2(x))
        if self.batch_norm:
            x = getattr(F, self.activations[1])(self.bn2(x))
        if self.data_augmentation:
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.max_pool2d(x, kernel_size=self.pool_size, stride=self.pool_size)
        x = getattr(F, self.activations[2])(self.conv3(x))
        if self.batch_norm:
            x = getattr(F, self.activations[2])(self.bn3(x))
        if self.data_augmentation:
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.max_pool2d(x, kernel_size=self.pool_size, stride=self.pool_size)
        x = getattr(F, self.activations[3])(self.conv4(x))
        if self.batch_norm:
            x = getattr(F, self.activations[3])(self.bn4(x))
        if self.data_augmentation:
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.max_pool2d(x, kernel_size=self.pool_size, stride=self.pool_size)
        x = getattr(F, self.activations[4])(self.conv5(x))
        if self.batch_norm:
            x = getattr(F, self.activations[4])(self.bn5(x))
        if self.data_augmentation:
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.max_pool2d(x, kernel_size=self.pool_size, stride=self.pool_size)

        # Flatten the output for the fully connected layer
        x = x.view(-1, self.fc1.in_features)

        # Fully connected layer with ReLU activation
        x = F.relu(self.fc1(x))

        # Output layer
        x = self.fc2(x)

        return x

    def _calculate_fc_input_size(self):
        # Dummy input to calculate the input size for the fully connected layer
        input_tensor = torch.randn(1, 3, 224, 224)
        output = self._forward_features(input_tensor)
        self.fc_input_size = output.view(-1).size(0)

    def _forward_features(self, x):
        x = F.max_pool2d(getattr(F, self.activations[0])(self.conv1(x)), kernel_size=self.pool_size, stride=self.pool_size)
        x = F.max_pool2d(getattr(F, self.activations[1])(self.conv2(x)), kernel_size=self.pool_size, stride=self.pool_size)
        x = F.max_pool2d(getattr(F, self.activations[2])(self.conv3(x)), kernel_size=self.pool_size, stride=self.pool_size)
        x = F.max_pool2d(getattr(F, self.activations[3])(self.conv4(x)), kernel_size=self.pool_size, stride=self.pool_size)
        x = F.max_pool2d(getattr(F, self.activations[4])(self.conv5(x)), kernel_size=self.pool_size, stride=self.pool_size)
        return x

# Create an instance of the CNN model with different configurations
model = CNN(num_classes=10, out_channels=[32, 64, 128, 256, 512], filter_sizes=[3, 3, 3, 3, 3],
            stride=1, padding=1, pool_size=(2, 2), fullyconnected_size=128,
            activations=['relu', 'relu', 'relu', 'relu', 'relu'], dropout_rate=0.2, batch_norm=True, data_augmentation=False).to(device)

# Print the model architecture
print(model)
# model.to()


CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn5): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout2d(p=0.2, inplace=False)
  (fc1): Linear(in_features=25088, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10,

In [6]:
# # Define the batch size
# batch_size = 32

# # Initialize the model
# model = CNN().to(device)

# # Define the optimizer
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# # Define the loss function
# criterion = nn.CrossEntropyLoss()

# # Define the number of epochs
# num_epochs = 1

# # Training loop
# for epoch in range(num_epochs):
#     # Set the model to training mode
#     model.train()
#     train_loss = 0.0
#     correct_train = 0
#     total_train = 0

#     for images, labels in train_loader:
#         # Move images and labels to device (e.g., GPU)
#         images, labels = images.to(device), labels.to(device)

#         # Zero the parameter gradients
#         optimizer.zero_grad()

#         # Forward pass
#         outputs = model(images)

#         # Calculate loss
#         loss = criterion(outputs, labels)
#         train_loss += loss.item()

#         # Backward pass and optimize
#         loss.backward()
#         optimizer.step()

#         # Calculate training accuracy
#         _, predicted = torch.max(outputs.data, 1)
#         total_train += labels.size(0)
#         correct_train += (predicted == labels).sum().item()

#     # Calculate average training loss and accuracy
#     train_loss /= len(train_loader)
#     train_accuracy = 100 * correct_train / total_train

#     # Set the model to evaluation mode
#     model.eval()
#     val_loss = 0.0
#     correct_val = 0
#     total_val = 0

#     # Disable gradient calculation
#     with torch.no_grad():
#         for images, labels in val_loader:
#             # Move images and labels to device
#             images, labels = images.to(device), labels.to(device)

#             # Forward pass
#             outputs = model(images)

#             # Calculate loss
#             loss = criterion(outputs, labels)
#             val_loss += loss.item()

#             # Calculate validation accuracy
#             _, predicted = torch.max(outputs.data, 1)
#             total_val += labels.size(0)
#             correct_val += (predicted == labels).sum().item()

#     # Calculate average validation loss and accuracy
#     val_loss /= len(val_loader)
#     val_accuracy = 100 * correct_val / total_val

#     # Print training and validation statistics
#     print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')
    


In [5]:
from tqdm import tqdm

def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, num_epochs=4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     device = 'cuda'
#     model.to(device)
    for epoch in range(num_epochs):
        model.train()  # Set model to train mode
        running_loss = 0.0
        train_loss = 0.0
        correct = 0
        total = 0
        for ind, (images, labels) in enumerate(tqdm(train_loader)):
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            optimizer.zero_grad()  # Zero the parameter gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            train_loss = train_loss + loss
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = correct / total

        # Validation loop
        model.eval()  # Set model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)  # Move data to GPU
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = correct / total

        # Print epoch statistics
        print(f'Epoch {epoch + 1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}')

        # Log to WandB
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_accuracy,
            'val_loss': val_loss,
            'val_accuracy': val_accuracy
        })


In [6]:
# Define the parameters for hyperparameter tuning
sweep_config = {
    'method': 'bayes', 
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'filter_sizes': {
            'values': [[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]]
        },
        'activation': {
            'values': [['leaky_relu', 'leaky_relu', 'leaky_relu', 'leaky_relu', 'leaky_relu'], ['relu', 'relu', 'relu', 'relu','relu'], ['relu', 'gelu', 'silu', 'mish','relu'],['gelu', 'mish', 'gelu', 'relu','gelu']]
        },
        'num_dense': {
            'values': [128, 256]
        },
        'batch_norm': {
            'values': [True, False]
        },
        'filter_organization': {
            'values': [[64, 128, 256, 512, 1024], [32, 32, 32, 32, 32], [32, 64, 64, 64, 128], [32, 64, 128, 256, 512]]
        },
        'dropout_rate': {
            'values': [0.2, 0.3]  
        },
        'data_augmentation': {
            'values': [True, False] 
        }
    }
}

sweep_id = wandb.sweep(sweep = sweep_config, project = 'Assignment_2')

Create sweep with ID: khdffvoq
Sweep URL: https://wandb.ai/lokendrakumar/Assignment_2/sweeps/khdffvoq


In [None]:
# Now, you can modify your main function to call train_and_evaluate:
def main():
    # Initialize a new wandb run
    with wandb.init() as run:
        # Construct run name based on hyperparameters
        run_name = f"{wandb.config.activation}-{wandb.config.filter_organization}-dropout-{wandb.config.dropout_rate}-batch_norm-{wandb.config.batch_norm}-data_augmentation-{wandb.config.data_augmentation}"
        wandb.run.name = run_name

        # Model object creation
        model = CNN(num_classes=10,
                    out_channels=wandb.config.filter_organization,
                    filter_sizes=wandb.config.filter_sizes,
                    activations=wandb.config.activation,
                    fullyconnected_size=wandb.config.num_dense,
                    dropout_rate=wandb.config.dropout_rate,
                    data_augmentation=wandb.config.data_augmentation)
        model.to(device)

#         # Define data transformations with data augmentation
#         transform = transforms.Compose([
#             RandomRotation(degrees=15),  # Random rotations up to 15 degrees
#             RandomHorizontalFlip(),      # Random horizontal flips
#             RandomVerticalFlip(),        # Random vertical flips
#             ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),  # Random color jitter
#             transforms.Resize((224, 224)),
#             transforms.ToTensor(),
#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        
#         ])
        # Define data transformations
        transform_list = [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]

        # Conditionally add data augmentation transformations
        if wandb.config.data_augmentation:
            transform_list = [
                RandomRotation(degrees=15),  
                RandomHorizontalFlip(),   
                RandomVerticalFlip(),        
                ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color jitter
            ] + transform_list

        transform = transforms.Compose(transform_list)
        
        criterion = nn.CrossEntropyLoss()
        

        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        train_and_evaluate(model, train_loader, val_loader, criterion, optimizer)

# Run the sweep to perform 5 experiments
wandb.agent(sweep_id, function=main, count=20)

[34m[1mwandb[0m: Agent Starting Run: bszpsaup with config:
[34m[1mwandb[0m: 	activation: ['leaky_relu', 'leaky_relu', 'leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: [32, 64, 128, 256, 512]
[34m[1mwandb[0m: 	filter_sizes: [4, 4, 4, 4, 4]
[34m[1mwandb[0m: 	num_dense: 128
[34m[1mwandb[0m: Currently logged in as: [33mma23m008[0m ([33mlokendrakumar[0m). Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 125/125 [10:30<00:00,  5.04s/it]


Epoch 1/4: Train Loss: 0.0313, Train Acc: 0.1754, Val Loss: 0.4413, Val Acc: 0.1731


100%|██████████| 125/125 [08:41<00:00,  4.17s/it]


Epoch 2/4: Train Loss: 0.0271, Train Acc: 0.2092, Val Loss: 0.4376, Val Acc: 0.2191


100%|██████████| 125/125 [08:39<00:00,  4.15s/it]


Epoch 3/4: Train Loss: 0.0266, Train Acc: 0.2314, Val Loss: 0.4358, Val Acc: 0.2206


100%|██████████| 125/125 [08:36<00:00,  4.14s/it]


Epoch 4/4: Train Loss: 0.0262, Train Acc: 0.2430, Val Loss: 0.4242, Val Acc: 0.2486


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▆█
train_accuracy,▁▅▇█
train_loss,█▂▂▁
val_accuracy,▁▅▅█
val_loss,█▆▆▁

0,1
epoch,4.0
train_accuracy,0.243
train_loss,0.02618
val_accuracy,0.24862
val_loss,0.42425


[34m[1mwandb[0m: Agent Starting Run: rojhhf80 with config:
[34m[1mwandb[0m: 	activation: ['relu', 'relu', 'relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	filter_sizes: [4, 4, 4, 4, 4]
[34m[1mwandb[0m: 	num_dense: 256


100%|██████████| 125/125 [08:19<00:00,  3.99s/it]


Epoch 1/4: Train Loss: 0.0272, Train Acc: 0.2046, Val Loss: 0.4149, Val Acc: 0.2656


100%|██████████| 125/125 [08:16<00:00,  3.98s/it]


Epoch 2/4: Train Loss: 0.0256, Train Acc: 0.2596, Val Loss: 0.4122, Val Acc: 0.2576


100%|██████████| 125/125 [08:13<00:00,  3.95s/it]


Epoch 3/4: Train Loss: 0.0250, Train Acc: 0.2801, Val Loss: 0.3938, Val Acc: 0.3057


100%|██████████| 125/125 [08:18<00:00,  3.99s/it]


Epoch 4/4: Train Loss: 0.0245, Train Acc: 0.3036, Val Loss: 0.3994, Val Acc: 0.2931


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▆█
train_accuracy,▁▅▆█
train_loss,█▄▂▁
val_accuracy,▂▁█▆
val_loss,█▇▁▃

0,1
epoch,4.0
train_accuracy,0.30362
train_loss,0.02451
val_accuracy,0.29315
val_loss,0.39938


[34m[1mwandb[0m: Agent Starting Run: 87rngbot with config:
[34m[1mwandb[0m: 	activation: ['relu', 'relu', 'relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	filter_sizes: [5, 5, 5, 5, 5]
[34m[1mwandb[0m: 	num_dense: 128


100%|██████████| 125/125 [08:22<00:00,  4.02s/it]


Epoch 1/4: Train Loss: 0.0276, Train Acc: 0.1903, Val Loss: 0.4345, Val Acc: 0.2226


100%|██████████| 125/125 [08:16<00:00,  3.98s/it]


Epoch 2/4: Train Loss: 0.0260, Train Acc: 0.2544, Val Loss: 0.4208, Val Acc: 0.2541


100%|██████████| 125/125 [08:20<00:00,  4.00s/it]


Epoch 3/4: Train Loss: 0.0252, Train Acc: 0.2764, Val Loss: 0.4035, Val Acc: 0.2931


100%|██████████| 125/125 [08:19<00:00,  4.00s/it]


Epoch 4/4: Train Loss: 0.0247, Train Acc: 0.2920, Val Loss: 0.4001, Val Acc: 0.2826


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▆█
train_accuracy,▁▅▇█
train_loss,█▄▂▁
val_accuracy,▁▄█▇
val_loss,█▅▂▁

0,1
epoch,4.0
train_accuracy,0.292
train_loss,0.02475
val_accuracy,0.28264
val_loss,0.40008


[34m[1mwandb[0m: Agent Starting Run: 5fp3x4e8 with config:
[34m[1mwandb[0m: 	activation: ['relu', 'relu', 'relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: [32, 64, 128, 256, 512]
[34m[1mwandb[0m: 	filter_sizes: [4, 4, 4, 4, 4]
[34m[1mwandb[0m: 	num_dense: 128


100%|██████████| 125/125 [08:34<00:00,  4.12s/it]


Epoch 1/4: Train Loss: 0.0317, Train Acc: 0.1731, Val Loss: 0.4462, Val Acc: 0.1816


100%|██████████| 125/125 [08:34<00:00,  4.11s/it]


Epoch 2/4: Train Loss: 0.0270, Train Acc: 0.2166, Val Loss: 0.4472, Val Acc: 0.1736


100%|██████████| 125/125 [08:28<00:00,  4.07s/it]


Epoch 3/4: Train Loss: 0.0266, Train Acc: 0.2289, Val Loss: 0.4438, Val Acc: 0.2056


 65%|██████▍   | 81/125 [05:30<03:00,  4.10s/it]

## Test Model

In [7]:
from tqdm import tqdm

def train_and_test(model, train_loader, test_loader, criterion, optimizer, num_epochs=8):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     device = 'cuda'
#     model.to(device)
    for epoch in range(num_epochs):
        model.train()  # Set model to train mode
        running_loss = 0.0
        train_loss = 0.0
        correct = 0
        total = 0
        for ind, (images, labels) in enumerate(tqdm(train_loader)):
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            optimizer.zero_grad()  # Zero the parameter gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            train_loss = train_loss + loss
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = correct / total

        # Validation loop
        model.eval()  # Set model to evaluation mode
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)  # Move data to GPU
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        test_loss = test_loss / len(test_loader.dataset)
        test_accuracy = correct / total

        # Print epoch statistics
        print(f'Epoch {epoch + 1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, test Loss: {test_loss:.4f}, test Acc: {test_accuracy:.4f}')

        # Log to WandB
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_accuracy,
            'test_loss': test_loss,
            'test_accuracy': test_accuracy
        })


In [8]:
# Define the parameters for hyperparameter tuning
best_sweep_config = {
    'method': 'bayes', 
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'filter_sizes': {
            'values': [[3, 3, 3, 3, 3]]
        },
        'activation': {
            'values': [['relu', 'gelu', 'silu', 'mish','relu']]
        },
        'num_dense': {
            'values': [256]
        },
        'batch_norm': {
            'values': [False]
        },
        'filter_organization': {
            'values': [[32, 32, 32, 32, 32]]
        },
        'dropout_rate': {
            'values': [0.3]  
        },
        'data_augmentation': {
            'values': [False] 
        }
    }
}

sweep_id = wandb.sweep(sweep = best_sweep_config, project = 'Assignment_2')

Create sweep with ID: pgutaejz
Sweep URL: https://wandb.ai/lokendrakumar/Assignment_2/sweeps/pgutaejz


In [None]:
# Now, you can modify your main function to call train_and_test:
def main():
    # Initialize a new wandb run
    with wandb.init() as run:
        # Construct run name based on hyperparameters
        run_name = f"{wandb.config.activation}-{wandb.config.filter_organization}-dropout-{wandb.config.dropout_rate}-batch_norm-{wandb.config.batch_norm}-data_augmentation-{wandb.config.data_augmentation}"
        wandb.run.name = run_name

        # Model object creation
        model = CNN(num_classes=10,
                    out_channels=wandb.config.filter_organization,
                    filter_sizes=wandb.config.filter_sizes,
                    activations=wandb.config.activation,
                    fullyconnected_size=wandb.config.num_dense,
                    dropout_rate=wandb.config.dropout_rate,
                    data_augmentation=wandb.config.data_augmentation)
        model.to(device)
        
        # Define data transformations
        transform_list = [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]

        # Conditionally add data augmentation transformations
        if wandb.config.data_augmentation:
            transform_list = [
                RandomRotation(degrees=15),  # Random rotations up to 15 degrees
                RandomHorizontalFlip(),      # Random horizontal flips
                RandomVerticalFlip(),        # Random vertical flips
                ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),  # Random color jitter
            ] + transform_list

        transform = transforms.Compose(transform_list)
        
        criterion = nn.CrossEntropyLoss()
        

        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
            
        # Train and evaluate the model
        train_and_test(model, train_loader, test_loader, criterion, optimizer)

# Run the sweep to perform 5 experiments
wandb.agent(sweep_id, function=main, count=2)

In [9]:
best_model = CNN(num_classes=10, out_channels=[32, 32, 32, 32, 32], filter_sizes=[3, 3, 3, 3, 3],
            stride=1, padding=1, pool_size=(2, 2), fullyconnected_size=256,
            activations=['relu', 'gelu', 'silu', 'mish','relu'], dropout_rate=0.3, batch_norm=False, data_augmentation=False).to(device)

In [10]:
import matplotlib.pyplot as plt
import numpy as np

# Initialize WandB
wandb.init(project='Assignment_2')

# Function to display a grid of images
def display_images(images, predictions, num_cols=3):
    num_images = len(images)
    num_rows = int(np.ceil(num_images / num_cols))
    
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(10, 3*num_rows))
    
    for i, (image, prediction) in enumerate(zip(images, predictions)):
        ax = axes[i // num_cols, i % num_cols]
        ax.imshow(image.permute(1, 2, 0))
        ax.set_title(f'Prediction: {prediction}')
        ax.axis('off')
    
    # Hide any empty subplots
    for j in range(num_images, num_rows*num_cols):
        axes[j // num_cols, j % num_cols].axis('off')
    
    plt.tight_layout()
    
    # Log to WandB
    wandb.log({"sample_predictions": plt})
    plt.close()

# Function to get sample images and predictions
def get_sample_images_and_predictions(best_model, test_loader, num_samples_per_class=3, num_classes=10):
    best_model.eval()
    images, predictions = [], []
    samples_per_class = {cls: 0 for cls in range(num_classes)}  # Keep track of samples per class
    with torch.no_grad():
        for i, (image, label) in enumerate(test_loader):
            for img, lbl in zip(image, label):
                cls = lbl.item()
                if samples_per_class[cls] < num_samples_per_class:
                    output = best_model(img.unsqueeze(0).to(device))
                    _, prediction = torch.max(output, 1)
                    predictions.append(prediction.item())
                    images.append(img)
                    samples_per_class[cls] += 1
                if all(val == num_samples_per_class for val in samples_per_class.values()):
                    break  # Stop when we have enough samples for each class
            if all(val == num_samples_per_class for val in samples_per_class.values()):
                break  # Stop when we have enough samples for each class
    return images, predictions

# Get sample images and predictions for 3 images per class
sample_images, sample_predictions = get_sample_images_and_predictions(best_model, test_loader, num_samples_per_class=3)

# Display the grid
display_images(sample_images, sample_predictions, num_cols=3)


[34m[1mwandb[0m: Currently logged in as: [33mma23m008[0m ([33mlokendrakumar[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
import matplotlib.pyplot as plt

def visualize_filters(model, test_loader):
    model.eval()
    with torch.no_grad():
        # Get a random image from the test set
        for images, _ in test_loader:
            image = images[0].unsqueeze(0).to(device)
            break
        
        # Get the activations of the first convolutional layer
        activations = model._forward_features(image)
        
        # Visualize filters in the first layer
        num_filters = activations.shape[1]
        num_cols = 8
        num_rows = 8  # Ensure 8 rows
        
        # Initialize W&B run
        wandb.init(project="Assignment_2")
        
        fig, axes = plt.subplots(num_rows, num_cols, figsize=(30, 30))
        
        for i in range(num_rows * num_cols):
            row = i // num_cols
            col = i % num_cols
            if i < num_filters:
                filter_image = activations[0, i].cpu().numpy()
                axes[row, col].imshow(filter_image, cmap='gray')
            else:
                axes[row, col].axis('off')
        
        plt.tight_layout()
        
        # Log the figure to wandb
        wandb.log({"filters_visualization": plt})
        plt.close()

# Visualize filters in the first layer
visualize_filters(model, test_loader)


VBox(children=(Label(value='0.103 MB of 0.103 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))