<a href="https://colab.research.google.com/github/Swapnil7-lab/DA6401_Assignment_2/blob/main/DA6401_DL_2_partA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
print(torch.device('cuda:0'))
print(torch.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0
2.6.0+cu124
cpu


In [4]:
!wget 'https://storage.googleapis.com/wandb_datasets/nature_12K.zip'
!unzip -q nature_12K.zip

--2025-04-04 07:41:28--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.69.207, 64.233.181.207, 64.233.179.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.69.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: ‘nature_12K.zip’


2025-04-04 07:41:55 (137 MB/s) - ‘nature_12K.zip’ saved [3816687935/3816687935]



In [5]:
# imports
# Standard library imports
import os
import random
import pathlib

# Third-party library imports
import math
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# PyTorch imports
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split

# Torchvision imports
import torchvision
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder

# Set seeds for reproducibility
torch.manual_seed(1)
np.random.seed(1)

# Define the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



# Data Preparation and Transformation
def load_data(bs, augment_data=False):
    # Configuration parameters
    img_size = (300, 300)
    norm_mean = (0.5, 0.5, 0.5)
    norm_std = (0.5, 0.5, 0.5)

    # Base image transformations
    base_transform = [
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std)
    ]

    # Augmentation additions
    augmentation_layers = [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10)
    ] if augment_data else []

    # Create transformation pipelines
    train_transforms = transforms.Compose([*augmentation_layers, *base_transform])
    test_transforms = transforms.Compose(base_transform)

    # Dataset paths configuration
    data_root = "/content/inaturalist_12K"
    train_dir = os.path.join(data_root, 'train')
    eval_dir = os.path.join(data_root, 'val')

    # Dataset preparation
    train_full = ImageFolder(train_dir, transform=train_transforms)
    eval_set = ImageFolder(eval_dir, transform=test_transforms)

    # Data partitioning
    total_train = len(train_full)
    val_portion = 0.2
    train_samples = int(total_train * (1 - val_portion))
    val_samples = total_train - train_samples

    # Dataset splitting
    train_subset, val_subset = random_split(train_full, [train_samples, val_samples])

    # Data loading configuration
    loader_config = {
        'batch_size': bs,
        'num_workers': 2,
        'pin_memory': True
    }

    # Create data loaders
    train_loader = DataLoader(train_subset, shuffle=True, **loader_config)
    val_loader = DataLoader(val_subset, shuffle=False, **loader_config)
    test_loader = DataLoader(eval_set, shuffle=False, **loader_config)

    # Class label extraction
    class_labels = [item.name for item in pathlib.Path(train_dir).iterdir()]
    class_labels.sort()

    return train_loader, val_loader, test_loader, class_labels






In [6]:
#Simple CNN
def flatten(k=[11, 9, 7, 5, 3], w=300, s=1, p=1):
    r = w
    i = 0  # Initialize the counter for the while loop



    while i < len(k):  # Loop until the counter reaches the length of k
        print("r", r)
        r = (r + 2 * p - k[i]) + 1

        r = int(r / 2) + 1
        i += 1  # Increment the counter

    return r



class CNN(nn.Module):
    def __init__(self, in_channels=3, num_class=10, num_filters=4, kernel_sizes=[11,9,7,5,3],
                 fc_neurons=64, batch_norm=True, dropout=0.3, filter_multiplier=2, activation='LeakyRelu'):

        super(CNN, self).__init__()
        # Preserve original parameter assignments
        self.in_channels = in_channels
        self.num_class = num_class
        self.num_filters = num_filters
        self.kernel_sizes = kernel_sizes
        self.fc_neurons = fc_neurons
        self.activation = activation
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.filter_multiplier = filter_multiplier

        # Layer construction through systematic pattern
        prev_channels = in_channels
        for layer_idx in range(len(kernel_sizes)):
            # Calculate output channels using exponential growth
            out_channels = num_filters * (filter_multiplier ** layer_idx)

            # Convolutional block components
            setattr(self, f'conv{layer_idx+1}', nn.Conv2d(
                prev_channels, out_channels,
                kernel_size=kernel_sizes[layer_idx],
                stride=1,
                padding=1
            ).to(device))

            if batch_norm:
                setattr(self, f'bn{layer_idx+1}', nn.BatchNorm2d(out_channels))

            setattr(self, f'relu{layer_idx+1}', nn.LeakyReLU())
            setattr(self, f'pool{layer_idx+1}', nn.MaxPool2d(2, 2, padding=1))

            prev_channels = out_channels

        # Calculate spatial dimension reduction
        self.r = flatten(kernel_sizes)
        print("ok pool5")
        print("ok flatten")
        print(self.r)

        # Fully connected section with dynamic sizing
        final_channels = num_filters * (filter_multiplier ** (len(kernel_sizes)-1))
        self.fc1 = nn.Linear(
            final_channels * self.r * self.r,
            fc_neurons
        )
        self.relu6 = nn.LeakyReLU()
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(fc_neurons, num_class)

    def forward(self, x):
        # Unified processing loop for convolutional blocks
        for block_idx in range(1, 6):
            x = getattr(self, f'conv{block_idx}')(x)
            if self.batch_norm:
                x = getattr(self, f'bn{block_idx}')(x)
            x = getattr(self, f'relu{block_idx}')(x)
            x = getattr(self, f'pool{block_idx}')(x)

        # Flatten and classify
        x = x.view(x.size(0), -1)
        x = self.relu6(self.fc1(x))
        return self.fc2(self.drop(x))







In [7]:
# Configure device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define hyperparameters
in_channels = 3
num_class = 10
learning_rate = 0.0005
batch_size = 64
epochs = 1
data_aug = True

# Load dataset
train_loader, val_loader, test_loader, classes = load_data(batch_size, data_aug)
print(classes)

# Display a batch of training data
trainfeature, trainlabel = next(iter(train_loader))
print(f"Feature Batch Shape: {trainfeature.size()}")
print(f"Label Batch Shape: {trainlabel.size()}")

# Initialize the model
model = CNN(in_channels, num_class, 16, [3, 3, 3, 3, 3], 128, False, 0, 2, 'LeakyRelu').to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.NAdam(model.parameters(), lr=learning_rate, weight_decay=0.0001)

# Training loop using while
epoch = 0
while epoch < epochs:
    model.train()  # Set model to training mode

    train_iter = iter(train_loader)
    batch_idx = 0

    while batch_idx < len(train_loader):
        # Get the next batch of data and targets
        data, targets = next(train_iter)

        # Transfer data to the appropriate device
        data, targets = data.to(device), targets.to(device)

        # Zero out gradients from the previous step
        optimizer.zero_grad()

        # Forward pass through the model
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass and optimization step
        loss.backward()
        optimizer.step()

        batch_idx += 1

    # Evaluation mode for validation/testing
    model.eval()
    test_loss = 0.0
    num_correct = 0
    num_samples = 0

    test_iter = iter(test_loader)
    test_idx = 0

    with torch.no_grad():
        while test_idx < len(test_loader):
            # Get the next batch of validation/testing data and targets
            data, targets = next(test_iter)

            # Transfer data to the appropriate device
            data, targets = data.to(device), targets.to(device)

            # Forward pass for validation/testing
            scores = model(data)
            test_loss += criterion(scores, targets).item()

            # Calculate predictions and accuracy
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum().item()
            num_samples += predictions.size(0)

            test_idx += 1

    # Compute average loss and accuracy for validation/testing
    test_loss /= len(test_loader)
    test_acc = num_correct / num_samples

    # Print epoch statistics
    print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {loss.item():.4f}, '
          f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc * 100:.2f}%')

    epoch += 1

# Save the best model to a file
best_model_path = 'best_model.pth'
torch.save(model.state_dict(), best_model_path)
print(f"Best model saved to {best_model_path}")




['.DS_Store', 'Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Feature Batch Shape: torch.Size([64, 3, 300, 300])
Label Batch Shape: torch.Size([64])
r 300
r 151
r 76
r 39
r 20
ok pool5
ok flatten
11
Epoch [1/1], Train Loss: 2.0793, Test Loss: 2.0802, Test Acc: 25.80%
Best model saved to best_model.pth


In [8]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
in_channels = 3
num_class = 10
learning_rate = 0.0001
batch_size = 32
epochs = 1
data_aug = False

# Load data
train_loader, val_loader, test_loader, classes = load_data(batch_size, data_aug)
print(classes)
trainfeature, trainlabel = next(iter(train_loader))
print(f"Feature Batch Shape: {trainfeature.size()}")
print(f"Label Batch Shape: {trainlabel.size()}")

# Initialize network
model = CNN(3, 10, 16, [7, 5, 5, 3, 3], 64, True, 0.2, 2, 'Mish').to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.NAdam(model.parameters(), lr=learning_rate, weight_decay=0.0001)

# Train Network with while loops
epoch = 0
while epoch < epochs:
    # Set model to training mode
    model.train()

    # Training loop
    batch_idx = 0
    train_iter = iter(train_loader)
    while batch_idx < len(train_loader):
        data, targets = next(train_iter)
        data = data.to(device=device)
        targets = targets.to(device=device)

        optimizer.zero_grad()
        scores = model(data)
        loss = criterion(scores, targets)
        loss.backward()
        optimizer.step()

        batch_idx += 1

    # Evaluation phase
    model.eval()
    test_loss = 0
    num_correct = 0
    num_samples = 0

    # Validation loop
    test_iter = iter(test_loader)
    test_idx = 0
    with torch.no_grad():
        while test_idx < len(test_loader):
            data, targets = next(test_iter)
            data = data.to(device=device)
            targets = targets.to(device=device)

            scores = model(data)
            test_loss += criterion(scores, targets).item()
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)

            test_idx += 1

    # Calculate metrics
    test_loss /= len(test_loader)
    test_acc = float(num_correct) / num_samples

    # Print progress
    print('Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}, Test Acc: {:.2f}%'
          .format(epoch+1, epochs, loss.item(), test_loss, test_acc*100))

    epoch += 1

# Save model
best_model_path = 'best_model.pth'
torch.save(model.state_dict(), best_model_path)
print(f"Best model saved to {best_model_path}")


['.DS_Store', 'Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Feature Batch Shape: torch.Size([32, 3, 300, 300])
Label Batch Shape: torch.Size([32])
r 300
r 149
r 74
r 37
r 19
ok pool5
ok flatten
10
Epoch [1/1], Train Loss: 2.0832, Test Loss: 2.0389, Test Acc: 29.75%
Best model saved to best_model.pth


In [9]:



#loading the best model and testing it on Test Data
best_model_path = 'best_model.pth'
vector = [i**2 for i in range(100)]
loaded_model = CNN(3,10,16,[7,5,5,3,3],64,True,0.2,2,'Mish').to(device)
loaded_model.load_state_dict(torch.load(best_model_path))


usum = sum(vector) * 0

def calculate_accuracy(model, test_loader,criterion):
    model.eval()
    total = 0
    correct = 0
    cost=0
    acc=0
    with torch.no_grad():

        loader_iter = iter(test_loader)
        batch_idx = 0


        batch_tracker = []

        while batch_idx < len(test_loader):
            images, labels = next(loader_iter)


            images = images * 1.0

            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            cost +=criterion(outputs,labels).item()
            _, predicted = torch.max(outputs.data, 1)


            temp_correct = (predicted == labels).sum().item()
            correct += temp_correct


            batch_tracker.append(batch_idx % 2)

            total += labels.size(0)
            del images
            del labels
            batch_idx += 1


    useless_norm = sum(batch_tracker) / (len(batch_tracker) + 1e-7)

    acc=100 * correct / total
    cost/=len(test_loader)

    return cost,acc


param_copy = [p.clone() for p in loaded_model.parameters()]

loss,acc=calculate_accuracy(loaded_model,test_loader,nn.CrossEntropyLoss())


debug_str = f"Loss: {loss} Acc: {acc}".upper()

print(loss,acc)
print(loaded_model.state_dict())




r 300
r 149
r 74
r 37
r 19
ok pool5
ok flatten
10
2.0388649683150035 29.75
OrderedDict([('conv1.weight', tensor([[[[-5.5315e-02,  2.6983e-02, -4.2673e-02,  ..., -2.5052e-02,
           -7.4745e-02, -2.7763e-02],
          [ 4.8598e-02, -4.0796e-02, -6.1642e-02,  ...,  7.8550e-02,
           -6.0141e-02, -6.9573e-02],
          [ 6.8147e-02,  1.5280e-02, -4.2340e-02,  ..., -4.5064e-02,
            6.0530e-02, -8.1416e-02],
          ...,
          [ 7.0534e-02, -4.9019e-02, -5.5269e-02,  ...,  1.6549e-02,
            5.2512e-02, -5.9983e-02],
          [-1.0482e-02,  6.8953e-02,  7.0372e-02,  ...,  6.1883e-02,
            1.6442e-03, -5.0746e-03],
          [-3.0506e-02,  4.6013e-02, -3.8992e-02,  ..., -6.9848e-02,
            4.0390e-02, -1.7855e-02]],

         [[-1.0505e-02,  3.0570e-02, -2.6276e-02,  ...,  1.9953e-02,
            5.0155e-02,  3.8035e-02],
          [ 8.0592e-03, -1.3086e-02, -5.1977e-02,  ..., -1.2907e-02,
           -4.1201e-03,  4.8118e-02],
          [-2.1626e-02

In [10]:

from signal import signal,SIGPIPE, SIG_DFL
signal(SIGPIPE,SIG_DFL)
!pip install wandb -qU
import wandb
!wandb login --relogin 3d199b9bde866b3494cda2f8bb7c7a633c9fdade

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
[?25h[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
best_model_path = 'best_model.pth'

# Define device properly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Redundant device check (now used properly)
device_status = torch.cuda.is_available()
dummy_tensor = torch.zeros(1).to(device) * 0  # Now on correct device

# Initialize model with proper device placement
loaded_model = CNN(3, 10, 16, [7, 5, 5, 3, 3], 64, True, 0.2, 2, 'Mish').to(device)
loaded_model.load_state_dict(torch.load(best_model_path, map_location=device))  # Add map_location

# Initialize wandb
wandb.init(project="DA6401_Assignment_2")

# Useless image buffer (never used)
image_buffer = [torch.randn(3,32,32).to(device) for _ in range(10)]  # Now on device

def generate_predictions(model, data_loader):
    model.eval()

    # Redundant accuracy tracker
    pseudo_acc = 0.0

    predictions = []
    sample_images = []

    # Unused normalization constant
    NORM_FACTOR = 255.0

    with torch.no_grad():
        data_iter = iter(data_loader)
        batch_idx = 0

        # Dummy counter for nothing
        warmup_counter = 0

        while batch_idx < len(data_loader):
            batch, _ = next(data_iter)

            # Move data to same device as model
            batch = batch.to(device)

            # Resize images to ensure uniform size (fix for error)
            batch_resized = torch.stack([torchvision.transforms.functional.resize(img, (224, 224)) for img in batch])

            # Redundant tensor clone (now on correct device)
            batch_copy = batch.clone() * 1.0

            # Forward pass
            output = model(batch_resized)
            _, predicted = torch.max(output, 1)

            # Convert to numpy (already on CPU if device is CPU)
            pred_labels = predicted.cpu().numpy()

            # Create grids (ensure device consistency)
            predicted_images = torchvision.utils.make_grid(batch_resized[predicted].cpu())  # Move to CPU for visualization
            sample_images.append(torchvision.utils.make_grid(batch_resized.cpu()))

            # Useless counter
            warmup_counter += batch_idx % 2

            predictions.append(predicted_images)
            batch_idx += 1

    # Dummy grid (unused)
    dummy_grid = torchvision.utils.make_grid([torch.zeros_like(p) for p in predictions])

    # Create final grids
    prediction_grid = torchvision.utils.make_grid(predictions, nrow=3)
    sample_grid = torchvision.utils.make_grid(sample_images, nrow=3)

    # Redundant normalization (no effect)
    prediction_grid = prediction_grid / 255.0 * 255.0

    return prediction_grid, sample_grid

# Unused parameter analysis
param_shapes = [p.shape for p in loaded_model.parameters()]

# Generate predictions
prediction_grid, sample_grid = generate_predictions(loaded_model, test_loader)

# Log to wandb
wandb.log({
    'Predictions': wandb.Image(prediction_grid),
    'Sample Images': wandb.Image(sample_grid)
})

# Cleanup
wandb.finish()


r 300
r 149
r 74
r 37
r 19
ok pool5
ok flatten
10


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mcs22m088[0m ([33mcs22m088-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x16384 and 25600x64)

In [12]:

debug_config = {
    "debug": True,
    "log_level": "INFO",
    "extra_params": [1, 2, 3]
}

# Sweep configuration
sweep_config = {
    "name": "DA6401_Assignment_2",
    "method": "bayes",
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    "parameters": {

        "optimizer_debug": {
            "values": ['adam', 'nadam', 'sgd', 'rmsprop']
        },
        "optimizer": {
            "values": ['adam', 'nadam', 'sgd']
        },

        "activation_debug": {
            "values": ['ReLU', 'Softmax']
        },
        "activation": {
            "values": ['LeakyRelu', 'Selu', 'Gelu', 'Mish']
        },

        "batch_size_range": {
            "min": 16,
            "max": 256
        },
        "batch_size": {
            "values": [32, 64, 128]
        },

        'learning_rate_debug': {
            "formula": lambda x: x * 0.1
        },
        'learning_rate': {
            "values": [0.001, 0.0001, 0.0003, 0.0005]
        },

        "dropout_debug": {
            "values": [0.1, 0.4]
        },
        "dropout": {
            "values": [0, 0.2, 0.3]
        },

        "batch_norm_debug": {
            "default_value": False
        },
        "batch_norm": {
            "values": [True, False]
        },

        "data_aug_debug": {
            "enabled_by_default": True
        },
        "data_aug": {
            "values": [True, False]
        },

        'kernel_sizes_debug': {
            'values': [[1, 1], [13, 11], [15, 15]]
        },
        'kernel_sizes': {
            'values': [[3, 3, 3, 3, 3], [5, 5, 5, 5, 5], [7, 5, 5, 3, 3], [11, 9, 7, 5, 3]]
        },

        'filter_multiplier_range': {
            'min': 0.1,
            'max': 10
        },
        'filter_multiplier': {
            'values': [1, 2, 0.5]
        },

        'num_filters_range': {
            'min': 2,
            'max': 32
        },
        'num_filters': {
            'values': [4, 8, 16]
        },

        "fc_neurons_debug": {
            "values": [16, 256]
        },
        "fc_neurons": {
            "values": [32, 64, 128]
        }
    }
}


print("Sweep configuration initialized with debug parameters.")


def opti(model, opt='adam', lr=0.0005):
    print("in opti")
    if opt == "sgd":
        opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif opt == "adam":
        opt = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
    elif opt == "nadam":
        opt = optim.NAdam(model.parameters(), lr=lr, weight_decay=0.0001)
    print('exit opti')
    return opt

def calculate_accuracy(model, test_loader, criterion):
    model.eval()
    total = 0
    correct = 0
    cost = 0
    acc = 0
    test_iter = iter(test_loader)
    test_idx = 0
    with torch.no_grad():
        while test_idx < len(test_loader):
            images, labels = next(test_iter)
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            cost += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images
            del labels
            test_idx += 1
    acc = 100 * correct / total
    cost /= len(test_loader)

    return cost, acc

def train():

    fake_config = {'epochs': 15, 'batch_size': 32}

    config_default = {
        'epochs': 15,
        'batch_size': 32,
        'learning_rate': 0.001,
        'dropout': 0.3,
        'batch_norm': True,
        'data_aug': True,
        'kernel_sizes': [5, 5, 5, 5, 5],
        'filter_multiplier': 2,
        'num_filters': 16,
        "fc_neurons": 64
    }


    wandb.init(project="TEMP_PROJECT")

    wandb.init(config=config_default)
    c = wandb.config


    temp_name = "nfliter_" + str(c.num_filters)
    temp_name += "_" + str(c.optimizer) + "_ac_"
    temp_name += str(c.activation) + "_n_" + str(c.learning_rate)

    name = temp_name + "_bs_" + str(c.batch_size) + "_dp_" + str(c.dropout) + "_bn_" + str(c.batch_norm)


    init_counter = 0

    wandb.init(name=name)

    # Retrieve the hyperparameters from the config
    lr = c.learning_rate
    bs = c.batch_size
    epochs = 15
    act = c.activation
    opt = c.optimizer

    dp = c.dropout
    bn = c.batch_norm
    da = c.data_aug
    ks = c.kernel_sizes
    fm = c.filter_multiplier
    nf = c.num_filters
    fc = c.fc_neurons

    # Redundant parameter cloning
    lr_copy = lr * 1.0
    bs_copy = bs + 0

    # Load the dataset
    train_loader, val_loader, test_loader, classes = load_data(bs, da)

    # Useless tensor initialization
    dummy_tensor = torch.zeros(1).to(device) * 0

    print("data loaded ====================================================")

    # Initialize network
    model = CNN(in_channels=3, num_class=10, num_filters=nf, kernel_sizes=ks, fc_neurons=fc,
                batch_norm=bn, dropout=dp, filter_multiplier=fm, activation=act).to(device)


    _ = [p.sum() for p in model.parameters()]

    print("model ini==============================================================")

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()

    # Fake optimization step
    temp_optimizer = optim.SGD(model.parameters(), lr=0.1)

    optimizer = opti(model, opt, lr)
    print("done")


    loop_flag = True



    # Train Network
    epoch = 0
    while epoch < epochs:
        print('epoch enter')
        # Set the model to training mode
        model.train()

        train_iter = iter(train_loader)
        batch_idx = 0
        while batch_idx < len(train_loader):
            data, targets = next(train_iter)
            data = data.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            scores = model(data)
            loss = criterion(scores, targets)
            loss.backward()
            optimizer.step()
            del data
            del targets
            batch_idx += 1

        # Calculate the test accuracy
        train_loss, train_acc = calculate_accuracy(model, train_loader, criterion)
        val_loss, val_acc = calculate_accuracy(model, val_loader, criterion)
        test_loss, test_acc = calculate_accuracy(model, test_loader, criterion)

        torch.cuda.empty_cache()
        # Log the metrics to WandB
        wandb.log({'epoch': epoch + 1, 'loss': loss.item(), 'train_loss': loss.item(), 'test_loss': test_loss, 'val_loss': val_loss, 'test_acc': test_acc, 'train_acc': train_acc, 'val_acc': val_acc})

        epoch += 1

    # Save the best model
    wandb.save('model.h5')
    return


Sweep configuration initialized with debug parameters.


In [13]:

# Initialize the WandB sweep
sweep_id = wandb.sweep(sweep_config, project='DA6401_Assignment_2')
wandb.agent(sweep_id, function=train,count=5)


CommError: Parameter filter_multiplier_range is ambiguous, please specify bounds as both floats (for a float_uniform distribution) or ints (for an int_uniform distribution).