In [1]:
import torch
import os
import wandb
import numpy as np
import torch
import random
import torch.nn as nn
from PIL import Image
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

In [2]:
wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: da24m008 (da24m008-iit-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

## Question 1

In [3]:
class CNNModel(nn.Module):
    def __init__(
        self,
        input_channels=3,
        filters_per_layer=[32, 64, 128, 256, 512],
        kernel_size=3,
        pool_sizes=2,
        conv_activation='relu',
        dense_units=256,
        dense_activation='relu',
        num_classes=10,
        dropout_rate=0.5,
        use_batch_norm=True
    ):
        
        super().__init__()
        
        self.kernel_size = kernel_size
        self.pool_sizes = pool_sizes
        self.conv_activation = conv_activation
        self.dense_activation = dense_activation

        # Initializing Convolutional, Batch Norm, and pooling layers
        self.conv_layers = nn.ModuleList()
        self.batch_norm_layers = nn.ModuleList()
        self.pool_layers = nn.ModuleList()
        
        in_channels = input_channels
        
        # Create 5 convolutional blocks
        for filters in filters_per_layer:
            # Convolutional layer
            self.conv_layers.append(
                nn.Conv2d(
                    in_channels=in_channels,
                    out_channels=filters,
                    kernel_size=kernel_size,
                    padding="same",
                )
            )
            
            # Batch normalization layer
            if use_batch_norm:
                self.batch_norm_layers.append(nn.BatchNorm2d(filters))
            else:
                self.batch_norm_layers.append(None)
            
            # Max pooling layer
            self.pool_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            
            # Update in_channels for next layer
            in_channels = filters
        
        # Calculate the output size after conv layers
        # Assuming input is 224x224, after 5 max-pooling layers it will be 7x7
        conv_output_size = 7 * 7 * filters_per_layer[-1]
        
        # First flatten the image to pass it to the dense layer
        self.flatten = nn.Flatten()

        # Dense layer
        self.fc1 = nn.Linear(conv_output_size, dense_units)
        self.fc_bn = nn.BatchNorm1d(dense_units) if use_batch_norm else None
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Output layer
        self.fc2 = nn.Linear(dense_units, num_classes)
        self.dropout2 = nn.Dropout(dropout_rate)
    
    def activation_func(self, activation, x):
        """Apply the selected activation function"""
        if activation.lower() == 'relu':
            return F.relu(x)
        elif activation.lower() == 'gelu':
            return F.gelu(x)
        elif activation.lower() == 'silu' or activation.lower() == 'swish':
            return F.silu(x)
        elif activation.lower() == 'mish':
            return x * torch.tanh(F.softplus(x))
            return F.sigmoid(x)
        elif activation.lower() == 'leakyrelu':
            return F.leaky_relu(x, negative_slope=0.01)
        else:
            # Default to ReLU
            return F.relu(x)
    
    def forward(self, x):
        """Forward pass through the network"""
        # Convolutional blocks
        for i, (conv, bn, pool) in enumerate(zip(self.conv_layers, self.batch_norm_layers, self.pool_layers)):
            x = conv(x)
            if bn is not None:
                x = bn(x)
            x = self.activation_func(self.conv_activation, x)
            x = pool(x)
        
        # Flatten
        x = self.flatten(x)
        # x = torch.flatten(x, 1)
        
        # Dense layer
        x = self.fc1(x)
        if self.fc_bn is not None:
            x = self.fc_bn(x)
        x = self.activation_func(self.dense_activation, x)
        x = self.dropout1(x)
        
        # Output layer
        x = self.fc2(x)
        
        return x


## Question 2

In [None]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Data paths
DATASET_PATH = r"/kaggle/input/inatural-12k/inaturalist_12K"  # Update with your path
TRAIN_DIR = os.path.join(DATASET_PATH, "train")
TEST_DIR = os.path.join(DATASET_PATH, "val")

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Hyperparameter sweep configuration
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'base_filters': {'values': [32, 64, None]},
        'conv_activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'dense_activation': {'values':['relu', 'gelu', 'silu', 'leakyrelu']},
        'filter_organization': {'values': ['same', 'doubling', 'halving']},
        'data_augmentation': {'values': [True, False]},
        'use_batch_norm': {'values': [True, False]},
        'dropout_rate': {'values': [0.2, 0.3, 0.5]},
        'dense_neurons': {'values': [128, 256, 512]},
        'learning_rate': {'values': [0.0001, 0.001]},
        'epochs': {'value': 10},
        'batch_size': {'value': 32},
        'image_size': {'value': 224},
        'validation_split': {'value': 0.2}
    }
}


In [4]:
class iNaturalistDataset(Dataset):
    """Custom Dataset for iNaturalist images."""
    
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with class subdirectories.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        
        # Get class directories and create class-to-idx mapping
        self.classes = sorted([d for d in os.listdir(root_dir) 
                              if os.path.isdir(os.path.join(root_dir, d))])
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
        
        # Get all image paths and corresponding labels
        self.image_paths = []
        self.labels = []
        
        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.image_paths.append(os.path.join(class_dir, img_name))
                    self.labels.append(self.class_to_idx[class_name])
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [None]:
def load_and_split_data(config):
    """
    Load data and split into train and validation sets,
    ensuring equal class representation in validation set.
    """
    # Base transforms
    base_transform = transforms.Compose([
        transforms.Resize((config.image_size, config.image_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Data augmentation transform
    augment_transform = transforms.Compose([
        transforms.Resize((config.image_size, config.image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Choose transform based on config
    train_transform = augment_transform if config.data_augmentation else base_transform
    
    # Load dataset
    full_dataset = iNaturalistDataset(root_dir=TRAIN_DIR, transform=train_transform)
    val_dataset = iNaturalistDataset(root_dir=TRAIN_DIR, transform=base_transform)
    test_datase = iNaturalistDataset(root_dir=TEST_DIR, transform=base_transform)
    
    # Get class counts for stratified split
    class_counts = {}
    for label in full_dataset.labels:
        if label not in class_counts:
            class_counts[label] = 0
        class_counts[label] += 1
    
    # Create stratified split
    train_indices = []
    val_indices = []
    
    for class_idx in range(len(full_dataset.classes)):
        # Get indices for this class
        class_indices = [i for i, label in enumerate(full_dataset.labels) if label == class_idx]
        np.random.shuffle(class_indices)
        
        # Split indices
        val_count = int(len(class_indices) * config.validation_split)
        val_indices.extend(class_indices[:val_count])
        train_indices.extend(class_indices[val_count:])
    
    # Create subset datasets
    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(val_dataset, val_indices)
    
    print(f"Total training samples: {len(train_dataset)}")
    print(f"Total validation samples: {len(val_dataset)}")
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )
    
    test_loader = DataLoader(
        test_datase,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )
    
    return train_loader, val_loader, test_loader, len(full_dataset.classes)


In [None]:
def train_model(config):
    """Train the model with current hyperparameter configuration."""
    # Load and split data
    train_loader, val_loader, test_loader, num_classes = load_and_split_data(config)
    
    # Create model based on hyperparameters
    if config.filter_organization == 'same' and config.base_filters:
        filters = [config.base_filters] * 5
    elif config.filter_organization == 'doubling' and config.base_filters:
        filters = [config.base_filters * (2**i) if config.base_filters * (2**i) < 512 else 512 for i in range(5)]
    elif config.filter_organization == 'halving' and config.base_filters:
        filters = [config.base_filters * (2**(4-i)) if config.base_filters * (2**(4-i)) < 512 else 512 for i in range(5)]
    else:
        filters = [32, 64, 128, 256, 512]  # Default
    
    model = CNNModel(
        input_channels=3,
        num_classes=num_classes,
        filters_per_layer=filters,
        kernel_size=3,
        conv_activation=config.conv_activation,
        dense_units=config.dense_neurons,
        dense_activation = config.dense_activation,
        dropout_rate=config.dropout_rate,
        use_batch_norm=config.use_batch_norm
    )
    
    # Move model to device
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    
    # Initialize WandB for tracking
    wandb.watch(model, criterion, log="all", log_freq=100)
    
    # Training loop
    best_val_acc = 0.0
    
    for epoch in range(config.epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Statistics
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_acc = correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        val_acc = val_correct / val_total
        
        # Log metrics to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": running_loss / len(train_loader),
            "train_accuracy": train_acc,
            "val_loss": val_loss / len(val_loader),
            "val_accuracy": val_acc
        })
        
        print(f'Epoch: {epoch + 1}, Val Loss: {val_loss / len(val_loader):.3f}, Val Acc: {100 * val_acc:.2f}%')
        
        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            # Save the model
            torch.save(model.state_dict(), f"best_model_{wandb.run.id}.pth")
            # Log the model to wandb
            artifact = wandb.Artifact('model', type='model')
            artifact.add_file(f"best_model_{wandb.run.id}.pth")
            wandb.log_artifact(artifact)
    
    model.eval()
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            test_total += labels.size(0)
            test_correct += predicted.eq(labels).sum().item()
    
    test_acc = test_correct / test_total
    print(f'Test Accuracy: {100 * test_acc:.2f}%')
    wandb.log({"test_accuracy": test_acc})
    
    # Save final model
    torch.save(model.state_dict(), f"final_model_{wandb.run.id}.pth")

    # Log the model to wandb
    artifact = wandb.Artifact('model', type='model')
    artifact.add_file(f"final_model_{wandb.run.id}.pth")
    wandb.log_artifact(artifact)


In [None]:
def sweep_train():
    """Configure and run hyperparameter sweep."""
    # Initialize wandb
    wandb.init()

    # Configuration parameters
    config = wandb.config

    # Set run name based on hyperparameters
    run_name = f'bf_{config.base_filters}_fo_{config.filter_organization}_dn_{config.dense_neurons}_ca_{config.conv_activation}_da_{config.dense_activation}_v5'
    wandb.run.name = run_name

    # Call training function with current hyperparameters
    train_model(config)

Using device: cuda


In [None]:
# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="DA6401-A2-V4")

# Start the sweep
wandb.agent(sweep_id, sweep_train, count=40)

Create sweep with ID: owlu7slq
Sweep URL: https://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/sweeps/owlu7slq


[34m[1mwandb[0m: Agent Starting Run: 1r5jogya with config:
[34m[1mwandb[0m: 	base_filters: None
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: halving
[34m[1mwandb[0m: 	image_size: 224
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	use_batch_norm: False
[34m[1mwandb[0m: 	validation_split: 0.2
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Tracking run with wandb version 0.19.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250418_121928-1r5jogya[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33ml

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.113, Val Acc: 25.41%
Epoch: 2, Val Loss: 2.091, Val Acc: 27.16%
Epoch: 3, Val Loss: 1.982, Val Acc: 29.86%
Epoch: 4, Val Loss: 1.945, Val Acc: 30.62%
Epoch: 5, Val Loss: 1.945, Val Acc: 32.37%
Epoch: 6, Val Loss: 2.034, Val Acc: 32.02%
Epoch: 7, Val Loss: 2.459, Val Acc: 29.06%
Epoch: 8, Val Loss: 2.943, Val Acc: 29.46%
Epoch: 9, Val Loss: 3.723, Val Acc: 30.27%
Epoch: 10, Val Loss: 4.955, Val Acc: 29.06%
Test Accuracy: 32.70%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▂▂▃▄▅▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▅▆██▅▅▆▅
[34m[1mwandb[0m:       val_loss ▁▁▁▁▁▁▂▃▅█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.327
[34m[1mwandb[0m: train_accuracy 0.9205
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.29065
[34m[1mwandb[0m:       val_loss 4.95543
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_halving_dn_512_ca_silu_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.068, Val Acc: 26.76%
Epoch: 2, Val Loss: 1.978, Val Acc: 30.92%
Epoch: 3, Val Loss: 1.904, Val Acc: 33.32%
Epoch: 4, Val Loss: 1.906, Val Acc: 32.72%
Epoch: 5, Val Loss: 1.878, Val Acc: 35.97%
Epoch: 6, Val Loss: 1.827, Val Acc: 36.27%
Epoch: 7, Val Loss: 1.812, Val Acc: 37.17%
Epoch: 8, Val Loss: 1.884, Val Acc: 36.47%
Epoch: 9, Val Loss: 1.854, Val Acc: 37.02%
Epoch: 10, Val Loss: 1.939, Val Acc: 36.97%
Test Accuracy: 36.65%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▃▄▄▅▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▅▅▇▇████
[34m[1mwandb[0m:       val_loss █▆▄▄▃▁▁▃▂▄
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3665
[34m[1mwandb[0m: train_accuracy 0.55688
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36968
[34m[1mwandb[0m:       val_loss 1.93855
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.962, Val Acc: 31.27%
Epoch: 2, Val Loss: 1.901, Val Acc: 32.47%
Epoch: 3, Val Loss: 1.912, Val Acc: 32.12%
Epoch: 4, Val Loss: 1.802, Val Acc: 36.17%
Epoch: 5, Val Loss: 1.788, Val Acc: 37.57%
Epoch: 6, Val Loss: 1.798, Val Acc: 37.52%
Epoch: 7, Val Loss: 1.765, Val Acc: 38.77%
Epoch: 8, Val Loss: 1.812, Val Acc: 38.92%
Epoch: 9, Val Loss: 1.930, Val Acc: 37.12%
Epoch: 10, Val Loss: 1.941, Val Acc: 36.82%
Test Accuracy: 37.25%


[34m[1mwandb[0m: uploading artifact model; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▃▄▅▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▂▅▇▇██▆▆
[34m[1mwandb[0m:       val_loss █▆▆▂▂▂▁▃▇▇
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3725
[34m[1mwandb[0m: train_accuracy 0.62062
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36818
[34m[1mwandb[0m:       val_loss 1.9414
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_halving_dn_256_ca_mish_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.006, Val Acc: 28.21%
Epoch: 2, Val Loss: 1.982, Val Acc: 29.66%
Epoch: 3, Val Loss: 1.873, Val Acc: 34.02%
Epoch: 4, Val Loss: 1.849, Val Acc: 34.67%
Epoch: 5, Val Loss: 1.809, Val Acc: 36.77%
Epoch: 6, Val Loss: 1.791, Val Acc: 37.27%
Epoch: 7, Val Loss: 1.839, Val Acc: 34.02%
Epoch: 8, Val Loss: 1.860, Val Acc: 34.32%
Epoch: 9, Val Loss: 1.754, Val Acc: 38.07%
Epoch: 10, Val Loss: 1.708, Val Acc: 40.62%
Test Accuracy: 40.45%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▅▆▆▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▄▅▆▆▄▄▇█
[34m[1mwandb[0m:       val_loss █▇▅▄▃▃▄▅▂▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4045
[34m[1mwandb[0m: train_accuracy 0.42087
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.4062
[34m[1mwandb[0m:       val_loss 1.70814
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_same_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [3

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.965, Val Acc: 31.57%
Epoch: 2, Val Loss: 1.873, Val Acc: 34.27%
Epoch: 3, Val Loss: 1.883, Val Acc: 33.57%
Epoch: 4, Val Loss: 1.826, Val Acc: 35.87%
Epoch: 5, Val Loss: 1.878, Val Acc: 35.32%
Epoch: 6, Val Loss: 1.839, Val Acc: 38.07%
Epoch: 7, Val Loss: 1.888, Val Acc: 34.62%
Epoch: 8, Val Loss: 1.882, Val Acc: 37.92%
Epoch: 9, Val Loss: 1.973, Val Acc: 37.02%
Epoch: 10, Val Loss: 2.007, Val Acc: 37.67%
Test Accuracy: 39.65%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▄▄▅▆▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▃▆▅█▄█▇█
[34m[1mwandb[0m:       val_loss ▆▃▃▁▃▂▃▃▇█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3965
[34m[1mwandb[0m: train_accuracy 0.6405
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.37669
[34m[1mwandb[0m:       val_loss 2.00684
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_128_ca_silu_da_leakyrelu_v5[0m at: [34m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.091, Val Acc: 28.26%
Epoch: 2, Val Loss: 1.880, Val Acc: 34.22%
Epoch: 3, Val Loss: 1.813, Val Acc: 37.62%
Epoch: 4, Val Loss: 1.791, Val Acc: 36.67%
Epoch: 5, Val Loss: 1.755, Val Acc: 37.17%
Epoch: 6, Val Loss: 1.803, Val Acc: 37.97%
Epoch: 7, Val Loss: 1.751, Val Acc: 39.17%
Epoch: 8, Val Loss: 1.788, Val Acc: 38.12%
Epoch: 9, Val Loss: 1.800, Val Acc: 39.57%
Epoch: 10, Val Loss: 1.837, Val Acc: 39.42%
Test Accuracy: 39.45%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▄▄▅▆▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▅▇▆▇▇█▇██
[34m[1mwandb[0m:       val_loss █▄▂▂▁▂▁▂▂▃
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3945
[34m[1mwandb[0m: train_accuracy 0.57675
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.3942
[34m[1mwandb[0m:       val_loss 1.83675
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_256_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.020, Val Acc: 28.06%
Epoch: 2, Val Loss: 1.962, Val Acc: 30.17%
Epoch: 3, Val Loss: 2.003, Val Acc: 29.16%
Epoch: 4, Val Loss: 1.907, Val Acc: 31.92%
Epoch: 5, Val Loss: 1.799, Val Acc: 36.27%
Epoch: 6, Val Loss: 1.806, Val Acc: 35.82%
Epoch: 7, Val Loss: 1.778, Val Acc: 37.02%
Epoch: 8, Val Loss: 1.861, Val Acc: 33.77%
Epoch: 9, Val Loss: 1.894, Val Acc: 34.37%
Epoch: 10, Val Loss: 1.749, Val Acc: 39.02%
Test Accuracy: 42.20%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▂▃▆▆▇▅▅█
[34m[1mwandb[0m:       val_loss █▆█▅▂▂▂▄▅▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.422
[34m[1mwandb[0m: train_accuracy 0.41837
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.3902
[34m[1mwandb[0m:       val_loss 1.74865
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_256_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m0

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.029, Val Acc: 28.71%
Epoch: 2, Val Loss: 2.001, Val Acc: 28.36%
Epoch: 3, Val Loss: 1.954, Val Acc: 30.87%
Epoch: 4, Val Loss: 1.923, Val Acc: 32.62%
Epoch: 5, Val Loss: 1.855, Val Acc: 35.52%
Epoch: 6, Val Loss: 1.807, Val Acc: 35.17%
Epoch: 7, Val Loss: 1.870, Val Acc: 34.17%
Epoch: 8, Val Loss: 1.782, Val Acc: 37.92%
Epoch: 9, Val Loss: 1.849, Val Acc: 36.72%
Epoch: 10, Val Loss: 1.763, Val Acc: 38.52%
Test Accuracy: 42.15%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▁▃▄▆▆▅█▇█
[34m[1mwandb[0m:       val_loss █▇▆▅▃▂▄▁▃▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4215
[34m[1mwandb[0m: train_accuracy 0.42575
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.38519
[34m[1mwandb[0m:       val_loss 1.76261
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.076, Val Acc: 25.26%
Epoch: 2, Val Loss: 1.964, Val Acc: 28.21%
Epoch: 3, Val Loss: 1.910, Val Acc: 32.17%
Epoch: 4, Val Loss: 1.870, Val Acc: 33.82%
Epoch: 5, Val Loss: 1.819, Val Acc: 36.07%
Epoch: 6, Val Loss: 1.842, Val Acc: 35.27%
Epoch: 7, Val Loss: 1.765, Val Acc: 39.27%
Epoch: 8, Val Loss: 1.798, Val Acc: 36.82%
Epoch: 9, Val Loss: 1.719, Val Acc: 39.42%
Epoch: 10, Val Loss: 1.748, Val Acc: 38.07%
Test Accuracy: 38.10%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▄▄▅▆▆▇███
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▄▅▆▆█▇█▇
[34m[1mwandb[0m:       val_loss █▆▅▄▃▃▂▃▁▂
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.381
[34m[1mwandb[0m: train_accuracy 0.37375
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.38069
[34m[1mwandb[0m:       val_loss 1.74845
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_doubling_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.186, Val Acc: 20.71%
Epoch: 2, Val Loss: 2.109, Val Acc: 22.81%
Epoch: 3, Val Loss: 2.046, Val Acc: 24.51%
Epoch: 4, Val Loss: 2.018, Val Acc: 27.56%
Epoch: 5, Val Loss: 1.946, Val Acc: 31.27%
Epoch: 6, Val Loss: 1.896, Val Acc: 32.62%
Epoch: 7, Val Loss: 1.921, Val Acc: 33.32%
Epoch: 8, Val Loss: 1.925, Val Acc: 34.92%
Epoch: 9, Val Loss: 1.956, Val Acc: 35.17%
Epoch: 10, Val Loss: 1.898, Val Acc: 35.22%
Test Accuracy: 35.45%


[34m[1mwandb[0m: uploading artifact model; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▃▄▅▅▆▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▃▄▆▇▇███
[34m[1mwandb[0m:       val_loss █▆▅▄▂▁▂▂▂▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3545
[34m[1mwandb[0m: train_accuracy 0.44175
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.35218
[34m[1mwandb[0m:       val_loss 1.89764
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_same_dn_128_ca_relu_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/ru

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.312, Val Acc: 11.81%
Epoch: 2, Val Loss: 2.300, Val Acc: 12.96%
Epoch: 3, Val Loss: 2.255, Val Acc: 13.86%
Epoch: 4, Val Loss: 2.245, Val Acc: 13.11%
Epoch: 5, Val Loss: 2.166, Val Acc: 21.66%
Epoch: 6, Val Loss: 2.186, Val Acc: 19.56%
Epoch: 7, Val Loss: 2.148, Val Acc: 22.71%
Epoch: 8, Val Loss: 2.171, Val Acc: 20.36%
Epoch: 9, Val Loss: 2.123, Val Acc: 22.46%
Epoch: 10, Val Loss: 2.080, Val Acc: 27.16%
Test Accuracy: 26.45%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▂▁▂▃▄▆▆▇█▇
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▂▂▅▅▆▅▆█
[34m[1mwandb[0m:       val_loss ██▆▆▄▄▃▄▂▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.2645
[34m[1mwandb[0m: train_accuracy 0.21388
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.27164
[34m[1mwandb[0m:       val_loss 2.07956
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_128_ca_silu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.062, Val Acc: 24.51%
Epoch: 2, Val Loss: 1.984, Val Acc: 28.81%
Epoch: 3, Val Loss: 1.928, Val Acc: 31.77%
Epoch: 4, Val Loss: 1.900, Val Acc: 32.97%
Epoch: 5, Val Loss: 1.855, Val Acc: 36.17%
Epoch: 6, Val Loss: 1.818, Val Acc: 36.92%
Epoch: 7, Val Loss: 1.813, Val Acc: 37.12%
Epoch: 8, Val Loss: 1.845, Val Acc: 37.57%
Epoch: 9, Val Loss: 1.826, Val Acc: 37.62%
Epoch: 10, Val Loss: 1.928, Val Acc: 37.47%
Test Accuracy: 37.40%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▃▄▄▅▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▅▆▇█████
[34m[1mwandb[0m:       val_loss █▆▄▃▂▁▁▂▁▄
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.374
[34m[1mwandb[0m: train_accuracy 0.56037
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.37469
[34m[1mwandb[0m:       val_loss 1.92816
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_doubling_dn_256_ca_relu_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.177, Val Acc: 19.31%
Epoch: 2, Val Loss: 2.095, Val Acc: 23.21%
Epoch: 3, Val Loss: 2.104, Val Acc: 23.01%
Epoch: 4, Val Loss: 2.034, Val Acc: 27.56%
Epoch: 5, Val Loss: 1.981, Val Acc: 28.91%
Epoch: 6, Val Loss: 1.916, Val Acc: 33.07%
Epoch: 7, Val Loss: 1.922, Val Acc: 30.97%
Epoch: 8, Val Loss: 1.910, Val Acc: 33.37%
Epoch: 9, Val Loss: 1.861, Val Acc: 34.47%
Epoch: 10, Val Loss: 1.866, Val Acc: 34.37%
Test Accuracy: 35.30%


[34m[1mwandb[0m: uploading artifact model; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▅▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▃▅▅▇▆▇██
[34m[1mwandb[0m:       val_loss █▆▆▅▄▂▂▂▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.353
[34m[1mwandb[0m: train_accuracy 0.35513
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.34367
[34m[1mwandb[0m:       val_loss 1.86627
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_same_dn_128_ca_relu_da_gelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/r

Total training samples: 8000
Total validation samples: 1999


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_halving_dn_512_ca_silu_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/runs/xbu9mzwk[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20250418_142606-xbu9mzwk/logs[0m
Run xbu9mzwk errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_19/1168500401.py", line 326, in sweep_train
    train_model(config)
  File "/tmp/ipykernel_19/1168500401.py", line 238, in train_model
    loss.backward()
  File "/usr/local/lib/python3.11/dist-packages/torch/_tensor.py", line 581, in backward
    t

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.114, Val Acc: 24.76%
Epoch: 2, Val Loss: 2.036, Val Acc: 26.86%
Epoch: 3, Val Loss: 2.026, Val Acc: 29.06%
Epoch: 4, Val Loss: 1.977, Val Acc: 31.27%
Epoch: 5, Val Loss: 1.962, Val Acc: 31.37%
Epoch: 6, Val Loss: 1.962, Val Acc: 31.02%
Epoch: 7, Val Loss: 1.909, Val Acc: 34.17%
Epoch: 8, Val Loss: 1.892, Val Acc: 34.27%
Epoch: 9, Val Loss: 1.866, Val Acc: 35.42%
Epoch: 10, Val Loss: 1.903, Val Acc: 34.12%
Test Accuracy: 33.90%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▄▄▅▆▆▇▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▄▅▅▅▇▇█▇
[34m[1mwandb[0m:       val_loss █▆▆▄▄▄▂▂▁▂
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.339
[34m[1mwandb[0m: train_accuracy 0.32775
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.34117
[34m[1mwandb[0m:       val_loss 1.90274
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_doubling_dn_128_ca_relu_da_relu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.863, Val Acc: 33.12%
Epoch: 2, Val Loss: 1.848, Val Acc: 34.92%
Epoch: 3, Val Loss: 1.840, Val Acc: 36.97%
Epoch: 4, Val Loss: 2.034, Val Acc: 31.22%
Epoch: 5, Val Loss: 2.213, Val Acc: 31.57%
Epoch: 6, Val Loss: 2.254, Val Acc: 33.47%
Epoch: 7, Val Loss: 2.167, Val Acc: 34.92%
Epoch: 8, Val Loss: 2.168, Val Acc: 34.27%
Epoch: 9, Val Loss: 2.238, Val Acc: 34.57%
Epoch: 10, Val Loss: 2.167, Val Acc: 36.12%
Test Accuracy: 35.60%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▅▇█████
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▃▆█▁▁▄▆▅▅▇
[34m[1mwandb[0m:       val_loss ▁▁▁▄▇█▇▇█▇
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.356
[34m[1mwandb[0m: train_accuracy 0.99938
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36118
[34m[1mwandb[0m:       val_loss 2.16743
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_doubling_dn_512_ca_relu_da_silu_v5[0m at: [34

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.055, Val Acc: 27.46%
Epoch: 2, Val Loss: 1.988, Val Acc: 29.91%
Epoch: 3, Val Loss: 1.940, Val Acc: 33.47%
Epoch: 4, Val Loss: 1.904, Val Acc: 34.47%
Epoch: 5, Val Loss: 1.848, Val Acc: 36.17%
Epoch: 6, Val Loss: 1.906, Val Acc: 36.02%
Epoch: 7, Val Loss: 2.156, Val Acc: 34.72%
Epoch: 8, Val Loss: 2.255, Val Acc: 35.12%
Epoch: 9, Val Loss: 2.764, Val Acc: 35.62%
Epoch: 10, Val Loss: 2.895, Val Acc: 35.92%
Test Accuracy: 37.05%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▂▃▃▄▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▆▇██▇▇██
[34m[1mwandb[0m:       val_loss ▂▂▂▁▁▁▃▄▇█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3705
[34m[1mwandb[0m: train_accuracy 0.91475
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.35918
[34m[1mwandb[0m:       val_loss 2.89451
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_same_dn_512_ca_gelu_da_gelu_v5[0m at: [34m[4m

Total training samples: 8000
Total validation samples: 1999


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_halving_dn_256_ca_mish_da_silu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/runs/0qnozmz8[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20250418_145408-0qnozmz8/logs[0m
Run 0qnozmz8 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_19/1168500401.py", line 326, in sweep_train
    train_model(config)
  File "/tmp/ipykernel_19/1168500401.py", line 234, in train_model
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.087, Val Acc: 24.36%
Epoch: 2, Val Loss: 2.061, Val Acc: 25.06%
Epoch: 3, Val Loss: 1.921, Val Acc: 29.41%
Epoch: 4, Val Loss: 1.869, Val Acc: 33.47%
Epoch: 5, Val Loss: 1.847, Val Acc: 35.12%
Epoch: 6, Val Loss: 1.872, Val Acc: 33.17%
Epoch: 7, Val Loss: 1.794, Val Acc: 37.42%
Epoch: 8, Val Loss: 1.770, Val Acc: 37.82%
Epoch: 9, Val Loss: 1.771, Val Acc: 37.02%
Epoch: 10, Val Loss: 1.771, Val Acc: 36.82%
Test Accuracy: 38.50%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▁▄▆▇▆███▇
[34m[1mwandb[0m:       val_loss █▇▄▃▃▃▂▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.385
[34m[1mwandb[0m: train_accuracy 0.38338
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36818
[34m[1mwandb[0m:       val_loss 1.77139
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_same_dn_256_ca_gelu_da_leakyrelu_v5[0m at: [34m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.019, Val Acc: 27.31%
Epoch: 2, Val Loss: 1.917, Val Acc: 31.57%
Epoch: 3, Val Loss: 1.925, Val Acc: 32.37%
Epoch: 4, Val Loss: 1.813, Val Acc: 33.47%
Epoch: 5, Val Loss: 1.792, Val Acc: 37.87%
Epoch: 6, Val Loss: 1.793, Val Acc: 37.62%
Epoch: 7, Val Loss: 1.795, Val Acc: 37.52%
Epoch: 8, Val Loss: 1.724, Val Acc: 39.42%
Epoch: 9, Val Loss: 1.698, Val Acc: 40.12%
Epoch: 10, Val Loss: 1.702, Val Acc: 40.12%
Test Accuracy: 41.15%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▄▄▇▇▇███
[34m[1mwandb[0m:       val_loss █▆▆▄▃▃▃▂▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4115
[34m[1mwandb[0m: train_accuracy 0.40025
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.4012
[34m[1mwandb[0m:       val_loss 1.70208
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/d

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.079, Val Acc: 25.06%
Epoch: 2, Val Loss: 2.107, Val Acc: 27.01%
Epoch: 3, Val Loss: 1.942, Val Acc: 29.71%
Epoch: 4, Val Loss: 2.069, Val Acc: 27.96%
Epoch: 5, Val Loss: 1.936, Val Acc: 30.87%
Epoch: 6, Val Loss: 1.869, Val Acc: 35.37%
Epoch: 7, Val Loss: 1.789, Val Acc: 36.37%
Epoch: 8, Val Loss: 1.785, Val Acc: 36.27%
Epoch: 9, Val Loss: 1.762, Val Acc: 38.27%
Epoch: 10, Val Loss: 1.792, Val Acc: 38.57%
Test Accuracy: 40.30%


[34m[1mwandb[0m: uploading artifact model; updating run config
[34m[1mwandb[0m: uploading artifact model; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▆▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▃▃▄▆▇▇██
[34m[1mwandb[0m:       val_loss ▇█▅▇▅▃▂▁▁▂
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.403
[34m[1mwandb[0m: train_accuracy 0.41825
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.38569
[34m[1mwandb[0m:       val_loss 1.79247
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_256_ca_relu_da_leakyrel

Total training samples: 8000
Total validation samples: 1999


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_halving_dn_256_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/runs/171n7452[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20250418_152748-171n7452/logs[0m
Run 171n7452 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_19/1168500401.py", line 326, in sweep_train
    train_model(config)
  File "/tmp/ipykernel_19/1168500401.py", line 234, in train_model
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.191, Val Acc: 19.31%
Epoch: 2, Val Loss: 2.127, Val Acc: 22.41%
Epoch: 3, Val Loss: 2.049, Val Acc: 26.46%
Epoch: 4, Val Loss: 1.981, Val Acc: 29.11%
Epoch: 5, Val Loss: 1.982, Val Acc: 28.96%
Epoch: 6, Val Loss: 1.952, Val Acc: 29.81%
Epoch: 7, Val Loss: 1.917, Val Acc: 32.32%
Epoch: 8, Val Loss: 1.883, Val Acc: 33.67%
Epoch: 9, Val Loss: 1.877, Val Acc: 34.62%
Epoch: 10, Val Loss: 1.862, Val Acc: 34.87%
Test Accuracy: 34.90%


[34m[1mwandb[0m: uploading artifact model; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▄▅▅▆▇▇██
[34m[1mwandb[0m:       val_loss █▇▅▄▄▃▂▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.349
[34m[1mwandb[0m: train_accuracy 0.336
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.34867
[34m[1mwandb[0m:       val_loss 1.86213
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_doubling_dn_128_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.077, Val Acc: 27.36%
Epoch: 2, Val Loss: 1.980, Val Acc: 30.02%
Epoch: 3, Val Loss: 1.953, Val Acc: 32.12%
Epoch: 4, Val Loss: 1.944, Val Acc: 32.72%
Epoch: 5, Val Loss: 1.827, Val Acc: 37.12%
Epoch: 6, Val Loss: 1.805, Val Acc: 36.97%
Epoch: 7, Val Loss: 1.857, Val Acc: 35.87%
Epoch: 8, Val Loss: 1.768, Val Acc: 39.87%
Epoch: 9, Val Loss: 1.797, Val Acc: 39.12%
Epoch: 10, Val Loss: 1.868, Val Acc: 36.22%
Test Accuracy: 38.10%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▆▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▄▄▆▆▆██▆
[34m[1mwandb[0m:       val_loss █▆▅▅▂▂▃▁▂▃
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.381
[34m[1mwandb[0m: train_accuracy 0.42525
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36218
[34m[1mwandb[0m:       val_loss 1.86834
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_256_ca_gelu_da_gelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m0

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.889, Val Acc: 32.87%
Epoch: 2, Val Loss: 1.820, Val Acc: 37.17%
Epoch: 3, Val Loss: 1.818, Val Acc: 36.97%
Epoch: 4, Val Loss: 1.774, Val Acc: 39.87%
Epoch: 5, Val Loss: 1.879, Val Acc: 35.97%
Epoch: 6, Val Loss: 1.938, Val Acc: 34.92%
Epoch: 7, Val Loss: 1.949, Val Acc: 35.77%
Epoch: 8, Val Loss: 2.035, Val Acc: 35.12%
Epoch: 9, Val Loss: 2.050, Val Acc: 35.82%
Epoch: 10, Val Loss: 2.136, Val Acc: 34.32%
Test Accuracy: 36.20%


[34m[1mwandb[0m: uploading artifact model; updating run config; uploading output.log; uploading wandb-summary.json
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▅▇▇████
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▅▅█▄▃▄▃▄▂
[34m[1mwandb[0m:       val_loss ▃▂▂▁▃▄▄▆▆█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.362
[34m[1mwandb[0m: train_accuracy 0.9995
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.34317
[34m[1mwandb[0m:       val_loss 2.13584
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_doubling_dn_256_ca_silu_da_leakyrelu_v5[0m at: [

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.039, Val Acc: 28.06%
Epoch: 2, Val Loss: 1.953, Val Acc: 31.12%
Epoch: 3, Val Loss: 1.875, Val Acc: 33.27%
Epoch: 4, Val Loss: 1.860, Val Acc: 32.17%
Epoch: 5, Val Loss: 1.783, Val Acc: 36.92%
Epoch: 6, Val Loss: 1.816, Val Acc: 36.07%
Epoch: 7, Val Loss: 1.734, Val Acc: 38.92%
Epoch: 8, Val Loss: 1.708, Val Acc: 40.62%
Epoch: 9, Val Loss: 1.716, Val Acc: 39.72%
Epoch: 10, Val Loss: 1.903, Val Acc: 35.47%
Test Accuracy: 36.60%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▅▆▆▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▄▃▆▅▇█▇▅
[34m[1mwandb[0m:       val_loss █▆▅▄▃▃▂▁▁▅
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.366
[34m[1mwandb[0m: train_accuracy 0.41037
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.35468
[34m[1mwandb[0m:       val_loss 1.90318
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_None_fo_same_dn_256_ca_silu_da_relu_v5[0m at: [34m[4

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.143, Val Acc: 22.16%
Epoch: 2, Val Loss: 2.038, Val Acc: 26.76%
Epoch: 3, Val Loss: 1.982, Val Acc: 30.07%
Epoch: 4, Val Loss: 2.003, Val Acc: 28.26%
Epoch: 5, Val Loss: 1.967, Val Acc: 31.67%
Epoch: 6, Val Loss: 1.898, Val Acc: 34.17%
Epoch: 7, Val Loss: 1.871, Val Acc: 35.02%
Epoch: 8, Val Loss: 1.845, Val Acc: 35.47%
Epoch: 9, Val Loss: 1.826, Val Acc: 36.92%
Epoch: 10, Val Loss: 1.806, Val Acc: 37.02%
Test Accuracy: 35.90%


[34m[1mwandb[0m: uploading artifact model; updating run config
[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▅▄▅▇▇▇██
[34m[1mwandb[0m:       val_loss █▆▅▅▄▃▂▂▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.359
[34m[1mwandb[0m: train_accuracy 0.33813
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.37019
[34m[1mwandb[0m:       val_loss 1.80552
[34m[1mwandb[0m: 
[34m[1mwand

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.231, Val Acc: 18.16%
Epoch: 2, Val Loss: 2.187, Val Acc: 19.46%
Epoch: 3, Val Loss: 2.168, Val Acc: 20.61%
Epoch: 4, Val Loss: 2.151, Val Acc: 22.06%
Epoch: 5, Val Loss: 2.120, Val Acc: 22.96%
Epoch: 6, Val Loss: 2.092, Val Acc: 23.61%
Epoch: 7, Val Loss: 2.042, Val Acc: 26.61%
Epoch: 8, Val Loss: 2.001, Val Acc: 29.06%
Epoch: 9, Val Loss: 2.010, Val Acc: 28.41%
Epoch: 10, Val Loss: 1.969, Val Acc: 30.47%
Test Accuracy: 31.05%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m: uploading artifact model; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▄▄▆▆▇█▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▂▃▄▄▆▇▇█
[34m[1mwandb[0m:       val_loss █▇▆▆▅▄▃▂▂▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3105
[34m[1mwandb[0m: train_accuracy 0.256
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.30465
[34m[1m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.331, Val Acc: 23.16%
Epoch: 2, Val Loss: 1.950, Val Acc: 31.62%
Epoch: 3, Val Loss: 1.864, Val Acc: 33.67%
Epoch: 4, Val Loss: 1.889, Val Acc: 32.72%
Epoch: 5, Val Loss: 1.898, Val Acc: 34.42%
Epoch: 6, Val Loss: 1.973, Val Acc: 32.17%
Epoch: 7, Val Loss: 1.775, Val Acc: 37.62%
Epoch: 8, Val Loss: 1.802, Val Acc: 38.77%
Epoch: 9, Val Loss: 1.731, Val Acc: 40.32%
Epoch: 10, Val Loss: 1.863, Val Acc: 36.92%
Test Accuracy: 38.20%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▅▅▆▅▇▇█▇
[34m[1mwandb[0m:       val_loss █▄▃▃▃▄▂▂▁▃
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.382
[34m[1mwandb[0m: train_accuracy 0.4185
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36918
[34m[1mwandb[0m:       val_loss 1.86326
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_512_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m0

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.357, Val Acc: 11.61%
Epoch: 2, Val Loss: 2.295, Val Acc: 11.31%
Epoch: 3, Val Loss: 2.282, Val Acc: 15.01%
Epoch: 4, Val Loss: 2.228, Val Acc: 16.71%
Epoch: 5, Val Loss: 2.215, Val Acc: 18.31%
Epoch: 6, Val Loss: 2.157, Val Acc: 21.61%
Epoch: 7, Val Loss: 2.301, Val Acc: 11.26%
Epoch: 8, Val Loss: 2.256, Val Acc: 16.11%
Epoch: 9, Val Loss: 2.154, Val Acc: 21.51%
Epoch: 10, Val Loss: 2.160, Val Acc: 22.41%
Test Accuracy: 23.15%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▃▁▁▄▅▆▃▃▆█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▁▃▄▅▇▁▄▇█
[34m[1mwandb[0m:       val_loss █▆▅▄▃▁▆▅▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.2315
[34m[1mwandb[0m: train_accuracy 0.19637
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.22411
[34m[1mwandb[0m:       val_loss 2.16003
[34m[1m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.963, Val Acc: 30.47%
Epoch: 2, Val Loss: 1.966, Val Acc: 30.62%
Epoch: 3, Val Loss: 1.894, Val Acc: 34.52%
Epoch: 4, Val Loss: 1.837, Val Acc: 34.87%
Epoch: 5, Val Loss: 1.851, Val Acc: 37.67%
Epoch: 6, Val Loss: 1.980, Val Acc: 34.22%
Epoch: 7, Val Loss: 1.760, Val Acc: 39.67%
Epoch: 8, Val Loss: 1.811, Val Acc: 40.32%
Epoch: 9, Val Loss: 1.940, Val Acc: 37.62%
Epoch: 10, Val Loss: 1.856, Val Acc: 39.52%
Test Accuracy: 40.65%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▃▄▄▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▁▄▄▆▄██▆▇
[34m[1mwandb[0m:       val_loss ▇█▅▃▄█▁▃▇▄
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4065
[34m[1mwandb[0m: train_accuracy 0.64062
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.3952
[34m[1mwandb[0m:       val_loss 1.85573
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_doubling_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.030, Val Acc: 27.41%
Epoch: 2, Val Loss: 1.961, Val Acc: 30.37%
Epoch: 3, Val Loss: 2.020, Val Acc: 28.06%
Epoch: 4, Val Loss: 1.926, Val Acc: 31.37%
Epoch: 5, Val Loss: 1.905, Val Acc: 31.87%
Epoch: 6, Val Loss: 1.859, Val Acc: 34.17%
Epoch: 7, Val Loss: 1.862, Val Acc: 34.52%
Epoch: 8, Val Loss: 1.796, Val Acc: 36.82%
Epoch: 9, Val Loss: 1.781, Val Acc: 38.47%
Epoch: 10, Val Loss: 1.737, Val Acc: 39.32%
Test Accuracy: 40.75%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m: uploading artifact model; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▅▅▆▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▃▁▃▄▅▅▇▇█
[34m[1mwandb[0m:       val_loss █▆█▆▅▄▄▂▂▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4075
[34m[1mwandb[0m: train_accuracy 0.3955
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.3932
[34m[1mwandb[0m:       val_loss 1.73712
[34m[1mwandb[0m: 
[34m[1mwan

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.977, Val Acc: 30.87%
Epoch: 2, Val Loss: 1.879, Val Acc: 34.12%
Epoch: 3, Val Loss: 1.823, Val Acc: 36.77%
Epoch: 4, Val Loss: 1.844, Val Acc: 35.77%
Epoch: 5, Val Loss: 1.811, Val Acc: 36.92%
Epoch: 6, Val Loss: 1.844, Val Acc: 38.47%
Epoch: 7, Val Loss: 1.792, Val Acc: 38.57%
Epoch: 8, Val Loss: 1.831, Val Acc: 38.32%
Epoch: 9, Val Loss: 1.944, Val Acc: 37.52%
Epoch: 10, Val Loss: 2.162, Val Acc: 35.82%
Test Accuracy: 37.60%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▂▃▃▄▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▆▅▆███▇▆
[34m[1mwandb[0m:       val_loss ▅▃▂▂▁▂▁▂▄█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.376
[34m[1mwandb[0m: train_accuracy 0.71788
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.35818
[34m[1mwandb[0m:       val_loss 2.16193
[34m[1mwandb[0m: 
[34m[1mwandb[0m:

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 1.997, Val Acc: 29.36%
Epoch: 2, Val Loss: 1.928, Val Acc: 31.12%
Epoch: 3, Val Loss: 1.815, Val Acc: 35.77%
Epoch: 4, Val Loss: 1.828, Val Acc: 35.02%
Epoch: 5, Val Loss: 1.757, Val Acc: 36.52%
Epoch: 6, Val Loss: 1.765, Val Acc: 38.02%
Epoch: 7, Val Loss: 1.769, Val Acc: 37.22%
Epoch: 8, Val Loss: 1.845, Val Acc: 38.42%
Epoch: 9, Val Loss: 1.778, Val Acc: 40.32%
Epoch: 10, Val Loss: 1.879, Val Acc: 38.67%
Test Accuracy: 41.05%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▃▄▅▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▅▅▆▇▆▇█▇
[34m[1mwandb[0m:       val_loss █▆▃▃▁▁▁▄▂▅
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.4105
[34m[1mwandb[0m: train_accuracy 0.62575
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.38669
[34m[1mwandb[0m:       val_loss 1.87928
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_doubling_dn_256_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.090, Val Acc: 26.46%
Epoch: 2, Val Loss: 2.050, Val Acc: 25.56%
Epoch: 3, Val Loss: 2.018, Val Acc: 27.06%
Epoch: 4, Val Loss: 1.992, Val Acc: 27.86%
Epoch: 5, Val Loss: 1.927, Val Acc: 31.92%
Epoch: 6, Val Loss: 1.943, Val Acc: 30.02%
Epoch: 7, Val Loss: 1.894, Val Acc: 32.52%
Epoch: 8, Val Loss: 1.835, Val Acc: 36.37%
Epoch: 9, Val Loss: 1.830, Val Acc: 33.67%
Epoch: 10, Val Loss: 1.836, Val Acc: 36.37%
Test Accuracy: 38.80%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▅▆▇▇██
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▂▁▂▂▅▄▆█▆█
[34m[1mwandb[0m:       val_loss █▇▆▅▄▄▃▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.388
[34m[1mwandb[0m: train_accuracy 0.39125
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36368
[34m[1mwandb[0m:       val_loss 1.83637
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_64_fo_same_dn_256_ca_silu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-

Total training samples: 8000
Total validation samples: 1999


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_halving_dn_512_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/runs/spt0mh2a[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20250418_173933-spt0mh2a/logs[0m
Run spt0mh2a errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_19/1168500401.py", line 326, in sweep_train
    train_model(config)
  File "/tmp/ipykernel_19/1168500401.py", line 234, in train_model
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.157, Val Acc: 20.71%
Epoch: 2, Val Loss: 2.044, Val Acc: 27.81%
Epoch: 3, Val Loss: 1.999, Val Acc: 28.56%
Epoch: 4, Val Loss: 1.911, Val Acc: 32.72%
Epoch: 5, Val Loss: 1.870, Val Acc: 34.82%
Epoch: 6, Val Loss: 1.909, Val Acc: 31.37%
Epoch: 7, Val Loss: 1.840, Val Acc: 34.17%
Epoch: 8, Val Loss: 1.847, Val Acc: 34.17%
Epoch: 9, Val Loss: 1.792, Val Acc: 37.07%
Epoch: 10, Val Loss: 1.796, Val Acc: 36.47%
Test Accuracy: 37.40%


[34m[1mwandb[0m: uploading artifact model; uploading output.log; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▃▄▅▆▆▇▇▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▄▄▆▇▆▇▇██
[34m[1mwandb[0m:       val_loss █▆▅▃▃▃▂▂▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.374
[34m[1mwandb[0m: train_accuracy 0.3525
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.36468
[34m[1mwandb[0m:       val_loss 1.79603
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_128_ca_gelu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-i

Total training samples: 8000
Total validation samples: 1999


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_halving_dn_256_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4/runs/4twbwd1v[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/da24m008-iit-madras/DA6401-A2-V4[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20250418_175032-4twbwd1v/logs[0m
Run 4twbwd1v errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_19/1168500401.py", line 326, in sweep_train
    train_model(config)
  File "/tmp/ipykernel_19/1168500401.py", line 234, in train_model
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.110, Val Acc: 23.76%
Epoch: 2, Val Loss: 2.058, Val Acc: 24.71%
Epoch: 3, Val Loss: 2.008, Val Acc: 28.41%
Epoch: 4, Val Loss: 1.959, Val Acc: 30.32%
Epoch: 5, Val Loss: 1.940, Val Acc: 30.72%
Epoch: 6, Val Loss: 1.985, Val Acc: 31.12%
Epoch: 7, Val Loss: 1.955, Val Acc: 31.62%
Epoch: 8, Val Loss: 1.991, Val Acc: 32.17%
Epoch: 9, Val Loss: 2.081, Val Acc: 30.77%
Epoch: 10, Val Loss: 2.323, Val Acc: 30.52%
Test Accuracy: 34.05%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy ▁▂▃▃▄▄▅▆▇█
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy ▁▂▅▆▇▇██▇▇
[34m[1mwandb[0m:       val_loss ▄▃▂▁▁▂▁▂▄█
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.3405
[34m[1mwandb[0m: train_accuracy 0.57137
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.30515
[34m[1mwandb[0m:       val_loss 2.32348
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mbf_32_fo_same_dn_256_ca_relu_da_leakyrelu_v5[0m at: [34m[4mhttps://wandb.ai/

Total training samples: 8000
Total validation samples: 1999
Epoch: 1, Val Loss: 2.277, Val Acc: 14.61%
Epoch: 2, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 3, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 4, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 5, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 6, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 7, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 8, Val Loss: 2.303, Val Acc: 10.01%
Epoch: 9, Val Loss: 2.303, Val Acc: 9.95%
Epoch: 10, Val Loss: 2.303, Val Acc: 10.01%
Test Accuracy: 10.00%


[34m[1mwandb[0m: uploading artifact model; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m: uploading artifact model; uploading history steps 9-10, summary, console lines 11-12
[34m[1mwandb[0m: uploading artifact model
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:          epoch ▁▂▃▃▄▅▆▆▇█
[34m[1mwandb[0m:  test_accuracy ▁
[34m[1mwandb[0m: train_accuracy █▇▂▃▂▂▂▁▂▂
[34m[1mwandb[0m:     train_loss ▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:   val_accuracy █▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:       val_loss ▁█████████
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:          epoch 10
[34m[1mwandb[0m:  test_accuracy 0.1
[34m[1mwandb[0m: train_accuracy 0.09362
[34m[1mwandb[0m:     train_loss 0
[34m[1mwandb[0m:   val_accuracy 0.10005
[34m[1mwandb[0m:       val_lo

## Question 4

In [5]:
# Path to the test data
DATASET_PATH = r"E:\IITM\2nd sem\inaturalist_12K"  # Update with your path
TEST_DIR = os.path.join(DATASET_PATH, "val")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Image size
IMAGE_SIZE = 224


Using device: cpu


In [6]:
def get_best_run_from_sweep(sweep_id, entity, project):
    api = wandb.Api()
    
    sweep = api.sweep(f"{entity}/{project}/{sweep_id}")
    
    best_val_acc = -1
    best_run = None
    
    for run in sweep.runs:
        val_acc = run.summary.get("val_accuracy")
        if val_acc is not None and val_acc > best_val_acc:
            best_val_acc = val_acc
            best_run = run
    
    if best_run is None:
        raise ValueError("No runs with 'val_accuracy' found in the sweep")
    
    print(f"Best run: {best_run.name}, val_accuracy: {best_val_acc:.4f}")
    return best_run


In [None]:
def download_best_model(run, artifact_name="model", output_dir="downloaded_model"):
    """
    Download the best model file from wandb.
    
    Args:
        best_run: The wandb run object for the best run
    
    Returns:
        model_path: Local path to the downloaded model
        config: Configuration of the best model
    """
    api = wandb.Api()

    # List all artifacts with this name in the project
    artifact_versions = api.artifacts(name=f"{run.project}/{artifact_name}", type_name='model')
    output_dir = os.path.join(os.getcwd(), output_dir)
    for artifact in artifact_versions:
        # Match the artifact to the run that created it
        if artifact.logged_by and artifact.logged_by().id == run.id:
            print(f"Found artifact version: {artifact.version} from run: {run.name}")
            artifact_dir = artifact.download(root=output_dir)
            for file_name in os.listdir(artifact_dir):
                print(file_name)
                if file_name.startswith("final_model") and file_name.endswith(".pth"):
                    model_path = os.path.join(os.getcwd(), artifact_dir, file_name)
                    print(f"Downloaded model file: {model_path}")
    
    print(f"Downloaded model file: {model_path}")
    
    # Get the model configuration from the run
    config = {
        'base_filters': run.config.get('base_filters', 32),
        'dense_activation': run.config.get('dense_activation', 'relu'),
        'filter_organization': run.config.get('filter_organization', 'doubling'),
        'dense_neurons': run.config.get('dense_neurons', 512),
        'dropout_rate': run.config.get('dropout_rate', 0.3),
        'use_batch_norm': run.config.get('use_batch_norm', True),
        'conv_activation': run.config.get('conv_activation', 'mish')
    }
    
    return model_path, config


In [8]:
def load_test_data():
    """Load the test dataset"""
    # Define transforms for test data
    test_transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Load test dataset
    test_dataset = iNaturalistDataset(root_dir=TEST_DIR, transform=test_transform)
    
    # Create data loader
    test_loader = DataLoader(
        test_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )
    
    return test_loader, test_dataset


In [9]:
def evaluate_model(sweep_id='uf2dfd5t', entity='da24m008-iit-madras', project='DA6401-A2-V4'):
    """Evaluate the best model on the test set"""
    # Initialize wandb
    wandb.init(project=project, job_type="evaluation")
    
    try:
        # Get the best run and download its model
        best_run = get_best_run_from_sweep(
            sweep_id=sweep_id,
            entity=entity,
            project=project
        )

        model_path, best_config = download_best_model(best_run)
        
        # Log the best run information
        wandb.log({"best_run_id": best_run.id, "best_run_name": best_run.name})
        
        # Load test data
        test_loader, test_dataset = load_test_data()
        
        # Get class names and count
        class_names = test_dataset.classes
        num_classes = len(class_names)
        print(f"Number of classes: {num_classes}")
        print(f"Class names: {class_names}")
        
        # Update config with the correct number of classes
        best_config['num_classes'] = num_classes
        
        # Create model with the best configuration
        if best_config['filter_organization'] == 'same':
            filters = [best_config['base_filters']] * 5
        elif best_config['filter_organization'] == 'doubling':
            filters = [best_config['base_filters'] * (2**i) for i in range(5)]
        elif best_config['filter_organization'] == 'halving':
            filters = [best_config['base_filters'] * (2**(4-i)) for i in range(5)]
        else:
            filters = [32, 64, 128, 256, 512]  # Default
        
        model = CNNModel(
            input_channels=3,
            num_classes=num_classes,
            filters_per_layer=filters,
            kernel_size=3,
            conv_activation=best_config['conv_activation'],
            dense_units=best_config['dense_neurons'],
            dropout_rate=best_config['dropout_rate'],
            use_batch_norm=best_config['use_batch_norm'],
            dense_activation=best_config['dense_activation']
        )
        
        # Load the best model weights
        model.load_state_dict(torch.load(model_path, map_location=device))
        model = model.to(device)
        model.eval()  # Set to evaluation mode
        
        # Evaluate model
        correct = 0
        total = 0
        all_labels = []
        all_predictions = []
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                # Store for confusion matrix
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())
        
        test_accuracy = correct / total
        print(f"Test accuracy: {test_accuracy:.4f}")
        
        # Log to wandb
        wandb.log({
            "best_model_test_accuracy": test_accuracy
        })
        
        return model, test_loader, test_dataset, all_labels, all_predictions, class_names
    
    except Exception as e:
        print(f"Error during evaluation: {e}")
        raise e


In [10]:
def create_prediction_grid(model, test_dataset, class_names):
    """Create a 10x3 grid of test images with predictions"""
    # Set model to evaluation mode
    model.eval()
    
    # Define transform for visualization
    vis_transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor(),
    ])
    
    # Sample indices for the grid
    num_samples = min(30, len(test_dataset))  # 10x3 grid needs 30 images
    indices = np.random.choice(len(test_dataset), num_samples, replace=False)
    
    # Create figure for the grid
    plt.figure(figsize=(15, 25))
    
    # Create lists to store images and captions for wandb
    wandb_images = []
    
    with torch.no_grad():
        for i, idx in enumerate(indices):
            # Get image and label
            original_image, label = test_dataset[idx]
            
            # For visualization, we need the unnormalized image
            img_path = test_dataset.image_paths[idx]
            vis_image = Image.open(img_path).convert('RGB')
            vis_tensor = vis_transform(vis_image)
            
            # Move to device and add batch dimension
            input_tensor = original_image.unsqueeze(0).to(device)
            
            # Get prediction
            output = model(input_tensor)
            _, prediction = output.max(1)
            prediction = prediction.item()
            
            # Plot
            plt.subplot(10, 3, i+1)
            # Convert tensor to numpy for plotting
            img_array = vis_tensor.permute(1, 2, 0).numpy()
            plt.imshow(img_array)
            
            true_class = class_names[label]
            pred_class = class_names[prediction]
            
            if label == prediction:
                color = 'green'
                caption = f"True: {true_class} | Pred: {pred_class} ✓"
            else:
                color = 'red'
                caption = f"True: {true_class} | Pred: {pred_class} ✗"
            
            plt.title(caption, color=color)
            plt.axis('off')
            
            # Add to wandb images list
            wandb_images.append(wandb.Image(img_array, caption=caption))
    
    plt.tight_layout()
    
    # Save the figure
    plt.savefig('prediction_grid.png')
    
    # Log the figure to wandb
    wandb.log({"prediction_grid": wandb.Image('prediction_grid.png')})
    
    # Also log the individual images with captions
    wandb.log({"test_predictions": wandb_images})


In [11]:
def create_confusion_matrix(all_labels, all_predictions, class_names):
    """Create and log a confusion matrix"""
    # Create confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    
    plt.figure(figsize=(12, 10))
    disp.plot(cmap=plt.cm.Blues)
    plt.xticks(rotation=90)
    plt.tight_layout()
    
    # Save the figure
    plt.savefig('confusion_matrix.png')
    
    # Log the figure to wandb
    wandb.log({"confusion_matrix": wandb.Image('confusion_matrix.png')})
    
    # Also log a summary of class-wise accuracies
    class_accuracy = cm.diagonal() / cm.sum(axis=1)
    for i, (class_name, accuracy) in enumerate(zip(class_names, class_accuracy)):
        wandb.log({f"class_accuracy/{class_name}": accuracy})


In [12]:
def generate_classification_report(all_labels, all_predictions, class_names):
    """Generate and log classification report"""    
    # Generate report
    report = classification_report(all_labels, all_predictions, 
                                  target_names=class_names, 
                                  output_dict=True)
    
    # Log to wandb
    for class_name in class_names:
        if class_name in report:
            wandb.log({
                f"metrics/{class_name}/precision": report[class_name]['precision'],
                f"metrics/{class_name}/recall": report[class_name]['recall'],
                f"metrics/{class_name}/f1-score": report[class_name]['f1-score']
            })
    
    # Log overall metrics
    wandb.log({
        "metrics/accuracy": report['accuracy'],
        "metrics/macro_avg_precision": report['macro avg']['precision'],
        "metrics/macro_avg_recall": report['macro avg']['recall'],
        "metrics/macro_avg_f1": report['macro avg']['f1-score'],
        "metrics/weighted_avg_precision": report['weighted avg']['precision'],
        "metrics/weighted_avg_recall": report['weighted avg']['recall'],
        "metrics/weighted_avg_f1": report['weighted avg']['f1-score']
    })


In [None]:
# Evaluate model
model, test_loader, test_dataset, all_labels, all_predictions, class_names = evaluate_model(
    sweep_id='uf2dfd5t',
    entity='da24m008-iit-madras',
    project='DA6401-A2-V4'
)

Best run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5, val_accuracy: 0.4287
Found artifact version: v69 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v69, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:3.0


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v68 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v68, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:3.3


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v67 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v67, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:1.0


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v66 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v66, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:1.5


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v65 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v65, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:0.9


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v64 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v64, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:1.0


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Found artifact version: v63 from run: bf_32_fo_doubling_dn_512_ca_mish_da_gelu_v5


wandb: Downloading large artifact model:v63, 55.04MB. 1 files... 
wandb:   1 of 1 files downloaded.  
Done. 0:0:0.9


best_model_73jfitab.pth
final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Downloaded model file: e:\IITM\2nd sem\DL\partA\downloaded_model\final_model_73jfitab.pth
Number of classes: 10
Class names: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']


In [None]:
# Create prediction grid
create_prediction_grid(model, test_dataset, class_names)

In [None]:
# Create confusion matrix
create_confusion_matrix(all_labels, all_predictions, class_names)

In [None]:
# Generate classification report
generate_classification_report(all_labels, all_predictions, class_names)