## Base

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pytorch_lightning as pl
import wandb
from pytorch_lightning.loggers import WandbLogger

torch.set_float32_matmul_precision('medium')
if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available.")

GPU is available: NVIDIA GeForce RTX 3060 Laptop GPU


In [7]:
class myCNN(pl.LightningModule):
    def __init__(self, input_channels=1, input_size=28, num_classes=10, filter_counts=[32], filter_sizes=[5], 
                 padding_sizes=[1], pooling_sizes=[2], pooling_strides=[2], pooling_paddings=[0], 
                 conv_activation='relu', learning_rate=0.001):
        super().__init__()
        self.save_hyperparameters()
        
        assert len(filter_counts) == len(filter_sizes) == len(padding_sizes) == \
            len(pooling_sizes) == len(pooling_strides) == len(pooling_paddings), \
            "All layer parameter lists must have the same length"
                
        self.conv_activation = {
            'relu': F.relu,
            'sigmoid': torch.sigmoid,
            'tanh': torch.tanh
        }.get(conv_activation.lower(), F.relu)
        
        self.convs = nn.ModuleList()
        self.pools = nn.ModuleList()
        
        current_channels = input_channels
        current_size = input_size
        
                
        for i, (filter_count, filter_size, padding_size, pooling_size, pooling_stride, pooling_padding) in \
            enumerate(zip(filter_counts, filter_sizes, padding_sizes, pooling_sizes, pooling_strides, pooling_paddings)):
            
            conv = nn.Conv2d(in_channels=input_channels,
                out_channels=filter_count,
                kernel_size=filter_size,
                stride=1,
                padding=padding_size
            )
            self.convs.append(conv)
            
            current_size = ( current_size + 2 * padding_size - filter_size ) // 1 + 1
            
            pool = nn.MaxPool2d(kernel_size=pooling_size, 
                stride=pooling_stride, 
                padding=pooling_padding
            )
            self.pools.append(pool)
            
            current_size = ( current_size + 2 * pooling_padding - pooling_size) // pooling_stride + 1
            
            current_channels = filter_count
            
            
        self.flat_size =  current_size * current_size * current_channels
        self.fc = nn.Linear(self.flat_size, num_classes)
        
        self.learning_rate = learning_rate
    
    def forward(self, x):
        
        for conv, pool in zip(self.convs, self.pools):
            x = self.conv_activation(conv(x))
            x = pool(x)
            
        x = x.view(-1, self.flat_size)
        x = self.fc(x)
        
        return x
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        # Log metrics
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        # Log metrics
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)
        
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
    

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load the MNIST dataset
train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('data', train=False, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8, pin_memory=True)
val_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True)

In [4]:
model = myCNN().to('cuda')

wandb_logger = WandbLogger(project="cnn-pytorch-lightning", log_model=True)

trainer = pl.Trainer(
        max_epochs=1,
        accelerator="gpu",  # Uses GPU if available
        logger=wandb_logger,
        log_every_n_steps=500,
        precision=16
    )

c:\Users\DELL\.conda\envs\DL\lib\site-packages\lightning_fabric\connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [5]:
trainer.fit(model, train_loader, val_loader)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: bullseye2608 (bullseye2608-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params | Mode 
---------------------------------------------
0 | convs | ModuleList | 832    | train
1 | pools | ModuleList | 0      | train
2 | fc    | Linear     | 54.1 K | train
---------------------------------------------
54.9 K    Trainable params
0         Non-trainable params
54.9 K    Total params
0.220     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


In [6]:
trainer.test(dataloaders=val_loader)
wandb.finish()

Restoring states from the checkpoint path at .\cnn-pytorch-lightning\bhwun64m\checkpoints\epoch=0-step=938.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at .\cnn-pytorch-lightning\bhwun64m\checkpoints\epoch=0-step=938.ckpt
c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁█
test_acc,▁
test_loss,▁
train_acc,▁
train_loss,▁
trainer/global_step,▁██
val_acc,▁
val_loss,▁

0,1
epoch,1.0
test_acc,0.9802
test_loss,0.06546
train_acc,0.98438
train_loss,0.05502
trainer/global_step,938.0
val_acc,0.9802
val_loss,0.06546


## Main


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
import os
import argparse
from typing import List, Tuple, Callable, Union, Type
from IPython.display import clear_output

class FlexibleCNN(pl.LightningModule):
    def __init__(
        self,
        input_channels: int = 3,
        num_classes: int = 10,
        conv_filters: List[int] = [32, 64, 128, 256, 512],
        kernel_sizes: Union[int, List[int]] = 3,
        conv_activation: Union[str, Type[nn.Module]] = "relu",
        dense_neurons: int = 512,
        dense_activation: Union[str, Type[nn.Module]] = "relu",
        pooling_size: Union[int, List[int]] = 2,
        learning_rate: float = 0.001,
        use_batch_norm: bool = False,
        dropout_rate: float = 0.0
    ):
        """
        Flexible CNN model with 5 conv-activation-maxpool blocks
        
        Args:
            input_channels: Number of input image channels (3 for RGB)
            num_classes: Number of output classes
            conv_filters: List of filter counts for each conv layer
            kernel_sizes: Kernel size for conv layers (int or list)
            conv_activation: Activation function for conv layers
            dense_neurons: Number of neurons in the dense layer
            dense_activation: Activation function for dense layer
            pooling_size: Max pooling size (int or list)
            learning_rate: Learning rate for optimizer
            use_batch_norm: Whether to use batch normalization
            dropout_rate: Dropout rate (if use_dropout is True)
            use_dropout: Whether to use dropout
        """
        super().__init__()
        self.save_hyperparameters()
        
        # Convert activation strings to functions
        self.conv_activation = self._get_activation(conv_activation)
        self.dense_activation = self._get_activation(dense_activation)
        
        # Convert single value to lists if needed
        if isinstance(kernel_sizes, int):
            kernel_sizes = [kernel_sizes] * 5
        if isinstance(pooling_size, int):
            pooling_size = [pooling_size] * 5
            
        self.use_dropout = False if dropout_rate == 0.0 else True

        # Create 5 convolution blocks
        self.conv_blocks = nn.ModuleList()
        in_channels = input_channels
        
        for i in range(5):
            layers = []
            
            # Convolution layer
            layers.append(nn.Conv2d(in_channels, conv_filters[i], kernel_size=kernel_sizes[i], padding=kernel_sizes[i]//2))
            
            # Optional batch normalization (before activation)
            if use_batch_norm:
                layers.append(nn.BatchNorm2d(conv_filters[i]))
            
            # Activation
            layers.append(self._get_activation_layer(self.conv_activation))
            
            # Optional dropout after activation but before pooling
            if self.use_dropout:
                layers.append(nn.Dropout2d(dropout_rate))
            
            # Max pooling
            layers.append(nn.MaxPool2d(kernel_size=pooling_size[i], stride=pooling_size[i]))
            
            self.conv_blocks.append(nn.Sequential(*layers))
            in_channels = conv_filters[i]
        
        # Calculate the size of the flattened features
        # Assuming input image size is 224x224 (common for iNaturalist)
        # Each pooling with size 2 reduces dimensions by half
        final_size = 224 // (2 ** 5)  # After 5 pooling layers
        self.flat_size = final_size * final_size * conv_filters[-1]
        
        # Dense layer
        self.fc1 = nn.Linear(self.flat_size, dense_neurons)
        
        # Optional batch normalization for dense layer
        self.use_batch_norm = use_batch_norm
        if use_batch_norm:
            self.bn_fc = nn.BatchNorm1d(dense_neurons)
            
        # Dense layer dropout (applied after activation)
        self.dropout_rate = dropout_rate
        if self.use_dropout:
            self.dropout = nn.Dropout(dropout_rate)
        
        # Output layer
        self.fc2 = nn.Linear(dense_neurons, num_classes)
        
        self.learning_rate = learning_rate
    
    def _get_activation(self, activation):
        """Convert activation name to function or return the provided activation"""
        if isinstance(activation, str):
            activation = activation.lower()
            if activation == 'relu':
                return F.relu
            elif activation == 'leaky_relu':
                return F.leaky_relu
            elif activation == 'elu':
                return F.elu
            elif activation == 'tanh':
                return F.tanh
            elif activation == 'sigmoid':
                return F.sigmoid
            else:
                raise ValueError(f"Unsupported activation: {activation}")
        return activation
    
    def _get_activation_layer(self, activation_fn):
        """Convert activation function to layer"""
        if activation_fn == F.relu:
            return nn.ReLU()
        elif activation_fn == F.leaky_relu:
            return nn.LeakyReLU()
        elif activation_fn == F.elu:
            return nn.ELU()
        elif activation_fn == F.tanh:
            return nn.Tanh()
        elif activation_fn == F.sigmoid:
            return nn.Sigmoid()
        else:
            # For custom activations, use a Lambda layer
            return nn.Identity()  # Placeholder, will use functional activation in forward
    
    def forward(self, x):
        # Apply all conv blocks
        for block in self.conv_blocks:
            x = block(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Apply dense layer
        x = self.fc1(x)
        
        # Apply batch norm if enabled
        if self.use_batch_norm:
            x = self.bn_fc(x)
        
        # Apply activation
        x = self.dense_activation(x)
        
        # Apply dropout if enabled
        if self.use_dropout:
            x = self.dropout(x)
        
        # Output layer
        x = self.fc2(x)
        
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        # Log metrics
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        # Log metrics
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        
        # Log metrics
        self.log('test_loss', loss)
        self.log('test_acc', acc)
        
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)


class iNaturalistDataModule(pl.LightningDataModule):
    def __init__(
        self, 
        data_dir: str,
        batch_size: int = 32,
        num_workers: int = 4,
        image_size: int = 224,
        use_data_augmentation: bool = False,
        seed=42,
    ):
        super().__init__()
        self.train_data_dir = os.path.join(data_dir, 'train')
        self.test_data_dir = os.path.join(data_dir, 'val')
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.image_size = image_size
        self.use_data_augmentation = use_data_augmentation
        self.seed = seed
        
    def setup(self, stage=None):
        # Basic transformations always applied
        basic_transforms = [
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]
        
        # Additional augmentations when enabled
        augmentation_transforms = []
        if self.use_data_augmentation:
            augmentation_transforms = [
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15),
                # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
                # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
                transforms.RandomResizedCrop(self.image_size, scale=(0.8, 1.0))
            ]
        
        # Combine transformations
        train_transforms = transforms.Compose(augmentation_transforms + basic_transforms)
        val_transforms = transforms.Compose(basic_transforms)
        
        # Load dataset
        full_dataset = ImageFolder(root=self.train_data_dir)
        
        # Split dataset
        dataset_size = len(full_dataset)
        train_size = int(0.8 * dataset_size)
        val_size = dataset_size - train_size
        
        # Use random split with generator for reproducibility
        generator = torch.Generator().manual_seed(self.seed)
        self.train_dataset, self.val_dataset = random_split(
            full_dataset, [train_size, val_size], generator=generator
        )
        
        self.train_dataset.dataset = ImageFolder(root=self.train_data_dir, transform=train_transforms) 
        self.val_dataset.dataset = ImageFolder(root=self.train_data_dir, transform=val_transforms) 
        
        # # Apply transformations
        # self.train_dataset = TransformedSubset(self.train_dataset, train_transforms)
        # self.val_dataset = TransformedSubset(self.val_dataset, val_transforms)
        
        self.test_dataset = ImageFolder(root=self.test_data_dir, transform=val_transforms)
        
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True,
            persistent_workers=True
        )
    
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True,
            persistent_workers=True
        )
    
    def test_dataloader(self):
        print('uhuh')
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True,
            persistent_workers=True
        )


In [2]:
data_directory = r'C:\Users\DELL\Desktop\Coding\Python\DL\Assignment 2\da6401_assignment2\data\inaturalist_12K'
data_module = iNaturalistDataModule(
        data_dir=data_directory,
        batch_size=64,
        use_data_augmentation=True
    )
data_module.setup()

model = FlexibleCNN(
    input_channels=3,
    num_classes=10,
    conv_filters=[32,64,128,256,512],
    kernel_sizes=3,
    conv_activation='relu',
    dense_neurons=512,
    dense_activation='relu',
    learning_rate=0.0002,
    use_batch_norm=True,
    dropout_rate=0.2
).to('cuda')

wandb_logger = WandbLogger(project="cnn-pytorch-lightning", log_model=True)

trainer = pl.Trainer(
    max_epochs=20,
    accelerator='gpu',
    logger=wandb_logger,
    precision=16
)


c:\Users\DELL\.conda\envs\DL\lib\site-packages\lightning_fabric\connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, data_module)

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: bullseye2608 (bullseye2608-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params | Mode 
----------------------------------------------------
0 | conv_blocks | ModuleList  | 1.6 M  | train
1 | fc1         | Linear      | 12.8 M | train
2 | bn_fc       | BatchNorm1d | 1.0 K  | train
3 | dropout     | Dropout     | 0      | train
4 | fc2         | Linear      | 5.1 K  | train
----------------------------------------------------
14.4 M    Trainable params
0         Non-trainable params
14.4 M    Total params
57.689    Total estimated model params size (MB)
35        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
trainer.test(model, data_module)
wandb.finish()

## Sweep

In [2]:
import os
import wandb
import subprocess
from pathlib import Path

# Define the sweep configuration
sweep_config = {
    'method': 'bayes',  # Bayesian optimization
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 1e-4,
            'max': 1e-2
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'conv_filters_pattern': {
            'values': [
                '32,64,128,256,512',     # Standard doubling
                '64,64,64,64,64',         # Same filters
                '128,96,64,48,32',        # Decreasing filters
                '32,64,128,64,32'         # Diamond pattern
            ]
        },
        'activation': {
            'values': ['relu', 'leaky_relu', 'elu']
        },
        'dense_neurons': {
            'values': [128, 256, 512]
        },
        'use_batch_norm': {
            'values': [True, False]
        },
        'dropout_rate': {
            'values': [0.0, 0.2, 0.3, 0.5]
        },
        'use_data_augmentation': {
            'values': [True, False]
        }
    }
}

In [5]:
sweep_id = wandb.sweep(sweep_config, project="inaturalist-cnn")

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: 5ulq9d3c
Sweep URL: https://wandb.ai/bullseye2608-indian-institute-of-technology-madras/inaturalist-cnn/sweeps/5ulq9d3c


In [3]:
def train():
    # Initialize a new wandb run
    run = wandb.init()
    
    # Get hyperparameters for this run
    config = wandb.config
    
    # Parse conv_filters to list
    conv_filters = [int(f) for f in config.conv_filters_pattern.split(',')]
    
    # The data path
    data_dir = r'C:\Users\DELL\Desktop\Coding\Python\DL\Assignment 2\da6401_assignment2\data\inaturalist_12K'
    
    # Initialize data module (only once per sweep)
    data_module = iNaturalistDataModule(
        data_dir=data_dir,
        batch_size=config.batch_size,
        use_data_augmentation=config.use_data_augmentation
    )
    data_module.setup()
    
    # Initialize model with sweep parameters
    model = FlexibleCNN(
        input_channels=3,
        num_classes=10,
        conv_filters=conv_filters,
        kernel_sizes=3,  # Fixed for simplicity
        conv_activation=config.activation,
        dense_neurons=config.dense_neurons,
        dense_activation=config.activation,
        learning_rate=config.learning_rate,
        use_batch_norm=config.use_batch_norm,
        dropout_rate=config.dropout_rate,
    )
    
    # Setup callbacks
    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        filename='{epoch}-{val_loss:.2f}',
        save_top_k=1,
        mode='min'
    )
    
    # Setup wandb logger (already initialized by wandb.init())
    logger = WandbLogger()
    
    # Initialize trainer
    trainer = pl.Trainer(
        max_epochs=10,  # Fixed for all sweeps
        accelerator='auto',
        callbacks=[checkpoint_callback],
        logger=logger
    )
    
    # Train the model
    trainer.fit(model, data_module)
    
    # Optionally test the model to get final metrics
    test_results = trainer.test(model, datamodule=data_module)
    
    # Log additional metrics if needed
    for k, v in test_results[0].items():
        wandb.log({k: v})
    
    # No need to call wandb.finish() as sweep agent manages the run lifecycle


In [None]:
cont_sweep_id = 'bullseye2608-indian-institute-of-technology-madras/inaturalist-cnn/kn08nm99'
wandb.agent(cont_sweep_id, train, count=50)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Agent Starting Run: f0o4maek with config:
wandb: 	activation: relu
wandb: 	batch_size: 32
wandb: 	conv_filters_pattern: 64,64,64,64,64
wandb: 	dense_neurons: 128
wandb: 	dropout_rate: 0
wandb: 	learning_rate: 0.0023721222172821163
wandb: 	use_batch_norm: False
wandb: 	use_data_augmentation: True
wandb: Currently logged in as: bullseye2608 (bullseye2608-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\loggers\wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | conv_blocks | ModuleList | 149 K  | train
1 | fc1         | Linear     | 401 K  | train


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇█
test_acc,▁▁
test_loss,▁▁
train_acc_epoch,▁▃▅▆▇▇▇███
train_acc_step,▂▂▁▂▅▆▅▃▅▂▄▄▃▅▂▅▆▆▅▅▅▅▂▃▃▃▅█▆▄▅▇▇▄▅▂▆▇▅▄
train_loss_epoch,█▇▅▅▃▃▂▂▁▁
train_loss_step,▇█▆▆▇▆▅▄▅▄▅▅▃▃▄▂▂▄▂▃▃▆▄▃▂▄▁▃▆▂▂▁▅▃▃▅▄▂▃▆
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
val_acc,▁▃▅▅▅▆▇▇██
val_loss,█▇▅▅▄▃▃▄▁▁

0,1
epoch,10.0
test_acc,0.257
test_loss,2.07718
train_acc_epoch,0.26116
train_acc_step,0.19355
train_loss_epoch,2.05794
train_loss_step,2.27768
trainer/global_step,2500.0
val_acc,0.2645
val_loss,2.03626


wandb: Agent Starting Run: m31ifv59 with config:
wandb: 	activation: elu
wandb: 	batch_size: 128
wandb: 	conv_filters_pattern: 64,64,64,64,64
wandb: 	dense_neurons: 256
wandb: 	dropout_rate: 0.5
wandb: 	learning_rate: 0.0008754906964621385
wandb: 	use_batch_norm: True
wandb: 	use_data_augmentation: True


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params | Mode 
----------------------------------------------------
0 | conv_blocks | ModuleList  | 150 K  | train
1 | fc1         | Linear      | 803 K  | train
2 | bn_fc       | BatchNorm1d | 512    | train
3 | dropout     | Dropout     | 0      | train
4 | fc2         | Linear      | 2.6 K  | train
----------------------------------------------------
956 K     Trainable params
0         Non-trainable params
956 K     Total params
3.825     Total estimated model params size (MB)
35        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

wandb: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█
test_acc,▁▁
test_loss,▁▁
train_acc_epoch,▁▄▅▆▇▇████
train_acc_step,▄▆▅▁▇▇▆█▇▆██
train_loss_epoch,█▄▃▃▂▂▂▁▁▁
train_loss_step,█▆▄█▄▄▇▅▆▂▅▁
trainer/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇████
val_acc,▁▄▄▅▆▄▆▆▆█
val_loss,█▅▄▃▃▂▃▂▂▁

0,1
epoch,10.0
test_acc,0.3165
test_loss,1.95846
train_acc_epoch,0.25453
train_acc_step,0.26562
train_loss_epoch,2.07681
train_loss_step,1.9621
trainer/global_step,630.0
val_acc,0.3285
val_loss,1.95259


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: aou4haqz with config:
wandb: 	activation: leaky_relu
wandb: 	batch_size: 128
wandb: 	conv_filters_pattern: 32,64,128,64,32
wandb: 	dense_neurons: 128
wandb: 	dropout_rate: 0.5
wandb: 	learning_rate: 0.0003756332509562994
wandb: 	use_batch_norm: False
wandb: 	use_data_augmentation: True


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | conv_blocks | ModuleList | 185 K  | train
1 | fc1         | Linear     | 200 K  | train
2 | dropout     | Dropout    | 0      | train
3 | fc2         | Linear     | 1.3 K  | train
---------------------------------------------------
387 K     Trainable params
0         Non-trainable params
387 K     Total params
1.551     Total estimated model params size (MB)
29        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█
test_acc,▁▁
test_loss,▁▁
train_acc_epoch,▁▂▂▃▅▅▆▇▇█
train_acc_step,▃▁▃▄▄█▄▆▇▆▅█
train_loss_epoch,██▇▆▅▄▃▂▂▁
train_loss_step,██▇▅▄▅▆▃▁▃▂▂
trainer/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇████
val_acc,▁▂▂▅▆▆▇▇▇█
val_loss,██▇▅▄▃▃▃▂▁

0,1
epoch,10.0
test_acc,0.2085
test_loss,2.20571
train_acc_epoch,0.18715
train_acc_step,0.20312
train_loss_epoch,2.22128
train_loss_step,2.21384
trainer/global_step,630.0
val_acc,0.197
val_loss,2.20979


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: rnrszvox with config:
wandb: 	activation: elu
wandb: 	batch_size: 64
wandb: 	conv_filters_pattern: 32,64,128,64,32
wandb: 	dense_neurons: 256
wandb: 	dropout_rate: 0.5
wandb: 	learning_rate: 0.00018049040676405349
wandb: 	use_batch_norm: False
wandb: 	use_data_augmentation: True


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | conv_blocks | ModuleList | 185 K  | train
1 | fc1         | Linear     | 401 K  | train
2 | dropout     | Dropout    | 0      | train
3 | fc2         | Linear     | 2.6 K  | train
---------------------------------------------------
589 K     Trainable params
0         Non-trainable params
589 K     Total params
2.359     Total estimated model params size (MB)
29        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇█████
test_acc,▁▁
test_loss,▁▁
train_acc_epoch,▁▃▅▆▇▇▇███
train_acc_step,▃▁▃▃▁▇▁▂▇▄▄▇█▅▄▅▆▄▅▇▅▆▅▆▅
train_loss_epoch,█▆▅▄▃▂▂▁▁▁
train_loss_step,█▇▇▇▅▅█▇▃▅▇▇▃▆▇▅▃▅▆▆▆▁▅▁▃
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
val_acc,▁▄▅▅▆▆▇▇▇█
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
test_acc,0.307
test_loss,2.00529
train_acc_epoch,0.24316
train_acc_step,0.20635
train_loss_epoch,2.11525
train_loss_step,2.04176
trainer/global_step,1250.0
val_acc,0.306
val_loss,2.00273


wandb: Agent Starting Run: oofm7r64 with config:
wandb: 	activation: leaky_relu
wandb: 	batch_size: 64
wandb: 	conv_filters_pattern: 32,64,128,64,32
wandb: 	dense_neurons: 256
wandb: 	dropout_rate: 0
wandb: 	learning_rate: 0.00028260182631515915
wandb: 	use_batch_norm: True
wandb: 	use_data_augmentation: False


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params | Mode 
----------------------------------------------------
0 | conv_blocks | ModuleList  | 186 K  | train
1 | fc1         | Linear      | 401 K  | train
2 | bn_fc       | BatchNorm1d | 512    | train
3 | fc2         | Linear      | 2.6 K  | train
----------------------------------------------------
590 K     Trainable params
0         Non-trainable params
590 K     Total params
2.364     Total estimated model params size (MB)
29        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇█████
test_acc,▁▁
test_loss,▁▁
train_acc_epoch,▁▂▃▃▄▅▆▇▇█
train_acc_step,▁▁▂▃▁▃▃▃▃▃▄▄▅▆▄▅▆▇▆▆▇▆███
train_loss_epoch,█▇▆▆▅▄▃▂▂▁
train_loss_step,██▇▇▇▆▆▆▆▆▅▅▅▄▅▄▃▂▃▃▂▂▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
val_acc,▁▇█▇▇▅▄▆▅▅
val_loss,▃▁▁▂▂▄▄▅▇█

0,1
epoch,10.0
test_acc,0.3665
test_loss,2.18232
train_acc_epoch,0.92749
train_acc_step,0.95238
train_loss_epoch,0.35942
train_loss_step,0.36553
trainer/global_step,1250.0
val_acc,0.3475
val_loss,2.27758


wandb: Agent Starting Run: mcyjktvo with config:
wandb: 	activation: elu
wandb: 	batch_size: 128
wandb: 	conv_filters_pattern: 64,64,64,64,64
wandb: 	dense_neurons: 512
wandb: 	dropout_rate: 0.5
wandb: 	learning_rate: 0.003172587524322519
wandb: 	use_batch_norm: True
wandb: 	use_data_augmentation: False


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type        | Params | Mode 
----------------------------------------------------
0 | conv_blocks | ModuleList  | 150 K  | train
1 | fc1         | Linear      | 1.6 M  | train
2 | bn_fc       | BatchNorm1d | 1.0 K  | train
3 | dropout     | Dropout     | 0      | train
4 | fc2         | Linear      | 5.1 K  | train
----------------------------------------------------
1.8 M     Trainable params
0         Non-trainable params
1.8 M     Total params
7.050     Total estimated model params size (MB)
35        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]