In [1]:
!pip install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.1.0->pytorch-lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.1.0->pytorch-lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.1.0->pytorch-lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.1.0->pytorch-lightning)
  Downloadi

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.datasets import INaturalist
import pytorch_lightning as pl
import wandb
import matplotlib.pyplot as plt
import numpy as np
import random

In [3]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [4]:
import splitfolders

In [5]:
import wandb
wandb.login(key="6ae5555f295dc1469adf2104179b22cabc458450")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs24m035[0m ([33mcs24m035-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
torch.manual_seed(2)  # Setting the random seed for PyTorch operations to ensure reproducibility
random.seed(2)  # Setting the random seed for Python's built-in random module
np.random.seed(2)  # Setting the random seed for NumPy operations

In [7]:
# Function to determine and set the device for computation (CPU/GPU)
def set_device():
    device = "cpu"  # Defaulting to CPU
    if torch.cuda.is_available():  # Checking if GPU is available
        device = torch.device("cuda")  # Setting device to GPU if available
    else:
        device = torch.device("cpu")  # Otherwise, default to CPU
    return device

device = set_device()  # Calling the function to set the device
print("Currently Using :: ", device)  # Printing the currently used device

Currently Using ::  cuda


In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
# Adjust the path accordingly
data_path = '/content/drive/MyDrive/nature_12k/inaturalist_12K/train'  #path where train data to be split is stored
output_path="train_val" #path where new split data train+validation should be stored

# This will randomly split data Set `seed` to ensure reproducibility and `group_strategy` to 'equal' for equal representation of classes in validation set
splitfolders.ratio(input=data_path, output=output_path, seed=42, ratio=(0.8, 0.2) )

Copying files: 9999 files [04:34, 36.38 files/s] 


In [18]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

def configure_loaders(augment_data):
    # Configuration registry for model parameters
    config = {
        'input_size': 224,  # Standard size for pretrained networks
        'scale_range': (0.08, 1.0),  # Default crop scaling from original paper
        'norm_mean': [0.485, 0.456, 0.406],  # Imagenet statistics
        'norm_std': [0.229, 0.224, 0.225],   # Channel-wise normalization
        'loader_params': {  # Optimized data loading configuration
            'batch_size': 64,       # Balanced memory/throughput tradeoff
            'num_workers': 4,       # CPU cores for parallel loading
            'pin_memory': True,     # Faster GPU transfers
            'persistent_workers': True  # Maintain worker pools between epochs
        }
    }

    # Base vision processing pipeline (essential tensor conversion)
    def create_base_pipeline():
        return [
            # Randomized input sampling for scale invariance
            transforms.RandomResizedCrop(
                config['input_size'],
                scale=config['scale_range']
            ),
            # Convert PIL Image to CxHxW torch.Tensor
            transforms.ToTensor()
        ]

    # Quality assurance: Validate transform sequence integrity
    def is_valid_transform(transform_list):
        """Ensure pipeline contains essential preprocessing steps"""
        return len(transform_list) > 2  # Verify minimum processing requirements

    # Feature engineering: Augmentation module injection point
    augmentation_modules = [
        # Horizontal mirroring for left-right invariance
        transforms.RandomHorizontalFlip(p=0.5),
        # Rotation tolerance for viewpoint variation
        transforms.RandomRotation(degrees=30)
    ] if str(augment_data).lower() == "true" else []

    # Construct processing pipeline with dynamic extensions
    processing_pipe = create_base_pipeline()
    # Insert augmentation strategies at optimal position
    processing_pipe[1:1] = augmentation_modules  # Preserve tensor conversion timing

    # Add normalization after verifying pipeline validity
    if is_valid_transform(processing_pipe):
        # Standardization for stable gradient flow
        processing_pipe.append(transforms.Normalize(
            config['norm_mean'],
            config['norm_std']
        ))

    # Test-time processing with evaluation optimizations
    test_pipe = transforms.Compose([
        # Resolution standardization
        transforms.Resize(256),
        # Center crop for consistent input sizing
        transforms.CenterCrop(224),
        # Type stability enforcement (PIL -> Tensor)
        # transforms.Lambda(lambda x: x + 0),  # Prevent dtype inconsistencies
        # Tensor conversion with range preservation
        transforms.ToTensor(),
        # Normalization matching training distribution
        transforms.Normalize(config['norm_mean'], config['norm_std'])
    ])

    # Dataset routing configuration
    data_paths = {
        'train': '/content/train_val/train',       # Primary training samples
        'validation': '/content/train_val/val',   # Hyperparameter tuning set
        'test': '/content/drive/MyDrive/nature_12k/inaturalist_12K/val'  # Final evaluation
    }

    # Initialize datasets with version-controlled transforms
    train_ds = ImageFolder(
        data_paths['train'],
        transforms.Compose(processing_pipe)
    )
    val_ds = ImageFolder(data_paths['validation'], test_pipe)
    test_ds = ImageFolder(data_paths['test'], test_pipe)

    # Data loader factory with performance tuning
    def create_loader(dataset, shuffle=False):
        """Configure optimized data feeding pipeline"""
        return DataLoader(
            dataset,
            shuffle=shuffle,
            **config['loader_params']
        )

    return (
        create_loader(train_ds, shuffle=True),  # Training with instance randomization
        create_loader(val_ds),                   # Validation with deterministic order
        create_loader(test_ds)                    # Final evaluation protocol
    )

    # Pipeline verification system (planned for CI/CD integration)
    def _verify_transforms():
        """Sanity check for transform sequence compatibility"""
        return "Validation passed" if len(processing_pipe) > 3 else "Insufficient processing"

In [19]:
class CNN(pl.LightningModule):
    def __init__(self, num_filters, filter_sizes, activations,dense_activation, num_neurons_dense , dropout_rate,batch_norm,in_channels=3):
        super(CNN, self).__init__()
        self.num_filters = num_filters
        self.filter_sizes = filter_sizes
        self.activations = activations
        self.num_neurons_dense = num_neurons_dense
        self.dropout_rate = dropout_rate
        self.dense_activation = dense_activation
        self.batch_norm=batch_norm

        # Convolutional layers
        self.conv_layers = nn.ModuleList()
        prev_filters = in_channels
        for i in range(len(num_filters)):
            self.conv_layers.append(nn.Conv2d(prev_filters, out_channels=num_filters[i], kernel_size=filter_sizes[i]))
            if(batch_norm=="True"):
              self.conv_layers.append(nn.BatchNorm2d(num_filters[i], eps=0.001))
            self.conv_layers.append(self.activations)
            self.conv_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            prev_filters = num_filters[i]

        self.linear_in_features=getInFeaturs(filter_sizes)
        # print(num_filters[] * self.linear_in_features * self.linear_in_features)

        self.dense = nn.Linear(in_features=num_filters[-1] * self.linear_in_features * self.linear_in_features, out_features=num_neurons_dense)
        self.bn=nn.BatchNorm2d(num_neurons_dense, eps=0.001)
        self.dropout = nn.Dropout(dropout_rate)
        self.output = nn.Linear(in_features=num_neurons_dense, out_features=10)
        # self.accuracy = BinaryAccuracy()

    def forward(self, x):
        for i in range(len(self.conv_layers)):
            x = self.conv_layers[i](x)

        #dense layers
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x=self.dense(x)
        # if(self.batch_norm==True):
        #   x=self.bn(x)
        x = self.dense_activation(x)
        x = self.output(x)
        return x



In [20]:
def getInFeaturs(filter_sizes):
  #layer1 output
  in_features=224-filter_sizes[0]+1
  in_features=(in_features-2)//2 + 1
  for i in range(1, len(filter_sizes)):
    in_features=in_features-filter_sizes[i]+1
    in_features=(in_features-2)//2 + 1
  return in_features


In [21]:
def set_num_of_filters(num_of_filters,filter_organisation,filter_sizes):
  #set number of filter for each layer considering the filter organization

  num_filters=[num_of_filters]

  if(filter_organisation=="double"): # doubling in each subsequent layer
    for i in range(len(filter_sizes)-1):
      num_filters.append(num_filters[i] * 2)
  elif(filter_organisation=="same"): #same number of filters in all layers
    for i in range(len(filter_sizes)-1):
      num_filters.append(num_filters[i])
  elif(filter_organisation=="half"): # halving in eachsubsequent layer
    for i in range(len(filter_sizes)-1):
      num_fil=num_filters[i]//2
      if(num_fil<=0):
        num_fil=1
      num_filters.append(num_fil)

  return num_filters

In [22]:
def return_activation_fun(activation):
  if activation == "ReLU":
      return  nn.ReLU()
  if activation == "GELU":
      return  nn.GELU()
  if activation == "SiLU":
      return  nn.SiLU()
  if activation == "Mish":
      return  nn.Mish()
  if activation == "LeakyReLU":
      return  nn.LeakyReLU()


In [23]:
#add any config to check
filter_sizes=[] #filter k*k dimention for 5 layers
for i in range(5):
  filter_sizes.append(5)
print(filter_sizes)
data_augmentation="True" # data augmentation flag

activation_for_cov="GELU"
conv_activations=return_activation_fun(activation_for_cov) # activation function for each of the 5 layers

activation_for_dense="GELU"
dense_activation=return_activation_fun(activation_for_dense) # activation function for the dense layer

dropout_rate=0
batch_norm="True"
num_neurons_dense=512

# set num_filters in each layer
num_of_filters=32 #num of filters in 1st layer
filter_organisation="double" # double , same , half
num_filters=set_num_of_filters(num_of_filters,filter_organisation,filter_sizes)
print(num_filters)

epochs=20

model = CNN(num_filters,
            filter_sizes,
            conv_activations,
            dense_activation,
            num_neurons_dense,
            dropout_rate,
            batch_norm
            ).to(device)
print(model)
train_loader , val_loader , test_loader = configure_loaders(data_augmentation)

[5, 5, 5, 5, 5]
[32, 64, 128, 256, 512]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (9): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_si

In [24]:
def train(epochs,model,train_loader,val_loader,print_on):
  #function to train our model
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08)

  n_total_steps_train = len(train_loader)
  n_total_steps_val=len(val_loader)
  acc=0
  for epoch in range(epochs):
    model.train() #trainnig phase
    n_correct_train = 0
    n_samples_train = 0
    loss_train=0.0
    for i, (images_train, labels_train) in enumerate(train_loader):

      images_train = images_train.to(device)
      labels_train = labels_train.to(device)

      # Forward pass
      outputs = model(images_train)
      loss = criterion(outputs, labels_train)

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loss_train+=loss.item()

      _, predicted_train = torch.max(outputs, 1)
      n_samples_train += labels_train.size(0)
      n_correct_train += (predicted_train == labels_train).sum().item()

      if (i+1) % 25 == 0:
        print (f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{n_total_steps_train}]')
    print('Finished Training')
    train_acc = 100.0 * n_correct_train / n_samples_train
    loss_train=loss_train/n_total_steps_train
    print (f'Epoch {epoch+1}, Training_Accuracy: {train_acc} , Training_Loss: {loss_train}')


    model.eval() #evaluation phase
    with torch.no_grad():
      n_correct = 0
      n_samples = 0
      loss_val=0.0

      for images, labels in val_loader:
          images = images.to(device)
          labels = labels.to(device)
          outputs = model(images)
          loss = criterion(outputs, labels)

          _, predicted = torch.max(outputs, 1)
          n_samples += labels.size(0)
          n_correct += (predicted == labels).sum().item() #counts number of correct labels

          loss_val+=loss.item()

      acc = 100.0 * n_correct / n_samples
      loss_val=loss_val/n_total_steps_val
      print (f'Epoch {epoch+1}, Validation_Accuracy: {acc} , Validation_Loss: {loss_val}')

    if(print_on=="wandb"):
      wandb.log({'Epoch': epoch+1,'Training_Loss' : loss_train,'Training_Accuracy' : train_acc,'Validation_Loss' : loss_val,'Validation_Accuracy':acc})

  return acc


In [None]:
epochs=3
print_on="print"
train(3,model,train_loader,val_loader,print_on)

Epoch [1/3], Step [25/125]
Epoch [1/3], Step [50/125]
Epoch [1/3], Step [75/125]
Epoch [1/3], Step [100/125]
Epoch [1/3], Step [125/125]
Finished Training
Epoch 1, Training_Accuracy: 22.415301912739093 , Training_Loss: 2.128661873817444
Epoch 1, Validation_Accuracy: 29.3 , Validation_Loss: 2.021551191806793
Epoch [2/3], Step [25/125]
Epoch [2/3], Step [50/125]
Epoch [2/3], Step [75/125]
Epoch [2/3], Step [100/125]
Epoch [2/3], Step [125/125]
Finished Training
Epoch 2, Training_Accuracy: 27.478434804350545 , Training_Loss: 2.03357061290741
Epoch 2, Validation_Accuracy: 28.8 , Validation_Loss: 2.014002475887537
Epoch [3/3], Step [25/125]
Epoch [3/3], Step [50/125]
Epoch [3/3], Step [75/125]
Epoch [3/3], Step [100/125]
Epoch [3/3], Step [125/125]
Finished Training
Epoch 3, Training_Accuracy: 28.891111388923616 , Training_Loss: 1.9898448677062988


In [None]:
  sweep_config = {
    'name' : 'part_A_question2',
    'method': 'bayes',
    'metric': {'goal': 'maximize', 'name': 'Val_Accuracy'},
    'parameters': {'num_filters': {'values': [16, 32]},
                   'filter_organisation': {'values': ['same','double','half']},
                    'dropout_rate': {'values': [0.2, 0.5 , 0]},
                    'filter_size': {'values': [3,5]},
                    'num_neurons_dense': {'values': [128,512]},
                    'activation': {'values': ['ReLU', 'GELU' , 'LeakyReLU' ]},
                    'data_augmentation': {'values': ['True', 'False']},
                    'batch_norm': {'values': ['True', 'False']},
                    'epochs': {'values': [10 , 20]},

                }}

In [None]:
def train_sweep():
    init_sweep =  wandb.init(project="CS6910_Assignment2", name="part_A_question2")
    sweep_params = init_sweep.config

    wandb.run.name = "_nf_" + str(sweep_params.num_filters) + "_fo_" + sweep_params.filter_organisation + "_dr_" + str(sweep_params.dropout_rate) + "_neu_" + str(sweep_params.num_neurons_dense) + "_act_" + sweep_params.activation +"_aug_" + sweep_params.data_augmentation +"_norm_" + sweep_params.batch_norm + "_ep_" + str(sweep_params.epochs)


    filter_sizes=[] #filter k*k dimention for 5 layers
    for i in range(5):
      filter_sizes.append(sweep_params.filter_size)
    print(filter_sizes)
    data_augmentation=sweep_params.data_augmentation # data augmentation flag

    activation_for_cov=sweep_params.activation
    conv_activations=return_activation_fun(activation_for_cov) # activation function for each of the 5 layers

    activation_for_dense=sweep_params.activation
    dense_activation=return_activation_fun(activation_for_dense) # activation function for the dense layer
    dropout_rate=sweep_params.dropout_rate
    batch_norm=sweep_params.batch_norm
    num_neurons_dense=sweep_params.num_neurons_dense

    # set num_filters in each layer
    num_of_filters=sweep_params.num_filters #num of filters in 1st layer
    filter_organisation=sweep_params.filter_organisation # double , same , half
    num_filters=set_num_of_filters(num_of_filters,filter_organisation,filter_sizes)
    print(num_filters)

    epochs=sweep_params.epochs

    model = CNN(num_filters,
                filter_sizes,
                conv_activations,
                dense_activation,
                num_neurons_dense,
                dropout_rate,
                batch_norm
                ).to(device)
    print(model)
    train_loader , val_loader , test_loader = dataset_loaders(data_augmentation)

    print_on="wandb"

    val_accuracy=train(epochs,model,train_loader,val_loader,print_on)
    wandb.log({"Val_Accuracy": val_accuracy})


In [None]:
sweep_id = wandb.sweep(sweep_config, project='CS6910_Assignment2')
wandb.agent(sweep_id, train_sweep,count=50)
wandb.finish()

Create sweep with ID: fjduncsb
Sweep URL: https://wandb.ai/cs24m035-indian-institute-of-technology-madras/CS6910_Assignment2/sweeps/fjduncsb


[34m[1mwandb[0m: Agent Starting Run: 2oa75y1g with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	num_neurons_dense: 512


[3, 3, 3, 3, 3]
[16, 32, 64, 128, 256]
CNN(
  (activations): ReLU()
  (dense_activation): ReLU()
  (conv_layers): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense): Linear(in_features=6400, out_features=512, bias=True)
  (



Epoch [1/10], Step [25/125]
Epoch [1/10], Step [50/125]
Epoch [1/10], Step [75/125]
Epoch [1/10], Step [100/125]
Epoch [1/10], Step [125/125]
Finished Training
Epoch 1, Training_Accuracy: 14.451806475809477 , Training_Loss: 2.262642890930176
Epoch 1, Validation_Accuracy: 19.2 , Validation_Loss: 2.1701004467904568
Epoch [2/10], Step [25/125]
Epoch [2/10], Step [50/125]
Epoch [2/10], Step [75/125]
Epoch [2/10], Step [100/125]
Epoch [2/10], Step [125/125]
Finished Training
Epoch 2, Training_Accuracy: 21.065133141642704 , Training_Loss: 2.1575339708328247
Epoch 2, Validation_Accuracy: 22.25 , Validation_Loss: 2.130561485886574
Epoch [3/10], Step [25/125]
Epoch [3/10], Step [50/125]
Epoch [3/10], Step [75/125]
Epoch [3/10], Step [100/125]
Epoch [3/10], Step [125/125]
Finished Training
Epoch 3, Training_Accuracy: 24.12801600200025 , Training_Loss: 2.09853351688385
Epoch 3, Validation_Accuracy: 26.25 , Validation_Loss: 2.061238080263138
Epoch [4/10], Step [25/125]
Epoch [4/10], Step [50/125]


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▄▅▆▆▇▇▇██
Training_Loss,█▆▄▄▃▂▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▂▄▄▅▆▆▇▇█
Validation_Loss,█▇▅▅▄▃▂▂▂▁

0,1
Epoch,10.0
Training_Accuracy,30.46631
Training_Loss,1.95839
Val_Accuracy,34.15
Validation_Accuracy,34.15
Validation_Loss,1.89477


[34m[1mwandb[0m: Agent Starting Run: ohwzdl5u with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[3, 3, 3, 3, 3]
[32, 32, 32, 32, 32]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(32, 32, kernel_size=(3, 

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▄▄▆▆▆▇▇██
Training_Loss,█▆▄▄▃▃▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▄▅▆▆▆▆▇██
Validation_Loss,█▆▄▃▂▂▃▂▁▁

0,1
Epoch,10.0
Training_Accuracy,35.29191
Training_Loss,1.83403
Val_Accuracy,36.3
Validation_Accuracy,36.3
Validation_Loss,1.80882


[34m[1mwandb[0m: Agent Starting Run: 4iys2sjd with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout_rate: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	num_neurons_dense: 512


[3, 3, 3, 3, 3]
[16, 32, 64, 128, 256]
CNN(
  (activations): LeakyReLU(negative_slope=0.01)
  (dense_activation): LeakyReLU(negative_slope=0.01)
  (conv_layers): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (10): LeakyReLU(negative_slope=0.01)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (13): LeakyReLU(negative_s

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▃▅▆▆▇▇▇██
Training_Loss,█▆▄▄▃▂▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▃▄▆▆▇██▇█
Validation_Loss,█▆▅▄▃▃▂▂▂▁

0,1
Epoch,10.0
Training_Accuracy,29.75372
Training_Loss,1.97553
Val_Accuracy,32.65
Validation_Accuracy,32.65
Validation_Loss,1.90276


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8x1obddy with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[5, 5, 5, 5, 5]
[32, 64, 128, 256, 512]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (9): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_si

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▃▄▅▅▆▆▇▇█
Training_Loss,█▆▅▄▄▃▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▂▅▃▃▆▆▆▇█
Validation_Loss,█▇▄▆▇▂▂▄▁▁

0,1
Epoch,10.0
Training_Accuracy,38.56732
Training_Loss,1.7542
Val_Accuracy,37.95
Validation_Accuracy,37.95
Validation_Loss,1.77429


[34m[1mwandb[0m: Agent Starting Run: 9rq30i3o with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organisation: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[5, 5, 5, 5, 5]
[32, 16, 8, 4, 2]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): GELU(approximate='none')
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): GELU(approximate='none')
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(16, 8, kernel_size=(5, 5), stride=(1, 1))
    (7): GELU(approximate='none')
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(8, 4, kernel_size=(5, 5), stride=(1, 1))
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(4, 2, kernel_size=(5, 5), stride=(1, 1))
    (13): GELU(approximate='none')
    (14): MaxPool2d(kernel_size=2, stride=2,

0,1
Epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
Training_Accuracy,▁▄▆▆▆▆▇▇▆▇▇▇▇▇▇▇████
Training_Loss,█▆▄▄▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▅▆▅▆▆▅▆▆▆▅▆▆▇▇▆▇█▇▇
Validation_Loss,█▄▄▄▃▃▃▃▃▃▂▃▂▂▂▂▁▁▁▁

0,1
Epoch,20.0
Training_Accuracy,20.41505
Training_Loss,2.151
Val_Accuracy,20.9
Validation_Accuracy,20.9
Validation_Loss,2.13399


[34m[1mwandb[0m: Agent Starting Run: o00ghr5u with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[5, 5, 5, 5, 5]
[32, 64, 128, 256, 512]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): GELU(approximate='none')
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (4): GELU(approximate='none')
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (7): GELU(approximate='none')
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1))
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1))
    (13): GELU(approximate='none')
    (14): MaxPool2d(kernel_s

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▃▅▅▆▆▇███
Training_Loss,█▆▅▄▄▃▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▂▄▅▆▇▇▇██
Validation_Loss,█▆▅▄▃▂▂▂▁▁

0,1
Epoch,10.0
Training_Accuracy,31.31641
Training_Loss,1.93318
Val_Accuracy,33.2
Validation_Accuracy,33.2
Validation_Loss,1.885


[34m[1mwandb[0m: Agent Starting Run: v1mrr3ir with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[5, 5, 5, 5, 5]
[32, 32, 32, 32, 32]
CNN(
  (activations): LeakyReLU(negative_slope=0.01)
  (dense_activation): LeakyReLU(negative_slope=0.01)
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (6): LeakyReLU(negative_slope=0.01)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (9): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): C

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▃▅▆▆▇▇▇██
Training_Loss,█▆▅▄▃▃▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▂▃▄▅▅▆▇▅█
Validation_Loss,█▆▅▄▄▄▂▂▄▁

0,1
Epoch,10.0
Training_Accuracy,31.59145
Training_Loss,1.92414
Val_Accuracy,34.35
Validation_Accuracy,34.35
Validation_Loss,1.85744


[34m[1mwandb[0m: Agent Starting Run: dqfow70j with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[3, 3, 3, 3, 3]
[32, 64, 128, 256, 512]
CNN(
  (activations): LeakyReLU(negative_slope=0.01)
  (dense_activation): LeakyReLU(negative_slope=0.01)
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (10): LeakyReLU(negative_slope=0.01)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
    (13): LeakyReLU(negativ

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Training_Accuracy,▁▄▅▅▆▇▇▇██
Training_Loss,█▆▅▄▃▃▂▂▁▁
Val_Accuracy,▁
Validation_Accuracy,▁▃▄▅▆▆▆▆▇█
Validation_Loss,█▆▅▄▃▃▂▂▁▁

0,1
Epoch,10.0
Training_Accuracy,33.75422
Training_Loss,1.87146
Val_Accuracy,35.65
Validation_Accuracy,35.65
Validation_Loss,1.84219


[34m[1mwandb[0m: Agent Starting Run: j44dqu4z with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organisation: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 512


[5, 5, 5, 5, 5]
[32, 64, 128, 256, 512]
CNN(
  (activations): GELU(approximate='none')
  (dense_activation): GELU(approximate='none')
  (conv_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (9): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU(approximate='none')
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_si