## A. IMPORT STATEMENTS

In [1]:
# Import Statements
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import wandb
import random

## B. SET UP FOR DEVICE AGNOSTIC CODE

In [2]:
# Check if GPU is available
torch.cuda.is_available()

True

In [3]:
# Setup device agnostic code
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

## C. DATA LOADING AND PREPROCESSING STEPS
### NOTE:- I have unzipped the inaturalist_12K dataset and uploaded to the Google Drive for easier access.

In [4]:
# Mount the google drive (RUN ONLY IN COLAB)
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [5]:
# Set up data directory path
TRAIN_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/train"
TEST_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/val"

In [None]:
def data_transformations(image_size=(224, 224), data_augment=False):

  """This function returns data transformations for the images data."""

  # Define transformations to be applied (Base Transformations)
  transformations = [
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) # NOTE:- I am planning to use EfficientNetV2 for part B of the assignment so using the same values as used for that network
  ]

  # If Augmentation is needed, add them to transform list
  if data_augment:
    transformations += [
      transforms.RandomHorizontalFlip(0.05),
      transforms.RandomVerticalFlip(0.05),
      transforms.RandomRotation(degrees=20),
      transforms.RandomApply([transforms.ColorJitter(0.2, 0.2, 0.2, 0.2)], p=0.05),
      transforms.RandomApply([transforms.GaussianBlur(3)], p=0.05)
    ]

  transformer = transforms.Compose(transformations)

  return transformer


def get_train_and_val_dataloaders(train_data_dir, image_size=(224, 224), data_augment=False, valset_size=0.2, batch_size=16):

  """This function returns the dataloader for trainset and validation set and classnames"""

  transformer = data_transformations(image_size, data_augment)

  # Dowload the total_train dataset
  total_trainset = torchvision.datasets.ImageFolder(root = train_data_dir, transform=transformer)

  # Get the classnames
  classnames = total_trainset.classes

  # Split the total_train data into train data and val data
  labels = [label for _, label in total_trainset.samples]

  if valset_size != 0:
    train_indices, val_indices = train_test_split(
                                    range(len(total_trainset)),
                                    test_size=valset_size,
                                    stratify=labels,
                                    random_state=42
                                    )
  else:
    train_indices = range(len(total_trainset))
    val_indices = []

  # Create the trainset and valset
  trainset = torch.utils.data.Subset(total_trainset, train_indices)
  valset = torch.utils.data.Subset(total_trainset, val_indices)

  # Create the dataloaders
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

  valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return trainloader, valloader, classnames



def get_test_dataloader(test_data_dir, image_size=(224, 224), batch_size=16):

  """This function returns the test dataloader"""

  transformer = data_transformations(image_size, False)

  #Download the test data
  testset = torchvision.datasets.ImageFolder(root=test_data_dir, transform=transformer)



  testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

  return testloader

## D. MODEL CLASS
The model defined below is a CNN Model class with the help of PyTorch nn.Module.
Model Specifications:-
1. 5 Convolution Blocks
2. 1 Fully connected layer
3. 1 output layer
4. Number of filters, kernel sizes, fully connected layer size, etc. are modular.

In [8]:
class CNNModel(nn.Module):
  def __init__(self, image_size, in_channels=3, num_classes=10,
                num_filters=[64, 64, 64, 64, 64], kernel_size=[3, 3, 3, 3, 3],
                activation_fn=nn.ReLU, fc_layer_size=2048,
                batchnorm=False, dropout=0.0):

    super().__init__()

    h, w = image_size

    # Block 1
    self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=num_filters[0], kernel_size=kernel_size[0])
    h, w = h - kernel_size[0] + 1, w - kernel_size[0] + 1
    self.batchnorm1 = nn.BatchNorm2d(num_filters[0]) if batchnorm else nn.Identity()
    self.activation1 = activation_fn()
    self.dropout1 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
    self.maxpool1 = nn.MaxPool2d(2, 2)
    h, w = h//2, w//2

    # Block 2
    self.conv2 = nn.Conv2d(in_channels=num_filters[0], out_channels=num_filters[1], kernel_size=kernel_size[1])
    h, w = h - kernel_size[1] + 1, w - kernel_size[1] + 1
    self.batchnorm2 = nn.BatchNorm2d(num_filters[1]) if batchnorm else nn.Identity()
    self.activation2 = activation_fn()
    self.dropout2 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
    self.maxpool2 = nn.MaxPool2d(2, 2)
    h, w = h//2, w//2

    # Block 3
    self.conv3 = nn.Conv2d(in_channels=num_filters[1], out_channels=num_filters[2], kernel_size=kernel_size[2])
    h, w = h - kernel_size[2] + 1, w - kernel_size[2] + 1
    self.batchnorm3 = nn.BatchNorm2d(num_filters[2]) if batchnorm else nn.Identity()
    self.activation3 = activation_fn()
    self.dropout3 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
    self.maxpool3 = nn.MaxPool2d(2, 2)
    h, w = h//2, w//2

    # Block 4
    self.conv4 = nn.Conv2d(in_channels=num_filters[2], out_channels=num_filters[3], kernel_size=kernel_size[3])
    h, w = h - kernel_size[3] + 1, w - kernel_size[3] + 1
    self.batchnorm4 = nn.BatchNorm2d(num_filters[3]) if batchnorm else nn.Identity()
    self.activation4 = activation_fn()
    self.dropout4 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
    self.maxpool4 = nn.MaxPool2d(2, 2)
    h, w = h//2, w//2

    # Block 5
    self.conv5 = nn.Conv2d(in_channels=num_filters[3], out_channels=num_filters[4], kernel_size=kernel_size[4])
    h, w = h - kernel_size[4] + 1, w - kernel_size[4] + 1
    self.batchnorm5 = nn.BatchNorm2d(num_filters[4]) if batchnorm else nn.Identity()
    self.activation5 = activation_fn()
    self.dropout5 = nn.Dropout2d(dropout) if dropout!=0 else nn.Identity()
    self.maxpool5 = nn.MaxPool2d(2, 2)
    h, w = h//2, w//2

    # Flattening layer
    self.flatten = nn.Flatten()

    # Fully connected layer
    self.fc_layer = nn.Linear(in_features=num_filters[4] * h * w, out_features=fc_layer_size)
    self.batchnorm_fc = nn.BatchNorm1d(fc_layer_size) if batchnorm else nn.Identity()
    self.act_fc = activation_fn()
    self.drop_fc = nn.Dropout(dropout) if dropout!=0 else nn.Identity()

    # Output layer
    self.out = nn.Linear(in_features=fc_layer_size, out_features=num_classes)

  def forward(self, x):
    # Block 1
    x = self.conv1(x)
    x = self.batchnorm1(x)
    x = self.activation1(x)
    x = self.dropout1(x)
    x = self.maxpool1(x)

    # Block 2
    x = self.conv2(x)
    x = self.batchnorm2(x)
    x = self.activation2(x)
    x = self.dropout2(x)
    x = self.maxpool2(x)

    # Block 3
    x = self.conv3(x)
    x = self.batchnorm3(x)
    x = self.activation3(x)
    x = self.dropout3(x)
    x = self.maxpool3(x)

    # Block 4
    x = self.conv4(x)
    x = self.batchnorm4(x)
    x = self.activation4(x)
    x = self.dropout4(x)
    x = self.maxpool4(x)

    # Block 5
    x = self.conv5(x)
    x = self.batchnorm5(x)
    x = self.activation5(x)
    x = self.dropout5(x)
    x = self.maxpool5(x)

    # Flatten
    x = self.flatten(x)

    # Fully connected layers
    x = self.fc_layer(x)
    x = self.batchnorm_fc(x)
    x = self.act_fc(x)
    x = self.drop_fc(x)

    # Output layer
    x = self.out(x)

    return x

## E. MODEL TRAINING
The "train_model" method defined below trains the model on the trainloader and then evaluate on valloader for the specified number of epochs.

In [9]:
def train_model(model, trainloader, valloader, criterion, optimizer, device, epochs=10):

  """This function trains and evaluates the model for the specified number of epochs."""

  # Metrics to keep track of
  train_epoch_losses = []
  train_epoch_accuracies = []
  val_epoch_losses = []
  val_epoch_accuracies = []

  for epoch in range(epochs):

    # Set the model in train mode
    model.train()

    # Epoch Metrics
    train_running_loss = 0
    train_correct = 0
    train_total = 0

    for data in tqdm(trainloader):

      inputs, labels = data[0].to(device), data[1].to(device)

      optimizer.zero_grad()

      outputs = model(inputs)

      loss = criterion(outputs, labels)

      loss.backward()
      optimizer.step()

      # Update metrics
      train_running_loss += loss.item() * inputs.size(0)
      train_total += inputs.size(0)
      train_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

    train_epoch_loss = train_running_loss / train_total
    train_epoch_accuracy = 100 * train_correct / train_total

    train_epoch_losses.append(train_epoch_loss)
    train_epoch_accuracies.append(train_epoch_accuracy)

    # Set the model to evaluation mode
    model.eval()

    # Epoch Metrics
    val_running_loss = 0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
      for data in tqdm(valloader):

        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Update Metrics
        val_running_loss += loss.item() * inputs.size(0)
        val_total += inputs.size(0)
        val_correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

    val_epoch_loss = val_running_loss / val_total
    val_epoch_accuracy = 100 * val_correct / val_total

    val_epoch_losses.append(val_epoch_loss)
    val_epoch_accuracies.append(val_epoch_accuracy)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_epoch_loss:.3f}, Train Acc: {train_epoch_accuracy:.2f}%, Val Loss: {val_epoch_loss:.3f}, Val Acc: {val_epoch_accuracy:.2f}%")

  return train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies

## F. TRIAL RUN

In [7]:
# Image dimension
image_size = (224, 224)

In [None]:
# Get the trainloader, valloader and classnames
trainloader, valloader, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, image_size=image_size)

In [None]:
# Define a basic model and move to the device being used
model = CNNModel(image_size)
model.to(device)

CNNModel(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm1): Identity()
  (activation1): ReLU()
  (dropout1): Identity()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm2): Identity()
  (activation2): ReLU()
  (dropout2): Identity()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm3): Identity()
  (activation3): ReLU()
  (dropout3): Identity()
  (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm4): Identity()
  (activation4): ReLU()
  (dropout4): Identity()
  (maxpool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm5): Identity()

In [None]:
# Define the criterion and optimizer to be used in training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

In [None]:
# Train the model
train_model(model, trainloader, valloader, criterion, optimizer, device, 10)

100%|██████████| 501/501 [06:49<00:00,  1.22it/s]
100%|██████████| 126/126 [05:55<00:00,  2.82s/it]


Epoch 1/10, Train Loss: 2.304, Train Acc: 9.49%, Val Loss: 2.303, Val Acc: 10.09%


100%|██████████| 501/501 [01:46<00:00,  4.68it/s]
100%|██████████| 126/126 [00:25<00:00,  4.93it/s]


Epoch 2/10, Train Loss: 2.304, Train Acc: 9.49%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:45<00:00,  4.77it/s]
100%|██████████| 126/126 [00:23<00:00,  5.27it/s]


Epoch 3/10, Train Loss: 2.304, Train Acc: 9.33%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:46<00:00,  4.71it/s]
100%|██████████| 126/126 [00:25<00:00,  4.92it/s]


Epoch 4/10, Train Loss: 2.304, Train Acc: 9.42%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:45<00:00,  4.75it/s]
100%|██████████| 126/126 [00:25<00:00,  4.97it/s]


Epoch 5/10, Train Loss: 2.304, Train Acc: 9.38%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:45<00:00,  4.73it/s]
100%|██████████| 126/126 [00:24<00:00,  5.14it/s]


Epoch 6/10, Train Loss: 2.304, Train Acc: 9.31%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:45<00:00,  4.76it/s]
100%|██████████| 126/126 [00:25<00:00,  5.00it/s]


Epoch 7/10, Train Loss: 2.304, Train Acc: 10.04%, Val Loss: 2.303, Val Acc: 10.09%


100%|██████████| 501/501 [01:45<00:00,  4.74it/s]
100%|██████████| 126/126 [00:24<00:00,  5.08it/s]


Epoch 8/10, Train Loss: 2.304, Train Acc: 9.77%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:44<00:00,  4.81it/s]
100%|██████████| 126/126 [00:25<00:00,  5.02it/s]


Epoch 9/10, Train Loss: 2.304, Train Acc: 9.27%, Val Loss: 2.303, Val Acc: 9.99%


100%|██████████| 501/501 [01:44<00:00,  4.79it/s]
100%|██████████| 126/126 [00:26<00:00,  4.79it/s]

Epoch 10/10, Train Loss: 2.304, Train Acc: 9.87%, Val Loss: 2.303, Val Acc: 9.99%





([2.3038672651018826,
  2.3038257285710606,
  2.3040455661652657,
  2.3039399892605457,
  2.303906606252033,
  2.3039464572355683,
  2.30354961865311,
  2.3036965939690703,
  2.3040666415219064,
  2.3037933534960016],
 [9.492880339745192,
  9.492880339745192,
  9.330502123407445,
  9.417936547589308,
  9.380464651511366,
  9.305520859355484,
  10.042468148888334,
  9.767674244316762,
  9.268048963277542,
  9.867599300524606],
 [2.303045203754833,
  2.302984376530071,
  2.302938708058604,
  2.3029538615719303,
  2.302663381759461,
  2.303031295924992,
  2.303070899847147,
  2.3033623700137142,
  2.302898993144383,
  2.3032914591835927],
 [10.08991008991009,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999,
  10.08991008991009,
  9.99000999000999,
  9.99000999000999,
  9.99000999000999])

### **OBSERVATIONS:-** For the trial run the model didn't perform well, giving the accuracy of ~10% for a train class classification, which is as good as random guessing.

## G. SOME HELPER FUNCTIONS

In [1]:
def get_activation_function(name):

  """This function returns the activation function based on the name passed"""

  if name == "leaky_relu":
    return nn.LeakyReLU
  elif name == "parametric_relu":
    return nn.PReLU
  elif name == "gelu":
    return nn.GELU
  elif name == "silu":
    return nn.SiLU
  elif name == "mish":
    return nn.Mish
  return nn.ReLU



def get_num_filters(name):

  """This function is a helper function for the hyper parameter sweep.
     As we need different filter sizes for the model to perform the sweep."""

  if name == "equal16":
    return [16, 16, 16, 16, 16]
  elif name == "equal32":
    return [32, 32, 32, 32, 32]
  elif name == "equal64":
    return [64, 64, 64, 64, 64]
  elif name == "doubling16":
    return [16, 32, 64, 128, 256]
  elif name == "doubling32":
    return [32, 64, 128, 256, 512]
  elif name == "halving256":
    return [256, 128, 64, 32, 16]
  else:
    return [100, 80, 50, 80, 100]



def get_kernel_size(name):

  """This function is a helper function for the hyper parameter sweep.
     As we need different kernel sizes for the model to perform the sweep."""

  if name == "constant5":
    return [5, 5, 5, 5, 5]
  elif name == "constant7":
    return [7, 7, 7, 7, 7]
  elif name == "decreasing":
    return [5, 5, 3, 3, 1]
  elif name == "increasing":
    return [1, 3, 3, 5, 5]
  return [3, 3, 3, 3, 3]



## H. WANDB HYPERPARAMETER SWEEP

In [None]:
def sweep_hyperparameters(config=None):

  with wandb.init(config=config):

    config = wandb.config
    wandb.run.name = f"activation_{str(config.activation)}_filters_{str(config.num_filters)}_lr_{config.learning_rate}_kernel_{config.kernel_size}_fc_size_{config.fc_layer_size}"

    # Log in my details
    wandb.config.update({"NAME": "SIDDHANT BARANWAL", "ROLL NO.": "DA24M021"})

    image_size=(224, 224)

    TRAIN_DATA_DIR = "/content/drive/My Drive/Data/inaturalist_12K/train"
    trainloader, valloader, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, image_size=image_size, data_augment=config.data_augment, valset_size=0.2, batch_size=config.batch_size)


    model = CNNModel(image_size, num_filters=get_num_filters(config.num_filters), kernel_size=get_kernel_size(config.kernel_size),
                     activation_fn=get_activation_function(config.activation), batchnorm=config.batch_norm, dropout=config.dropout,
                     fc_layer_size=config.fc_layer_size)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    train_losses, train_accuracies, val_losses, val_accuracies = train_model(model, trainloader, valloader, criterion, optimizer, device, epochs=10)

    # Log the evaluation metrics

    for epoch in range(len(train_losses)):
      wandb.log({
          "epoch": epoch,
          "train_loss": train_losses[epoch],
          "train_accuracy": train_accuracies[epoch],
          "validation_loss": val_losses[epoch],
          "validation_accuracy": val_accuracies[epoch]
      })

    wandb.log({
        "val_accuracy": val_accuracies[-1]
    })

In [None]:
sweep_config = {
    "method" : "bayes",
    "metric" : {"name": "val_accuracy", "goal": "maximize"},
    "parameters" : {
        "data_augment" : {"values" : [True, False]},
        "batch_norm" : {"values" : [True, False]},
        "dropout" : {"values" : [0.0, 0.2, 0.4]},
        "learning_rate" : {"values" : [0.01, 0.001, 0.0005, 0.0001]},
        "activation" : {"values" : ["relu", "leaky_relu", "parametric_relu",
                                    "gelu", "silu", "mish"]},
        "num_filters" : {"values" : ["equal16", "equal32", "equal64", "doubling16", "doubling32", "halving256"]},
        "kernel_size" : {"values" : ["constant3", "constant5", "constant7", "decreasing", "increasing"]},
        "fc_layer_size" : {"values": [2048, 1024, 512]},
        "batch_size": {"values": [8, 16, 32]}
    }
}

sweep_id = wandb.sweep(sweep_config, project = "da24m021_da6401_assignment2")
wandb.agent(sweep_id, function = sweep_hyperparameters, count = 50)

Create sweep with ID: k2txx1xm
Sweep URL: https://wandb.ai/da24m021-indian-institute-of-technology-madras/da24m021_da6401_assignment2/sweeps/k2txx1xm


[34m[1mwandb[0m: Agent Starting Run: cerulhgd with config:
[34m[1mwandb[0m: 	activation: leaky_relu
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	fc_layer_size: 512
[34m[1mwandb[0m: 	kernel_size: constant5
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_filters: halving256


100%|██████████| 1001/1001 [02:21<00:00,  7.08it/s]
100%|██████████| 251/251 [00:27<00:00,  8.99it/s]


Epoch 1/10, Train Loss: 107038537.954, Train Acc: 10.30%, Val Loss: 2349336.981, Val Acc: 10.69%


100%|██████████| 1001/1001 [02:15<00:00,  7.41it/s]
100%|██████████| 251/251 [00:28<00:00,  8.85it/s]


Epoch 2/10, Train Loss: 47263346.942, Train Acc: 9.36%, Val Loss: 2651705.043, Val Acc: 10.04%


100%|██████████| 1001/1001 [02:13<00:00,  7.52it/s]
100%|██████████| 251/251 [00:27<00:00,  8.99it/s]


Epoch 3/10, Train Loss: 10797584.519, Train Acc: 10.17%, Val Loss: 2360685.813, Val Acc: 9.94%


100%|██████████| 1001/1001 [02:14<00:00,  7.42it/s]
100%|██████████| 251/251 [00:28<00:00,  8.90it/s]


Epoch 4/10, Train Loss: 5648114.842, Train Acc: 10.68%, Val Loss: 2271509.495, Val Acc: 9.99%


100%|██████████| 1001/1001 [02:13<00:00,  7.51it/s]
100%|██████████| 251/251 [00:29<00:00,  8.37it/s]


Epoch 5/10, Train Loss: 4120590.666, Train Acc: 9.53%, Val Loss: 1424813.879, Val Acc: 9.84%


100%|██████████| 1001/1001 [02:13<00:00,  7.50it/s]
100%|██████████| 251/251 [00:28<00:00,  8.89it/s]


Epoch 6/10, Train Loss: 10002602.419, Train Acc: 9.91%, Val Loss: 2228866.012, Val Acc: 10.19%


100%|██████████| 1001/1001 [02:14<00:00,  7.42it/s]
100%|██████████| 251/251 [00:28<00:00,  8.91it/s]


Epoch 7/10, Train Loss: 4329779.164, Train Acc: 10.34%, Val Loss: 1531243.208, Val Acc: 10.49%


100%|██████████| 1001/1001 [02:14<00:00,  7.44it/s]
100%|██████████| 251/251 [00:28<00:00,  8.96it/s]


Epoch 8/10, Train Loss: 14684807.763, Train Acc: 10.13%, Val Loss: 2647371.340, Val Acc: 10.44%


100%|██████████| 1001/1001 [02:14<00:00,  7.44it/s]
100%|██████████| 251/251 [00:28<00:00,  8.92it/s]


Epoch 9/10, Train Loss: 5983211.872, Train Acc: 10.32%, Val Loss: 2676980.992, Val Acc: 10.04%


100%|██████████| 1001/1001 [02:14<00:00,  7.44it/s]
100%|██████████| 251/251 [00:28<00:00,  8.94it/s]

Epoch 10/10, Train Loss: 4200109.973, Train Acc: 10.03%, Val Loss: 1622379.387, Val Acc: 10.04%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▆▁▅█▂▄▆▅▆▅
train_loss,█▄▁▁▁▁▁▂▁▁
val_accuracy,▁
validation_accuracy,█▃▂▂▁▄▆▆▃▃
validation_loss,▆█▆▆▁▅▂██▂

0,1
epoch,9.0
train_accuracy,10.02998
train_loss,4200109.9728
val_accuracy,10.03996
validation_accuracy,10.03996
validation_loss,1622379.38711


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xpuhnbdk with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: halving256


100%|██████████| 1001/1001 [02:25<00:00,  6.88it/s]
100%|██████████| 251/251 [00:28<00:00,  8.88it/s]


Epoch 1/10, Train Loss: 2.226, Train Acc: 18.82%, Val Loss: 2.057, Val Acc: 27.17%


100%|██████████| 1001/1001 [02:21<00:00,  7.06it/s]
100%|██████████| 251/251 [00:27<00:00,  8.98it/s]


Epoch 2/10, Train Loss: 2.120, Train Acc: 23.26%, Val Loss: 1.982, Val Acc: 29.72%


100%|██████████| 1001/1001 [02:21<00:00,  7.06it/s]
100%|██████████| 251/251 [00:28<00:00,  8.79it/s]


Epoch 3/10, Train Loss: 2.072, Train Acc: 26.90%, Val Loss: 1.981, Val Acc: 29.62%


100%|██████████| 1001/1001 [02:21<00:00,  7.10it/s]
100%|██████████| 251/251 [00:28<00:00,  8.89it/s]


Epoch 4/10, Train Loss: 2.051, Train Acc: 26.70%, Val Loss: 1.945, Val Acc: 31.72%


100%|██████████| 1001/1001 [02:23<00:00,  6.95it/s]
100%|██████████| 251/251 [00:27<00:00,  9.09it/s]


Epoch 5/10, Train Loss: 2.032, Train Acc: 27.87%, Val Loss: 1.911, Val Acc: 31.97%


100%|██████████| 1001/1001 [02:22<00:00,  7.03it/s]
100%|██████████| 251/251 [00:28<00:00,  8.92it/s]


Epoch 6/10, Train Loss: 2.014, Train Acc: 28.65%, Val Loss: 1.920, Val Acc: 33.22%


100%|██████████| 1001/1001 [02:20<00:00,  7.13it/s]
100%|██████████| 251/251 [00:27<00:00,  8.98it/s]


Epoch 7/10, Train Loss: 1.981, Train Acc: 29.54%, Val Loss: 1.897, Val Acc: 33.97%


100%|██████████| 1001/1001 [02:21<00:00,  7.06it/s]
100%|██████████| 251/251 [00:28<00:00,  8.90it/s]


Epoch 8/10, Train Loss: 1.969, Train Acc: 29.99%, Val Loss: 1.892, Val Acc: 32.92%


100%|██████████| 1001/1001 [02:22<00:00,  7.00it/s]
100%|██████████| 251/251 [00:28<00:00,  8.87it/s]


Epoch 9/10, Train Loss: 1.969, Train Acc: 29.72%, Val Loss: 1.869, Val Acc: 35.31%


100%|██████████| 1001/1001 [02:19<00:00,  7.19it/s]
100%|██████████| 251/251 [00:28<00:00,  8.90it/s]

Epoch 10/10, Train Loss: 1.945, Train Acc: 31.59%, Val Loss: 1.881, Val Acc: 33.77%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▆▇▇▇█
train_loss,█▅▄▄▃▃▂▂▂▁
val_accuracy,▁
validation_accuracy,▁▃▃▅▅▆▇▆█▇
validation_loss,█▅▅▄▃▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,31.58881
train_loss,1.9455
val_accuracy,33.76623
validation_accuracy,33.76623
validation_loss,1.88088


[34m[1mwandb[0m: Agent Starting Run: l7bvact0 with config:
[34m[1mwandb[0m: 	activation: parametric_relu
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal16


100%|██████████| 501/501 [01:49<00:00,  4.57it/s]
100%|██████████| 126/126 [00:25<00:00,  4.93it/s]


Epoch 1/10, Train Loss: 2.236, Train Acc: 16.75%, Val Loss: 2.126, Val Acc: 22.78%


100%|██████████| 501/501 [01:42<00:00,  4.90it/s]
100%|██████████| 126/126 [00:25<00:00,  4.92it/s]


Epoch 2/10, Train Loss: 2.092, Train Acc: 23.98%, Val Loss: 2.046, Val Acc: 28.22%


100%|██████████| 501/501 [01:44<00:00,  4.80it/s]
100%|██████████| 126/126 [00:25<00:00,  4.90it/s]


Epoch 3/10, Train Loss: 2.012, Train Acc: 28.17%, Val Loss: 1.980, Val Acc: 29.02%


100%|██████████| 501/501 [01:42<00:00,  4.91it/s]
100%|██████████| 126/126 [00:25<00:00,  4.96it/s]


Epoch 4/10, Train Loss: 1.951, Train Acc: 30.38%, Val Loss: 1.958, Val Acc: 30.32%


100%|██████████| 501/501 [01:44<00:00,  4.80it/s]
100%|██████████| 126/126 [00:25<00:00,  4.92it/s]


Epoch 5/10, Train Loss: 1.908, Train Acc: 32.50%, Val Loss: 1.952, Val Acc: 31.12%


100%|██████████| 501/501 [01:41<00:00,  4.92it/s]
100%|██████████| 126/126 [00:25<00:00,  4.88it/s]


Epoch 6/10, Train Loss: 1.876, Train Acc: 33.42%, Val Loss: 1.932, Val Acc: 30.52%


100%|██████████| 501/501 [01:44<00:00,  4.81it/s]
100%|██████████| 126/126 [00:25<00:00,  4.96it/s]


Epoch 7/10, Train Loss: 1.846, Train Acc: 34.61%, Val Loss: 1.912, Val Acc: 31.97%


100%|██████████| 501/501 [01:42<00:00,  4.91it/s]
100%|██████████| 126/126 [00:26<00:00,  4.79it/s]


Epoch 8/10, Train Loss: 1.816, Train Acc: 36.07%, Val Loss: 1.920, Val Acc: 31.87%


100%|██████████| 501/501 [01:43<00:00,  4.83it/s]
100%|██████████| 126/126 [00:23<00:00,  5.34it/s]


Epoch 9/10, Train Loss: 1.784, Train Acc: 36.73%, Val Loss: 1.917, Val Acc: 31.62%


100%|██████████| 501/501 [01:43<00:00,  4.86it/s]
100%|██████████| 126/126 [00:26<00:00,  4.70it/s]

Epoch 10/10, Train Loss: 1.756, Train Acc: 38.15%, Val Loss: 1.915, Val Acc: 32.87%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁
validation_accuracy,▁▅▅▆▇▆▇▇▇█
validation_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,38.14639
train_loss,1.75614
val_accuracy,32.86713
validation_accuracy,32.86713
validation_loss,1.91479


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: abtwupgs with config:
[34m[1mwandb[0m: 	activation: leaky_relu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal16


100%|██████████| 251/251 [01:46<00:00,  2.35it/s]
100%|██████████| 63/63 [00:25<00:00,  2.51it/s]


Epoch 1/10, Train Loss: 2.309, Train Acc: 13.51%, Val Loss: 2.252, Val Acc: 12.94%


100%|██████████| 251/251 [01:42<00:00,  2.45it/s]
100%|██████████| 63/63 [00:25<00:00,  2.45it/s]


Epoch 2/10, Train Loss: 2.247, Train Acc: 16.89%, Val Loss: 2.197, Val Acc: 17.93%


100%|██████████| 251/251 [01:42<00:00,  2.46it/s]
100%|██████████| 63/63 [00:25<00:00,  2.50it/s]


Epoch 3/10, Train Loss: 2.212, Train Acc: 18.36%, Val Loss: 2.173, Val Acc: 18.83%


100%|██████████| 251/251 [01:42<00:00,  2.44it/s]
100%|██████████| 63/63 [00:25<00:00,  2.49it/s]


Epoch 4/10, Train Loss: 2.185, Train Acc: 19.80%, Val Loss: 2.145, Val Acc: 20.98%


100%|██████████| 251/251 [01:40<00:00,  2.49it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


Epoch 5/10, Train Loss: 2.173, Train Acc: 20.27%, Val Loss: 2.115, Val Acc: 22.18%


100%|██████████| 251/251 [01:43<00:00,  2.42it/s]
100%|██████████| 63/63 [00:23<00:00,  2.67it/s]


Epoch 6/10, Train Loss: 2.166, Train Acc: 20.32%, Val Loss: 2.120, Val Acc: 23.28%


100%|██████████| 251/251 [01:40<00:00,  2.51it/s]
100%|██████████| 63/63 [00:24<00:00,  2.52it/s]


Epoch 7/10, Train Loss: 2.158, Train Acc: 21.22%, Val Loss: 2.112, Val Acc: 23.23%


100%|██████████| 251/251 [01:42<00:00,  2.46it/s]
100%|██████████| 63/63 [00:25<00:00,  2.47it/s]


Epoch 8/10, Train Loss: 2.145, Train Acc: 22.01%, Val Loss: 2.095, Val Acc: 24.48%


100%|██████████| 251/251 [01:40<00:00,  2.50it/s]
100%|██████████| 63/63 [00:24<00:00,  2.60it/s]


Epoch 9/10, Train Loss: 2.126, Train Acc: 22.48%, Val Loss: 2.104, Val Acc: 24.13%


100%|██████████| 251/251 [01:41<00:00,  2.48it/s]
100%|██████████| 63/63 [00:24<00:00,  2.54it/s]

Epoch 10/10, Train Loss: 2.116, Train Acc: 23.52%, Val Loss: 2.064, Val Acc: 26.67%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▆▇▇█
train_loss,█▆▄▄▃▃▃▂▁▁
val_accuracy,▁
validation_accuracy,▁▄▄▅▆▆▆▇▇█
validation_loss,█▆▅▄▃▃▃▂▂▁

0,1
epoch,9.0
train_accuracy,23.51986
train_loss,2.11574
val_accuracy,26.67333
validation_accuracy,26.67333
validation_loss,2.06434


[34m[1mwandb[0m: Agent Starting Run: 9u4a9al4 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 1024
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: doubling32


100%|██████████| 501/501 [01:52<00:00,  4.45it/s]
100%|██████████| 126/126 [00:24<00:00,  5.07it/s]


Epoch 1/10, Train Loss: 2.153, Train Acc: 20.91%, Val Loss: 2.037, Val Acc: 25.62%


100%|██████████| 501/501 [01:47<00:00,  4.67it/s]
100%|██████████| 126/126 [00:25<00:00,  4.88it/s]


Epoch 2/10, Train Loss: 1.994, Train Acc: 28.23%, Val Loss: 1.965, Val Acc: 31.77%


100%|██████████| 501/501 [01:45<00:00,  4.75it/s]
100%|██████████| 126/126 [00:27<00:00,  4.53it/s]


Epoch 3/10, Train Loss: 1.927, Train Acc: 31.89%, Val Loss: 1.917, Val Acc: 34.27%


100%|██████████| 501/501 [01:45<00:00,  4.74it/s]
100%|██████████| 126/126 [00:26<00:00,  4.83it/s]


Epoch 4/10, Train Loss: 1.860, Train Acc: 34.36%, Val Loss: 1.884, Val Acc: 35.01%


100%|██████████| 501/501 [01:47<00:00,  4.65it/s]
100%|██████████| 126/126 [00:25<00:00,  4.94it/s]


Epoch 5/10, Train Loss: 1.798, Train Acc: 36.93%, Val Loss: 1.892, Val Acc: 33.52%


100%|██████████| 501/501 [01:47<00:00,  4.67it/s]
100%|██████████| 126/126 [00:23<00:00,  5.26it/s]


Epoch 6/10, Train Loss: 1.707, Train Acc: 40.17%, Val Loss: 1.883, Val Acc: 34.42%


100%|██████████| 501/501 [01:48<00:00,  4.61it/s]
100%|██████████| 126/126 [00:25<00:00,  4.86it/s]


Epoch 7/10, Train Loss: 1.590, Train Acc: 43.92%, Val Loss: 1.926, Val Acc: 35.26%


100%|██████████| 501/501 [01:45<00:00,  4.77it/s]
100%|██████████| 126/126 [00:25<00:00,  4.87it/s]


Epoch 8/10, Train Loss: 1.422, Train Acc: 49.79%, Val Loss: 2.083, Val Acc: 34.82%


100%|██████████| 501/501 [01:47<00:00,  4.66it/s]
100%|██████████| 126/126 [00:25<00:00,  4.87it/s]


Epoch 9/10, Train Loss: 1.197, Train Acc: 58.36%, Val Loss: 2.098, Val Acc: 35.41%


100%|██████████| 501/501 [01:44<00:00,  4.78it/s]
100%|██████████| 126/126 [00:27<00:00,  4.59it/s]

Epoch 10/10, Train Loss: 0.913, Train Acc: 68.62%, Val Loss: 2.385, Val Acc: 33.52%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▃▃▄▄▅▆█
train_loss,█▇▇▆▆▅▅▄▃▁
val_accuracy,▁
validation_accuracy,▁▅▇█▇▇███▇
validation_loss,▃▂▁▁▁▁▂▄▄█

0,1
epoch,9.0
train_accuracy,68.62353
train_loss,0.91266
val_accuracy,33.51648
validation_accuracy,33.51648
validation_loss,2.38495


[34m[1mwandb[0m: Agent Starting Run: l8cbhw5y with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	fc_layer_size: 1024
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: doubling16


100%|██████████| 251/251 [02:11<00:00,  1.91it/s]
100%|██████████| 63/63 [00:31<00:00,  2.03it/s]


Epoch 1/10, Train Loss: 2.267, Train Acc: 17.85%, Val Loss: 2.180, Val Acc: 20.13%


100%|██████████| 251/251 [02:09<00:00,  1.93it/s]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Epoch 2/10, Train Loss: 2.166, Train Acc: 21.71%, Val Loss: 2.122, Val Acc: 23.63%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Epoch 3/10, Train Loss: 2.121, Train Acc: 23.17%, Val Loss: 2.083, Val Acc: 24.53%


100%|██████████| 251/251 [02:05<00:00,  2.00it/s]
100%|██████████| 63/63 [00:30<00:00,  2.04it/s]


Epoch 4/10, Train Loss: 2.104, Train Acc: 24.33%, Val Loss: 2.070, Val Acc: 25.07%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:30<00:00,  2.06it/s]


Epoch 5/10, Train Loss: 2.080, Train Acc: 24.92%, Val Loss: 2.023, Val Acc: 27.02%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:31<00:00,  2.03it/s]


Epoch 6/10, Train Loss: 2.047, Train Acc: 26.89%, Val Loss: 2.031, Val Acc: 27.87%


100%|██████████| 251/251 [02:07<00:00,  1.97it/s]
100%|██████████| 63/63 [00:33<00:00,  1.88it/s]


Epoch 7/10, Train Loss: 2.031, Train Acc: 27.04%, Val Loss: 1.995, Val Acc: 28.42%


100%|██████████| 251/251 [02:06<00:00,  1.99it/s]
100%|██████████| 63/63 [00:31<00:00,  2.02it/s]


Epoch 8/10, Train Loss: 2.006, Train Acc: 28.23%, Val Loss: 1.998, Val Acc: 28.22%


100%|██████████| 251/251 [02:07<00:00,  1.96it/s]
100%|██████████| 63/63 [00:30<00:00,  2.05it/s]


Epoch 9/10, Train Loss: 1.989, Train Acc: 29.00%, Val Loss: 1.974, Val Acc: 30.32%


100%|██████████| 251/251 [02:08<00:00,  1.95it/s]
100%|██████████| 63/63 [00:30<00:00,  2.04it/s]

Epoch 10/10, Train Loss: 1.950, Train Acc: 30.58%, Val Loss: 1.942, Val Acc: 30.72%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▆▅▄▄▃▃▂▂▁
val_accuracy,▁
validation_accuracy,▁▃▄▄▆▆▆▆██
validation_loss,█▆▅▅▃▄▃▃▂▁

0,1
epoch,9.0
train_accuracy,30.57707
train_loss,1.95039
val_accuracy,30.71928
validation_accuracy,30.71928
validation_loss,1.9416


[34m[1mwandb[0m: Agent Starting Run: 1vhdz6j8 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: doubling16


100%|██████████| 1001/1001 [02:18<00:00,  7.25it/s]
100%|██████████| 251/251 [00:34<00:00,  7.37it/s]


Epoch 1/10, Train Loss: 2.231, Train Acc: 17.64%, Val Loss: 2.170, Val Acc: 19.93%


100%|██████████| 1001/1001 [02:13<00:00,  7.50it/s]
100%|██████████| 251/251 [00:31<00:00,  7.90it/s]


Epoch 2/10, Train Loss: 2.145, Train Acc: 22.33%, Val Loss: 2.091, Val Acc: 27.67%


100%|██████████| 1001/1001 [02:14<00:00,  7.46it/s]
100%|██████████| 251/251 [00:31<00:00,  7.98it/s]


Epoch 3/10, Train Loss: 2.118, Train Acc: 23.67%, Val Loss: 2.080, Val Acc: 26.37%


100%|██████████| 1001/1001 [02:15<00:00,  7.38it/s]
100%|██████████| 251/251 [00:31<00:00,  7.92it/s]


Epoch 4/10, Train Loss: 2.092, Train Acc: 25.07%, Val Loss: 2.050, Val Acc: 26.92%


100%|██████████| 1001/1001 [02:13<00:00,  7.51it/s]
100%|██████████| 251/251 [00:31<00:00,  7.98it/s]


Epoch 5/10, Train Loss: 2.065, Train Acc: 25.78%, Val Loss: 1.999, Val Acc: 30.17%


100%|██████████| 1001/1001 [02:11<00:00,  7.59it/s]
100%|██████████| 251/251 [00:33<00:00,  7.58it/s]


Epoch 6/10, Train Loss: 2.056, Train Acc: 26.61%, Val Loss: 2.012, Val Acc: 28.87%


100%|██████████| 1001/1001 [02:14<00:00,  7.42it/s]
100%|██████████| 251/251 [00:31<00:00,  7.94it/s]


Epoch 7/10, Train Loss: 2.039, Train Acc: 27.65%, Val Loss: 1.996, Val Acc: 27.82%


100%|██████████| 1001/1001 [02:14<00:00,  7.45it/s]
100%|██████████| 251/251 [00:31<00:00,  7.94it/s]


Epoch 8/10, Train Loss: 2.020, Train Acc: 27.37%, Val Loss: 1.999, Val Acc: 29.12%


100%|██████████| 1001/1001 [02:15<00:00,  7.37it/s]
100%|██████████| 251/251 [00:31<00:00,  7.99it/s]


Epoch 9/10, Train Loss: 1.995, Train Acc: 28.74%, Val Loss: 1.955, Val Acc: 31.22%


100%|██████████| 1001/1001 [02:11<00:00,  7.59it/s]
100%|██████████| 251/251 [00:33<00:00,  7.46it/s]

Epoch 10/10, Train Loss: 1.970, Train Acc: 29.55%, Val Loss: 1.942, Val Acc: 31.27%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▅▆▆▇▇██
train_loss,█▆▅▄▄▃▃▂▂▁
val_accuracy,▁
validation_accuracy,▁▆▅▅▇▇▆▇██
validation_loss,█▆▅▄▃▃▃▃▁▁

0,1
epoch,9.0
train_accuracy,29.55284
train_loss,1.97031
val_accuracy,31.26873
validation_accuracy,31.26873
validation_loss,1.94177


[34m[1mwandb[0m: Agent Starting Run: rkd1hnpr with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal32


100%|██████████| 501/501 [02:15<00:00,  3.70it/s]
100%|██████████| 126/126 [00:31<00:00,  4.05it/s]


Epoch 1/10, Train Loss: 2.102, Train Acc: 24.88%, Val Loss: 2.025, Val Acc: 26.22%


100%|██████████| 501/501 [02:11<00:00,  3.80it/s]
100%|██████████| 126/126 [00:32<00:00,  3.88it/s]


Epoch 2/10, Train Loss: 1.954, Train Acc: 31.49%, Val Loss: 1.971, Val Acc: 29.82%


100%|██████████| 501/501 [02:11<00:00,  3.80it/s]
100%|██████████| 126/126 [00:31<00:00,  4.03it/s]


Epoch 3/10, Train Loss: 1.878, Train Acc: 34.40%, Val Loss: 1.917, Val Acc: 32.57%


100%|██████████| 501/501 [02:12<00:00,  3.79it/s]
100%|██████████| 126/126 [00:31<00:00,  4.02it/s]


Epoch 4/10, Train Loss: 1.825, Train Acc: 36.10%, Val Loss: 1.905, Val Acc: 34.17%


100%|██████████| 501/501 [02:13<00:00,  3.77it/s]
100%|██████████| 126/126 [00:30<00:00,  4.08it/s]


Epoch 5/10, Train Loss: 1.778, Train Acc: 37.58%, Val Loss: 1.886, Val Acc: 36.11%


100%|██████████| 501/501 [02:10<00:00,  3.84it/s]
100%|██████████| 126/126 [00:33<00:00,  3.76it/s]


Epoch 6/10, Train Loss: 1.742, Train Acc: 38.52%, Val Loss: 1.877, Val Acc: 34.62%


100%|██████████| 501/501 [02:08<00:00,  3.88it/s]
100%|██████████| 126/126 [00:32<00:00,  3.84it/s]


Epoch 7/10, Train Loss: 1.693, Train Acc: 40.84%, Val Loss: 1.851, Val Acc: 35.76%


100%|██████████| 501/501 [02:14<00:00,  3.71it/s]
100%|██████████| 126/126 [00:31<00:00,  4.00it/s]


Epoch 8/10, Train Loss: 1.657, Train Acc: 42.29%, Val Loss: 1.876, Val Acc: 35.61%


100%|██████████| 501/501 [02:12<00:00,  3.78it/s]
100%|██████████| 126/126 [00:32<00:00,  3.83it/s]


Epoch 9/10, Train Loss: 1.616, Train Acc: 44.03%, Val Loss: 1.858, Val Acc: 36.36%


100%|██████████| 501/501 [02:12<00:00,  3.79it/s]
100%|██████████| 126/126 [00:31<00:00,  4.02it/s]

Epoch 10/10, Train Loss: 1.593, Train Acc: 43.99%, Val Loss: 1.844, Val Acc: 36.71%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▄▃▂▂▁▁
val_accuracy,▁
validation_accuracy,▁▃▅▆█▇▇▇██
validation_loss,█▆▄▃▃▂▁▂▂▁

0,1
epoch,9.0
train_accuracy,43.99201
train_loss,1.59254
val_accuracy,36.71329
validation_accuracy,36.71329
validation_loss,1.84438


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bkdy8d1w with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal64


100%|██████████| 251/251 [01:51<00:00,  2.26it/s]
100%|██████████| 63/63 [00:24<00:00,  2.56it/s]


Epoch 1/10, Train Loss: 2.175, Train Acc: 19.56%, Val Loss: 2.082, Val Acc: 24.08%


100%|██████████| 251/251 [01:44<00:00,  2.40it/s]
100%|██████████| 63/63 [00:26<00:00,  2.39it/s]


Epoch 2/10, Train Loss: 2.050, Train Acc: 26.03%, Val Loss: 2.029, Val Acc: 26.62%


100%|██████████| 251/251 [01:44<00:00,  2.40it/s]
100%|██████████| 63/63 [00:24<00:00,  2.60it/s]


Epoch 3/10, Train Loss: 1.982, Train Acc: 29.18%, Val Loss: 1.970, Val Acc: 30.97%


100%|██████████| 251/251 [01:47<00:00,  2.33it/s]
100%|██████████| 63/63 [00:24<00:00,  2.62it/s]


Epoch 4/10, Train Loss: 1.927, Train Acc: 31.91%, Val Loss: 1.936, Val Acc: 31.62%


100%|██████████| 251/251 [01:44<00:00,  2.40it/s]
100%|██████████| 63/63 [00:25<00:00,  2.46it/s]


Epoch 5/10, Train Loss: 1.885, Train Acc: 33.46%, Val Loss: 1.926, Val Acc: 32.12%


100%|██████████| 251/251 [01:43<00:00,  2.42it/s]
100%|██████████| 63/63 [00:24<00:00,  2.53it/s]


Epoch 6/10, Train Loss: 1.844, Train Acc: 34.75%, Val Loss: 1.906, Val Acc: 32.87%


100%|██████████| 251/251 [01:47<00:00,  2.33it/s]
100%|██████████| 63/63 [00:24<00:00,  2.62it/s]


Epoch 7/10, Train Loss: 1.800, Train Acc: 36.62%, Val Loss: 1.880, Val Acc: 34.17%


100%|██████████| 251/251 [01:46<00:00,  2.35it/s]
100%|██████████| 63/63 [00:23<00:00,  2.66it/s]


Epoch 8/10, Train Loss: 1.757, Train Acc: 38.21%, Val Loss: 1.886, Val Acc: 33.37%


100%|██████████| 251/251 [01:45<00:00,  2.37it/s]
100%|██████████| 63/63 [00:23<00:00,  2.65it/s]


Epoch 9/10, Train Loss: 1.715, Train Acc: 40.44%, Val Loss: 1.884, Val Acc: 34.02%


100%|██████████| 251/251 [01:44<00:00,  2.39it/s]
100%|██████████| 63/63 [00:25<00:00,  2.52it/s]

Epoch 10/10, Train Loss: 1.647, Train Acc: 42.49%, Val Loss: 1.929, Val Acc: 35.16%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▆▅▅▄▄▃▂▂▁
val_accuracy,▁
validation_accuracy,▁▃▅▆▆▇▇▇▇█
validation_loss,█▆▄▃▃▂▁▁▁▃

0,1
epoch,9.0
train_accuracy,42.49313
train_loss,1.64702
val_accuracy,35.16484
validation_accuracy,35.16484
validation_loss,1.92854


[34m[1mwandb[0m: Agent Starting Run: bw5494y3 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 1024
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal32


100%|██████████| 251/251 [01:48<00:00,  2.31it/s]
100%|██████████| 63/63 [00:25<00:00,  2.48it/s]


Epoch 1/10, Train Loss: 2.071, Train Acc: 24.98%, Val Loss: 1.956, Val Acc: 30.87%


100%|██████████| 251/251 [01:42<00:00,  2.45it/s]
100%|██████████| 63/63 [00:25<00:00,  2.46it/s]


Epoch 2/10, Train Loss: 1.880, Train Acc: 33.51%, Val Loss: 1.921, Val Acc: 32.12%


100%|██████████| 251/251 [01:44<00:00,  2.41it/s]
100%|██████████| 63/63 [00:25<00:00,  2.48it/s]


Epoch 3/10, Train Loss: 1.782, Train Acc: 37.72%, Val Loss: 1.901, Val Acc: 34.37%


100%|██████████| 251/251 [01:43<00:00,  2.43it/s]
100%|██████████| 63/63 [00:25<00:00,  2.50it/s]


Epoch 4/10, Train Loss: 1.697, Train Acc: 41.08%, Val Loss: 1.884, Val Acc: 34.27%


100%|██████████| 251/251 [01:42<00:00,  2.46it/s]
100%|██████████| 63/63 [00:27<00:00,  2.31it/s]


Epoch 5/10, Train Loss: 1.598, Train Acc: 45.27%, Val Loss: 1.889, Val Acc: 33.27%


100%|██████████| 251/251 [01:42<00:00,  2.45it/s]
100%|██████████| 63/63 [00:24<00:00,  2.54it/s]


Epoch 6/10, Train Loss: 1.511, Train Acc: 48.68%, Val Loss: 1.883, Val Acc: 34.17%


100%|██████████| 251/251 [01:43<00:00,  2.42it/s]
100%|██████████| 63/63 [00:23<00:00,  2.65it/s]


Epoch 7/10, Train Loss: 1.400, Train Acc: 53.46%, Val Loss: 1.928, Val Acc: 35.11%


100%|██████████| 251/251 [01:44<00:00,  2.41it/s]
100%|██████████| 63/63 [00:25<00:00,  2.52it/s]


Epoch 8/10, Train Loss: 1.296, Train Acc: 57.72%, Val Loss: 1.941, Val Acc: 34.42%


100%|██████████| 251/251 [01:41<00:00,  2.47it/s]
100%|██████████| 63/63 [00:25<00:00,  2.51it/s]


Epoch 9/10, Train Loss: 1.180, Train Acc: 61.14%, Val Loss: 2.030, Val Acc: 32.42%


100%|██████████| 251/251 [01:44<00:00,  2.41it/s]
100%|██████████| 63/63 [00:25<00:00,  2.47it/s]

Epoch 10/10, Train Loss: 1.071, Train Acc: 66.25%, Val Loss: 2.069, Val Acc: 31.87%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▄▅▆▇▇█
train_loss,█▇▆▅▅▄▃▃▂▁
val_accuracy,▁
validation_accuracy,▁▃▇▇▅▆█▇▄▃
validation_loss,▄▂▂▁▁▁▃▃▇█

0,1
epoch,9.0
train_accuracy,66.25031
train_loss,1.07097
val_accuracy,31.86813
validation_accuracy,31.86813
validation_loss,2.06859


[34m[1mwandb[0m: Agent Starting Run: ok79fpgb with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: doubling32


100%|██████████| 501/501 [01:54<00:00,  4.39it/s]
100%|██████████| 126/126 [00:25<00:00,  4.96it/s]


Epoch 1/10, Train Loss: 2.088, Train Acc: 27.05%, Val Loss: 1.965, Val Acc: 31.22%


100%|██████████| 501/501 [01:47<00:00,  4.65it/s]
100%|██████████| 126/126 [00:27<00:00,  4.64it/s]


Epoch 2/10, Train Loss: 1.845, Train Acc: 35.54%, Val Loss: 1.901, Val Acc: 33.77%


100%|██████████| 501/501 [01:47<00:00,  4.67it/s]
100%|██████████| 126/126 [00:26<00:00,  4.81it/s]


Epoch 3/10, Train Loss: 1.634, Train Acc: 43.43%, Val Loss: 2.038, Val Acc: 31.47%


100%|██████████| 501/501 [01:48<00:00,  4.62it/s]
100%|██████████| 126/126 [00:25<00:00,  4.88it/s]


Epoch 4/10, Train Loss: 1.315, Train Acc: 54.47%, Val Loss: 2.038, Val Acc: 35.06%


100%|██████████| 501/501 [01:48<00:00,  4.60it/s]
100%|██████████| 126/126 [00:25<00:00,  5.03it/s]


Epoch 5/10, Train Loss: 0.798, Train Acc: 73.87%, Val Loss: 2.296, Val Acc: 34.07%


100%|██████████| 501/501 [01:47<00:00,  4.66it/s]
100%|██████████| 126/126 [00:27<00:00,  4.65it/s]


Epoch 6/10, Train Loss: 0.432, Train Acc: 86.32%, Val Loss: 2.567, Val Acc: 32.62%


100%|██████████| 501/501 [01:46<00:00,  4.70it/s]
100%|██████████| 126/126 [00:26<00:00,  4.81it/s]


Epoch 7/10, Train Loss: 0.256, Train Acc: 92.34%, Val Loss: 2.942, Val Acc: 30.37%


100%|██████████| 501/501 [01:48<00:00,  4.63it/s]
100%|██████████| 126/126 [00:25<00:00,  4.85it/s]


Epoch 8/10, Train Loss: 0.176, Train Acc: 94.69%, Val Loss: 3.024, Val Acc: 32.17%


100%|██████████| 501/501 [01:47<00:00,  4.66it/s]
100%|██████████| 126/126 [00:26<00:00,  4.82it/s]


Epoch 9/10, Train Loss: 0.133, Train Acc: 96.44%, Val Loss: 3.056, Val Acc: 32.37%


100%|██████████| 501/501 [01:47<00:00,  4.68it/s]
100%|██████████| 126/126 [00:27<00:00,  4.62it/s]

Epoch 10/10, Train Loss: 0.144, Train Acc: 95.89%, Val Loss: 3.353, Val Acc: 29.47%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▆▇████
train_loss,█▇▆▅▃▂▁▁▁▁
val_accuracy,▁
validation_accuracy,▃▆▃█▇▅▂▄▅▁
validation_loss,▁▁▂▂▃▄▆▆▇█

0,1
epoch,9.0
train_accuracy,95.89058
train_loss,0.14419
val_accuracy,29.47053
validation_accuracy,29.47053
validation_loss,3.35302


[34m[1mwandb[0m: Agent Starting Run: e0p549g0 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal64


100%|██████████| 251/251 [01:50<00:00,  2.26it/s]
100%|██████████| 63/63 [00:25<00:00,  2.45it/s]


Epoch 1/10, Train Loss: 2.159, Train Acc: 20.63%, Val Loss: 2.052, Val Acc: 25.67%


100%|██████████| 251/251 [01:45<00:00,  2.37it/s]
100%|██████████| 63/63 [00:25<00:00,  2.46it/s]


Epoch 2/10, Train Loss: 2.003, Train Acc: 28.44%, Val Loss: 1.961, Val Acc: 29.77%


100%|██████████| 251/251 [01:46<00:00,  2.36it/s]
100%|██████████| 63/63 [00:25<00:00,  2.45it/s]


Epoch 3/10, Train Loss: 1.917, Train Acc: 32.24%, Val Loss: 1.930, Val Acc: 31.12%


100%|██████████| 251/251 [01:46<00:00,  2.35it/s]
100%|██████████| 63/63 [00:25<00:00,  2.43it/s]


Epoch 4/10, Train Loss: 1.851, Train Acc: 33.97%, Val Loss: 1.891, Val Acc: 34.37%


100%|██████████| 251/251 [01:47<00:00,  2.33it/s]
100%|██████████| 63/63 [00:26<00:00,  2.41it/s]


Epoch 5/10, Train Loss: 1.805, Train Acc: 36.27%, Val Loss: 1.881, Val Acc: 34.22%


100%|██████████| 251/251 [01:47<00:00,  2.33it/s]
100%|██████████| 63/63 [00:25<00:00,  2.44it/s]


Epoch 6/10, Train Loss: 1.756, Train Acc: 38.50%, Val Loss: 1.836, Val Acc: 36.11%


100%|██████████| 251/251 [01:47<00:00,  2.34it/s]
100%|██████████| 63/63 [00:25<00:00,  2.46it/s]


Epoch 7/10, Train Loss: 1.707, Train Acc: 39.86%, Val Loss: 1.836, Val Acc: 36.31%


100%|██████████| 251/251 [01:46<00:00,  2.36it/s]
100%|██████████| 63/63 [00:25<00:00,  2.45it/s]


Epoch 8/10, Train Loss: 1.643, Train Acc: 41.87%, Val Loss: 1.862, Val Acc: 35.71%


100%|██████████| 251/251 [01:44<00:00,  2.40it/s]
100%|██████████| 63/63 [00:25<00:00,  2.47it/s]


Epoch 9/10, Train Loss: 1.585, Train Acc: 44.29%, Val Loss: 1.831, Val Acc: 36.66%


100%|██████████| 251/251 [01:46<00:00,  2.36it/s]
100%|██████████| 63/63 [00:25<00:00,  2.45it/s]

Epoch 10/10, Train Loss: 1.500, Train Acc: 47.16%, Val Loss: 1.847, Val Acc: 36.11%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▆▅▅▄▄▃▃▂▁
val_accuracy,▁
validation_accuracy,▁▄▄▇▆██▇██
validation_loss,█▅▄▃▃▁▁▂▁▁

0,1
epoch,9.0
train_accuracy,47.16463
train_loss,1.50012
val_accuracy,36.11389
validation_accuracy,36.11389
validation_loss,1.84692


[34m[1mwandb[0m: Agent Starting Run: u4qycdjq with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: constant7
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal64


100%|██████████| 251/251 [02:20<00:00,  1.79it/s]
100%|██████████| 63/63 [00:32<00:00,  1.96it/s]


Epoch 1/10, Train Loss: 2.161, Train Acc: 20.48%, Val Loss: 2.092, Val Acc: 22.43%


100%|██████████| 251/251 [02:16<00:00,  1.84it/s]
100%|██████████| 63/63 [00:31<00:00,  2.03it/s]


Epoch 2/10, Train Loss: 2.047, Train Acc: 25.52%, Val Loss: 2.030, Val Acc: 27.47%


100%|██████████| 251/251 [02:15<00:00,  1.86it/s]
100%|██████████| 63/63 [00:31<00:00,  1.99it/s]


Epoch 3/10, Train Loss: 1.980, Train Acc: 29.24%, Val Loss: 1.957, Val Acc: 30.72%


100%|██████████| 251/251 [02:16<00:00,  1.84it/s]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Epoch 4/10, Train Loss: 1.915, Train Acc: 31.96%, Val Loss: 1.947, Val Acc: 29.82%


100%|██████████| 251/251 [02:17<00:00,  1.83it/s]
100%|██████████| 63/63 [00:33<00:00,  1.91it/s]


Epoch 5/10, Train Loss: 1.865, Train Acc: 33.52%, Val Loss: 1.924, Val Acc: 32.27%


100%|██████████| 251/251 [02:15<00:00,  1.85it/s]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Epoch 6/10, Train Loss: 1.807, Train Acc: 36.00%, Val Loss: 1.895, Val Acc: 33.62%


100%|██████████| 251/251 [02:15<00:00,  1.85it/s]
100%|██████████| 63/63 [00:31<00:00,  2.00it/s]


Epoch 7/10, Train Loss: 1.761, Train Acc: 38.12%, Val Loss: 1.878, Val Acc: 34.52%


100%|██████████| 251/251 [02:14<00:00,  1.86it/s]
100%|██████████| 63/63 [00:31<00:00,  1.98it/s]


Epoch 8/10, Train Loss: 1.707, Train Acc: 40.77%, Val Loss: 1.906, Val Acc: 32.87%


100%|██████████| 251/251 [02:15<00:00,  1.85it/s]
100%|██████████| 63/63 [00:32<00:00,  1.96it/s]


Epoch 9/10, Train Loss: 1.641, Train Acc: 43.24%, Val Loss: 1.955, Val Acc: 33.12%


100%|██████████| 251/251 [02:16<00:00,  1.84it/s]
100%|██████████| 63/63 [00:32<00:00,  1.93it/s]

Epoch 10/10, Train Loss: 1.609, Train Acc: 43.88%, Val Loss: 1.895, Val Acc: 33.77%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▆▇██
train_loss,█▇▆▅▄▄▃▂▁▁
val_accuracy,▁
validation_accuracy,▁▄▆▅▇▇█▇▇█
validation_loss,█▆▄▃▃▂▁▂▄▂

0,1
epoch,9.0
train_accuracy,43.87959
train_loss,1.60939
val_accuracy,33.76623
validation_accuracy,33.76623
validation_loss,1.89494


[34m[1mwandb[0m: Agent Starting Run: l09acwk2 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: decreasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal16


100%|██████████| 251/251 [02:12<00:00,  1.89it/s]
100%|██████████| 63/63 [00:32<00:00,  1.93it/s]


Epoch 1/10, Train Loss: 2.279, Train Acc: 13.45%, Val Loss: 2.235, Val Acc: 16.13%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:32<00:00,  1.96it/s]


Epoch 2/10, Train Loss: 2.189, Train Acc: 20.10%, Val Loss: 2.140, Val Acc: 22.43%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:32<00:00,  1.96it/s]


Epoch 3/10, Train Loss: 2.107, Train Acc: 24.28%, Val Loss: 2.081, Val Acc: 25.27%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:31<00:00,  1.99it/s]


Epoch 4/10, Train Loss: 2.061, Train Acc: 26.41%, Val Loss: 2.032, Val Acc: 26.92%


100%|██████████| 251/251 [02:07<00:00,  1.97it/s]
100%|██████████| 63/63 [00:33<00:00,  1.86it/s]


Epoch 5/10, Train Loss: 2.023, Train Acc: 27.94%, Val Loss: 2.026, Val Acc: 27.82%


100%|██████████| 251/251 [02:07<00:00,  1.97it/s]
100%|██████████| 63/63 [00:34<00:00,  1.84it/s]


Epoch 6/10, Train Loss: 1.987, Train Acc: 28.57%, Val Loss: 2.011, Val Acc: 27.82%


100%|██████████| 251/251 [02:08<00:00,  1.95it/s]
100%|██████████| 63/63 [00:31<00:00,  2.00it/s]


Epoch 7/10, Train Loss: 1.974, Train Acc: 29.89%, Val Loss: 1.985, Val Acc: 29.17%


100%|██████████| 251/251 [02:09<00:00,  1.94it/s]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Epoch 8/10, Train Loss: 1.956, Train Acc: 30.11%, Val Loss: 1.967, Val Acc: 29.52%


100%|██████████| 251/251 [02:06<00:00,  1.98it/s]
100%|██████████| 63/63 [00:31<00:00,  1.99it/s]


Epoch 9/10, Train Loss: 1.932, Train Acc: 31.71%, Val Loss: 1.971, Val Acc: 30.77%


100%|██████████| 251/251 [02:07<00:00,  1.97it/s]
100%|██████████| 63/63 [00:31<00:00,  2.03it/s]

Epoch 10/10, Train Loss: 1.922, Train Acc: 32.28%, Val Loss: 1.950, Val Acc: 31.62%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁
validation_accuracy,▁▄▅▆▆▆▇▇██
validation_loss,█▆▄▃▃▃▂▁▂▁

0,1
epoch,9.0
train_accuracy,32.27579
train_loss,1.9222
val_accuracy,31.61838
validation_accuracy,31.61838
validation_loss,1.94951


[34m[1mwandb[0m: Agent Starting Run: uif87mlh with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 1024
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal16


100%|██████████| 1001/1001 [02:17<00:00,  7.29it/s]
100%|██████████| 251/251 [00:32<00:00,  7.72it/s]


Epoch 1/10, Train Loss: 2.250, Train Acc: 15.76%, Val Loss: 2.170, Val Acc: 19.88%


100%|██████████| 1001/1001 [02:12<00:00,  7.55it/s]
100%|██████████| 251/251 [00:33<00:00,  7.54it/s]


Epoch 2/10, Train Loss: 2.139, Train Acc: 21.48%, Val Loss: 2.112, Val Acc: 22.73%


100%|██████████| 1001/1001 [02:12<00:00,  7.53it/s]
100%|██████████| 251/251 [00:32<00:00,  7.71it/s]


Epoch 3/10, Train Loss: 2.091, Train Acc: 23.91%, Val Loss: 2.068, Val Acc: 24.53%


100%|██████████| 1001/1001 [02:12<00:00,  7.54it/s]
100%|██████████| 251/251 [00:31<00:00,  7.89it/s]


Epoch 4/10, Train Loss: 2.053, Train Acc: 25.58%, Val Loss: 2.049, Val Acc: 25.22%


100%|██████████| 1001/1001 [02:12<00:00,  7.53it/s]
100%|██████████| 251/251 [00:31<00:00,  7.91it/s]


Epoch 5/10, Train Loss: 2.026, Train Acc: 26.57%, Val Loss: 2.026, Val Acc: 27.32%


100%|██████████| 1001/1001 [02:12<00:00,  7.58it/s]
100%|██████████| 251/251 [00:31<00:00,  7.98it/s]


Epoch 6/10, Train Loss: 2.010, Train Acc: 27.29%, Val Loss: 2.005, Val Acc: 28.57%


100%|██████████| 1001/1001 [02:10<00:00,  7.65it/s]
100%|██████████| 251/251 [00:34<00:00,  7.27it/s]


Epoch 7/10, Train Loss: 1.984, Train Acc: 29.32%, Val Loss: 2.017, Val Acc: 27.82%


100%|██████████| 1001/1001 [02:10<00:00,  7.68it/s]
100%|██████████| 251/251 [00:35<00:00,  7.16it/s]


Epoch 8/10, Train Loss: 1.961, Train Acc: 29.47%, Val Loss: 1.979, Val Acc: 29.42%


100%|██████████| 1001/1001 [02:10<00:00,  7.70it/s]
100%|██████████| 251/251 [00:31<00:00,  7.96it/s]


Epoch 9/10, Train Loss: 1.950, Train Acc: 30.39%, Val Loss: 1.987, Val Acc: 28.92%


100%|██████████| 1001/1001 [02:12<00:00,  7.55it/s]
100%|██████████| 251/251 [00:31<00:00,  7.89it/s]

Epoch 10/10, Train Loss: 1.935, Train Acc: 31.15%, Val Loss: 1.958, Val Acc: 31.27%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▅▆▆▇▇██
train_loss,█▆▄▄▃▃▂▂▁▁
val_accuracy,▁
validation_accuracy,▁▃▄▄▆▆▆▇▇█
validation_loss,█▆▅▄▃▃▃▂▂▁

0,1
epoch,9.0
train_accuracy,31.15164
train_loss,1.9353
val_accuracy,31.26873
validation_accuracy,31.26873
validation_loss,1.95756


[34m[1mwandb[0m: Agent Starting Run: q6hii2jb with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	data_augment: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: constant7
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: equal16


100%|██████████| 501/501 [02:16<00:00,  3.68it/s]
100%|██████████| 126/126 [00:31<00:00,  4.01it/s]


Epoch 1/10, Train Loss: 2.225, Train Acc: 16.33%, Val Loss: 2.178, Val Acc: 18.58%


100%|██████████| 501/501 [02:10<00:00,  3.83it/s]
100%|██████████| 126/126 [00:31<00:00,  3.99it/s]


Epoch 2/10, Train Loss: 2.130, Train Acc: 21.36%, Val Loss: 2.111, Val Acc: 21.98%


100%|██████████| 501/501 [02:10<00:00,  3.83it/s]
100%|██████████| 126/126 [00:31<00:00,  4.04it/s]


Epoch 3/10, Train Loss: 2.073, Train Acc: 24.17%, Val Loss: 2.068, Val Acc: 25.52%


100%|██████████| 501/501 [02:11<00:00,  3.82it/s]
100%|██████████| 126/126 [00:32<00:00,  3.89it/s]


Epoch 4/10, Train Loss: 2.033, Train Acc: 25.89%, Val Loss: 2.030, Val Acc: 27.32%


100%|██████████| 501/501 [02:10<00:00,  3.84it/s]
100%|██████████| 126/126 [00:32<00:00,  3.91it/s]


Epoch 5/10, Train Loss: 2.003, Train Acc: 27.49%, Val Loss: 2.019, Val Acc: 27.72%


100%|██████████| 501/501 [02:08<00:00,  3.89it/s]
100%|██████████| 126/126 [00:34<00:00,  3.68it/s]


Epoch 6/10, Train Loss: 1.981, Train Acc: 28.69%, Val Loss: 1.995, Val Acc: 28.67%


100%|██████████| 501/501 [02:09<00:00,  3.87it/s]
100%|██████████| 126/126 [00:33<00:00,  3.81it/s]


Epoch 7/10, Train Loss: 1.957, Train Acc: 29.99%, Val Loss: 1.982, Val Acc: 30.12%


100%|██████████| 501/501 [02:08<00:00,  3.89it/s]
100%|██████████| 126/126 [00:31<00:00,  4.02it/s]


Epoch 8/10, Train Loss: 1.941, Train Acc: 30.41%, Val Loss: 1.956, Val Acc: 30.92%


100%|██████████| 501/501 [02:10<00:00,  3.85it/s]
100%|██████████| 126/126 [00:31<00:00,  4.02it/s]


Epoch 9/10, Train Loss: 1.919, Train Acc: 31.73%, Val Loss: 1.987, Val Acc: 29.92%


100%|██████████| 501/501 [02:10<00:00,  3.85it/s]
100%|██████████| 126/126 [00:32<00:00,  3.94it/s]

Epoch 10/10, Train Loss: 1.901, Train Acc: 32.41%, Val Loss: 1.975, Val Acc: 30.47%





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁
validation_accuracy,▁▃▅▆▆▇██▇█
validation_loss,█▆▅▃▃▂▂▁▂▂

0,1
epoch,9.0
train_accuracy,32.41319
train_loss,1.90146
val_accuracy,30.46953
validation_accuracy,30.46953
validation_loss,1.97538


[34m[1mwandb[0m: Agent Starting Run: kfnecoz5 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augment: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	fc_layer_size: 2048
[34m[1mwandb[0m: 	kernel_size: increasing
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: doubling16


100%|██████████| 251/251 [01:49<00:00,  2.30it/s]
 78%|███████▊  | 49/63 [00:20<00:05,  2.55it/s]

## **NOTE :-** The hyperparameter sweep was set for 50 experiments. But due to colab rate limits, the sweep got disconnected after 16 runs. But we got to see some satisfactory results within these 16 runs.




## The best validation accuracy I was able to get by hyperparameter tuning is 36.713.

Best hyperparameters among these are:-

| Rank | Activation | Batch Norm | Batch Size | Data Augment | Dropout | FC Layer Size | Kernel Size  | Learning Rate | Num Filters | Train Acc | Val Acc | Train Loss | Val Loss |
|------|------------|------------|------------|--------------|---------|---------------|--------------|---------------|-------------|-----------|---------|------------|---------|
| 1st  | Mish       | True       | 16         | True         | 0       | 2048          | Decreasing   | 0.0001        | Equal32     | 43.99%    | 36.71%  | 1.59254    | 1.84438 |
| 2nd  | Mish       | False      | 32         | False        | 0       | 2048          | Decreasing   | 0.0001        | Equal64     | 47.16%    | 36.11%  | 1.50012    | 1.84692 |
| 3rd  | Mish       | False      | 32         | False        | 0       | 2048          | Increasing   | 0.0001        | Equal64     | 36.62%    | 34.17%  | 1.80000    | 1.88000 |


## Observation:-
In one of the run I was also able to attain 95.89% accuracy on the train dataset with only 29.47% accuracy on validation set signifying considerable overfitting.

## I. BEST MODEL TRAINING AND EVALUATION

In [18]:
# Image resize size
image_size = (224, 224)


# Get the trainloader with complete train dataset (No validation set)
trainloader, _, classnames = get_train_and_val_dataloaders(TRAIN_DATA_DIR, image_size=image_size, valset_size=0, data_augment=True, batch_size=16)


# Define the model and move to the device
best_model = CNNModel(image_size, num_filters=get_num_filters("equal32"), kernel_size=get_kernel_size("decreasing"),
                     activation_fn=get_activation_function("mish"), batchnorm=True, dropout=0, fc_layer_size=2048)
best_model.to(device)

# Define the criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=0.0001)


epochs = 10

# Training Loops
for epoch in range(epochs):

  # Set the model in train mode
  best_model.train()

  # Metrics to keep track of
  running_loss = 0
  correct = 0
  total = 0

  for data in tqdm(trainloader):

    inputs, labels = data[0].to(device), data[1].to(device)

    optimizer.zero_grad()

    outputs = best_model(inputs)

    loss = criterion(outputs, labels)

    loss.backward()
    optimizer.step()

    # Metric update
    running_loss += loss.item() * inputs.size(0)
    total += inputs.size(0)
    correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

  train_epoch_loss = running_loss / total
  train_epoch_accuracy = 100 * correct / total

  print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_epoch_loss:.3f}, Train Acc: {train_epoch_accuracy:.2f}%")

100%|██████████| 626/626 [33:50<00:00,  3.24s/it]


Epoch 1/10, Train Loss: 2.102, Train Acc: 25.41%


100%|██████████| 626/626 [02:41<00:00,  3.87it/s]


Epoch 2/10, Train Loss: 1.953, Train Acc: 31.23%


100%|██████████| 626/626 [02:39<00:00,  3.92it/s]


Epoch 3/10, Train Loss: 1.875, Train Acc: 33.98%


100%|██████████| 626/626 [02:37<00:00,  3.98it/s]


Epoch 4/10, Train Loss: 1.831, Train Acc: 35.46%


100%|██████████| 626/626 [02:39<00:00,  3.93it/s]


Epoch 5/10, Train Loss: 1.770, Train Acc: 38.20%


100%|██████████| 626/626 [02:39<00:00,  3.92it/s]


Epoch 6/10, Train Loss: 1.739, Train Acc: 39.61%


100%|██████████| 626/626 [02:38<00:00,  3.95it/s]


Epoch 7/10, Train Loss: 1.712, Train Acc: 40.48%


100%|██████████| 626/626 [02:36<00:00,  3.99it/s]


Epoch 8/10, Train Loss: 1.665, Train Acc: 42.26%


100%|██████████| 626/626 [02:37<00:00,  3.98it/s]


Epoch 9/10, Train Loss: 1.647, Train Acc: 42.84%


100%|██████████| 626/626 [02:36<00:00,  4.00it/s]

Epoch 10/10, Train Loss: 1.608, Train Acc: 44.47%





In [28]:
# Best model evaluation

# Load the test dataset
testloader = get_test_dataloader(TEST_DATA_DIR, image_size=image_size, batch_size=16)

# Set the model in evaluation mode
best_model.eval()

# Values to keep track of
running_loss = 0
correct = 0
total = 0

with torch.no_grad():
  for data in tqdm(testloader):

    inputs, labels = data[0].to(device), data[1].to(device)
    outputs = best_model(inputs)
    loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    total += inputs.size(0)
    correct += torch.sum(labels == torch.argmax(outputs, dim=1)).item()

test_loss = running_loss / total
test_accuracy = (correct / total) * 100

print(f"\nTest Loss: {test_loss:.3f}, Test Acc: {test_accuracy:.2f}%")

100%|██████████| 125/125 [00:24<00:00,  5.10it/s]


Test Loss: 1.756, Test Acc: 40.50%





## Metrics of the best model :-
## Test Loss of best model :- 1.756
## Test Accuracy of best model :- 40.50%

## J. SAVE THE MODEL FOR FURTHER USE

In [24]:
model_path = "/content/drive/MyDrive/Data/models/da6401_2_best_model.pth"
torch.save(best_model, model_path)

In [26]:
best_model = torch.load(model_path, weights_only=False)

## K. Create the Prediction Table

In [33]:
# Get 10 random samples from test dataset
num_samples = 10
indices = random.sample(range(len(testloader.dataset)), num_samples)
sample_images = [testloader.dataset[i] for i in indices]

images = []
labels = []
predictions = []

for data in sample_images:
  input, label = data[0].to(device), data[1]
  input = input.unsqueeze(0)

  output = best_model(input)
  prediction = output.argmax(dim=1).item()

  # Undo image transformations
  image = input.squeeze(0).cpu().numpy()
  image = image.transpose(1, 2, 0)
  image = image * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
  image = np.clip(image, 0, 1)

  images.append(image)
  labels.append(classnames[label])
  predictions.append(classnames[prediction])

wandb.init(project = "da24m021_da6401_assignment2", name="prediction_visualization")
columns = ["Image", "True Label", "Predicted Label"]
prediction_table = wandb.Table(columns=columns)

for i in range(num_samples):
  img = wandb.Image(images[i])

  true_label = labels[i]
  pred_label = predictions[i]
  prediction_table.add_data(
      img,
      true_label,
      pred_label
  )

# Log the table to wandb
wandb.log({"Prediction Samples": prediction_table})