In [8]:
!pip install wandb



In [1]:
import os
import numpy as np
import zipfile
import requests
from tqdm import tqdm
import torch
import torch.nn as nn
# import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split
from torch import optim
from pathlib import Path
import json
import wandb

In [2]:
dataset_url = "https://storage.googleapis.com/wandb_datasets/nature_12K.zip"
dataset_zip_path = "/kaggle/working/nature_12K.zip"
dataset_dir = "nature_12K"

In [3]:
if not os.path.exists(dataset_dir):
    if not os.path.exists(dataset_zip_path):
        print("Downloading iNaturalist-12K...")
        response = requests.get(dataset_url, stream=True)
        total_size = int(response.headers.get('content-length', 0))
        with open(dataset_zip_path, 'wb') as f, tqdm(
            desc=dataset_zip_path,
            total=total_size,
            unit='iB',
            unit_scale=True,
            unit_divisor=1024,
        ) as bar:
            for data in response.iter_content(chunk_size=1024):
                size = f.write(data)
                bar.update(size)

Downloading iNaturalist-12K...


/kaggle/working/nature_12K.zip: 100%|██████████| 3.55G/3.55G [00:38<00:00, 100MiB/s] 


In [4]:
if not os.path.exists(dataset_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall(".")

Extracting dataset...


In [5]:
def prepare_datasets(data_dir, val_split=0.2, batch_size=32, image_size=(224, 224)):
    data_dir = Path(data_dir)

    # Define transforms (customize as needed)
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),  # Converts to [0, 1] and CxHxW
        transforms.Normalize(mean=[0.5]*3, std=[0.5]*3),  # Normalize RGB
    ])

    # Load training and testing datasets
    full_train_dataset = ImageFolder(root=data_dir / "train", transform=transform)
    test_dataset = ImageFolder(root=data_dir / "val", transform=transform)

    # Create validation split from training set
    val_size = int(val_split * len(full_train_dataset))
    train_size = len(full_train_dataset) - val_size

    train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

    # Extract X and Y by iterating over DataLoader batches if needed
    train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

    # Convert to X, Y tensors
    def extract_XY(loader):
        for X, Y in loader:
            return X, Y

    X_train, Y_train = extract_XY(train_loader)
    X_val, Y_val = extract_XY(val_loader)
    X_test, Y_test = extract_XY(test_loader)

    return X_train, Y_train, X_val, Y_val, X_test, Y_test

In [6]:
# dataset_dir = "/kaggle/working/inaturalist_12K"

In [7]:
# X_train, Y_train, X_val, Y_val, X_test, Y_test = prepare_datasets(
#     data_dir='/kaggle/working/inaturalist_12K', val_split=0.2, batch_size=64, image_size=(224, 224)
# )

In [8]:
# print(X_train.shape)

# Part A
### Question 1

Build a small CNN model consisting of 5 convolution layers. Each convolution layer would be followed by an activation and a max-pooling layer.

After 5 such conv-activation-maxpool blocks, you should have one dense layer followed by the output layer containing 10 neurons. The input layer should be compatible with the images in the iNaturalist dataset dataset.
The code should be flexible such that the number of filters, size of filters, and activation function of the convolution layers and dense layers can be changed. You should also be able to change the number of neurons in the dense layer.

In [7]:
api_key = "7040d84a3ed65a967eb3389dd6fe774b418576ed" 
wandb.login(key=api_key)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mda24m004[0m ([33mda24m004-iitmaana[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [8]:
class FlexibleCNN(nn.Module):
    def __init__(self,
                 num_filters=32,  # initial number of filters
                 filter_size=3,   # size of filters (k x k)
                 activation='relu',  # activation function
                 dense_neurons=512,  # number of neurons in dense layer
                 input_channels=3,   # RGB images
                 num_classes=10,    # number of output classes
                 use_batch_norm=True,  # whether to use batch normalization
                 dropout_rate=0.2,   # dropout rate
                 filter_strategy='same'):  # filter organization strategy
        super(FlexibleCNN, self).__init__()
        
        # Store parameters for calculations
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.dense_neurons = dense_neurons
        self.use_batch_norm = use_batch_norm
        self.dropout_rate = dropout_rate
        self.filter_strategy = filter_strategy
        self.input_size = 224  # Initial image size
        
        # Choose activation function
        if activation.lower() == 'relu':
            self.activation = nn.ReLU()
        elif activation.lower() == 'leakyrelu':
            self.activation = nn.LeakyReLU()
        elif activation.lower() == 'gelu':
            self.activation = nn.GELU()
        elif activation.lower() == 'silu':
            self.activation = nn.SiLU()
        elif activation.lower() == 'mish':
            self.activation = nn.Mish()
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
        

        # Define filters based on strategy
        filter_counts = []
        if filter_strategy == 'same':
            filter_counts = [num_filters] * 5
        elif filter_strategy == 'double':
            for i in range(5):
                filter_counts.append(num_filters * (2**i))
        elif filter_strategy == 'half':
            for i in range(5):
                filter_counts.append(max(num_filters // (2**i), 8))
        elif filter_strategy == 'pyramid':
            # Increase then decrease like a pyramid
            filter_counts = [num_filters, num_filters*2, num_filters*4, num_filters*2, num_filters]
        else:
            filter_counts = [num_filters] * 5  # Default to same
            
        # Create 5 conv-activation-maxpool blocks
        self.conv_blocks = nn.ModuleList()
        in_channels = input_channels
        
        for i in range(5):
            block = []
            # Conv layer
            block.append(nn.Conv2d(in_channels, filter_counts[i], filter_size, padding=filter_size//2))

            # Batch normalization if enabled
            if use_batch_norm:
                block.append(nn.BatchNorm2d(filter_counts[i]))

            # Activation
            block.append(self.activation)

            # Max pooling
            block.append(nn.MaxPool2d(2, 2))

            # Dropout after pooling
            block.append(nn.Dropout2d(dropout_rate))

            self.conv_blocks.append(nn.Sequential(*block))
            in_channels = filter_counts[i]
            self.input_size //= 2  # Each max pool reduces size by half
        
        # Calculate the size of the flattened features after conv blocks
        # Assuming input size of 224x224 (standard for iNaturalist)
        self.final_filters = filter_counts[-1]
        self.flattened_size = self.final_filters * (self.input_size * self.input_size)
        
        # Dense layers
        self.dense = nn.Sequential(
            nn.Linear(self.flattened_size, dense_neurons),
            self.activation,
            nn.Dropout(dropout_rate),  # Dropout before final layer
            nn.Linear(dense_neurons, num_classes)
        )
    
    def forward(self, x):
        for block in self.conv_blocks:
            x = block(x)
        x = x.view(x.size(0), -1)
        x = self.dense(x)
        return x
    
    def get_computations(self):
        """Calculate total number of computations"""
        # Computations in conv layers
        # conv_computations = 0
        # input_size = 224
        # in_channels = 3
        
        # for i in range(5):
        #     # Each conv layer
        #     conv_computations += (input_size * input_size * in_channels * 
        #                         self.num_filters * self.filter_size * self.filter_size)
        #     # Each maxpool reduces size by 2
        #     input_size = input_size // 2
        #     in_channels = self.num_filters
        
        # # Computations in dense layers
        # dense_computations = (self.flattened_size * self.dense_neurons +  # first dense layer
        #                     self.dense_neurons * 10)  # output layer
        
        return sum(param.numel() for param in self.parameters()) * 100
    
    def get_parameters(self):
        """Calculate total number of parameters"""
        # Parameters in conv layers
        # conv_params = 0
        # in_channels = 3
        
        # for _ in range(5):
        #     # Each conv layer has (filter_size * filter_size * in_channels + 1) * num_filters parameters
        #     conv_params += (self.filter_size * self.filter_size * in_channels + 1) * self.num_filters
        #     # Batch norm parameters if enabled
        #     if self.use_batch_norm:
        #         conv_params += 2 * self.num_filters  # gamma and beta for each channel
        #     in_channels = self.num_filters
        
        # # Parameters in dense layers
        # dense_params = (self.flattened_size * self.dense_neurons + self.dense_neurons +  # first dense layer
        #                self.dense_neurons * 10 + 10)  # output layer
        
        return sum(p.numel() for p in self.parameters())

In [9]:
def train(config=None):
    # Initialize wandb
    with wandb.init(config=config):
        config = wandb.config
        
        # Initialize best_val_acc at the beginning of the function
        best_val_acc = 0.0

        # Set random seed for reproducibility
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)

        # Data augmentation and normalization based on config
        if config.use_augmentation:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(10),
                transforms.ColorJitter(brightness=0.2, contrast=0.2),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
            ])

        val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])
        
        # Load dataset
        train_dataset = datasets.ImageFolder(
            root='/kaggle/working/inaturalist_12K/train',  # Update to match your dataset path
            transform=train_transform
        )
        
        # Split into train and validation
        train_indices, val_indices = train_test_split(
            list(range(len(train_dataset))),
            test_size=0.2,
            stratify=train_dataset.targets,
            random_state=config.seed
        )
        
        train_subset = torch.utils.data.Subset(train_dataset, train_indices)
        val_subset = torch.utils.data.Subset(train_dataset, val_indices)
        
        train_loader = DataLoader(
            train_subset,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=4
        )
        
        val_loader = DataLoader(
            val_subset,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=4
        )
        
        # Initialize model with filter strategy
        model = FlexibleCNN(
            num_filters=config.num_filters,
            filter_size=config.filter_size,
            activation=config.activation,
            dense_neurons=config.dense_neurons,
            use_batch_norm=config.use_batch_norm,
            dropout_rate=config.dropout_rate,
            filter_strategy=config.filter_strategy
        )
        
        # Move model to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        
        # Loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
        
        # Update run name to include new parameters
        run_name = f"nf_{config.num_filters}_fs_{config.filter_size}_strat_{config.filter_strategy}_act_{config.activation}_dn_{config.dense_neurons}_lr_{config.learning_rate}_bs_{config.batch_size}_bn_{config.use_batch_norm}_dr_{config.dropout_rate}_aug_{config.use_augmentation}"
        
        # Set the run name
        wandb.run.name = run_name
        
        # The rest of the training code remains the same
        for epoch in range(config.epochs):
            # Training phase
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            for batch_idx, (inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = outputs.max(1)
                train_total += targets.size(0)
                train_correct += predicted.eq(targets).sum().item()

                if batch_idx % 100 == 0:
                    print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss.item():.4f}')

            train_acc = 100. * train_correct / train_total

            # Validation phase
            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for inputs, targets in val_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)

                    val_loss += loss.item()
                    _, predicted = outputs.max(1)
                    val_total += targets.size(0)
                    val_correct += predicted.eq(targets).sum().item()

            val_acc = 100. * val_correct / val_total

            # Log metrics to wandb
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss / len(train_loader),
                "train_acc": train_acc,
                "val_loss": val_loss / len(val_loader),
                "val_acc": val_acc
            })

            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), 'best_model.pth')
                print(f"Best model saved with val_acc: {val_acc:.2f}% at epoch {epoch}")

            print(f'Epoch: {epoch}, Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {train_acc:.2f}%, Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {val_acc:.2f}%')

In [10]:

# Define sweep configuration
sweep_config = {
    'method': 'bayes',  # Use Bayesian optimization
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {
            'values': [16, 32, 64]
        },
        'filter_size': {
            'values': [3]
        },
        'filter_strategy': {
            'values': ['same', 'double', 'half', 'pyramid']
        },
        'activation': {
            'values': ['relu', 'gelu', 'silu', 'mish']
        },
        'dense_neurons': {
            'values': [128, 256]
        },
        'learning_rate': {
            'min': 1e-4,
            'max': 1e-2
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'use_batch_norm': {
            'values': [True, False]
        },
        'dropout_rate': {
            'values': [0.2, 0.3, 0.4]
        },
        'use_augmentation': {
            'values': [True, False]
        },
        'epochs': {
            'value': 7
        },
        'seed': {
            'value': 42
        }
    }
}

# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="inaturalist-cnn-sweep_sample_02")

# Run sweep
wandb.agent(sweep_id, train, count=35)

Create sweep with ID: 1i5lqabw
Sweep URL: https://wandb.ai/da24m004-iitmaana/inaturalist-cnn-sweep_sample_02/sweeps/1i5lqabw


[34m[1mwandb[0m: Agent Starting Run: u3n6k8ap with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: double
[34m[1mwandb[0m: 	learning_rate: 0.00872506802859034
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch: 0, Batch: 0, Loss: 2.3072
Best model saved with val_acc: 10.00% at epoch 0
Epoch: 0, Train Loss: 501.0558, Train Acc: 10.13%, Val Loss: 2.3042, Val Acc: 10.00%
Epoch: 1, Batch: 0, Loss: 2.3107
Epoch: 1, Train Loss: 2.4239, Train Acc: 9.89%, Val Loss: 2.3028, Val Acc: 10.00%
Epoch: 2, Batch: 0, Loss: 2.3028
Epoch: 2, Train Loss: 2.4012, Train Acc: 9.11%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 3, Batch: 0, Loss: 2.3030
Epoch: 3, Train Loss: 2.3030, Train Acc: 9.86%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 4, Batch: 0, Loss: 2.3025
Epoch: 4, Train Loss: 2.3034, Train Acc: 9.21%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 5, Batch: 0, Loss: 2.3033
Epoch: 5, Train Loss: 2.3031, Train Acc: 9.56%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 6, Batch: 0, Loss: 2.3039
Epoch: 6, Train Loss: 2.3927, Train Acc: 9.55%, Val Loss: 2.3026, Val Acc: 10.00%


0,1
epoch,▁▂▃▅▆▇█
train_acc,█▆▁▆▂▄▄
train_loss,█▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁
val_loss,█▂▁▁▁▁▁

0,1
epoch,6.0
train_acc,9.55119
train_loss,2.39273
val_acc,10.0
val_loss,2.30263


[34m[1mwandb[0m: Agent Starting Run: fj9mpy23 with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: same
[34m[1mwandb[0m: 	learning_rate: 0.0051511926357889135
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3028
Best model saved with val_acc: 10.00% at epoch 0
Epoch: 0, Train Loss: 2.3620, Train Acc: 10.48%, Val Loss: 2.3030, Val Acc: 10.00%
Epoch: 1, Batch: 0, Loss: 2.3217
Epoch: 1, Train Loss: 2.3258, Train Acc: 9.63%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 2, Batch: 0, Loss: 2.3118
Epoch: 2, Train Loss: 2.3123, Train Acc: 9.43%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 3, Batch: 0, Loss: 2.3031
Epoch: 3, Train Loss: 2.3094, Train Acc: 9.13%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 4, Batch: 0, Loss: 2.3025
Epoch: 4, Train Loss: 2.3073, Train Acc: 9.00%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 5, Batch: 0, Loss: 2.3029
Epoch: 5, Train Loss: 2.3537, Train Acc: 9.30%, Val Loss: 2.3026, Val Acc: 10.00%
Epoch: 6, Batch: 0, Loss: 2.3023
Epoch: 6, Train Loss: 2.3435, Train Acc: 9.01%, Val Loss: 2.3026, Val Acc: 10.00%


0,1
epoch,▁▂▃▅▆▇█
train_acc,█▄▃▂▁▂▁
train_loss,█▃▂▁▁▇▆
val_acc,▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁

0,1
epoch,6.0
train_acc,9.01363
train_loss,2.34349
val_acc,10.0
val_loss,2.30262


[34m[1mwandb[0m: Agent Starting Run: uhigy0jh with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: same
[34m[1mwandb[0m: 	learning_rate: 0.009662759213098051
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3972
Epoch: 0, Batch: 100, Loss: 2.2514
Best model saved with val_acc: 12.50% at epoch 0
Epoch: 0, Train Loss: 2.3611, Train Acc: 10.88%, Val Loss: 2.3023, Val Acc: 12.50%
Epoch: 1, Batch: 0, Loss: 2.3078
Epoch: 1, Batch: 100, Loss: 2.3354
Best model saved with val_acc: 13.90% at epoch 1
Epoch: 1, Train Loss: 2.3065, Train Acc: 11.70%, Val Loss: 2.2950, Val Acc: 13.90%
Epoch: 2, Batch: 0, Loss: 2.2962
Epoch: 2, Batch: 100, Loss: 2.3169
Best model saved with val_acc: 15.05% at epoch 2
Epoch: 2, Train Loss: 2.3014, Train Acc: 12.04%, Val Loss: 2.2830, Val Acc: 15.05%
Epoch: 3, Batch: 0, Loss: 2.3147
Epoch: 3, Batch: 100, Loss: 2.2678
Epoch: 3, Train Loss: 2.2934, Train Acc: 13.49%, Val Loss: 2.2728, Val Acc: 14.35%
Epoch: 4, Batch: 0, Loss: 2.3034
Epoch: 4, Batch: 100, Loss: 2.3130
Epoch: 4, Train Loss: 2.2967, Train Acc: 12.14%, Val Loss: 2.2756, Val Acc: 12.95%
Epoch: 5, Batch: 0, Loss: 2.2805
Epoch: 5, Batch: 100, Loss: 2.3345
Epoch: 5, Train Loss: 2.2933, 

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▃▄▇▄▇█
train_loss,█▄▃▃▃▃▁
val_acc,▁▅█▆▂▅█
val_loss,█▇▅▃▄▂▁

0,1
epoch,6.0
train_acc,13.83923
train_loss,2.27285
val_acc,15.0
val_loss,2.25991


[34m[1mwandb[0m: Agent Starting Run: 4nb4vilx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: same
[34m[1mwandb[0m: 	learning_rate: 0.0020653459307336266
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3977
Epoch: 0, Batch: 100, Loss: 2.2922
Best model saved with val_acc: 19.45% at epoch 0
Epoch: 0, Train Loss: 2.3142, Train Acc: 12.93%, Val Loss: 2.2281, Val Acc: 19.45%
Epoch: 1, Batch: 0, Loss: 2.3584
Epoch: 1, Batch: 100, Loss: 2.2570
Best model saved with val_acc: 20.85% at epoch 1
Epoch: 1, Train Loss: 2.2539, Train Acc: 15.89%, Val Loss: 2.1754, Val Acc: 20.85%
Epoch: 2, Batch: 0, Loss: 2.2895
Epoch: 2, Batch: 100, Loss: 2.2286
Best model saved with val_acc: 21.85% at epoch 2
Epoch: 2, Train Loss: 2.2411, Train Acc: 16.38%, Val Loss: 2.1803, Val Acc: 21.85%
Epoch: 3, Batch: 0, Loss: 2.1318
Epoch: 3, Batch: 100, Loss: 2.3320
Best model saved with val_acc: 22.10% at epoch 3
Epoch: 3, Train Loss: 2.2232, Train Acc: 17.60%, Val Loss: 2.1588, Val Acc: 22.10%
Epoch: 4, Batch: 0, Loss: 2.1579
Epoch: 4, Batch: 100, Loss: 2.2545
Best model saved with val_acc: 22.40% at epoch 4
Epoch: 4, Train Loss: 2.2205, Train Acc: 17.95%, Val Loss: 2.1417, Val Acc: 22.40%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▆▇▇█
train_loss,█▄▃▂▂▂▁
val_acc,▁▄▇▇█▃▇
val_loss,█▄▄▂▁▁▁

0,1
epoch,6.0
train_acc,19.2399
train_loss,2.20312
val_acc,22.15
val_loss,2.14564


[34m[1mwandb[0m: Agent Starting Run: jyby0yzk with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: half
[34m[1mwandb[0m: 	learning_rate: 0.009163090889529085
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3082
Epoch: 0, Batch: 100, Loss: 2.3176
Best model saved with val_acc: 10.00% at epoch 0
Epoch: 0, Train Loss: 2.3203, Train Acc: 10.28%, Val Loss: 2.3065, Val Acc: 10.00%
Epoch: 1, Batch: 0, Loss: 2.3085
Epoch: 1, Batch: 100, Loss: 2.3054
Epoch: 1, Train Loss: 2.3181, Train Acc: 10.43%, Val Loss: 2.3057, Val Acc: 10.00%
Epoch: 2, Batch: 0, Loss: 2.3227
Epoch: 2, Batch: 100, Loss: 2.3392
Epoch: 2, Train Loss: 2.3202, Train Acc: 9.98%, Val Loss: 2.3056, Val Acc: 10.00%
Epoch: 3, Batch: 0, Loss: 2.3065
Epoch: 3, Batch: 100, Loss: 2.3165
Epoch: 3, Train Loss: 2.3117, Train Acc: 9.59%, Val Loss: 2.3061, Val Acc: 10.00%
Epoch: 4, Batch: 0, Loss: 2.2894
Epoch: 4, Batch: 100, Loss: 2.3134
Epoch: 4, Train Loss: 2.3129, Train Acc: 9.75%, Val Loss: 2.3047, Val Acc: 10.00%
Epoch: 5, Batch: 0, Loss: 2.2919
Epoch: 5, Batch: 100, Loss: 2.3104
Epoch: 5, Train Loss: 2.3076, Train Acc: 10.01%, Val Loss: 2.3064, Val Acc: 10.00%
Epoch: 6, Batch: 0, Loss: 2.3099
Epoch: 6, Batch

0,1
epoch,▁▂▃▅▆▇█
train_acc,▇█▄▁▂▅▁
train_loss,█▇█▃▄▁▂
val_acc,▁▁▁▁▁▁▁
val_loss,█▅▅▇▁█▇

0,1
epoch,6.0
train_acc,9.6387
train_loss,2.30951
val_acc,10.0
val_loss,2.30628


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
502 response executing GraphQL.

<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>502 Server Error</title>
</head>
<body text=#000000 bgcolor=#ffffff>
<h1>Error: Server Error</h1>
<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>
<h2></h2>
</body></html>

[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 37g4e8fu with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: half
[34m[1mwandb[0m: 	learning_rate: 0.0024056124969460474
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3190
Epoch: 0, Batch: 100, Loss: 2.2497
Best model saved with val_acc: 21.35% at epoch 0
Epoch: 0, Train Loss: 2.2799, Train Acc: 14.11%, Val Loss: 2.2062, Val Acc: 21.35%
Epoch: 1, Batch: 0, Loss: 2.2968
Epoch: 1, Batch: 100, Loss: 2.2356
Best model saved with val_acc: 23.35% at epoch 1
Epoch: 1, Train Loss: 2.2198, Train Acc: 18.31%, Val Loss: 2.1415, Val Acc: 23.35%
Epoch: 2, Batch: 0, Loss: 2.2577
Epoch: 2, Batch: 100, Loss: 2.1332
Best model saved with val_acc: 25.95% at epoch 2
Epoch: 2, Train Loss: 2.1803, Train Acc: 19.91%, Val Loss: 2.0919, Val Acc: 25.95%
Epoch: 3, Batch: 0, Loss: 2.2268
Epoch: 3, Batch: 100, Loss: 2.1516
Epoch: 3, Train Loss: 2.1549, Train Acc: 21.24%, Val Loss: 2.0862, Val Acc: 25.05%
Epoch: 4, Batch: 0, Loss: 2.0897
Epoch: 4, Batch: 100, Loss: 2.2604
Epoch: 4, Train Loss: 2.1402, Train Acc: 21.48%, Val Loss: 2.0725, Val Acc: 24.85%
Epoch: 5, Batch: 0, Loss: 2.0665
Epoch: 5, Batch: 100, Loss: 2.1733
Best model saved with val_acc:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▇▇██
train_loss,█▅▄▂▂▁▁
val_acc,▁▄█▆▆█▇
val_loss,█▅▃▂▂▁▁

0,1
epoch,6.0
train_acc,22.74034
train_loss,2.12403
val_acc,25.45
val_loss,2.05703


[34m[1mwandb[0m: Agent Starting Run: 76heklgk with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: half
[34m[1mwandb[0m: 	learning_rate: 0.0020575698366611596
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3677
Epoch: 0, Batch: 100, Loss: 2.3092
Best model saved with val_acc: 17.10% at epoch 0
Epoch: 0, Train Loss: 2.3069, Train Acc: 11.55%, Val Loss: 2.2561, Val Acc: 17.10%
Epoch: 1, Batch: 0, Loss: 2.3017
Epoch: 1, Batch: 100, Loss: 2.2235
Best model saved with val_acc: 17.80% at epoch 1
Epoch: 1, Train Loss: 2.2526, Train Acc: 15.18%, Val Loss: 2.2118, Val Acc: 17.80%
Epoch: 2, Batch: 0, Loss: 2.1572
Epoch: 2, Batch: 100, Loss: 2.2243
Best model saved with val_acc: 19.85% at epoch 2
Epoch: 2, Train Loss: 2.2290, Train Acc: 15.34%, Val Loss: 2.1726, Val Acc: 19.85%
Epoch: 3, Batch: 0, Loss: 2.2136
Epoch: 3, Batch: 100, Loss: 2.2645
Best model saved with val_acc: 20.35% at epoch 3
Epoch: 3, Train Loss: 2.2143, Train Acc: 17.40%, Val Loss: 2.1440, Val Acc: 20.35%
Epoch: 4, Batch: 0, Loss: 2.2061
Epoch: 4, Batch: 100, Loss: 2.1873
Best model saved with val_acc: 21.50% at epoch 4
Epoch: 4, Train Loss: 2.2088, Train Acc: 17.51%, Val Loss: 2.1361, Val Acc: 21.50%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▇▇██
train_loss,█▅▄▃▂▂▁
val_acc,▁▂▅▆▇▆█
val_loss,█▆▄▂▂▂▁

0,1
epoch,6.0
train_acc,18.87736
train_loss,2.18234
val_acc,22.15
val_loss,2.12467


[34m[1mwandb[0m: Agent Starting Run: 78saxd3g with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0001935335594487083
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.4052
Epoch: 0, Batch: 100, Loss: 2.2425
Best model saved with val_acc: 17.95% at epoch 0
Epoch: 0, Train Loss: 2.2532, Train Acc: 16.35%, Val Loss: 2.1702, Val Acc: 17.95%
Epoch: 1, Batch: 0, Loss: 2.1722
Epoch: 1, Batch: 100, Loss: 2.0035
Best model saved with val_acc: 23.90% at epoch 1
Epoch: 1, Train Loss: 2.1458, Train Acc: 21.85%, Val Loss: 2.0891, Val Acc: 23.90%
Epoch: 2, Batch: 0, Loss: 2.0711
Epoch: 2, Batch: 100, Loss: 2.1011
Best model saved with val_acc: 26.60% at epoch 2
Epoch: 2, Train Loss: 2.0958, Train Acc: 24.60%, Val Loss: 2.0429, Val Acc: 26.60%
Epoch: 3, Batch: 0, Loss: 2.0441
Epoch: 3, Batch: 100, Loss: 1.8964
Best model saved with val_acc: 31.55% at epoch 3
Epoch: 3, Train Loss: 2.0500, Train Acc: 26.95%, Val Loss: 1.9754, Val Acc: 31.55%
Epoch: 4, Batch: 0, Loss: 1.9476
Epoch: 4, Batch: 100, Loss: 1.9692
Epoch: 4, Train Loss: 2.0343, Train Acc: 27.60%, Val Loss: 1.9808, Val Acc: 29.50%
Epoch: 5, Batch: 0, Loss: 2.0446
Epoch: 5, Batch:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▇▇██
train_loss,█▅▄▃▂▁▁
val_acc,▁▄▅█▇██
val_loss,█▆▄▂▂▁▁

0,1
epoch,6.0
train_acc,29.15364
train_loss,1.98811
val_acc,32.4
val_loss,1.93073


[34m[1mwandb[0m: Agent Starting Run: wasniyut with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.000415123094272017
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113535544442736, max=1.0…

Epoch: 0, Batch: 0, Loss: 2.3720
Best model saved with val_acc: 14.80% at epoch 0
Epoch: 0, Train Loss: 2.2948, Train Acc: 13.65%, Val Loss: 2.2796, Val Acc: 14.80%
Epoch: 1, Batch: 0, Loss: 2.2404
Best model saved with val_acc: 20.65% at epoch 1
Epoch: 1, Train Loss: 2.2181, Train Acc: 17.51%, Val Loss: 2.1532, Val Acc: 20.65%
Epoch: 2, Batch: 0, Loss: 2.1991
Best model saved with val_acc: 23.50% at epoch 2
Epoch: 2, Train Loss: 2.1733, Train Acc: 20.75%, Val Loss: 2.1013, Val Acc: 23.50%
Epoch: 3, Batch: 0, Loss: 2.1195
Best model saved with val_acc: 27.70% at epoch 3
Epoch: 3, Train Loss: 2.1496, Train Acc: 21.74%, Val Loss: 2.0489, Val Acc: 27.70%
Epoch: 4, Batch: 0, Loss: 2.0920
Epoch: 4, Train Loss: 2.1214, Train Acc: 23.24%, Val Loss: 2.0187, Val Acc: 27.35%
Epoch: 5, Batch: 0, Loss: 2.0180
Best model saved with val_acc: 29.05% at epoch 5
Epoch: 5, Train Loss: 2.0872, Train Acc: 24.52%, Val Loss: 1.9968, Val Acc: 29.05%
Epoch: 6, Batch: 0, Loss: 2.1296
Best model saved with val_

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▃▅▆▇▇█
train_loss,█▆▄▃▃▂▁
val_acc,▁▄▅▇▇▇█
val_loss,█▅▄▂▂▁▁

0,1
epoch,6.0
train_acc,25.71571
train_loss,2.07037
val_acc,30.35
val_loss,1.99


[34m[1mwandb[0m: Agent Starting Run: c4gqww45 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0003179050247620776
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3047
Epoch: 0, Batch: 100, Loss: 2.2164
Epoch: 0, Batch: 200, Loss: 2.2619
Best model saved with val_acc: 24.25% at epoch 0
Epoch: 0, Train Loss: 2.2173, Train Acc: 17.14%, Val Loss: 2.0970, Val Acc: 24.25%
Epoch: 1, Batch: 0, Loss: 2.0726
Epoch: 1, Batch: 100, Loss: 2.0466
Epoch: 1, Batch: 200, Loss: 2.0141
Best model saved with val_acc: 27.75% at epoch 1
Epoch: 1, Train Loss: 2.1044, Train Acc: 24.00%, Val Loss: 2.0355, Val Acc: 27.75%
Epoch: 2, Batch: 0, Loss: 1.9897
Epoch: 2, Batch: 100, Loss: 1.9094
Epoch: 2, Batch: 200, Loss: 1.9807
Best model saved with val_acc: 30.30% at epoch 2
Epoch: 2, Train Loss: 2.0499, Train Acc: 26.54%, Val Loss: 1.9730, Val Acc: 30.30%
Epoch: 3, Batch: 0, Loss: 2.1041
Epoch: 3, Batch: 100, Loss: 1.9895
Epoch: 3, Batch: 200, Loss: 1.9898
Best model saved with val_acc: 33.15% at epoch 3
Epoch: 3, Train Loss: 2.0257, Train Acc: 27.70%, Val Loss: 1.9144, Val Acc: 33.15%
Epoch: 4, Batch: 0, Loss: 1.9981
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▆▆▇██
train_loss,█▅▄▃▂▂▁
val_acc,▁▃▅▇▆▇█
val_loss,█▆▄▂▃▂▁

0,1
epoch,6.0
train_acc,30.77885
train_loss,1.95325
val_acc,34.85
val_loss,1.86499


[34m[1mwandb[0m: Agent Starting Run: lnvhpte6 with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.000540725749897866
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.2964
Epoch: 0, Batch: 100, Loss: 2.2439
Best model saved with val_acc: 19.50% at epoch 0
Epoch: 0, Train Loss: 2.2723, Train Acc: 14.65%, Val Loss: 2.1912, Val Acc: 19.50%
Epoch: 1, Batch: 0, Loss: 2.2565
Epoch: 1, Batch: 100, Loss: 2.1434
Best model saved with val_acc: 22.35% at epoch 1
Epoch: 1, Train Loss: 2.2216, Train Acc: 18.18%, Val Loss: 2.1441, Val Acc: 22.35%
Epoch: 2, Batch: 0, Loss: 2.0599
Epoch: 2, Batch: 100, Loss: 2.2690
Best model saved with val_acc: 26.30% at epoch 2
Epoch: 2, Train Loss: 2.1692, Train Acc: 21.48%, Val Loss: 2.0943, Val Acc: 26.30%
Epoch: 3, Batch: 0, Loss: 2.0764
Epoch: 3, Batch: 100, Loss: 2.1478
Best model saved with val_acc: 27.50% at epoch 3
Epoch: 3, Train Loss: 2.1305, Train Acc: 22.89%, Val Loss: 2.0325, Val Acc: 27.50%
Epoch: 4, Batch: 0, Loss: 2.1314
Epoch: 4, Batch: 100, Loss: 2.1234
Best model saved with val_acc: 28.25% at epoch 4
Epoch: 4, Train Loss: 2.1084, Train Acc: 24.32%, Val Loss: 2.0215, Val Acc: 28.25%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▇███
train_loss,█▆▄▃▂▁▁
val_acc,▁▃▆▇▇▇█
val_loss,█▆▅▂▂▂▁

0,1
epoch,6.0
train_acc,24.49056
train_loss,2.08779
val_acc,29.3
val_loss,1.99048


[34m[1mwandb[0m: Agent Starting Run: 9zrmnjjh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.000547782510991508
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.4172
Epoch: 0, Batch: 100, Loss: 2.2795
Best model saved with val_acc: 21.25% at epoch 0
Epoch: 0, Train Loss: 2.2741, Train Acc: 14.65%, Val Loss: 2.1555, Val Acc: 21.25%
Epoch: 1, Batch: 0, Loss: 2.2304
Epoch: 1, Batch: 100, Loss: 2.1540
Best model saved with val_acc: 26.10% at epoch 1
Epoch: 1, Train Loss: 2.1987, Train Acc: 19.70%, Val Loss: 2.0825, Val Acc: 26.10%
Epoch: 2, Batch: 0, Loss: 2.1191
Epoch: 2, Batch: 100, Loss: 2.1153
Epoch: 2, Train Loss: 2.1509, Train Acc: 21.83%, Val Loss: 2.0637, Val Acc: 26.00%
Epoch: 3, Batch: 0, Loss: 2.1936
Epoch: 3, Batch: 100, Loss: 2.0150
Best model saved with val_acc: 27.35% at epoch 3
Epoch: 3, Train Loss: 2.1236, Train Acc: 23.24%, Val Loss: 2.0325, Val Acc: 27.35%
Epoch: 4, Batch: 0, Loss: 2.0837
Epoch: 4, Batch: 100, Loss: 1.9603
Best model saved with val_acc: 30.25% at epoch 4
Epoch: 4, Train Loss: 2.1182, Train Acc: 23.88%, Val Loss: 2.0052, Val Acc: 30.25%
Epoch: 5, Batch: 0, Loss: 2.0724
Epoch: 5, Batch:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▆▇███
train_loss,█▅▃▂▂▁▁
val_acc,▁▅▅▆███
val_loss,█▅▄▃▂▁▁

0,1
epoch,6.0
train_acc,24.32804
train_loss,2.09404
val_acc,30.65
val_loss,1.9836


[34m[1mwandb[0m: Agent Starting Run: dwgj8hxw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0003576653337401191
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3040
Epoch: 0, Batch: 100, Loss: 2.2218
Best model saved with val_acc: 17.70% at epoch 0
Epoch: 0, Train Loss: 2.2830, Train Acc: 12.80%, Val Loss: 2.2398, Val Acc: 17.70%
Epoch: 1, Batch: 0, Loss: 2.2179
Epoch: 1, Batch: 100, Loss: 2.1372
Best model saved with val_acc: 21.10% at epoch 1
Epoch: 1, Train Loss: 2.2255, Train Acc: 17.75%, Val Loss: 2.1559, Val Acc: 21.10%
Epoch: 2, Batch: 0, Loss: 2.1862
Epoch: 2, Batch: 100, Loss: 2.2044
Best model saved with val_acc: 22.95% at epoch 2
Epoch: 2, Train Loss: 2.1757, Train Acc: 21.18%, Val Loss: 2.1381, Val Acc: 22.95%
Epoch: 3, Batch: 0, Loss: 2.1133
Epoch: 3, Batch: 100, Loss: 2.1104
Best model saved with val_acc: 24.60% at epoch 3
Epoch: 3, Train Loss: 2.1531, Train Acc: 21.82%, Val Loss: 2.1047, Val Acc: 24.60%
Epoch: 4, Batch: 0, Loss: 2.1007
Epoch: 4, Batch: 100, Loss: 2.1867
Best model saved with val_acc: 28.05% at epoch 4
Epoch: 4, Train Loss: 2.1297, Train Acc: 23.50%, Val Loss: 2.0613, Val Acc: 28.05%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇▇█
train_loss,█▆▄▄▃▂▁
val_acc,▁▃▄▅▇██
val_loss,█▅▅▄▂▁▂

0,1
epoch,6.0
train_acc,25.65321
train_loss,2.07589
val_acc,29.35
val_loss,2.05335


[34m[1mwandb[0m: Agent Starting Run: 7dct1rh0 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0003944508525427043
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3795
Epoch: 0, Batch: 100, Loss: 2.2342
Epoch: 0, Batch: 200, Loss: 2.2028
Best model saved with val_acc: 25.20% at epoch 0
Epoch: 0, Train Loss: 2.1961, Train Acc: 19.84%, Val Loss: 2.0735, Val Acc: 25.20%
Epoch: 1, Batch: 0, Loss: 2.1695
Epoch: 1, Batch: 100, Loss: 2.3121
Epoch: 1, Batch: 200, Loss: 1.9369
Best model saved with val_acc: 27.90% at epoch 1
Epoch: 1, Train Loss: 2.1021, Train Acc: 24.62%, Val Loss: 2.0254, Val Acc: 27.90%
Epoch: 2, Batch: 0, Loss: 1.9765
Epoch: 2, Batch: 100, Loss: 1.9885
Epoch: 2, Batch: 200, Loss: 2.0058
Best model saved with val_acc: 31.70% at epoch 2
Epoch: 2, Train Loss: 2.0657, Train Acc: 26.22%, Val Loss: 1.9642, Val Acc: 31.70%
Epoch: 3, Batch: 0, Loss: 2.3008
Epoch: 3, Batch: 100, Loss: 1.9121
Epoch: 3, Batch: 200, Loss: 2.0325
Best model saved with val_acc: 32.30% at epoch 3
Epoch: 3, Train Loss: 2.0314, Train Acc: 27.75%, Val Loss: 1.9198, Val Acc: 32.30%
Epoch: 4, Batch: 0, Loss: 2.0354
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▆▇▇█
train_loss,█▅▄▃▂▂▁
val_acc,▁▃▆▇▇██
val_loss,█▆▄▂▂▂▁

0,1
epoch,6.0
train_acc,30.10376
train_loss,1.97713
val_acc,33.5
val_loss,1.88322


[34m[1mwandb[0m: Agent Starting Run: sms56swp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.00011652242317466724
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.4553
Epoch: 0, Batch: 100, Loss: 2.3330
Epoch: 0, Batch: 200, Loss: 2.2476
Best model saved with val_acc: 15.10% at epoch 0
Epoch: 0, Train Loss: 2.2811, Train Acc: 14.76%, Val Loss: 2.2601, Val Acc: 15.10%
Epoch: 1, Batch: 0, Loss: 2.2145
Epoch: 1, Batch: 100, Loss: 2.2840
Epoch: 1, Batch: 200, Loss: 2.1135
Best model saved with val_acc: 21.50% at epoch 1
Epoch: 1, Train Loss: 2.1983, Train Acc: 18.85%, Val Loss: 2.1567, Val Acc: 21.50%
Epoch: 2, Batch: 0, Loss: 2.1123
Epoch: 2, Batch: 100, Loss: 2.0768
Epoch: 2, Batch: 200, Loss: 2.0273
Best model saved with val_acc: 23.55% at epoch 2
Epoch: 2, Train Loss: 2.1435, Train Acc: 22.64%, Val Loss: 2.1223, Val Acc: 23.55%
Epoch: 3, Batch: 0, Loss: 2.2108
Epoch: 3, Batch: 100, Loss: 1.9774
Epoch: 3, Batch: 200, Loss: 2.1211
Best model saved with val_acc: 27.65% at epoch 3
Epoch: 3, Train Loss: 2.1093, Train Acc: 24.10%, Val Loss: 2.0677, Val Acc: 27.65%
Epoch: 4, Batch: 0, Loss: 2.0440
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▃▅▆▇██
train_loss,█▆▄▃▂▂▁
val_acc,▁▄▅▇▇▇█
val_loss,█▅▅▃▂▂▁

0,1
epoch,6.0
train_acc,27.27841
train_loss,2.04111
val_acc,30.6
val_loss,1.9825


[34m[1mwandb[0m: Agent Starting Run: p1b3oujp with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0011644275411365728
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3138
Epoch: 0, Batch: 100, Loss: 2.2832
Epoch: 0, Batch: 200, Loss: 2.4203
Best model saved with val_acc: 22.25% at epoch 0
Epoch: 0, Train Loss: 2.2717, Train Acc: 16.08%, Val Loss: 2.1277, Val Acc: 22.25%
Epoch: 1, Batch: 0, Loss: 1.9751
Epoch: 1, Batch: 100, Loss: 2.1432
Epoch: 1, Batch: 200, Loss: 2.1092
Best model saved with val_acc: 22.55% at epoch 1
Epoch: 1, Train Loss: 2.1801, Train Acc: 19.73%, Val Loss: 2.1148, Val Acc: 22.55%
Epoch: 2, Batch: 0, Loss: 2.3828
Epoch: 2, Batch: 100, Loss: 2.2838
Epoch: 2, Batch: 200, Loss: 2.2121
Best model saved with val_acc: 24.50% at epoch 2
Epoch: 2, Train Loss: 2.1589, Train Acc: 21.25%, Val Loss: 2.0802, Val Acc: 24.50%
Epoch: 3, Batch: 0, Loss: 2.1912
Epoch: 3, Batch: 100, Loss: 2.0222
Epoch: 3, Batch: 200, Loss: 2.0306
Best model saved with val_acc: 25.85% at epoch 3
Epoch: 3, Train Loss: 2.1339, Train Acc: 21.80%, Val Loss: 2.0632, Val Acc: 25.85%
Epoch: 4, Batch: 0, Loss: 2.1368
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▆▇▇██
train_loss,█▄▃▂▂▁▁
val_acc,▁▁▄▆█▇█
val_loss,█▇▅▄▁▃▁

0,1
epoch,6.0
train_acc,23.02788
train_loss,2.11348
val_acc,27.7
val_loss,2.0244


[34m[1mwandb[0m: Agent Starting Run: 4a98qm1z with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0005790630042391598
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3960
Epoch: 0, Batch: 100, Loss: 2.2611
Epoch: 0, Batch: 200, Loss: 2.3191
Best model saved with val_acc: 24.35% at epoch 0
Epoch: 0, Train Loss: 2.2313, Train Acc: 18.36%, Val Loss: 2.0847, Val Acc: 24.35%
Epoch: 1, Batch: 0, Loss: 2.1047
Epoch: 1, Batch: 100, Loss: 2.1855
Epoch: 1, Batch: 200, Loss: 2.0246
Best model saved with val_acc: 27.00% at epoch 1
Epoch: 1, Train Loss: 2.1324, Train Acc: 22.45%, Val Loss: 2.0296, Val Acc: 27.00%
Epoch: 2, Batch: 0, Loss: 2.0345
Epoch: 2, Batch: 100, Loss: 1.9158
Epoch: 2, Batch: 200, Loss: 2.1851
Best model saved with val_acc: 30.10% at epoch 2
Epoch: 2, Train Loss: 2.0827, Train Acc: 24.87%, Val Loss: 1.9684, Val Acc: 30.10%
Epoch: 3, Batch: 0, Loss: 2.0852
Epoch: 3, Batch: 100, Loss: 2.0648
Epoch: 3, Batch: 200, Loss: 2.0482
Best model saved with val_acc: 31.45% at epoch 3
Epoch: 3, Train Loss: 2.0474, Train Acc: 26.24%, Val Loss: 1.9314, Val Acc: 31.45%
Epoch: 4, Batch: 0, Loss: 1.9973
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▆▇▇█
train_loss,█▅▃▂▂▁▁
val_acc,▁▄▇█▇██
val_loss,█▆▃▁▂▁▁

0,1
epoch,6.0
train_acc,29.04113
train_loss,2.00252
val_acc,31.25
val_loss,1.92052


[34m[1mwandb[0m: Agent Starting Run: ux338ka9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0003618316726504598
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3467
Epoch: 0, Batch: 100, Loss: 2.2387
Epoch: 0, Batch: 200, Loss: 2.1880
Best model saved with val_acc: 26.15% at epoch 0
Epoch: 0, Train Loss: 2.1816, Train Acc: 20.72%, Val Loss: 2.0528, Val Acc: 26.15%
Epoch: 1, Batch: 0, Loss: 2.1937
Epoch: 1, Batch: 100, Loss: 2.1786
Epoch: 1, Batch: 200, Loss: 1.8811
Best model saved with val_acc: 29.65% at epoch 1
Epoch: 1, Train Loss: 2.0712, Train Acc: 25.74%, Val Loss: 1.9915, Val Acc: 29.65%
Epoch: 2, Batch: 0, Loss: 2.0131
Epoch: 2, Batch: 100, Loss: 2.0003
Epoch: 2, Batch: 200, Loss: 1.9669
Best model saved with val_acc: 30.35% at epoch 2
Epoch: 2, Train Loss: 2.0238, Train Acc: 27.95%, Val Loss: 1.9656, Val Acc: 30.35%
Epoch: 3, Batch: 0, Loss: 2.2144
Epoch: 3, Batch: 100, Loss: 1.8521
Epoch: 3, Batch: 200, Loss: 1.9524
Best model saved with val_acc: 33.35% at epoch 3
Epoch: 3, Train Loss: 1.9936, Train Acc: 28.77%, Val Loss: 1.9073, Val Acc: 33.35%
Epoch: 4, Batch: 0, Loss: 1.8959
Epoch: 4, Batch: 100, Loss:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇██
train_loss,█▅▃▂▂▁▁
val_acc,▁▄▄▆▆▆█
val_loss,█▆▅▃▃▂▁

0,1
epoch,6.0
train_acc,31.12889
train_loss,1.94625
val_acc,35.5
val_loss,1.86053


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 71c1gyfv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0004797096605158063
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3047
Epoch: 0, Batch: 100, Loss: 2.3090
Epoch: 0, Batch: 200, Loss: 2.2901
Best model saved with val_acc: 19.15% at epoch 0
Epoch: 0, Train Loss: 2.2599, Train Acc: 14.66%, Val Loss: 2.1782, Val Acc: 19.15%
Epoch: 1, Batch: 0, Loss: 2.1981
Epoch: 1, Batch: 100, Loss: 2.1351
Epoch: 1, Batch: 200, Loss: 2.0340
Best model saved with val_acc: 25.50% at epoch 1
Epoch: 1, Train Loss: 2.1675, Train Acc: 21.52%, Val Loss: 2.0913, Val Acc: 25.50%
Epoch: 2, Batch: 0, Loss: 2.1147
Epoch: 2, Batch: 100, Loss: 2.1655
Epoch: 2, Batch: 200, Loss: 2.1594
Epoch: 2, Train Loss: 2.1174, Train Acc: 23.90%, Val Loss: 2.0770, Val Acc: 24.55%
Epoch: 3, Batch: 0, Loss: 2.2194
Epoch: 3, Batch: 100, Loss: 2.0463
Epoch: 3, Batch: 200, Loss: 2.0599
Best model saved with val_acc: 27.85% at epoch 3
Epoch: 3, Train Loss: 2.0795, Train Acc: 24.83%, Val Loss: 2.0252, Val Acc: 27.85%
Epoch: 4, Batch: 0, Loss: 1.9600
Epoch: 4, Batch: 100, Loss: 2.3034
Epoch: 4, Batch: 200, Loss: 2.0655
Best m

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▆▇██
train_loss,█▆▄▃▂▂▁
val_acc,▁▄▄▅▆▆█
val_loss,█▅▅▃▂▁▁

0,1
epoch,6.0
train_acc,29.14114
train_loss,1.98622
val_acc,33.25
val_loss,1.96475


[34m[1mwandb[0m: Agent Starting Run: d8cl37fh with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0015389086945764765
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3010
Best model saved with val_acc: 25.20% at epoch 0
Epoch: 0, Train Loss: 2.2718, Train Acc: 15.71%, Val Loss: 2.1201, Val Acc: 25.20%
Epoch: 1, Batch: 0, Loss: 2.2472
Epoch: 1, Train Loss: 2.2173, Train Acc: 18.44%, Val Loss: 2.1285, Val Acc: 22.65%
Epoch: 2, Batch: 0, Loss: 2.2097
Epoch: 2, Train Loss: 2.2240, Train Acc: 18.00%, Val Loss: 2.1815, Val Acc: 21.95%
Epoch: 3, Batch: 0, Loss: 2.2121
Epoch: 3, Train Loss: 2.2167, Train Acc: 18.31%, Val Loss: 2.1261, Val Acc: 24.20%
Epoch: 4, Batch: 0, Loss: 2.2572
Epoch: 4, Train Loss: 2.2174, Train Acc: 19.03%, Val Loss: 2.1443, Val Acc: 22.30%
Epoch: 5, Batch: 0, Loss: 2.2043
Epoch: 5, Train Loss: 2.2240, Train Acc: 17.81%, Val Loss: 2.1540, Val Acc: 22.95%
Epoch: 6, Batch: 0, Loss: 2.2276
Best model saved with val_acc: 26.00% at epoch 6
Epoch: 6, Train Loss: 2.2128, Train Acc: 18.48%, Val Loss: 2.1363, Val Acc: 26.00%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▇▆▆█▅▇
train_loss,█▂▂▁▂▂▁
val_acc,▇▂▁▅▂▃█
val_loss,▁▂█▂▄▅▃

0,1
epoch,6.0
train_acc,18.47731
train_loss,2.21283
val_acc,26.0
val_loss,2.13625


[34m[1mwandb[0m: Agent Starting Run: rm9zk8g1 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: half
[34m[1mwandb[0m: 	learning_rate: 0.002505504703378612
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.2997
Epoch: 0, Batch: 100, Loss: 2.2463
Epoch: 0, Batch: 200, Loss: 2.2604
Best model saved with val_acc: 20.60% at epoch 0
Epoch: 0, Train Loss: 2.2638, Train Acc: 14.80%, Val Loss: 2.1499, Val Acc: 20.60%
Epoch: 1, Batch: 0, Loss: 2.2982
Epoch: 1, Batch: 100, Loss: 2.2721
Epoch: 1, Batch: 200, Loss: 2.2017
Best model saved with val_acc: 20.90% at epoch 1
Epoch: 1, Train Loss: 2.2345, Train Acc: 16.65%, Val Loss: 2.1649, Val Acc: 20.90%
Epoch: 2, Batch: 0, Loss: 2.3070
Epoch: 2, Batch: 100, Loss: 2.1785
Epoch: 2, Batch: 200, Loss: 2.2420
Epoch: 2, Train Loss: 2.2628, Train Acc: 15.58%, Val Loss: 2.2200, Val Acc: 17.45%
Epoch: 3, Batch: 0, Loss: 2.2738
Epoch: 3, Batch: 100, Loss: 2.0872
Epoch: 3, Batch: 200, Loss: 2.1544
Epoch: 3, Train Loss: 2.2322, Train Acc: 16.68%, Val Loss: 2.1796, Val Acc: 19.10%
Epoch: 4, Batch: 0, Loss: 2.2026
Epoch: 4, Batch: 100, Loss: 2.1747
Epoch: 4, Batch: 200, Loss: 2.0989
Epoch: 4, Train Loss: 2.2210, Train Acc: 17.84%, Val Lo

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▃▅▇██
train_loss,█▃█▃▁▁▁
val_acc,▇█▁▄▄▄▃
val_loss,▁▂█▄▃▅▅

0,1
epoch,6.0
train_acc,18.08976
train_loss,2.22057
val_acc,18.3
val_loss,2.18553


[34m[1mwandb[0m: Agent Starting Run: yycg7dkf with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.002637347674455608
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3067
Epoch: 0, Batch: 100, Loss: 2.5057
Best model saved with val_acc: 15.80% at epoch 0
Epoch: 0, Train Loss: 2.3348, Train Acc: 12.96%, Val Loss: 2.2647, Val Acc: 15.80%
Epoch: 1, Batch: 0, Loss: 2.3127
Epoch: 1, Batch: 100, Loss: 2.3343
Epoch: 1, Train Loss: 2.3295, Train Acc: 12.00%, Val Loss: 2.2902, Val Acc: 13.25%
Epoch: 2, Batch: 0, Loss: 2.3329
Epoch: 2, Batch: 100, Loss: 2.3300
Epoch: 2, Train Loss: 2.3318, Train Acc: 11.31%, Val Loss: 2.2995, Val Acc: 11.10%
Epoch: 3, Batch: 0, Loss: 2.2908
Epoch: 3, Batch: 100, Loss: 2.2729
Epoch: 3, Train Loss: 2.3146, Train Acc: 11.30%, Val Loss: 2.3007, Val Acc: 11.55%
Epoch: 4, Batch: 0, Loss: 2.3512
Epoch: 4, Batch: 100, Loss: 2.5556
Epoch: 4, Train Loss: 2.3198, Train Acc: 10.93%, Val Loss: 2.3008, Val Acc: 10.45%
Epoch: 5, Batch: 0, Loss: 2.3377
Epoch: 5, Batch: 100, Loss: 2.2812
Epoch: 5, Train Loss: 2.3035, Train Acc: 10.25%, Val Loss: 2.2858, Val Acc: 12.25%
Epoch: 6, Batch: 0, Loss: 2.3066
Epoch: 6, Ba

0,1
epoch,▁▂▃▅▆▇█
train_acc,█▆▄▄▃▁▃
train_loss,█▇▇▃▅▁▁
val_acc,█▅▂▃▁▃▁
val_loss,▁▆███▅█

0,1
epoch,6.0
train_acc,10.96387
train_loss,2.3048
val_acc,10.35
val_loss,2.29866


[34m[1mwandb[0m: Agent Starting Run: lwebjcn5 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0019293718931601585
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3071
Best model saved with val_acc: 16.75% at epoch 0
Epoch: 0, Train Loss: 2.2920, Train Acc: 11.53%, Val Loss: 2.2431, Val Acc: 16.75%
Epoch: 1, Batch: 0, Loss: 2.2175
Best model saved with val_acc: 20.20% at epoch 1
Epoch: 1, Train Loss: 2.2412, Train Acc: 16.20%, Val Loss: 2.2096, Val Acc: 20.20%
Epoch: 2, Batch: 0, Loss: 2.1706
Epoch: 2, Train Loss: 2.2216, Train Acc: 17.66%, Val Loss: 2.1900, Val Acc: 18.55%
Epoch: 3, Batch: 0, Loss: 2.2814
Epoch: 3, Train Loss: 2.2071, Train Acc: 17.46%, Val Loss: 2.1680, Val Acc: 19.05%
Epoch: 4, Batch: 0, Loss: 2.1341
Epoch: 4, Train Loss: 2.1962, Train Acc: 19.13%, Val Loss: 2.1584, Val Acc: 18.95%
Epoch: 5, Batch: 0, Loss: 2.1934
Best model saved with val_acc: 22.45% at epoch 5
Epoch: 5, Train Loss: 2.1921, Train Acc: 19.09%, Val Loss: 2.1438, Val Acc: 22.45%
Epoch: 6, Batch: 0, Loss: 2.2468
Epoch: 6, Train Loss: 2.1627, Train Acc: 21.23%, Val Loss: 2.1339, Val Acc: 21.80%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▅▆▆█
train_loss,█▅▄▃▃▃▁
val_acc,▁▅▃▄▄█▇
val_loss,█▆▅▃▃▂▁

0,1
epoch,6.0
train_acc,21.22765
train_loss,2.16272
val_acc,21.8
val_loss,2.13393


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uxy03es0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.00017384593224232403
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3616
Best model saved with val_acc: 13.80% at epoch 0
Epoch: 0, Train Loss: 2.2896, Train Acc: 13.14%, Val Loss: 2.2586, Val Acc: 13.80%
Epoch: 1, Batch: 0, Loss: 2.2751
Best model saved with val_acc: 20.15% at epoch 1
Epoch: 1, Train Loss: 2.2295, Train Acc: 17.81%, Val Loss: 2.1969, Val Acc: 20.15%
Epoch: 2, Batch: 0, Loss: 2.2175
Best model saved with val_acc: 22.60% at epoch 2
Epoch: 2, Train Loss: 2.1837, Train Acc: 20.07%, Val Loss: 2.1459, Val Acc: 22.60%
Epoch: 3, Batch: 0, Loss: 2.1007
Best model saved with val_acc: 25.80% at epoch 3
Epoch: 3, Train Loss: 2.1500, Train Acc: 22.35%, Val Loss: 2.0970, Val Acc: 25.80%
Epoch: 4, Batch: 0, Loss: 2.0857
Best model saved with val_acc: 27.40% at epoch 4
Epoch: 4, Train Loss: 2.1132, Train Acc: 23.97%, Val Loss: 2.0607, Val Acc: 27.40%
Epoch: 5, Batch: 0, Loss: 2.0534
Best model saved with val_acc: 28.85% at epoch 5
Epoch: 5, Train Loss: 2.0844, Train Acc: 26.44%, Val Loss: 2.0336, Val Acc: 28.85%
Epoch: 6, 

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▃▅▆▇██
train_loss,█▆▅▄▃▂▁
val_acc,▁▄▅▆▇██
val_loss,█▆▅▄▃▂▁

0,1
epoch,6.0
train_acc,26.81585
train_loss,2.05992
val_acc,29.95
val_loss,2.0016


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xshuxiek with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0004111039551410106
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3044
Epoch: 0, Batch: 100, Loss: 2.1692
Epoch: 0, Batch: 200, Loss: 2.2443
Best model saved with val_acc: 23.30% at epoch 0
Epoch: 0, Train Loss: 2.2222, Train Acc: 17.95%, Val Loss: 2.1135, Val Acc: 23.30%
Epoch: 1, Batch: 0, Loss: 2.0672
Epoch: 1, Batch: 100, Loss: 2.0759
Epoch: 1, Batch: 200, Loss: 2.1379
Best model saved with val_acc: 27.75% at epoch 1
Epoch: 1, Train Loss: 2.1353, Train Acc: 22.35%, Val Loss: 2.0339, Val Acc: 27.75%
Epoch: 2, Batch: 0, Loss: 1.8851
Epoch: 2, Batch: 100, Loss: 2.0188
Epoch: 2, Batch: 200, Loss: 2.0767
Best model saved with val_acc: 29.60% at epoch 2
Epoch: 2, Train Loss: 2.0923, Train Acc: 24.98%, Val Loss: 1.9922, Val Acc: 29.60%
Epoch: 3, Batch: 0, Loss: 2.1582
Epoch: 3, Batch: 100, Loss: 1.9693
Epoch: 3, Batch: 200, Loss: 2.0551
Epoch: 3, Train Loss: 2.0711, Train Acc: 25.35%, Val Loss: 1.9702, Val Acc: 29.10%
Epoch: 4, Batch: 0, Loss: 2.1026
Epoch: 4, Batch: 100, Loss: 2.1649
Epoch: 4, Batch: 200, Loss: 2.1838
Best m

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇▇█
train_loss,█▅▃▂▂▁▁
val_acc,▁▄▅▅▇▇█
val_loss,█▅▄▃▁▂▁

0,1
epoch,6.0
train_acc,27.90349
train_loss,2.03404
val_acc,33.65
val_loss,1.92084


[34m[1mwandb[0m: Agent Starting Run: apcznqu1 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0003287583115126017
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3010
Best model saved with val_acc: 22.30% at epoch 0
Epoch: 0, Train Loss: 2.2662, Train Acc: 15.03%, Val Loss: 2.1671, Val Acc: 22.30%
Epoch: 1, Batch: 0, Loss: 2.1961
Best model saved with val_acc: 24.05% at epoch 1
Epoch: 1, Train Loss: 2.1543, Train Acc: 21.42%, Val Loss: 2.1043, Val Acc: 24.05%
Epoch: 2, Batch: 0, Loss: 2.1485
Best model saved with val_acc: 28.90% at epoch 2
Epoch: 2, Train Loss: 2.1099, Train Acc: 24.22%, Val Loss: 2.0425, Val Acc: 28.90%
Epoch: 3, Batch: 0, Loss: 2.1125
Epoch: 3, Train Loss: 2.0810, Train Acc: 24.97%, Val Loss: 2.0226, Val Acc: 27.20%
Epoch: 4, Batch: 0, Loss: 2.0777
Best model saved with val_acc: 30.75% at epoch 4
Epoch: 4, Train Loss: 2.0578, Train Acc: 26.04%, Val Loss: 1.9828, Val Acc: 30.75%
Epoch: 5, Batch: 0, Loss: 1.9254
Best model saved with val_acc: 32.10% at epoch 5
Epoch: 5, Train Loss: 2.0356, Train Acc: 27.49%, Val Loss: 1.9499, Val Acc: 32.10%
Epoch: 6, Batch: 0, Loss: 1.8991
Epoch: 6, Train Loss: 2.01

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇▇█
train_loss,█▅▄▃▂▂▁
val_acc,▁▂▆▅▇██
val_loss,█▆▄▄▃▂▁

0,1
epoch,6.0
train_acc,28.59107
train_loss,2.01618
val_acc,31.75
val_loss,1.93046


[34m[1mwandb[0m: Agent Starting Run: ydt3kr11 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.001745313418679571
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3537
Epoch: 0, Batch: 100, Loss: 2.2087
Best model saved with val_acc: 20.65% at epoch 0
Epoch: 0, Train Loss: 2.2913, Train Acc: 13.24%, Val Loss: 2.1700, Val Acc: 20.65%
Epoch: 1, Batch: 0, Loss: 2.2943
Epoch: 1, Batch: 100, Loss: 2.2045
Best model saved with val_acc: 22.75% at epoch 1
Epoch: 1, Train Loss: 2.2174, Train Acc: 18.49%, Val Loss: 2.1298, Val Acc: 22.75%
Epoch: 2, Batch: 0, Loss: 2.1612
Epoch: 2, Batch: 100, Loss: 2.2257
Best model saved with val_acc: 23.10% at epoch 2
Epoch: 2, Train Loss: 2.1752, Train Acc: 19.85%, Val Loss: 2.1141, Val Acc: 23.10%
Epoch: 3, Batch: 0, Loss: 2.0243
Epoch: 3, Batch: 100, Loss: 2.0846
Best model saved with val_acc: 23.90% at epoch 3
Epoch: 3, Train Loss: 2.1722, Train Acc: 20.19%, Val Loss: 2.1182, Val Acc: 23.90%
Epoch: 4, Batch: 0, Loss: 2.1920
Epoch: 4, Batch: 100, Loss: 2.1495
Best model saved with val_acc: 24.80% at epoch 4
Epoch: 4, Train Loss: 2.1495, Train Acc: 21.53%, Val Loss: 2.0821, Val Acc: 24.80%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▆▆▇▇█
train_loss,█▅▃▃▂▂▁
val_acc,▁▃▃▄▅█▇
val_loss,█▆▅▅▄▂▁

0,1
epoch,6.0
train_acc,23.01538
train_loss,2.12288
val_acc,26.85
val_loss,2.02749


[34m[1mwandb[0m: Agent Starting Run: zjjqa7fw with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.002438231739509331
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3710
Epoch: 0, Batch: 100, Loss: 2.3541
Best model saved with val_acc: 19.40% at epoch 0
Epoch: 0, Train Loss: 2.3142, Train Acc: 15.13%, Val Loss: 2.1794, Val Acc: 19.40%
Epoch: 1, Batch: 0, Loss: 2.1446
Epoch: 1, Batch: 100, Loss: 2.1567
Best model saved with val_acc: 22.85% at epoch 1
Epoch: 1, Train Loss: 2.2193, Train Acc: 17.86%, Val Loss: 2.1283, Val Acc: 22.85%
Epoch: 2, Batch: 0, Loss: 2.1570
Epoch: 2, Batch: 100, Loss: 2.1834
Epoch: 2, Train Loss: 2.1987, Train Acc: 18.74%, Val Loss: 2.1297, Val Acc: 20.85%
Epoch: 3, Batch: 0, Loss: 2.1779
Epoch: 3, Batch: 100, Loss: 2.0662
Best model saved with val_acc: 23.10% at epoch 3
Epoch: 3, Train Loss: 2.1798, Train Acc: 20.15%, Val Loss: 2.0963, Val Acc: 23.10%
Epoch: 4, Batch: 0, Loss: 2.0151
Epoch: 4, Batch: 100, Loss: 2.0345
Best model saved with val_acc: 23.65% at epoch 4
Epoch: 4, Train Loss: 2.1555, Train Acc: 20.90%, Val Loss: 2.1014, Val Acc: 23.65%
Epoch: 5, Batch: 0, Loss: 2.1074
Epoch: 5, Batch:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▆▇▇█
train_loss,█▄▃▂▁▂▁
val_acc,▁▅▃▅▆▆█
val_loss,█▅▅▃▃▁▂

0,1
epoch,6.0
train_acc,21.91524
train_loss,2.1448
val_acc,25.3
val_loss,2.08313


[34m[1mwandb[0m: Agent Starting Run: 6bp1lmu0 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0006938302390063593
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.4108
Epoch: 0, Batch: 100, Loss: 2.2287
Best model saved with val_acc: 23.50% at epoch 0
Epoch: 0, Train Loss: 2.2256, Train Acc: 18.40%, Val Loss: 2.0906, Val Acc: 23.50%
Epoch: 1, Batch: 0, Loss: 2.2832
Epoch: 1, Batch: 100, Loss: 2.0023
Best model saved with val_acc: 29.75% at epoch 1
Epoch: 1, Train Loss: 2.1201, Train Acc: 23.79%, Val Loss: 2.0092, Val Acc: 29.75%
Epoch: 2, Batch: 0, Loss: 2.1466
Epoch: 2, Batch: 100, Loss: 2.0602
Epoch: 2, Train Loss: 2.0726, Train Acc: 25.62%, Val Loss: 1.9656, Val Acc: 29.30%
Epoch: 3, Batch: 0, Loss: 2.0806
Epoch: 3, Batch: 100, Loss: 2.0300
Best model saved with val_acc: 31.05% at epoch 3
Epoch: 3, Train Loss: 2.0451, Train Acc: 27.37%, Val Loss: 1.9532, Val Acc: 31.05%
Epoch: 4, Batch: 0, Loss: 1.9807
Epoch: 4, Batch: 100, Loss: 1.9773
Best model saved with val_acc: 32.30% at epoch 4
Epoch: 4, Train Loss: 2.0140, Train Acc: 28.48%, Val Loss: 1.9275, Val Acc: 32.30%
Epoch: 5, Batch: 0, Loss: 1.8940
Epoch: 5, Batch:

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▅▇▇▇█
train_loss,█▅▄▃▂▂▁
val_acc,▁▅▅▆▇▇█
val_loss,█▅▄▃▂▂▁

0,1
epoch,6.0
train_acc,29.6287
train_loss,1.97084
val_acc,33.9
val_loss,1.88736


[34m[1mwandb[0m: Agent Starting Run: jua4zql4 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: half
[34m[1mwandb[0m: 	learning_rate: 0.0004107494392142384
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3259
Epoch: 0, Batch: 100, Loss: 2.2440
Best model saved with val_acc: 24.90% at epoch 0
Epoch: 0, Train Loss: 2.2349, Train Acc: 17.39%, Val Loss: 2.1146, Val Acc: 24.90%
Epoch: 1, Batch: 0, Loss: 2.1762
Epoch: 1, Batch: 100, Loss: 2.1203
Best model saved with val_acc: 28.10% at epoch 1
Epoch: 1, Train Loss: 2.1426, Train Acc: 22.39%, Val Loss: 2.0465, Val Acc: 28.10%
Epoch: 2, Batch: 0, Loss: 2.0891
Epoch: 2, Batch: 100, Loss: 2.0600
Best model saved with val_acc: 28.65% at epoch 2
Epoch: 2, Train Loss: 2.1051, Train Acc: 24.00%, Val Loss: 2.0103, Val Acc: 28.65%
Epoch: 3, Batch: 0, Loss: 2.0775
Epoch: 3, Batch: 100, Loss: 2.0972
Best model saved with val_acc: 30.20% at epoch 3
Epoch: 3, Train Loss: 2.0654, Train Acc: 25.52%, Val Loss: 1.9846, Val Acc: 30.20%
Epoch: 4, Batch: 0, Loss: 2.0142
Epoch: 4, Batch: 100, Loss: 1.9666
Best model saved with val_acc: 31.75% at epoch 4
Epoch: 4, Train Loss: 2.0470, Train Acc: 26.30%, Val Loss: 1.9655, Val Acc: 31.75%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▇▇██
train_loss,█▅▄▃▂▁▁
val_acc,▁▄▄▆▇█▇
val_loss,█▅▄▃▂▂▁

0,1
epoch,6.0
train_acc,27.60345
train_loss,2.01449
val_acc,32.0
val_loss,1.93137


[34m[1mwandb[0m: Agent Starting Run: wufjmgi0 with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.002274580687609866
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3059
Best model saved with val_acc: 22.30% at epoch 0
Epoch: 0, Train Loss: 2.2634, Train Acc: 16.38%, Val Loss: 2.1521, Val Acc: 22.30%
Epoch: 1, Batch: 0, Loss: 2.2060
Epoch: 1, Train Loss: 2.2259, Train Acc: 17.94%, Val Loss: 2.1602, Val Acc: 19.60%
Epoch: 2, Batch: 0, Loss: 2.1649
Epoch: 2, Train Loss: 2.2278, Train Acc: 18.36%, Val Loss: 2.1757, Val Acc: 17.00%
Epoch: 3, Batch: 0, Loss: 2.3278
Epoch: 3, Train Loss: 2.2409, Train Acc: 17.43%, Val Loss: 2.1809, Val Acc: 21.25%
Epoch: 4, Batch: 0, Loss: 2.2311
Epoch: 4, Train Loss: 2.2483, Train Acc: 16.98%, Val Loss: 2.2133, Val Acc: 17.05%
Epoch: 5, Batch: 0, Loss: 2.2012
Epoch: 5, Train Loss: 2.2517, Train Acc: 16.50%, Val Loss: 2.1707, Val Acc: 21.75%
Epoch: 6, Batch: 0, Loss: 2.2329
Epoch: 6, Train Loss: 2.2780, Train Acc: 14.64%, Val Loss: 2.2121, Val Acc: 20.05%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▄▇█▆▅▄▁
train_loss,▆▁▁▃▄▄█
val_acc,█▄▁▇▁▇▅
val_loss,▁▂▄▄█▃█

0,1
epoch,6.0
train_acc,14.63933
train_loss,2.27797
val_acc,20.05
val_loss,2.21208


[34m[1mwandb[0m: Agent Starting Run: 47lk559x with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0007561220293841636
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3291
Best model saved with val_acc: 24.05% at epoch 0
Epoch: 0, Train Loss: 2.2197, Train Acc: 18.29%, Val Loss: 2.0605, Val Acc: 24.05%
Epoch: 1, Batch: 0, Loss: 2.1071
Best model saved with val_acc: 29.25% at epoch 1
Epoch: 1, Train Loss: 2.0918, Train Acc: 24.03%, Val Loss: 1.9630, Val Acc: 29.25%
Epoch: 2, Batch: 0, Loss: 2.0614
Best model saved with val_acc: 31.10% at epoch 2
Epoch: 2, Train Loss: 2.0516, Train Acc: 26.99%, Val Loss: 1.9466, Val Acc: 31.10%
Epoch: 3, Batch: 0, Loss: 2.1447
Best model saved with val_acc: 31.85% at epoch 3
Epoch: 3, Train Loss: 2.0130, Train Acc: 27.90%, Val Loss: 1.9268, Val Acc: 31.85%
Epoch: 4, Batch: 0, Loss: 1.9711
Best model saved with val_acc: 32.05% at epoch 4
Epoch: 4, Train Loss: 2.0009, Train Acc: 28.19%, Val Loss: 1.9114, Val Acc: 32.05%
Epoch: 5, Batch: 0, Loss: 2.0323
Epoch: 5, Train Loss: 1.9750, Train Acc: 30.12%, Val Loss: 1.9043, Val Acc: 31.95%
Epoch: 6, Batch: 0, Loss: 2.0049
Best model saved with val_

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇██
train_loss,█▅▄▃▂▂▁
val_acc,▁▅▆▆▇▆█
val_loss,█▄▄▃▃▂▁

0,1
epoch,6.0
train_acc,30.60383
train_loss,1.9545
val_acc,34.2
val_loss,1.87006


[34m[1mwandb[0m: Agent Starting Run: h7m1knbl with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0019562158924477896
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: False


Epoch: 0, Batch: 0, Loss: 2.3061
Epoch: 0, Batch: 100, Loss: 2.4539
Best model saved with val_acc: 21.70% at epoch 0
Epoch: 0, Train Loss: 2.2536, Train Acc: 17.39%, Val Loss: 2.1550, Val Acc: 21.70%
Epoch: 1, Batch: 0, Loss: 2.2631
Epoch: 1, Batch: 100, Loss: 2.2933
Best model saved with val_acc: 22.30% at epoch 1
Epoch: 1, Train Loss: 2.2157, Train Acc: 19.54%, Val Loss: 2.1268, Val Acc: 22.30%
Epoch: 2, Batch: 0, Loss: 2.2035
Epoch: 2, Batch: 100, Loss: 2.3125
Best model saved with val_acc: 22.95% at epoch 2
Epoch: 2, Train Loss: 2.2194, Train Acc: 18.55%, Val Loss: 2.1055, Val Acc: 22.95%
Epoch: 3, Batch: 0, Loss: 2.1846
Epoch: 3, Batch: 100, Loss: 2.1758
Epoch: 3, Train Loss: 2.2063, Train Acc: 19.89%, Val Loss: 2.1398, Val Acc: 22.55%
Epoch: 4, Batch: 0, Loss: 2.0907
Epoch: 4, Batch: 100, Loss: 2.1460
Epoch: 4, Train Loss: 2.2179, Train Acc: 18.46%, Val Loss: 2.1874, Val Acc: 19.95%
Epoch: 5, Batch: 0, Loss: 2.2458
Epoch: 5, Batch: 100, Loss: 2.3067
Epoch: 5, Train Loss: 2.2452, 

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▇▄█▄▁▁
train_loss,█▂▃▁▃▇▄
val_acc,▅▆█▇▁▅▄
val_loss,▅▃▁▄▇█▅

0,1
epoch,6.0
train_acc,17.45218
train_loss,2.22552
val_acc,21.25
val_loss,2.16055


[34m[1mwandb[0m: Agent Starting Run: sxg7ag0b with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0015506284929182035
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.3291
Best model saved with val_acc: 20.80% at epoch 0
Epoch: 0, Train Loss: 2.2849, Train Acc: 15.68%, Val Loss: 2.1177, Val Acc: 20.80%
Epoch: 1, Batch: 0, Loss: 2.1651
Best model saved with val_acc: 26.50% at epoch 1
Epoch: 1, Train Loss: 2.1421, Train Acc: 22.24%, Val Loss: 2.0375, Val Acc: 26.50%
Epoch: 2, Batch: 0, Loss: 2.1477
Best model saved with val_acc: 29.10% at epoch 2
Epoch: 2, Train Loss: 2.0989, Train Acc: 24.63%, Val Loss: 2.0074, Val Acc: 29.10%
Epoch: 3, Batch: 0, Loss: 2.1392
Best model saved with val_acc: 29.70% at epoch 3
Epoch: 3, Train Loss: 2.0720, Train Acc: 25.09%, Val Loss: 1.9959, Val Acc: 29.70%
Epoch: 4, Batch: 0, Loss: 2.0770
Epoch: 4, Train Loss: 2.0525, Train Acc: 25.45%, Val Loss: 1.9769, Val Acc: 28.45%
Epoch: 5, Batch: 0, Loss: 2.0473
Best model saved with val_acc: 31.90% at epoch 5
Epoch: 5, Train Loss: 2.0329, Train Acc: 26.90%, Val Loss: 1.9571, Val Acc: 31.90%
Epoch: 6, Batch: 0, Loss: 2.0264
Epoch: 6, Train Loss: 2.02

0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▅▆▇▇██
train_loss,█▄▃▂▂▁▁
val_acc,▁▅▆▇▆██
val_loss,█▅▄▃▃▂▁

0,1
epoch,6.0
train_acc,27.62845
train_loss,2.02062
val_acc,31.6
val_loss,1.93309


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: n7p7ofzb with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filter_strategy: pyramid
[34m[1mwandb[0m: 	learning_rate: 0.0007171894644397339
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batch_norm: True


Epoch: 0, Batch: 0, Loss: 2.4030
Epoch: 0, Batch: 100, Loss: 2.2033
Best model saved with val_acc: 27.90% at epoch 0
Epoch: 0, Train Loss: 2.1808, Train Acc: 20.18%, Val Loss: 2.0256, Val Acc: 27.90%
Epoch: 1, Batch: 0, Loss: 2.0797
Epoch: 1, Batch: 100, Loss: 2.0455
Best model saved with val_acc: 32.85% at epoch 1
Epoch: 1, Train Loss: 2.0538, Train Acc: 26.85%, Val Loss: 1.9547, Val Acc: 32.85%
Epoch: 2, Batch: 0, Loss: 2.1150
Epoch: 2, Batch: 100, Loss: 1.9760
Best model saved with val_acc: 32.90% at epoch 2
Epoch: 2, Train Loss: 1.9949, Train Acc: 29.42%, Val Loss: 1.9189, Val Acc: 32.90%
Epoch: 3, Batch: 0, Loss: 2.0282
Epoch: 3, Batch: 100, Loss: 1.9793
Best model saved with val_acc: 33.70% at epoch 3
Epoch: 3, Train Loss: 1.9603, Train Acc: 29.95%, Val Loss: 1.9149, Val Acc: 33.70%
Epoch: 4, Batch: 0, Loss: 2.0002
Epoch: 4, Batch: 100, Loss: 1.9148
Best model saved with val_acc: 34.00% at epoch 4
Epoch: 4, Train Loss: 1.9237, Train Acc: 31.63%, Val Loss: 1.8731, Val Acc: 34.00%


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▄▆▆▇▇█
train_loss,█▅▄▃▂▂▁
val_acc,▁▅▅▆▆▇█
val_loss,█▅▄▄▃▂▁

0,1
epoch,6.0
train_acc,34.30429
train_loss,1.86596
val_acc,36.7
val_loss,1.83095


In [14]:
def train_best_model(sweep_id, entity, project, epochs=100):
    """
    Load the best configuration from a W&B sweep and train a model for the specified epochs
    """
    # Get the best run from the sweep
    api = wandb.Api()
    sweep = api.sweep(f"{entity}/{project}/{sweep_id}")
    
    # Find the best run based on validation accuracy
    best_run = None
    best_val_acc = 0
    
    for run in sweep.runs:
        if run.summary.get("val_acc", 0) > best_val_acc:
            best_val_acc = run.summary.get("val_acc", 0)
            best_run = run
    
    if best_run is None:
        print("No valid runs found in the sweep")
        return
    
    # Get the best configuration
    best_config = best_run.config
    print(f"Best run: {best_run.name}")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    print(f"Best configuration: {best_config}")
    
    # Add epochs to the config
    best_config["epochs"] = epochs
    
    # Train a new model with the best configuration
    with wandb.init(project=wandb.run.project, config=best_config, name=f"best_model_100_epochs"):
        config = wandb.config
        
        # Set random seed for reproducibility
        torch.manual_seed(config.seed if "seed" in config else 42)
        np.random.seed(config.seed if "seed" in config else 42)
        
        # Data transforms
        if config.use_augmentation if "use_augmentation" in config else True:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(10),
                transforms.ColorJitter(brightness=0.2, contrast=0.2),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
            ])

        val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])
        
        # Load dataset
        train_dataset = datasets.ImageFolder(
            root='/kaggle/working/inaturalist_12K/train',
            transform=train_transform
        )
        
        # Split into train and validation
        train_indices, val_indices = train_test_split(
            list(range(len(train_dataset))),
            test_size=0.2,
            stratify=train_dataset.targets,
            random_state=config.seed if "seed" in config else 42
        )
        
        train_subset = torch.utils.data.Subset(train_dataset, train_indices)
        val_subset = torch.utils.data.Subset(train_dataset, val_indices)
        
        train_loader = DataLoader(
            train_subset,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=4
        )
        
        val_loader = DataLoader(
            val_subset,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=4
        )
        
        # Initialize model
        model = FlexibleCNN(
            num_filters=config.num_filters,
            filter_size=config.filter_size,
            activation=config.activation if "activation" in config else "relu",
            dense_neurons=config.dense_neurons,
            use_batch_norm=config.use_batch_norm if "use_batch_norm" in config else True,
            dropout_rate=config.dropout_rate if "dropout_rate" in config else 0.2,
            filter_strategy=config.filter_strategy if "filter_strategy" in config else "same"
        )
        
        # Move model to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        
        # Loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
        
        # Training loop
        best_val_acc = 0
        
        for epoch in range(config.epochs):
            # Training phase
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0
            
            for batch_idx, (inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                train_total += targets.size(0)
                train_correct += predicted.eq(targets).sum().item()
                
                if batch_idx % 100 == 0:
                    print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss.item():.4f}')
            
            train_acc = 100. * train_correct / train_total
            
            # Validation phase
            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for inputs, targets in val_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    
                    val_loss += loss.item()
                    _, predicted = outputs.max(1)
                    val_total += targets.size(0)
                    val_correct += predicted.eq(targets).sum().item()
            
            val_acc = 100. * val_correct / val_total
            
            # Log metrics to wandb
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss / len(train_loader),
                "train_acc": train_acc,
                "val_loss": val_loss / len(val_loader),
                "val_acc": val_acc
            })
            
            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), 'best_model_100_epochs.pth')
                print(f"Best model saved with val_acc: {val_acc:.2f}% at epoch {epoch}")
            
            print(f'Epoch: {epoch}, Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {train_acc:.2f}%, Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {val_acc:.2f}%')
        
        # Load test dataset and evaluate
        test_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])
        
        test_dataset = datasets.ImageFolder(
            root='/kaggle/working/inaturalist_12K/val',
            transform=test_transform
        )
        
        test_loader = DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=4
        )
        
        # Load best model weights
        model.load_state_dict(torch.load('best_model_100_epochs.pth'))
        model.eval()
        
        # Evaluate on test set
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        test_accuracy = 100 * correct / total
        print(f'Test Accuracy on {total} test images: {test_accuracy:.2f}%')
        wandb.log({"test_accuracy": test_accuracy})
        
        # Plot class predictions
        fig = plot_class_predictions(model, test_loader, test_dataset)
        wandb.log({"Class Predictions": wandb.Image(fig)})
        plt.close(fig)
        
        return model, test_accuracy

In [None]:
# def train_best_model(sweep_id, entity, project, epochs=100)

In [15]:
import matplotlib.pyplot as plt
def plot_class_predictions(model, test_loader, test_dataset, num_classes=10, samples_per_class=3):
    """
    Plot a grid of images for each class with their predictions
    """
    # Get class names
    class_names = test_dataset.classes
    
    # Dictionary to store samples for each class
    class_samples = {i: [] for i in range(num_classes)}
    
    # Collect samples for each class
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.cpu().numpy()
            
            # Get model predictions
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            predictions = predictions.cpu().numpy()
            
            # Store samples for each class
            for i, (image, label, pred) in enumerate(zip(images, labels, predictions)):
                if len(class_samples[label]) < samples_per_class:
                    class_samples[label].append((image.cpu(), pred))
                
            # Check if we have enough samples for each class
            if all(len(samples) >= samples_per_class for samples in class_samples.values()):
                break
    
    # Create a grid of images
    fig, axes = plt.subplots(num_classes, samples_per_class, figsize=(12, 30))
    
    for class_idx in range(num_classes):
        for sample_idx in range(samples_per_class):
            if sample_idx < len(class_samples[class_idx]):
                img, pred = class_samples[class_idx][sample_idx]
                
                # Convert the image for display
                img = img.numpy()
                img = np.transpose(img, (1, 2, 0))  # CxHxW → HxWxC
                img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])  # Unnormalize
                img = np.clip(img, 0, 1)
                
                # Display the image
                axes[class_idx, sample_idx].imshow(img)
                axes[class_idx, sample_idx].set_title(f"True: {class_names[class_idx]}\nPred: {class_names[pred]}", 
                                                  fontsize=8,
                                                  color="green" if class_idx == pred else "red")
                axes[class_idx, sample_idx].axis('off')
            else:
                axes[class_idx, sample_idx].axis('off')
    
    plt.tight_layout()
    return fig

In [17]:
wandb.init(project="inaturalist-cnn-sweep_sample_02") 
model,_ = train_best_model(sweep_id, "da24m004-iitmaana", "inaturalist-cnn-sweep_sample_02", epochs=100)

Best run: nf_32_fs_3_strat_pyramid_act_gelu_dn_256_lr_0.0007171894644397339_bs_64_bn_True_dr_0.2_aug_False
Best validation accuracy: 36.70%
Best configuration: {'seed': 42, 'epochs': 7, 'activation': 'gelu', 'batch_size': 64, 'filter_size': 3, 'num_filters': 32, 'dropout_rate': 0.2, 'dense_neurons': 256, 'learning_rate': 0.0007171894644397339, 'use_batch_norm': True, 'filter_strategy': 'pyramid', 'use_augmentation': False}


Epoch: 0, Batch: 0, Loss: 2.4030
Epoch: 0, Batch: 100, Loss: 2.2099
Best model saved with val_acc: 27.85% at epoch 0
Epoch: 0, Train Loss: 2.1812, Train Acc: 20.14%, Val Loss: 2.0284, Val Acc: 27.85%
Epoch: 1, Batch: 0, Loss: 2.0783
Epoch: 1, Batch: 100, Loss: 2.0327
Best model saved with val_acc: 32.25% at epoch 1
Epoch: 1, Train Loss: 2.0536, Train Acc: 27.02%, Val Loss: 1.9499, Val Acc: 32.25%
Epoch: 2, Batch: 0, Loss: 2.1040
Epoch: 2, Batch: 100, Loss: 1.9826
Best model saved with val_acc: 32.90% at epoch 2
Epoch: 2, Train Loss: 1.9933, Train Acc: 29.30%, Val Loss: 1.9175, Val Acc: 32.90%
Epoch: 3, Batch: 0, Loss: 2.0119
Epoch: 3, Batch: 100, Loss: 1.9856
Best model saved with val_acc: 34.05% at epoch 3
Epoch: 3, Train Loss: 1.9593, Train Acc: 30.19%, Val Loss: 1.9164, Val Acc: 34.05%
Epoch: 4, Batch: 0, Loss: 1.9966
Epoch: 4, Batch: 100, Loss: 1.9228
Epoch: 4, Train Loss: 1.9253, Train Acc: 31.44%, Val Loss: 1.8782, Val Acc: 33.90%
Epoch: 5, Batch: 0, Loss: 1.7905
Epoch: 5, Batch:

  model.load_state_dict(torch.load('best_model_100_epochs.pth'))


Test Accuracy on 2000 test images: 35.35%


0,1
epoch,▁▂▃▅▆▇█
test_accuracy,▁
train_acc,▁▄▆▆▇▇█
train_loss,█▅▄▃▂▂▁
val_acc,▁▅▅▆▆▇█
val_loss,█▅▄▄▃▂▁

0,1
epoch,6.0
test_accuracy,35.35
train_acc,34.24178
train_loss,1.8678
val_acc,36.55
val_loss,1.82708


In [28]:
# Define transform
# test_transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], 
#                          std=[0.229, 0.224, 0.225])
# ])

# # Load test dataset
# test_dataset = datasets.ImageFolder(
#     root='/kaggle/working/inaturalist_12K/val',
#     transform=test_transform
# )

# test_loader = DataLoader(
#     test_dataset,
#     batch_size=config['batch_size'],  # Make sure `config` exists
#     shuffle=True,
#     num_workers=4
# )

In [None]:
# /kaggle/working/inaturalist_12K/train