# Q&A
* how to determine data split between train, validation and test sets? Since we cannot use the test set for any observation, is it even possible?  
* is there conv3d, to use for the viedo data?
* maxpool vs avgpool?
* why big filters do not improve the performance by much?
* how to manage memory correctly? because i think that my current code does generate some trash, that is noe beeing removes (as from time to time i use all available memory, but it appears to be rather random)

# Setup

## Libraries

In [1]:
%matplotlib inline

In [2]:
# !pip install matplotlib torch torchvision numpy pandas scikit-learn wandb

Collecting torch
  Downloading torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting wandb
  Downloading wandb-0.19.9-py3-none-win_amd64.whl.metadata (10 kB)
Collecting sympy==1.13.1 (from torch)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting sentry-sdk>=2.0.0 (from wandb)
  Downloading sentry_sdk-2.26.1-py2.py3-none-any.whl.metadata (10 kB)
Collecting setproctitle (from wandb)
  Using cached setproctitle-1.3.5-cp312-cp312-win_amd64.whl.metadata (10 kB)
Downloading torch-2.6.0-cp312-cp312-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/204.1 MB 991.0 kB/s eta 0:03:26
   ---------------------------------------- 0.1/204.1 MB 656.4 kB/s eta

In [3]:
import os
import time

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output
from tqdm.auto import tqdm

import torch
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch import nn

import wandb

from io import StringIO
import sys

## Config

In [4]:
batch_size = 64

In [5]:
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {_device}")

Using device: cpu


In [6]:
data_augmentation = True

### Helpers


In [7]:
# https://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call

class Capturing(list):
    def __enter__(self):
        self._stdout = sys.stdout
        sys.stdout = self._stringio = StringIO()
        return self
    def __exit__(self, *args):
        self.extend(self._stringio.getvalue().splitlines())
        del self._stringio    # free up some memory
        sys.stdout = self._stdout


## Import data
**About data:** The dataset consists of 102 flower categories, and each class has between 40 and 258 images. The images have large scale, pose, and light variations. In addition, there are categories that have large variations within the category and several very similar categories.  
The default split of the dataset is 1020, 1020 and 6149 images for training, validation and test sets respectively.
If you can handle the bigger training dataset, you can experiment by taking up to 80% of the test set for training.

In [8]:
class InMemDataLoader(object):
    """
    A data loader that keeps all data in CPU or GPU memory.
    """

    __initialized = False

    def __init__(
        self,
        dataset,
        batch_size=1,
        shuffle=False,
        sampler=None,
        batch_sampler=None,
        drop_last=False,
    ):
        """A torch dataloader that fetches data from memory."""
        batches = []
        for i in tqdm(range(len(dataset))):
            batch = [torch.tensor(t) for t in dataset[i]]
            batches.append(batch)
        tensors = [torch.stack(ts) for ts in zip(*batches)]
        dataset = torch.utils.data.TensorDataset(*tensors)
        self.dataset = dataset
        self.batch_size = batch_size
        self.drop_last = drop_last

        if batch_sampler is not None:
            if batch_size > 1 or shuffle or sampler is not None or drop_last:
                raise ValueError(
                    "batch_sampler option is mutually exclusive "
                    "with batch_size, shuffle, sampler, and "
                    "drop_last"
                )
            self.batch_size = None
            self.drop_last = None

        if sampler is not None and shuffle:
            raise ValueError("sampler option is mutually exclusive with " "shuffle")

        if batch_sampler is None:
            if sampler is None:
                if shuffle:
                    sampler = torch.utils.data.RandomSampler(dataset)
                else:
                    sampler = torch.utils.data.SequentialSampler(dataset)
            batch_sampler = torch.utils.data.BatchSampler(
                sampler, batch_size, drop_last
            )

        self.sampler = sampler
        self.batch_sampler = batch_sampler
        self.__initialized = True

    def __setattr__(self, attr, val):
        if self.__initialized and attr in ("batch_size", "sampler", "drop_last"):
            raise ValueError(
                "{} attribute should not be set after {} is "
                "initialized".format(attr, self.__class__.__name__)
            )

        super(InMemDataLoader, self).__setattr__(attr, val)

    def __iter__(self):
        for batch_indices in self.batch_sampler:
            yield self.dataset[batch_indices]

    def __len__(self):
        return len(self.batch_sampler)

    def to(self, device):
        self.dataset.tensors = tuple(t.to(device) for t in self.dataset.tensors)
        return self

In [9]:
def load_flowers(
    batch_size=64,
    test_train_valid_percent=(0.1, 0.8, 0.1),
    train_transform=None,
    eval_transform=None,
    Loader=torch.utils.data.DataLoader,
):
    """
    Load the flowers dataset with the given batch size and transformation.
    The dataset is split into train, validation, and test sets according to the specified percentages.
    The data is loaded using the specified loader class.
    """

    if train_transform is None:
        train_transform = transforms.Compose([
            transforms.Resize((224, 224)),

            transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
            transforms.RandomRotation(15),
            transforms.RandomHorizontalFlip(),
            transforms.RandomAdjustSharpness(sharpness_factor=2),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),

            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
    if eval_transform is None:
        eval_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])


    test_percent, train_percent, valid_percent = test_train_valid_percent

    # TRAIN
    train = torchvision.datasets.Flowers102(
        root='./data', split='train', download=True,
        transform=train_transform if data_augmentation else eval_transform
    )
    train = torch.utils.data.Subset(train, range(int(len(train) * train_percent)))

    # TEST
    test = torchvision.datasets.Flowers102(
        root='./data', split='test', download=True, transform=eval_transform
    )
    test = torch.utils.data.Subset(test, range(int(len(test) * test_percent)))

    # VALID
    valid = torchvision.datasets.Flowers102(
        root='./data', split='val', download=True, transform=eval_transform
    )
    valid = torch.utils.data.Subset(valid, range(int(len(valid) * valid_percent)))

    data_loaders = {
        'train': Loader(train, batch_size=batch_size, shuffle=True),
        'valid': Loader(valid, batch_size=batch_size, shuffle=True),
        'test': Loader(test, batch_size=batch_size, shuffle=True),
    }

    return data_loaders


# Solution

## Task 1
* Your task is to implement a convolutional neural network from scratch using PyTorch.
* Your CNN should consist of convolutional layers (Conv2D), pooling layers (MaxPooling2D), activation layers (e.g., ReLU), and fully connected layers (if needed).

### Import data

In [None]:
data_loaders = load_flowers(batch_size, (1, 1, 1), Loader=InMemDataLoader)

### Model class

In [None]:
class Model1(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model1, self).__init__()
        self.layers = nn.Sequential(*args, **kwargs)

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
def evaluate(model, data_loader):
    loss = 0
    correct = 0
    loss_fn = nn.CrossEntropyLoss(
        reduction='sum',
    )

    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(_device), labels.to(_device)

            outputs = model(inputs)
            loss += loss_fn(outputs, labels).item()
            pred = outputs.argmax(
                dim=1, keepdim=True
            )
            correct += (
                pred.eq(labels.view_as(pred)).sum().item()
            )

    loss /= len(data_loader.dataset)
    accuracy = correct / len(data_loader.dataset)
    return loss, accuracy

In [None]:
def train_step(model, optimizer, loss_fn):
    for batch_idx, (inputs, labels) in enumerate(data_loaders['train']):
        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        wandb.log({
            "loss": loss.item(),
            "batch_idx": batch_idx,
        })

In [None]:
def init_weights(model):
    for layer in model.modules():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
            if layer.bias is not None:
                nn.init.zeros_(layer.bias)
        elif isinstance(layer, nn.BatchNorm2d):
            nn.init.ones_(layer.weight)
            if layer.bias is not None:
                nn.init.zeros_(layer.bias)

### Model creation

In [None]:
# version 1 of model - too simple
model_type = "v1"

model = Model1(
    nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Dropout(p=0.25),

    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Dropout(p=0.25),

    nn.Flatten(),
    nn.Linear(128 * 56 * 56, 512),
    nn.ReLU(),

    nn.Dropout(p=0.5),
    nn.Linear(512, 102),
)

In [None]:
# version 2
model_type = "v2"

model = Model1(
    nn.Conv2d(3, 128, kernel_size=11, stride=4, padding=2),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=5, stride=2),
    nn.Dropout(p=0.25),

    nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Dropout(p=0.25),

    nn.Conv2d(256, 256, kernel_size=3),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Dropout(p=0.25),

    nn.Flatten(),
    nn.Linear(6400, 512),
    nn.ReLU(),

    nn.Dropout(p=0.5),
    nn.Linear(512, 102),
)

In [None]:
# stacked 3x3 convs
model_type = "v3_stack3x3"

model = Model1(
    nn.Conv2d(3, 64, kernel_size=5, stride=3, padding=2),
    nn.BatchNorm2d(64),
    nn.ReLU(),

    nn.Conv2d(64, 128, kernel_size=3),
    nn.BatchNorm2d(128),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=3, stride=3),
    nn.Dropout(p=0.25),

    nn.Conv2d(128, 256, kernel_size=3, stride=2),
    nn.BatchNorm2d(256),
    nn.ReLU(),

    nn.Conv2d(256, 256, kernel_size=3),
    nn.BatchNorm2d(256),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=3, stride=3),
    nn.Dropout(p=0.25),

    nn.Flatten(),
    nn.Linear(2304, 1024),
    nn.ReLU(),

    nn.Linear(1024, 512),
    nn.ReLU(),

    nn.Dropout(p=0.5),
    nn.Linear(512, 102),
)

In [None]:
# version 3 = alexnet
model_type = "alexnet"

model = Model1(
    nn.Conv2d(3, 96, kernel_size=11, stride=4),
    nn.BatchNorm2d(96),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(p=0.25),

    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.BatchNorm2d(256),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(p=0.25),

    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.BatchNorm2d(384),
    nn.ReLU(),
    nn.Dropout(p=0.25),

    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.BatchNorm2d(384),
    nn.ReLU(),
    nn.Dropout(p=0.25),

    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.BatchNorm2d(256),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(p=0.25),


    nn.Flatten(),
    nn.Linear(6400, 4096),
    nn.ReLU(),

    nn.Linear(4096, 4096),
    nn.ReLU(),

    nn.Dropout(p=0.5),
    nn.Linear(4096, 102),
    # nn.Softmax(dim=1),
)

### Training loop

In [None]:
model.to(_device)

epochs = 100

learning_rate = 0.0001
momentum = 0.9
weight_decay = 0.0005
betas = (0.9, 0.999)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=learning_rate,
    betas=betas,
    weight_decay=weight_decay,
)
# optimizer = torch.optim.SGD(
#     model.parameters(),
#     lr=learning_rate,
#     momentum=momentum,
#     weight_decay=weight_decay,
# )

loss_fn = nn.CrossEntropyLoss()

init_weights(model)

run = wandb.init(
    entity = "fejowo5522-",
    project= "NN_list3_OxFlow",
    config = {
        "task": 1,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "SGD",
        "learning_rate": learning_rate,
        # "momentum": momentum,
        "betas": betas,
        "weight_decay": weight_decay,
        "loss_fn": "cross_entropy",
        "model": model_type,
        "data_augmentation": data_augmentation,
    }
)
run.name = "Task1_" + str(int(time.time()))

model.train()
for epoch in tqdm(range(epochs), desc="Training", leave=False):
    train_step(model, optimizer, loss_fn)

    for loader, split in [
        (data_loaders['train'], 'train'),
        (data_loaders['valid'], 'valid'),
    ]:
        loss, accuracy = evaluate(model, loader)
        wandb.log({
            "epoch": epoch,
            f"{split}_loss": loss,
            f"{split}_accuracy": accuracy,
        })

model.eval()
loss, accuracy = evaluate(model, data_loaders['test'])
wandb.log({
    "test_loss": loss,
    "test_accuracy": accuracy,
})
print(
    "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
        loss,
        100.0 * accuracy,
    )
)

run.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:


Abort: 

## Task 2
* Train your CNN on different training set sized (10%, 20%, 50%, 80%, 100%) and evaluate the performance on the validation set and test set.
    * Report the accuracy and loss on the validation set and test set for each training set size.
* Train your CNN on the full training set plus 20%, 50% and 80% of the test set and evaluate the performance on the validation set and the remaining test set.
    * Report the accuracy and loss on the validation set and remaining test set for each training set size.
* Compare the performance of your CNN on the different training set sizes and analyze the results.


In [None]:
training_sizes = [0.1, 0.2, 0.5, 0.8, 1, 1.2, 1.5, 1.8]
# training_sizes = [1.2, 1.5, 1.8]

In [None]:
def _train_step(model, optimizer, loss_fn, data_loader, max_batch_percent=1, reverse_loop=False):
    data_iter = iter(data_loader)

    if reverse_loop:
        data_iter = reversed(list(data_iter))

    for batch_idx, (inputs, labels) in enumerate(data_iter):
        if batch_idx >= max_batch_percent * len(data_loader):
            break

        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        wandb.log({
            "loss": loss.item(),
            "batch_idx": batch_idx,
        })


def train_step(model, optimizer, loss_fn, max_batch_percent=1):
    _train_step(model, optimizer, loss_fn, data_loaders['train'], max_batch_percent)

    if max_batch_percent > 1:
        _train_step(model, optimizer, loss_fn, data_loaders['test'], max_batch_percent - 1, True)

In [None]:
def evaluate(model, data_loader, max_batch_percent=1):
    loss = 0
    correct = 0
    loss_fn = nn.CrossEntropyLoss(
        reduction='sum',
    )

    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(data_loader):


            inputs, labels = inputs.to(_device), labels.to(_device)

            outputs = model(inputs)
            loss += loss_fn(outputs, labels).item()
            pred = outputs.argmax(
                dim=1, keepdim=True
            )
            correct += (
                pred.eq(labels.view_as(pred)).sum().item()
            )

    loss /= len(data_loader.dataset)
    accuracy = correct / len(data_loader.dataset)
    return loss, accuracy

In [None]:
model.to(_device)

epochs = 5

learning_rate = 0.0001
momentum = 0.9
weight_decay = 0.0005
betas = (0.9, 0.999)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=learning_rate,
    betas=betas,
    weight_decay=weight_decay,
)
# optimizer = torch.optim.SGD(
#     model.parameters(),
#     lr=learning_rate,
#     momentum=momentum,
#     weight_decay=weight_decay,
# )

loss_fn = nn.CrossEntropyLoss()

for training_size in training_sizes:
    print(f"====>   Training size: {training_size}")

    init_weights(model)

    run = wandb.init(
        entity = "fejowo5522-",
        project= "NN_list3_OxFlow",
        config = {
            "task": 2,
            "batch_size": batch_size,
            "epochs": epochs,
            "optimizer": "SGD",
            "learning_rate": learning_rate,
            # "momentum": momentum,
            "betas": betas,
            "weight_decay": weight_decay,
            "loss_fn": "cross_entropy",
            "model": model_type,
            "training_size": training_size,
            "data_augmentation": data_augmentation,
        }
    )
    run.name = "Task2_" + str(int(time.time()))

    model.train()

    for epoch in tqdm(range(epochs), desc="Training", leave=False):
        train_step(model, optimizer, loss_fn, training_size)

        for loader, split in [
            (data_loaders['train'], 'train'),
            (data_loaders['valid'], 'valid'),
        ]:
            loss, accuracy = evaluate(model, loader)
            wandb.log({
                "epoch": epoch,
                f"{split}_loss": loss,
                f"{split}_accuracy": accuracy,
            })

    model.eval()
    loss, accuracy = evaluate(model, data_loaders['test'], 1 if training_size < 1 else 2 - training_size)
    wandb.log({
        "test_loss": loss,
        "test_accuracy": accuracy,
    })
    print(
        "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
            loss,
            100.0 * accuracy,
        )
    )

    run.finish()

====>   Training size: 0.1


0,1
batch_idx,▄▇▃▁▂▄▅▅█▃▅▄▁▂▃▂▄▃▇▂█▄▇▁▂▇▃▄▅▆▄▆▂▂▃▆▃▄▆▅
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▆█▅▅▅███▄
valid_accuracy,▁▁▁▁▁▁▁▁▁▁
valid_loss,▁▁▁▂▃▃▄▅▇█

0,1
batch_idx,15.0
epoch,9.0
loss,4.62315
train_accuracy,0.0098
train_loss,4.623
valid_accuracy,0.0098
valid_loss,4.625


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.619931, Accuracy: ( 1.0%)


0,1
batch_idx,▁█▁█▁█▁█▁█
epoch,▁▁▃▃▅▅▆▆██
loss,▁██▆▅▃▂▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▅▂▆█▁
train_loss,█▅▂▁▁
valid_accuracy,▂▃█▁▂
valid_loss,█▅▂▁▁

0,1
batch_idx,1.0
epoch,4.0
loss,4.73263
test_accuracy,0.00992
test_loss,4.61993
train_accuracy,0.0098
train_loss,4.62428
valid_accuracy,0.00784
valid_loss,4.65063


====>   Training size: 0.2


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.624298, Accuracy: ( 1.0%)


0,1
batch_idx,▁▃▆█▁▃▆█▁▃▆█▁▃▆█▁▃▆█
epoch,▁▁▃▃▅▅▆▆██
loss,▂▇██▆▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,█▁▅▇▂
train_loss,█▁▁▁▁
valid_accuracy,▁▃█▆█
valid_loss,█▁▁▁▁

0,1
batch_idx,3.0
epoch,4.0
loss,4.61903
test_accuracy,0.01025
test_loss,4.6243
train_accuracy,0.00784
train_loss,4.62643
valid_accuracy,0.01078
valid_loss,4.61738


====>   Training size: 0.5


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.624965, Accuracy: ( 1.0%)


0,1
batch_idx,▁▂▃▄▅▆▇█▁▂▃▄▅▆▇█▁▂▃▄▅▆▇█▁▂▃▄▅▆▇█▁▂▃▄▅▆▇█
epoch,▁▁▃▃▅▅▆▆██
loss,▃▆▇█▅▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁████
train_loss,█▁▁▁▁
valid_accuracy,▁▁▁▁▁
valid_loss,█▁▁▁▁

0,1
batch_idx,7.0
epoch,4.0
loss,4.62499
test_accuracy,0.01008
test_loss,4.62496
train_accuracy,0.0098
train_loss,4.62482
valid_accuracy,0.0098
valid_loss,4.62497


====>   Training size: 0.8


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.624758, Accuracy: ( 0.3%)


0,1
batch_idx,▁▂▂▃▃▅▆▇▇█▂▄▅▇█▂▂▃▅▅▆▇█▂▂▃▄▅▆▆▇█▁▂▂▃▅▆▇█
epoch,▁▁▃▃▅▅▆▆██
loss,▂▆██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆█▅▅
train_loss,▁▁█▂▂
valid_accuracy,▁▁▁▁▁
valid_loss,█▂▁▃▂

0,1
batch_idx,12.0
epoch,4.0
loss,4.62132
test_accuracy,0.00293
test_loss,4.62476
train_accuracy,0.0098
train_loss,4.62426
valid_accuracy,0.0098
valid_loss,4.62487


====>   Training size: 1


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.622943, Accuracy: ( 1.7%)


0,1
batch_idx,▂▃▃▄▄▅▆▆▇█▁▂▂▃▄▅▆▆▇█▁▃▄▅▆█▂▃▄▄▅▆▆▇▁▂▃▄▅█
epoch,▁▁▃▃▅▅▆▆██
loss,▇█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▆▁▁
train_loss,▅█▁▆█
valid_accuracy,▆▁█▁▁
valid_loss,▄█▁▆▇

0,1
batch_idx,15.0
epoch,4.0
loss,4.62503
test_accuracy,0.01659
test_loss,4.62294
train_accuracy,0.0098
train_loss,4.6249
valid_accuracy,0.0098
valid_loss,4.62497


====>   Training size: 1.2


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.609618, Accuracy: ( 4.7%)


0,1
batch_idx,▂▄▆▂▄▅▅▆▇█▄▄▅▁▄▅▇█▂▂██▂▂▄▇▁▃▄▅▇▇█▄▅▅▅▅▇▇
epoch,▁▁▃▃▅▅▆▆██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁█▃▃▆
train_loss,█▃▂▁▁
valid_accuracy,▁▁▁▁█
valid_loss,█▆▄▄▁

0,1
batch_idx,19.0
epoch,4.0
loss,4.6181
test_accuracy,0.04732
test_loss,4.60962
train_accuracy,0.01078
train_loss,4.62315
valid_accuracy,0.01275
valid_loss,4.62048


====>   Training size: 1.5


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.618347, Accuracy: ( 3.8%)


0,1
batch_idx,▁▂▃▂▂▄▆▆▂▁▂▂▅▅▆█▃▄▄▄▇▃▃▁▁▆▇██▁▃▃▂▂▃▄▅▅▆█
epoch,▁▁▃▃▅▅▆▆██
loss,█▅▃▄▃▃▁▄▃▃▃▃▄▃▃▃▃▃▅▄▃▃▃▃▃▃▃▄▃▃▃▂▃▄▃▄▃▂▃▂
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,▁▄▇██
valid_accuracy,▁▁▁▁▁
valid_loss,▁▂▃▅█

0,1
batch_idx,48.0
epoch,4.0
loss,4.61874
test_accuracy,0.03773
test_loss,4.61835
train_accuracy,0.0098
train_loss,4.62502
valid_accuracy,0.0098
valid_loss,4.62502


====>   Training size: 1.8


Training:   0%|          | 0/5 [00:00<?, ?it/s]

Test set: Average loss: 4.614853, Accuracy: ( 3.9%)


0,1
batch_idx,▁▂▁▂▃█▁▂▂▂▄▆▇▇█▂▂▂▁▂▅▅▅▇▇▂▂▃▃▅▂▂▃▄▄▅▆▇▇█
epoch,▁▁▃▃▅▅▆▆██
loss,▅█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▁▁▁
train_loss,▁▁▆▇█
valid_accuracy,▁▁▁▁▁
valid_loss,▁▂▃▅█

0,1
batch_idx,77.0
epoch,4.0
loss,4.61319
test_accuracy,0.03887
test_loss,4.61485
train_accuracy,0.0098
train_loss,4.62508
valid_accuracy,0.0098
valid_loss,4.62508


## Task 3
* Implement a baseline AlexNet model using PyTorch.
* Training AlexNet may take a long time, so try to use GPU acceleration if available.


### Model class

In [51]:
class AlexNetBaseline(nn.Module):
    def __init__(self, num_classes=102):
        super(AlexNetBaseline, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)
            elif isinstance(layer, nn.BatchNorm2d):
                nn.init.ones_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def evaluate(self, data_loader):
        loss = 0
        correct = 0
        loss_fn = nn.CrossEntropyLoss(
            reduction='sum',
        )

        with torch.no_grad():
            for inputs, labels in data_loader:
                inputs, labels = inputs.to(_device), labels.to(_device)

                outputs = self(inputs)
                loss += loss_fn(outputs, labels).item()
                pred = outputs.argmax(
                    dim=1, keepdim=True
                )
                correct += (
                    pred.eq(labels.view_as(pred)).sum().item()
                )

        loss /= len(data_loader.dataset)
        accuracy = correct / len(data_loader.dataset)
        return loss, accuracy

In [52]:
def train_step(model, optimizer, loss_fn, data_loader):
    data_iter = iter(data_loader)

    for batch_idx, (inputs, labels) in enumerate(data_iter):
        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        wandb.log({
            "loss": loss.item(),
            "batch_idx": batch_idx,
        })

### Train model

In [None]:
batch_size = 64
data_loaders = load_flowers(batch_size, (1, 1, 1), transform=None, Loader=torch.utils.data.DataLoader)

In [20]:
# initialize
alexnet_model = AlexNetBaseline(num_classes=102).to(_device)
print(alexnet_model)

AlexNetBaseline(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout

In [22]:
# parameters
epochs = 100
learning_rate = 0.001
weight_decay = 0.0005

# optimizer and loss
optimizer = torch.optim.Adam(alexnet_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
loss_fn = nn.CrossEntropyLoss()

alexnet_model.init_weights()

run = wandb.init(
    entity = "fejowo5522-",
    project= "NN_list3_OxFlow",
    config = {
        "task": 3,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "Adam",
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "loss_fn": "cross_entropy",
        "model": 'alexnet',
        "data_augmentation": data_augmentation,
    }
)
run.name = "Task3_" + str(int(time.time()))


alexnet_model.train()
for epoch in tqdm(range(epochs), desc="Training", leave=False):
    train_step(alexnet_model, optimizer, loss_fn, data_loaders['train'])

    for loader, split in [
        (data_loaders['train'], 'train'),
        (data_loaders['valid'], 'valid'),
    ]:
        loss, accuracy = alexnet_model.evaluate(loader)
        wandb.log({
            "epoch": epoch,
            f"{split}_loss": loss,
            f"{split}_accuracy": accuracy,
        })

alexnet_model.eval()
loss, accuracy = alexnet_model.evaluate(data_loaders['test'])
wandb.log({
    "test_loss": loss,
    "test_accuracy": accuracy,
})
print(
    "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
        loss,
        100.0 * accuracy,
    )
)

run.finish()

Training:   0%|          | 0/100 [00:00<?, ?it/s]

Test set: Average loss: 4.625029, Accuracy: ( 0.7%)


0,1
batch_idx,▆▁▃▅▄▇▇█▇█▃▄▁▄▅▁▆▁▆█▃▃▇▇▃▇▂▅█▆▄▅▁▃▃▂█▅▄█
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█
loss,█▁▇▄▃▃▂▄▄▃▂▄▃▄▃▂▃▃▃▃▃▃▂▂▃▃▃▃▄▂▃▄▃▃▂▃▄▂▃▂
test_accuracy,▁
test_loss,▁
train_accuracy,█▅▁▅█▄▅▅▅▅▅▅▅▅▄▅▅▅▅▅▅▅▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
train_loss,█▆▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid_accuracy,█▇▇▆▇▁█▆▆▇█▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
valid_loss,█▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,99.0
loss,4.62572
test_accuracy,0.00748
test_loss,4.62503
train_accuracy,0.0098
train_loss,4.62497
valid_accuracy,0.0098
valid_loss,4.62497


## Task 4
* Input normalization: experiment with different input normalization techniques (e.g., mean subtraction, standardization) and analyze their impact on the model's performance.


In [None]:
def train_step(model, optimizer, loss_fn, data_loader):
    data_iter = iter(data_loader)

    for batch_idx, (inputs, labels) in enumerate(data_iter):
        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        wandb.log({
            "loss": loss.item(),
            "batch_idx": batch_idx,
        })

In [65]:
def test_data_loaders(
    normalization_method,
    data_loaders,
    model,
    epochs=10,
    learning_rate=0.001,
    weight_decay = 0.0005,
):
    # optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = nn.CrossEntropyLoss()

    model.init_weights()

    run = wandb.init(
        entity = "fejowo5522-",
        project= "NN_list3_OxFlow",
        config = {
            "task": 4,
            "batch_size": batch_size,
            "epochs": epochs,
            "optimizer": "Adam",
            "learning_rate": learning_rate,
            "weight_decay": weight_decay,
            "loss_fn": "cross_entropy",
            "model": 'alexnet',
            "normalization": normalization_method,
            "data_augmentation": data_augmentation,
        }
    )
    run.name = "Task4_" + str(int(time.time()))


    model.train()
    for epoch in tqdm(range(epochs), desc="Training", leave=False):
        train_step(model, optimizer, loss_fn, data_loaders['train'])

        for loader, split in [
            (data_loaders['train'], 'train'),
            (data_loaders['valid'], 'valid'),
        ]:
            loss, accuracy = model.evaluate(loader)
            wandb.log({
                "epoch": epoch,
                f"{split}_loss": loss,
                f"{split}_accuracy": accuracy,
            })

    model.eval()
    loss, accuracy = model.evaluate(data_loaders['test'])
    wandb.log({
        "test_loss": loss,
        "test_accuracy": accuracy,
    })
    print(
        "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
            loss,
            100.0 * accuracy,
        )
    )

    run.finish()

In [66]:
normalization_transforms = {
    'mean_subtraction': transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[1, 1, 1]),
    'standardization': transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
}

for name, normalization in normalization_transforms.items():
    data_loaders = load_flowers(
        batch_size,
        (1,1,1),
        train_transform=transforms.Compose([
            transforms.Resize((224, 224)),

            transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
            transforms.RandomRotation(15),
            transforms.RandomHorizontalFlip(),
            transforms.RandomAdjustSharpness(sharpness_factor=2),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),

            transforms.ToTensor(),
            normalization,
        ]),
        eval_transform=transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            normalization,
        ]),
        Loader=torch.utils.data.DataLoader
    )


    model = AlexNetBaseline(num_classes=102).to(_device)

    test_data_loaders(name, data_loaders, model)



Training:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.135406, Accuracy: ( 3.7%)


0,1
batch_idx,▂▃▄▄▁▄▅█▇█▂▃▃▄▅▂▃▅██▄▄▅▂▂▄▅▇█▁▃▆█▅▇█▂▃▄█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,██████████▇▇▅▆▇█▇▇▆▇▇▅▆▅▅▅▆▅▆▆▅▅▄▃▃▂▃▁▁▃
test_accuracy,▁
test_loss,▁
train_accuracy,▁▁▂▃▃▃▄▆▆█
train_loss,██▇▇▇▆▆▄▂▁
valid_accuracy,▁▂▁▃▃▅▅█▇█
valid_loss,██▇▇▇▆▅▄▂▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.24219
test_accuracy,0.03692
test_loss,4.13541
train_accuracy,0.0451
train_loss,4.06377
valid_accuracy,0.03529
valid_loss,4.1377


Training:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.626478, Accuracy: ( 1.1%)


0,1
batch_idx,▁▂▃▄█▅▂▆▇█▁▁▂▆▆▁▂▃▆▇▂▂▆█▁▅▇█▁▄▁▁▂▅▆▁▂▂▄█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▆▆▆█▆▆▆▆▆▁
train_loss,██▄▃▃▂▁▂▁▂
valid_accuracy,▅█▅▅▅▇▂▂▅▁
valid_loss,▆█▃▃▃▂▂▂▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.62759
test_accuracy,0.01073
test_loss,4.62648
train_accuracy,0.00784
train_loss,4.62503
valid_accuracy,0.00686
valid_loss,4.625


## Zad 5
* Experiment with different hyperparameters such as learning rate, batch size, number of epochs, and optimizer choice (e.g., SGD, Adam).

### Testing function

In [None]:
def random_search_tuner(
    param_ranges,
    model_name,
    model_class,
    optimizer_name='Adam',
    loss_fn_name='cross_entropy',
    trials=10,
    epochs=10,
):
    optimizer = {
        'Adam': torch.optim.Adam,
        'SGD': torch.optim.SGD,
    }[optimizer_name]

    loss_fn = {
        'cross_entropy': nn.CrossEntropyLoss,
        'mse': nn.MSELoss,
    }[loss_fn_name]
    loss_fn = loss_fn()

    for trial in tqdm(range(trials), desc="Trial", leave=False, position=0):
        # randomly sample hyperparameters
        variables = {
            'learning_rate': 0,
            'weight_decay': 0,
            'momentum': 0,
            'batch_size': 0,
        }

        for name, variable in variables.items():
            if name in param_ranges:
                variables[name] = np.random.uniform(param_ranges[name][0], param_ranges[name][1])

        # init model
        model = model_class().to(_device)
        model.init_weights()

        # create optimizer
        if(optimizer_name == 'SGD'):
            _optimizer = optimizer(
                model.parameters(),
                lr=variables['learning_rate'],
                weight_decay=variables['weight_decay'],
                momentum=variables['momentum'],
            )
        elif(optimizer_name == 'Adam'):
            _optimizer = optimizer(
                model.parameters(),
                lr=variables['learning_rate'],
                weight_decay=variables['weight_decay'],
            )

        # create data loaders
        data_percent = (1,1,1)
        data_loaders = load_flowers(
            batch_size,
            data_percent,
        )

        # init wandb
        with Capturing() as output:
            run = wandb.init(
                entity = "fejowo5522-",
                project= "NN_list3_OxFlow",
                config = {
                    "task": 5,
                    "batch_size": variables['batch_size'],
                    "epochs": epochs,
                    "optimizer": optimizer_name,
                    "learning_rate": variables['learning_rate'],
                    "momentum": variables['momentum'],
                    "weight_decay": variables['weight_decay'],
                    "loss_fn": loss_fn_name,
                    "model": model_name,
                    "data_percent": data_percent,
                    "data_augmentation": data_augmentation,
                }
            )
            run.name = "Task5_" + str(int(time.time()))

        # train model
        model.train()
        for epoch in tqdm(range(epochs), desc="Epoch", leave=False, position=1):
            train_step(model, _optimizer, loss_fn, data_loaders['train'])

            for loader, split in [
                (data_loaders['train'], 'train'),
                (data_loaders['valid'], 'valid'),
            ]:
                loss, accuracy = model.evaluate(loader)
                with Capturing() as output:
                    wandb.log({
                        "epoch": epoch,
                        f"{split}_loss": loss,
                        f"{split}_accuracy": accuracy,
                    })

        model.eval()
        loss, accuracy = model.evaluate(data_loaders['test'])
        with Capturing() as output:
            wandb.log({
                "test_loss": loss,
                "test_accuracy": accuracy,
            })
        print(
            "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
                loss,
                100.0 * accuracy,
            )
        )

        run.finish()


### Model class

In [None]:
def train_step(model, optimizer, loss_fn, data_loader):
    data_iter = iter(data_loader)

    for batch_idx, (inputs, labels) in enumerate(data_iter):
        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        with Capturing() as output:
            wandb.log({
                "loss": loss.item(),
                "batch_idx": batch_idx,
            })

In [12]:
class AlexNetBaseline(nn.Module):
    def __init__(self, num_classes=102):
        super(AlexNetBaseline, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)
            elif isinstance(layer, nn.BatchNorm2d):
                nn.init.ones_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def evaluate(self, data_loader):
        loss = 0
        correct = 0
        loss_fn = nn.CrossEntropyLoss(
            reduction='sum',
        )

        with torch.no_grad():
            for inputs, labels in data_loader:
                inputs, labels = inputs.to(_device), labels.to(_device)

                outputs = self(inputs)
                loss += loss_fn(outputs, labels).item()
                pred = outputs.argmax(
                    dim=1, keepdim=True
                )
                correct += (
                    pred.eq(labels.view_as(pred)).sum().item()
                )

        loss /= len(data_loader.dataset)
        accuracy = correct / len(data_loader.dataset)
        return loss, accuracy

### Perform test on parameters

In [13]:
random_search_tuner(
    param_ranges={
        'learning_rate': (1e-5, 1e-1),
        'weight_decay': (0.0, 0.1),
    },
    model_name = 'AlexNetBaseline',
    model_class = AlexNetBaseline,
    optimizer_name='Adam',
    loss_fn_name='cross_entropy',
    trials=10,
    epochs=10,
)

Trial:   0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 345M/345M [08:54<00:00, 645kB/s] 
100%|██████████| 502/502 [00:00<00:00, 503kB/s]
100%|██████████| 15.0k/15.0k [00:00<00:00, 14.7MB/s]
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: fejowo5522 (fejowo5522-) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.738259, Accuracy: ( 0.3%)


0,1
batch_idx,▁▂▃▇▁▄▆▆▂▃▅▅▇▁▁▅▆█▁▃▆█▃▅█▃▆██▄█▁▂▃▆▂▃▅▆▇
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▁▁▄█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▅▃▄▆▅▂▁█▅▂
train_loss,▂█▁▁▁▁▁▁▁▁
valid_accuracy,▅▄▂▁▅▂▃▅▃█
valid_loss,▃█▁▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,5.04685
test_accuracy,0.00342
test_loss,4.73826
train_accuracy,0.00784
train_loss,4.84152
valid_accuracy,0.01863
valid_loss,4.84499


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.624626, Accuracy: ( 1.1%)


0,1
batch_idx,▂▄▅▆▇▁▃▄▅█▁▂▄▅▆██▁▂▄▂▃▃▅▇▃▅█▁▃▆▆▃▅▆▁▂▆▆█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,█▆▃▂▁▂▂▁▁▂▂▁▁▁▂▂▁▁▂▂▁▃▁▁▂▁▂▂▂▁▂▁▂▂▁▁▁▂▂▂
test_accuracy,▁
test_loss,▁
train_accuracy,██▁█▅█████
train_loss,█▄▂▂▁▂▁▁▁▁
valid_accuracy,█▁▁▁▁▁▁▁▁▁
valid_loss,█▃▂▂▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.6268
test_accuracy,0.0109
test_loss,4.62463
train_accuracy,0.0098
train_loss,4.62498
valid_accuracy,0.0098
valid_loss,4.62498


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.638284, Accuracy: ( 0.3%)


0,1
batch_idx,▁▂▃▆▇▂▅▇█▁▅▆▆▇▇▁▂▃▄▆▂▃▅▆▁▆▇▃▁▆▃▄▅▆▇▂▅▆▇█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▆▁▁▆█▅▅▅▅▄
train_loss,▁▁▁█▁▁▁▁▁▁
valid_accuracy,█▁▂▂▄▄▄▄▄▅
valid_loss,▁▁▁█▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.72786
test_accuracy,0.00325
test_loss,4.63828
train_accuracy,0.00882
train_loss,167.39055
valid_accuracy,0.01078
valid_loss,12.29674


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.620449, Accuracy: ( 1.5%)


0,1
batch_idx,▁▁▄▅▆▁▃▃▄▇▁▃▅█▁▆▆▄▄▇▁▆▇▃▄▄▄▅██▂▃▄▆▇▁▁▂▄▆
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,█▂▅▂▂▂▃▁▂▃
train_loss,█▅▁▃▁▂▁▁▁▁
valid_accuracy,█▁▂▂▂▂▂▂▁▂
valid_loss,▇█▁▃▂▁▃▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.64503
test_accuracy,0.01529
test_loss,4.62045
train_accuracy,0.01078
train_loss,28374220.17156
valid_accuracy,0.0098
valid_loss,1766336.06983


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.650732, Accuracy: ( 0.5%)


0,1
batch_idx,▃▃▄▄▂▅▅▆▇█▃▃▄▆▁▅▆▁▃▅▆▂▄▇█▃▄▅▁▂▅▇██▁▂▃▅▆▅
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▄▁▅▆█▄▅▇▆▅
train_loss,▁▃█▁▁▁▁▁▁▁
valid_accuracy,▄▁▄▃▁█▆▂▄▅
valid_loss,▁▂█▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.93619
test_accuracy,0.0052
test_loss,4.65073
train_accuracy,0.00882
train_loss,4.76384
valid_accuracy,0.01176
valid_loss,4.77389


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.626359, Accuracy: ( 1.1%)


0,1
batch_idx,▄▁▂▃▅▃▄▅▆▇█▁▂▅▁▅▆▆▂▄▅▇█▁▃▅▆▆▁▁▅▆▇▇▂▄▅▇▁▇
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▂▆▁▂█▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁
test_accuracy,▁
test_loss,▁
train_accuracy,█▆▁▆▆▆▂▆▆▆
train_loss,█▁▂▁▁▁▆▁▁▁
valid_accuracy,▆▃▁▃▃▃█▃▃▃
valid_loss,█▁▂▁▁▁▄▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.64122
test_accuracy,0.01057
test_loss,4.62636
train_accuracy,0.0098
train_loss,253445.44634
valid_accuracy,0.0098
valid_loss,405988.32136


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.631606, Accuracy: ( 1.0%)


0,1
batch_idx,▄▅▆▂▁▅▅▇▁▁▃▆▇▇▁▄▅▆▂▃▇▂▃▃▄▆▇█▄▇▂▃▃▄▅▇▇▂▃▆
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▁▁▁▁▁▁▁▁▁▁▇█▄▂▁▁▂▂▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄█▃▃▆▂▃▄▃
train_loss,▁▃█▁▁▁▁▁▁▁
valid_accuracy,█▄▄▄█▁▅▇▄▅
valid_loss,▁▃█▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.82262
test_accuracy,0.01008
test_loss,4.63161
train_accuracy,0.0098
train_loss,4.76433
valid_accuracy,0.0098
valid_loss,4.76057


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.625014, Accuracy: ( 1.8%)


0,1
batch_idx,▃▃▄▅▆█▁██▂▄▄▅▆▁▆█▁▂▂▆█▅▁▁▆▁▂▄▄▆▆█▂▅▂▃▄▅▇
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▃▁▃▃▃▃█▃▃▃
train_loss,█▂▁▁▁▂▁▁▁▁
valid_accuracy,▂█▂▂▂▂▁▄▂▄
valid_loss,█▂▁▁▁▃▃▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.62637
test_accuracy,0.01805
test_loss,4.62501
train_accuracy,0.0098
train_loss,4.62498
valid_accuracy,0.01176
valid_loss,4.62497


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.633497, Accuracy: ( 0.8%)


0,1
batch_idx,▁▁▃▆▆▁▂▄▆▇▂▄▇▁▁▄▅▇█▂▇▃▅▆▇▄▅▅▂▄▅▇█▁▂▅▅▆▂█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,█▃▁▃▃▃▃▃▃▃
train_loss,▁█▁▁▂▁▂▆▄▂
valid_accuracy,▁██▆▃▃▆▆▆▁
valid_loss,▁█▁▁▁▁▁▂▂▁

0,1
batch_idx,15.0
epoch,9.0
loss,114208216.0
test_accuracy,0.00829
test_loss,4.6335
train_accuracy,0.0098
train_loss,40278611.12793
valid_accuracy,0.00784
valid_loss,20886095.23477


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Test set: Average loss: 4.627984, Accuracy: ( 0.3%)


0,1
batch_idx,▁▂▃▃▄▆▇▇▁▂▅▆▁▄█▃▄██▁▄▁▂▃▅▇▁▂▄▅▁▃▄▅▆▇▂▄▆█
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
loss,▁▁█▁▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▅▁█▅▅▅▅▅▅▅
train_loss,█▁▁▁▁▁▁▁▁▁
valid_accuracy,▁▁▁▁▁▁▁▁▁▁
valid_loss,█▁▁▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,9.0
loss,4.64007
test_accuracy,0.00342
test_loss,4.62798
train_accuracy,0.0098
train_loss,4.62564
valid_accuracy,0.0098
valid_loss,5925190.59398


In [14]:
random_search_tuner(
    param_ranges={
        'learning_rate': (1e-5, 1e-1),
        'weight_decay': (0.0, 0.1),
        'momentum': (0.0, 0.01),
    },
    model_name = 'AlexNetBaseline',
    model_class = AlexNetBaseline,
    optimizer_name='SGD',
    loss_fn_name='cross_entropy',
    trials=10,
    epochs=20,
)

Trial:   0%|          | 0/10 [00:00<?, ?it/s]

  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Test set: Average loss:      nan, Accuracy: ( 0.3%)


0,1
batch_idx,▃█▁▄▆▁▃▄▅▁▄▆▇▁▆▇▁▂▄▅▅▆▆▁▂▆▁▄▃█▃▄█▁▂▄▅▆▇▄
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,▁
test_accuracy,▁
train_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
batch_idx,15.0
epoch,19.0
loss,
test_accuracy,0.00325
test_loss,
train_accuracy,0.0098
train_loss,
valid_accuracy,0.0098
valid_loss,


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Test set: Average loss: 4.326445, Accuracy: ( 5.2%)


0,1
batch_idx,▄▅▆█▁▄▅▁▁▆▃▇▇▃▅▁▅▃█▁▁▃▇█▃▁▃▄▅▆▄▅▁▃▃▃▁▃▅█
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,█▄▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁▁▁▁▁▂▁▁▁▁▂▃▁▃▅▅▅▆█
train_loss,███▇▇▇▇▆▅▅▅▄▄▄▄▃▂▂▁▁
valid_accuracy,▁▁▁▂▂▂▁▂▁▂▂▃▃▂▃▅▆▆▅█
valid_loss,██▇▇▇▇▆▆▅▅▄▄▄▄▃▃▃▂▁▁

0,1
batch_idx,15.0
epoch,19.0
loss,4.35585
test_accuracy,0.05188
test_loss,4.32645
train_accuracy,0.06961
train_loss,4.22778
valid_accuracy,0.05882
valid_loss,4.29353


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Test set: Average loss: 4.228149, Accuracy: ( 3.3%)


0,1
batch_idx,▁▅▆█▁▇▇▃▄▅▆█▂▆▇▆██▃▇▅▆██▂▄▇▁▃▄▇█▇▁▄▁▁▂▂▄
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,█▆▅▄▅▅▅▆▅▅▅▅▅▆▅▅▄▄▄▄▄▃▄▄▄▄▃▄▄▃▄▄▃▁▂▂▂▂▂▃
test_accuracy,▁
test_loss,▁
train_accuracy,▂▃▃▁▃▃▃▄▃▂▅▅▃▆▆█▆▇██
train_loss,█▇▆▆▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁▁
valid_accuracy,▁▃▃▂▃▃▂▄▄▆▄▄▁▅▄▇▅▄█▇
valid_loss,█▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▁▁▁

0,1
batch_idx,15.0
epoch,19.0
loss,4.34716
test_accuracy,0.03253
test_loss,4.22815
train_accuracy,0.03725
train_loss,4.30909
valid_accuracy,0.03137
valid_loss,4.34737


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Test set: Average loss: 4.216880, Accuracy: ( 5.2%)


0,1
batch_idx,▁▁▂▃▄▅▃▄▁▂▁▂▇█▂▇▂▆▆▂▃▃▄▇█▂▃▆▇█▆▁▇▄▇▇▂█▅█
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,█▆▅▅▅▅▅▅▄▅▅▄▄▄▄▄▄▄▄▃▄▄▅▃▄▃▃▃▃▄▄▃▄▂▂▂▃▂▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▂▁▁▂▂▂▁▂▃▂▃▃▃▃▄▅██▅█
train_loss,██▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▂▁
valid_accuracy,▁▁▂▁▁▁▂▁▁▃▂▁▃▃▂▅▅▄▅█
valid_loss,██▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▁

0,1
batch_idx,15.0
epoch,19.0
loss,4.23701
test_accuracy,0.05172
test_loss,4.21688
train_accuracy,0.05196
train_loss,4.14619
valid_accuracy,0.06667
valid_loss,4.1848


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Zad 6
* Modify your CNN architecture to include batch normalization and dropout layers.
* Experiment with different dropout rates and analyze their impact on the model's performance.


### Model class

In [13]:
class AlexNetBaseline(nn.Module):
    def __init__(self, dropout_p=0.5, num_classes=102):
        super(AlexNetBaseline, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.Dropout(p=dropout_p),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.Dropout(p=dropout_p),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.Dropout(p=dropout_p),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.Dropout(p=dropout_p),
            nn.ReLU(inplace=True),

            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.Dropout(p=dropout_p),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(p=dropout_p),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)
            elif isinstance(layer, nn.BatchNorm2d):
                nn.init.ones_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def evaluate(self, data_loader):
        loss = 0
        correct = 0
        loss_fn = nn.CrossEntropyLoss(
            reduction='sum',
        )

        with torch.no_grad():
            for inputs, labels in data_loader:
                inputs, labels = inputs.to(_device), labels.to(_device)

                outputs = self(inputs)
                loss += loss_fn(outputs, labels).item()
                pred = outputs.argmax(
                    dim=1, keepdim=True
                )
                correct += (
                    pred.eq(labels.view_as(pred)).sum().item()
                )

        loss /= len(data_loader.dataset)
        accuracy = correct / len(data_loader.dataset)
        return loss, accuracy

### Training functions

In [14]:
def train_step(model, optimizer, loss_fn, data_loader):
    data_iter = iter(data_loader)

    for batch_idx, (inputs, labels) in enumerate(data_iter):
        inputs, labels = inputs.to(_device), labels.to(_device)

        optimizer.zero_grad()   # Zero gradients
        logits = model(inputs)   # Forward pass
        loss = loss_fn(logits, labels)  # Compute loss
        loss.backward() # Backward pass
        optimizer.step()    # Update weights

        wandb.log({
            "loss": loss.item(),
            "batch_idx": batch_idx,
        })

In [15]:
def test_dropout(
    dropout,
    model_class,
    epochs=10,
    learning_rate=0.001,
    weight_decay = 0.0005,
):
    model = model_class(dropout)
    model.init_weights()

    # optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = nn.CrossEntropyLoss()

    with Capturing() as output:
        run = wandb.init(
            entity = "fejowo5522-",
            project= "NN_list3_OxFlow",
            config = {
                "task": 6,
                "batch_size": batch_size,
                "epochs": epochs,
                "data_augmentation": data_augmentation,
                "optimizer": "Adam",
                "loss_fn": "cross_entropy",
                "learning_rate": learning_rate,
                "weight_decay": weight_decay,
                "model": 'alexnet',
                "dropout": dropout,
            }
        )
        run.name = "Task6_" + str(int(time.time()))


    model.train()
    for epoch in tqdm(range(epochs), desc="Training", leave=False, position=1):
        train_step(model, optimizer, loss_fn, data_loaders['train'])

        for loader, split in [
            (data_loaders['train'], 'train'),
            (data_loaders['valid'], 'valid'),
        ]:
            loss, accuracy = model.evaluate(loader)
            wandb.log({
                "epoch": epoch,
                f"{split}_loss": loss,
                f"{split}_accuracy": accuracy,
            })

    model.eval()
    loss, accuracy = model.evaluate(data_loaders['test'])
    wandb.log({
        "test_loss": loss,
        "test_accuracy": accuracy,
    })
    print(
        "Test set: Average loss: {:8.6f}, Accuracy: ({:4.1f}%)".format(
            loss,
            100.0 * accuracy,
        )
    )

    run.finish()

### Test dropouts

In [None]:
for drop in tqdm(range(0, 100, 10), position=0):
    test_dropout(
        drop/100,
        AlexNetBaseline,
        epochs=10,
        learning_rate=0.001,
        weight_decay = 0.0005,
    )

0,1
batch_idx,▁
loss,▁

0,1
batch_idx,0.0
loss,5.8069


Training:   0%|          | 0/10 [00:00<?, ?it/s]

## Zad 7
* Implement data augmentation techniques such as random rotations, shifts, flips, and zooms on the training dataset.
* Train your CNN with augmented data and compare the performance with the baseline model trained on the original data.


implemented already in `load_flowers` data loader and global `data_augmentation` variable or custom `transfrom`'s

## Zad 8
* Implement residual connections in your CNN architecture; see the [ResNet paper](https://arxiv.org/abs/1512.03385) for more details.
* Implement inception modules in your CNN architecture; see the [GoogLeNet paper](https://arxiv.org/abs/1409.4842) for more details.
            

### Model classes

In [None]:
class ResNet(nn.Module):
    def __init__(self, num_classes=102):
        super(ResNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.layer2 = self._make_layer(64, 128, 2)
        self.layer3 = self._make_layer(128, 256, 2)
        self.layer4 = self._make_layer(256, 512, 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channels, out_channels, blocks):
        downsample = None
        if in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        layers = [ResidualBlock(in_channels, out_channels, downsample=downsample)]
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [None]:
class InceptionNet(nn.Module):
    def __init__(self, num_classes=102):
        super(InceptionNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.inception1 = InceptionModule(64, 32, 32, 64, 16, 32, 32)
        self.inception2 = InceptionModule(160, 64, 64, 128, 32, 64, 64)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(320, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

### Train model

In [None]:
def train_model(model, data_loaders, num_epochs=10, learning_rate=0.001):
    model.to(_device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in data_loaders['train']:
            inputs, labels = inputs.to(_device), labels.to(_device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        train_loss, train_acc = evaluate(model, data_loaders['train'])
        valid_loss, valid_acc = evaluate(model, data_loaders['valid'])
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}")

    test_loss, test_acc = evaluate(model, data_loaders['test'])
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

In [None]:
resnet_model = ResNet(num_classes=102)
train_model(resnet_model, data_loaders, num_epochs=10, learning_rate=0.001)

In [None]:
inception_model = InceptionNet(num_classes=102)
train_model(inception_model, data_loaders, num_epochs=10, learning_rate=0.001)