### Imports

In [1]:
import torch
print(f'PyTorch CUDA is available? {torch.cuda.is_available()}')
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PyTorch CUDA is available? True


### Fix randomness for reproducibility

In [2]:
def fix_random(seed: int) -> None:
    """Fix all the possible sources of randomness.

    Args:
        seed: the seed to use.
    """
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

fix_random(42)

## Devices availables

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Device: {device}")

Device: cuda


### Hyperparameters

In [4]:
cfg = {
    # Prepare dataloader
    'n_classes': 43,
    'std_size': 260,
    'rand_crop_size': 150,
    'batch_size': 64,

    # Compilation of network
    'epochs': 30,
    'lr': 1e-3,
    'wd': 1e-4,
}

## Access dataloader

In [None]:
!git clone https://github.com/marcusklasson/GroceryStoreDataset.git

In [5]:
from pathlib import Path
from PIL import Image
from torch import Tensor
from torch.utils.data import Dataset
from typing import List, Tuple

In [6]:
class GroceryStoreDataset(Dataset):

    def __init__(self, split: str, transform=None) -> None:
        super().__init__()

        self.root = Path("GroceryStoreDataset/dataset")
        self.split = split
        self.paths, self.labels = self.read_file()

        self.transform = transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[Tensor, int]:
        img = Image.open(self.root / self.paths[idx])
        label = self.labels[idx]

        if self.transform:
            img = self.transform(img)

        return img, label

    def read_file(self) -> Tuple[List[str], List[int]]:
        paths = []
        labels = []

        with open(self.root / f"{self.split}.txt") as f:
            for line in f:
                # path, fine-grained class, coarse-grained class
                path, _, label = line.replace("\n", "").split(", ")
                paths.append(path), labels.append(int(label))

        return paths, labels

    def get_num_classes(self) -> int:
        return max(self.labels) + 1

In [7]:
from torchvision import transforms as T, datasets

In [8]:
## Rimettere a 260x260 o a 224x224 (per resnet)

tsfms_std = T.Compose([
    T.Resize(size=(cfg['std_size'], cfg['std_size'])),
    T.ToTensor(),
    # T.Lambda(lambda x: x.flatten()),
])

tsfms_increasing = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomResizedCrop(size=(cfg['rand_crop_size'], cfg['rand_crop_size']), scale=(0.7, 0.9)),
    T.ToTensor(),
    T.Resize(size=(cfg['std_size'], cfg['std_size'])),
    # T.Lambda(lambda x: x.flatten()),
])

train_dset = GroceryStoreDataset(
    split="train",
    transform=tsfms_std,
)
increased_train_dst = GroceryStoreDataset(
    split="train",
    transform=tsfms_increasing,
)
val_dset = GroceryStoreDataset(
    split="val",
    transform=tsfms_std,
)
test_dset = GroceryStoreDataset(
    split="test",
    transform=tsfms_std,
)
n_classes = cfg['n_classes']
input_dim = len(train_dset[0][0])

increased_dataset = torch.utils.data.ConcatDataset([increased_train_dst,train_dset])
print(len(increased_dataset))

5280


In [9]:
from torch.utils.data import DataLoader


In [10]:
print(len(train_dset))
print(len(val_dset))

2640
296


In [11]:
train_dl = DataLoader(
    train_dset,
    batch_size=cfg['batch_size'],
    shuffle=True
)
val_dl = DataLoader(
    val_dset,
    batch_size=cfg['batch_size']
)
test_dl = DataLoader(
    test_dset,
    batch_size=cfg['batch_size']
)

In [12]:
# next(iter(...)) catches the first batch of the data loader
# If shuffle is True, this will return a different batch every time we run this cell
# For iterating over the whole dataset, we can simple use "for batch in data_loader: ..."
data_inputs, data_labels = next(iter(train_dl))

# The shape of the outputs are [batch_size, d_1,...,d_N] where d_1,...,d_N are the
# dimensions of the data point returned from the dataset class
print(f"Data inputs: {data_inputs.shape}\n{data_inputs}")
print(f"\nData labels: {data_labels.shape}\n{data_labels}")

Data inputs: torch.Size([64, 3, 260, 260])
tensor([[[[0.1529, 0.1176, 0.0941,  ..., 0.1216, 0.1020, 0.0627],
          [0.1451, 0.1176, 0.1020,  ..., 0.1216, 0.1059, 0.0627],
          [0.1529, 0.1255, 0.1137,  ..., 0.1255, 0.1059, 0.0667],
          ...,
          [0.2510, 0.2510, 0.2471,  ..., 0.5490, 0.5490, 0.5490],
          [0.2510, 0.2510, 0.2471,  ..., 0.5529, 0.5529, 0.5529],
          [0.2510, 0.2510, 0.2471,  ..., 0.5608, 0.5608, 0.5608]],

         [[0.1255, 0.0941, 0.0784,  ..., 0.0902, 0.0706, 0.0392],
          [0.1137, 0.0902, 0.0824,  ..., 0.0902, 0.0745, 0.0392],
          [0.1216, 0.0980, 0.0902,  ..., 0.0941, 0.0784, 0.0431],
          ...,
          [0.1412, 0.1412, 0.1373,  ..., 0.4039, 0.4039, 0.4039],
          [0.1412, 0.1412, 0.1373,  ..., 0.4078, 0.4078, 0.4078],
          [0.1412, 0.1412, 0.1373,  ..., 0.4157, 0.4157, 0.4157]],

         [[0.0627, 0.0353, 0.0275,  ..., 0.0196, 0.0431, 0.0275],
          [0.0549, 0.0353, 0.0314,  ..., 0.0157, 0.0431, 0.0275],

## Prepare network

### complex Model 

In [None]:
## Best Performance on Validation set: 0.23
class SimpleClassifier(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax(dim=0)

        self.first = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5, stride=4, padding='valid')

        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='same')
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding='same')
        self.pool1 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        self.conv4 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='same')
        self.conv5 = torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, padding='same')
        self.pool2 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=64)

        self.conv6 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='same')
        self.conv7 = torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, padding='same')
        self.pool3 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm3 = torch.nn.BatchNorm2d(num_features=128)

        self.conv8 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='same')
        self.conv9 = torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=5, padding='same')
        self.pool4 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm4 = torch.nn.BatchNorm2d(num_features=256)

        self.flatten1 = torch.nn.Flatten()
        # self.fc1 = torch.nn.Linear((hidden_dim*16) * 1 * 1, 120)
        self.fc1 = torch.nn.Linear(6400, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.first(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool1(x)

        x = self.batchnorm1(x)

        x = self.conv4(x)
        x = self.relu(x)
        x = self.conv5(x)
        x = self.relu(x)
        x = self.pool2(x)

        x = self.batchnorm2(x)

        x = self.conv6(x)
        x = self.relu(x)
        x = self.conv7(x)
        x = self.relu(x)
        x = self.pool3(x)

        x = self.batchnorm3(x)

        x = self.conv8(x)
        x = self.relu(x)
        x = self.conv9(x)
        x = self.relu(x)
        x = self.pool4(x)

        x = self.batchnorm4(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model7

In [13]:

class ResNet(torch.nn.Module):
    def __init__(self, module):
        super().__init__()
        self.module = module

    def forward(self, inputs):
        # print(self.module(inputs).shape, inputs.shape)
        return self.module(inputs) + inputs

# Best Performance on Validation set: 0.567 con lr=3e-4 wd=3e-5
class SimpleClassifier7(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        self.stem = self.create_conv_block(in_dim=input_dim, out_dim=hidden_dim, ker_size=7, stride=2, version='max-pool')

        self.conv_block1 = self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim*2, ker_size=3, stride=2, version='batch-norm')
        
        hidden_dim *= 2

        self.residual_block1 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim, ker_size=3, version='std'),
                    torch.nn.BatchNorm2d(hidden_dim),
                    torch.nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding='same'),
                    torch.nn.BatchNorm2d(hidden_dim),
                )
            ),
            torch.nn.SiLU()
        )

        self.residual_block2 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim, ker_size=3, version='std'),
                    torch.nn.BatchNorm2d(hidden_dim),
                    torch.nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding='same'),
                    torch.nn.BatchNorm2d(hidden_dim),
                )
            ),
            torch.nn.SiLU()
        )

        self.conv_block2 = self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim*2, ker_size=3, version='batch-norm')

        hidden_dim *= 2


        self.residual_block3 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim, ker_size=3, version='std'),
                    torch.nn.BatchNorm2d(hidden_dim),
                    torch.nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding='same'),
                    torch.nn.BatchNorm2d(hidden_dim),
                )
            ),
            torch.nn.SiLU()
        )

        self.residual_block4 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim, ker_size=3, version='std'),
                    torch.nn.BatchNorm2d(hidden_dim),
                    torch.nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding='same'),
                    torch.nn.BatchNorm2d(hidden_dim),
                )
            ),
            torch.nn.SiLU()
        )

        


        # hidden_dim *= 2
        # self.conv_block2 = self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim*2, ker_size=5, version='batch-norm')

        # self.conv_block2 = torch.nn.Sequential(
        #     self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim*2, ker_size=5, version='std'),
        #     self.create_conv_block(in_dim=hidden_dim*2, out_dim=hidden_dim*4, ker_size=5, version='batch-norm')
        # )

        # hidden_dim *= 4

        # self.conv_block3 = torch.nn.Sequential(
        #     self.create_conv_block(in_dim=hidden_dim, out_dim=hidden_dim*2, ker_size=5, version='std'),
        #     self.create_conv_block(in_dim=hidden_dim*2, out_dim=hidden_dim*4, ker_size=5, version='batch-norm')
        # )

        # hidden_dim *= 4    

        final_output_h_w = 3

        self.adaptive_avg = torch.nn.AdaptiveAvgPool2d(output_size=(final_output_h_w, final_output_h_w))


        self.fc_classifier = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(hidden_dim * final_output_h_w * final_output_h_w, 120),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.2),
            torch.nn.Linear(120, n_classes)
        )

    def create_conv_block(self, in_dim, out_dim, ker_size, stride=1, padding='same', version='std'):
        if stride > 1:
            padding='valid'

        if version == 'std':
            return torch.nn.Sequential(
                torch.nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=ker_size, stride=stride, padding=padding),
                torch.nn.SiLU()
            )
        elif version == 'max-pool':
            return torch.nn.Sequential(
                torch.nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=ker_size, stride=stride, padding=padding),
                torch.nn.SiLU(),
                torch.nn.MaxPool2d(2, 2)
            )
        else:
            return torch.nn.Sequential(
                torch.nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=ker_size, stride=stride, padding=padding),
                torch.nn.SiLU(),
                torch.nn.MaxPool2d(2, 2),
                torch.nn.BatchNorm2d(num_features=out_dim)
            )
    
    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.stem(x)
        x = self.conv_block1(x)

        x = self.residual_block1(x)
        x = self.residual_block2(x)

        x = self.conv_block2(x)

        x = self.residual_block3(x)
        x = self.residual_block4(x)
        # x = self.conv_block3(x)
        # x = self.conv_block4(x)

        x = self.adaptive_avg(x)

        x = self.fc_classifier(x)

        return x

### Model6

In [42]:

class ResNet(torch.nn.Module):
    def __init__(self, module):
        super().__init__()
        self.module = module

    def forward(self, inputs):
        return self.module(inputs) + inputs

# Best Performance on Validation set: 0.57 con lr=3e-4 wd=3e-5
class SimpleClassifier6(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        dim = hidden_dim
        
        self.silu = torch.nn.SiLU()
        self.dropout = torch.nn.Dropout(p=0.2)

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=64, kernel_size=7, stride=4, padding='valid')
        self.pool1 = torch.nn.MaxPool2d(3, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='same')
        self.pool2 = torch.nn.MaxPool2d(3, 2)

        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=128)

        self.residual_block1 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    torch.nn.Conv2d(128, 128, kernel_size=3, padding='same'),
                    torch.nn.SiLU(),
                    torch.nn.BatchNorm2d(128),
                    torch.nn.Conv2d(128, 128, kernel_size=3, padding='same'),
                    # torch.nn.SiLU(),
                    torch.nn.BatchNorm2d(128),
                )
            ),
            torch.nn.SiLU()
        )
        

        self.conv3 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding='same')
        self.pool3 = torch.nn.MaxPool2d(3, 2)
        self.conv4 = torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding='same')


        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=256)

        self.residual_block2 = torch.nn.Sequential(
            ResNet(
                torch.nn.Sequential(
                    torch.nn.Conv2d(256, 256, kernel_size=3, padding='same'),
                    torch.nn.SiLU(),
                    torch.nn.BatchNorm2d(256),
                    torch.nn.Conv2d(256, 256, kernel_size=3, padding='same'),
                    # torch.nn.SiLU(),
                    torch.nn.BatchNorm2d(256),
                )
            ),
            torch.nn.SiLU()
        )

        self.conv5 = torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding='same')
        self.pool4 = torch.nn.MaxPool2d(3, 2)

        self.batchnorm3 = torch.nn.BatchNorm2d(num_features=512)
        
        # self.conv5 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='valid')
        # self.pool5 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm4 = torch.nn.BatchNorm2d(num_features=256)

        # self.conv6 = torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, padding='valid')
        # self.pool6 = torch.nn.MaxPool2d(2, 2)

        self.adaptive_avg = torch.nn.AdaptiveAvgPool2d(output_size=(3, 3))

        # self.batchnorm5 = torch.nn.BatchNorm2d(num_features=512)

        self.flatten1 = torch.nn.Flatten()
        # self.fc1 = torch.nn.Linear(512 * 7 * 7, 120)
        self.fc1 = torch.nn.Linear(4608, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.silu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.silu(x)
        x = self.pool2(x)

        x = self.batchnorm1(x)

        x = self.residual_block1(x)

        x = self.conv3(x)
        x = self.silu(x)
        x = self.conv4(x)
        x = self.silu(x)
        x = self.pool3(x)

        x = self.batchnorm2(x)

        x = self.residual_block2(x)

        x = self.conv5(x)
        x = self.silu(x)
        x = self.pool4(x)

        x = self.batchnorm3(x)

        # x = self.conv5(x)
        # x = self.elu(x)
        # x = self.pool5(x)

        # x = self.batchnorm4(x)

        # x = self.conv6(x)
        # x = self.elu(x)
        # x = self.pool6(x)
        
        # x = self.batchnorm5(x)

        x = self.adaptive_avg(x)

        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.silu(x)
        x = self.dropout(x)
        x = self.fc2(x)

        # x = self.conv_layer1(x)
        # x = self.conv_layer2(x)
        # x = self.conv_layer3(x)
        # x = self.conv_layer4(x)
        # x = self.conv_layer5(x)
        
        # x = self.fc_classifier(x)

        return x

### Prova

In [None]:
class ComplexCNN(torch.nn.Module):
    def __init__(self, n_classes):
        super(ComplexCNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 512, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),
            torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.flatten = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(12800, 96)
        # self.fc1 = torch.nn.Linear(512*2*2, 96)
        self.dropout = torch.nn.Dropout(0.5)
        self.fc2 = torch.nn.Linear(96, n_classes)
        self.softmax = torch.nn.Softmax()

        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

### Last Model

In [None]:
# Best Performance on Validation set: 0.5101351351351351 con step 3e-4
class SimpleClassifier5(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        dim = 96
        self.conv_layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=input_dim, out_channels=dim, kernel_size=11, stride=4, padding='valid'),

            torch.nn.SiLU(),
            torch.nn.MaxPool2d(3, 2),

            torch.nn.BatchNorm2d(num_features=dim)

        )

        self.conv_layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, stride=2, padding='valid'),

            torch.nn.SiLU(),
            torch.nn.MaxPool2d(2, 2),

            torch.nn.BatchNorm2d(dim*2)
        )
        dim = dim*2

        self.conv_layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=5, padding='same'),

            torch.nn.SiLU(),
            torch.nn.MaxPool2d(2, 2),

            torch.nn.BatchNorm2d(dim*2),

            # torch.nn.Dropout2d(p=0.2),
        )
        dim *= 2

        self.conv_layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=5, padding='same'),

            torch.nn.SiLU(),
            torch.nn.MaxPool2d(2, 2),

            torch.nn.BatchNorm2d(dim*2)
        )
        dim *= 2

        self.conv_layer5 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=5, padding='same'),

            torch.nn.SiLU(),
            torch.nn.MaxPool2d(2, 2),

            torch.nn.BatchNorm2d(dim*2)
        )
        dim *= 2

        # Initialize the modules we need to build the network

        self.fc_classifier = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(6400, 100),
            torch.nn.SiLU(),
            # torch.nn.Linear(200, 100),
            # torch.nn.SiLU(),
            torch.nn.Dropout(p=0.05),
            torch.nn.Linear(100, n_classes),
            torch.nn.LogSoftmax(dim=0)
        )
        # self.elu = torch.nn.ELU()
        # self.softmax = torch.nn.Softmax()

        # self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5, padding='valid')
        # self.pool1 = torch.nn.MaxPool2d(2, 2)
        # self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='valid')
        # self.pool2 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        # self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='valid')
        # self.pool3 = torch.nn.MaxPool2d(2, 2)
        # self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='valid')
        # self.pool4 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm2 = torch.nn.BatchNorm2d(num_features=128)

        # self.conv5 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='valid')
        # self.pool5 = torch.nn.MaxPool2d(2, 2)
        # self.conv6 = torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, padding='valid')
        # self.pool6 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm3 = torch.nn.BatchNorm2d(num_features=512)
        
        

        # self.batchnorm4 = torch.nn.BatchNorm2d(num_features=64)

        

        # self.batchnorm5 = torch.nn.BatchNorm2d(num_features=32)

        # self.flatten1 = torch.nn.Flatten()
        # self.fc1 = torch.nn.Linear(1152, 120)
        # self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        # x = self.conv1(x)
        # x = self.elu(x)
        # x = self.pool1(x)
        # x = self.conv2(x)
        # x = self.elu(x)
        # x = self.pool2(x)

        # x = self.batchnorm1(x)

        # x = self.conv3(x)
        # x = self.elu(x)
        # x = self.pool3(x)
        # x = self.conv4(x)
        # x = self.elu(x)
        # x = self.pool4(x)

        # x = self.batchnorm2(x)

        # x = self.conv5(x)
        # x = self.elu(x)
        # x = self.pool5(x)
        # x = self.conv6(x)
        # x = self.elu(x)
        # x = self.pool6(x)
        
        # x = self.batchnorm3(x)
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = self.conv_layer3(x)
        x = self.conv_layer4(x)
        # x = self.conv_layer5(x)
        
        x = self.fc_classifier(x)

        return x

### Model1

Model1: prova (migliore) con 2 blocchi (Conv -> RELU -> MaxPool x2) intervallati da un BatchNorm layer.

In [None]:
# Best Performance on Validation set: 0.310
class SimpleClassifier1(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5)
        self.pool1 = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = torch.nn.MaxPool2d(2, 2)


        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5)
        self.pool3 = torch.nn.MaxPool2d(2, 2)
        self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        self.pool4 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=128)

        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(8192, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)

        x = self.batchnorm1(x)

        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool3(x)
        x = self.conv4(x)
        x = self.relu(x)
        x = self.pool4(x)

        x = self.batchnorm2(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model2

Model2: prova con blocco Convolutional Layer -> RELU -> MaxPooling Layer, ripetuto due volte. Stride usato unicamente per il MaxPooling layers.

In [None]:
# Best Performance on Validation set: 0.24225352112676057
class SimpleClassifier2(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5)
        self.pool1 = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = torch.nn.MaxPool2d(2, 2)


        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(70688, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model3

Model3:
prova con un solo Convolutional Layer, a cui aumento kernel_size e stride per ridurne la dimensionalità e di conseguenza il numero di parametri. 

In [None]:
# Best Performance on Validation set: 0.20321931589537223
class SimpleClassifier3(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=11, stride=6)


        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(16384, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model4

Model4: prova con un convolutional Layer e un pooling Layer.

In [None]:
# Best Performance on Validation set: 
class SimpleClassifier4(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=7, stride=4)
        self.pool1 = torch.nn.MaxPool2d(2, 2)

        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(9216, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        
        x = self.pool1(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Chiamata Modelli

In [14]:
input_channels = 3
hidden_channels = 64

model7 = SimpleClassifier7(
    input_dim=input_channels,
    hidden_dim=hidden_channels,
    n_classes=cfg['n_classes']
)

for name, params in model7.named_parameters():
    print(f"{name}: {params.shape}")

stem.0.weight: torch.Size([64, 3, 7, 7])
stem.0.bias: torch.Size([64])
conv_block1.0.weight: torch.Size([128, 64, 3, 3])
conv_block1.0.bias: torch.Size([128])
conv_block1.3.weight: torch.Size([128])
conv_block1.3.bias: torch.Size([128])
residual_block1.0.module.0.0.weight: torch.Size([128, 128, 3, 3])
residual_block1.0.module.0.0.bias: torch.Size([128])
residual_block1.0.module.1.weight: torch.Size([128])
residual_block1.0.module.1.bias: torch.Size([128])
residual_block1.0.module.2.weight: torch.Size([128, 128, 3, 3])
residual_block1.0.module.2.bias: torch.Size([128])
residual_block1.0.module.3.weight: torch.Size([128])
residual_block1.0.module.3.bias: torch.Size([128])
residual_block2.0.module.0.0.weight: torch.Size([128, 128, 3, 3])
residual_block2.0.module.0.0.bias: torch.Size([128])
residual_block2.0.module.1.weight: torch.Size([128])
residual_block2.0.module.1.bias: torch.Size([128])
residual_block2.0.module.2.weight: torch.Size([128, 128, 3, 3])
residual_block2.0.module.2.bias: t

## Push the model to the device (CUDA) 

In [75]:
# Push model to device. Has to be done only once.
model7.to(device)

SimpleClassifier7(
  (stem): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=valid)
    (1): SiLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block1): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=valid)
    (1): SiLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (residual_block1): Sequential(
    (0): ResNet(
      (module): Sequential(
        (0): Sequential(
          (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
          (1): SiLU()
        )
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
        (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 

## Optimizer

In [77]:
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import OneCycleLR, ReduceLROnPlateau

In [78]:
initial_lr = cfg['lr']
# optimizer = Adam(model7.parameters(), lr=initial_lr)
optimizer = AdamW(model7.parameters(), lr=initial_lr, weight_decay=cfg['wd'])
num_epochs = cfg['epochs']
num_steps = num_epochs * len(train_dl)
# lr_scheduler = OneCycleLR(optimizer, initial_lr, total_steps=num_steps)
lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.6, patience=2, min_lr=1e-5)

## Training the network

In [20]:
from tqdm.notebook import tqdm
import torch.nn.functional as F

In [79]:
def ncorrect(scores, y):
    y_hat = torch.argmax(scores, -1)
    return (y_hat == y).sum()

def accuracy(scores, y):
    correct = ncorrect(scores, y)
    return correct.true_divide(y.shape[0])

def train_loop(model, train_dl, epochs, opt, scheduler, val_dl=None, verbose=False):
    best_val_acc = 0
    best_params = []
    best_epoch = -1

    for e in tqdm(range(epochs)):
    # for e in range(epochs):
        model.train()
        # Train
        train_loss = 0
        train_samples = 0
        train_acc = 0
        # running_loss = 0
        # correct = 0
        # total = 0
        for train_data in train_dl:
            imgs = train_data[0].to(device)
            labels = train_data[1].to(device)

            opt.zero_grad()  # clear

            # print(f'imgs = {imgs.shape}')
            scores = model(imgs)
            # print(f'scores= {scores.shape}')
            
            loss = F.cross_entropy(scores, labels)
            loss.backward()  # fill
            opt.step()       # Weight Optimizer step


            # running_loss += loss.item()
            # _, predicted = torch.max(scores.data, 1)
            # total += labels.size(0)
            # correct += (predicted == labels).sum().item()

            train_loss += loss.item() * imgs.shape[0]
            train_samples += imgs.shape[0]
            train_acc += ncorrect(scores, labels).item()

            
        # train_loss = running_loss / len(train_dl)
        # train_acc = 100 * correct / total
        train_acc /= train_samples
        train_loss /= train_samples

        # Validation
        model.eval()
        with torch.no_grad():
            val_loss = 0
            val_samples = 0
            val_acc = 0
            # correct = 0
            # total = 0
            if val_dl is not None:
                for val_data in val_dl:
                    imgs = val_data[0].to(device)
                    labels = val_data[1].to(device)
                    val_scores = model(imgs)
                    val_loss += F.cross_entropy(val_scores, labels).item() * imgs.shape[0]

                    val_samples += imgs.shape[0]
                    val_acc += ncorrect(val_scores, labels).item()

                    # val_scores = model(imgs)
                    # _, predicted = torch.max(val_scores.data, 1)
                    # total += labels.size(0)
                    # correct += (predicted == labels).sum().item()

                val_acc /= val_samples
                val_loss /= val_samples
                # val_acc = 100 * correct / total

            if val_dl is None or val_acc > best_val_acc:
                best_val_acc = val_acc if val_dl is not None else 0
                best_params = model.state_dict()
                torch.save(best_params, "best_model_1.pth")
                best_epoch = e

        if scheduler != None:
            scheduler.step(val_loss) # Learning Rate Scheduler step

        # if verbose and e % 5 == 0:
        if verbose:
            print(f"Epoch {e}: train loss {train_loss:.3f} - train acc {train_acc:.3f}" + ("" if val_dl is None else f" - valid loss {val_loss:.3f} - valid acc {val_acc:.3f}"))
            # print(f"Epoch {e}: train loss {train_loss:.3f} - train acc {train_acc:.3f}" + ("" if val_dl is None else f" - valid loss - valid acc {val_acc:.3f}"))

    if verbose and val_dl is not None:
        print(f"Best epoch {best_epoch}, best acc {best_val_acc}")

    return best_val_acc, best_params, best_epoch

In [82]:
best_val_acc, best_params, best_epoch = train_loop(
    model7,
    train_dl,
    cfg['epochs'],
    optimizer,
    lr_scheduler,
    val_dl,
    verbose=True
)

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 0: train loss 2.167 - train acc 0.369 - valid loss 2.862 - valid acc 0.220
Epoch 1: train loss 1.551 - train acc 0.511 - valid loss 2.729 - valid acc 0.307
Epoch 2: train loss 1.178 - train acc 0.628 - valid loss 2.386 - valid acc 0.382
Epoch 3: train loss 0.900 - train acc 0.701 - valid loss 3.222 - valid acc 0.264
Epoch 4: train loss 0.780 - train acc 0.740 - valid loss 2.669 - valid acc 0.399
Epoch 5: train loss 0.646 - train acc 0.791 - valid loss 3.275 - valid acc 0.314
Epoch 6: train loss 0.467 - train acc 0.850 - valid loss 2.855 - valid acc 0.338
Epoch 7: train loss 0.356 - train acc 0.887 - valid loss 2.318 - valid acc 0.422
Epoch 8: train loss 0.269 - train acc 0.918 - valid loss 3.143 - valid acc 0.382
Epoch 9: train loss 0.233 - train acc 0.931 - valid loss 2.140 - valid acc 0.507
Epoch 10: train loss 0.204 - train acc 0.935 - valid loss 3.373 - valid acc 0.355
Epoch 11: train loss 0.207 - train acc 0.931 - valid loss 3.288 - valid acc 0.389
Epoch 12: train loss 0.163

In [15]:
from skorch import NeuralNetClassifier

In [16]:
model = NeuralNetClassifier(
    module=model7,
    criterion=torch.nn.CrossEntropyLoss,
    optimizer=torch.optim.AdamW,
    max_epochs=30,
    batch_size=64,
    verbose=False
)

In [17]:
from skorch.helper import SliceDataset

In [18]:
d_loader_slice_X = SliceDataset(train_dset, idx=0)
d_loader_slice_y = SliceDataset(train_dset, idx=1)

In [19]:
from sklearn.model_selection import GridSearchCV

In [20]:
# define the grid search parameters
param_grid = {
    'optimizer__lr': [1e-3, 3e-4, 1e-4],
    'optimizer__weight_decay': [1e-4, 1e-5],
}
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(d_loader_slice_X, d_loader_slice_y)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [80]:
torch.cuda.empty_cache()