### Imports

In [1]:
import torch
print(f'PyTorch CUDA is available? {torch.cuda.is_available()}')
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PyTorch CUDA is available? True


### Fix randomness => reproducibility

In [2]:
def fix_random(seed: int) -> None:
    """Fix all the possible sources of randomness.

    Args:
        seed: the seed to use.
    """
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

fix_random(42)

## Devices availables

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Device: {device}")

Device: cuda


## Access dataloader

In [None]:
!git clone https://github.com/marcusklasson/GroceryStoreDataset.git

In [4]:
from pathlib import Path
from PIL import Image
from torch import Tensor
from torch.utils.data import Dataset
from typing import List, Tuple

In [5]:
class GroceryStoreDataset(Dataset):

    def __init__(self, split: str, transform=None) -> None:
        super().__init__()

        self.root = Path("GroceryStoreDataset/dataset")
        self.split = split
        self.paths, self.labels = self.read_file()

        self.transform = transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[Tensor, int]:
        img = Image.open(self.root / self.paths[idx])
        label = self.labels[idx]

        if self.transform:
            img = self.transform(img)

        return img, label

    def read_file(self) -> Tuple[List[str], List[int]]:
        paths = []
        labels = []

        with open(self.root / f"{self.split}.txt") as f:
            for line in f:
                # path, fine-grained class, coarse-grained class
                path, _, label = line.replace("\n", "").split(", ")
                paths.append(path), labels.append(int(label))

        return paths, labels

    def get_num_classes(self) -> int:
        return max(self.labels) + 1

In [6]:
from torchvision import transforms as T, datasets

In [7]:
tsfms_std = T.Compose([
    T.ToTensor(),
    T.Resize(size=(348, 348)),
    # T.Lambda(lambda x: x.flatten()),
])

tsfms_increasing = T.Compose([
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Resize(size=(348, 348)),
    # T.Lambda(lambda x: x.flatten()),
])

train_dset = GroceryStoreDataset(
    split="train",
    transform=tsfms_std,
)
increased_train_dst = GroceryStoreDataset(
    split="train",
    transform=tsfms_increasing,
)
val_dset = GroceryStoreDataset(
    split="val",
    transform=tsfms_std,
)
test_dset = GroceryStoreDataset(
    split="test",
    transform=tsfms_std,
)
n_classes = 43
input_dim = len(train_dset[0][0])

increased_dataset = torch.utils.data.ConcatDataset([increased_train_dst,train_dset])
print(len(increased_dataset))

5280


In [8]:
print(len(train_dset))
print(len(increased_train_dst))

2640
2640


In [None]:
# _, ax = plt.subplots(1, 4)

# for i, idx in enumerate([0, 10, 20, 30]):
#     img, label = test_dset[idx]
#     # Obtain dimension for each axis (consider them already resized to 400x400)
#     dim = np.int32(np.ceil(np.sqrt(img.shape[0]/3)))

#     ax[i].imshow(img.reshape(3, dim, dim).permute(1, 2, 0))
#     ax[i].set_title(label)
#     ax[i].axis("off")

In [9]:
from torch.utils.data import DataLoader

In [10]:
batch_size = 16

train_dl = DataLoader(
    train_dset,
    batch_size=batch_size,
    shuffle=True
)
val_dl = DataLoader(
    val_dset,
    batch_size=batch_size
)
test_dl = DataLoader(
    test_dset,
    batch_size=batch_size
)

In [11]:
# next(iter(...)) catches the first batch of the data loader
# If shuffle is True, this will return a different batch every time we run this cell
# For iterating over the whole dataset, we can simple use "for batch in data_loader: ..."
data_inputs, data_labels = next(iter(train_dl))

# The shape of the outputs are [batch_size, d_1,...,d_N] where d_1,...,d_N are the
# dimensions of the data point returned from the dataset class
print(f"Data inputs: {data_inputs.shape}\n{data_inputs}")
print(f"\nData labels: {data_labels.shape}\n{data_labels}")

Data inputs: torch.Size([16, 3, 348, 348])
tensor([[[[0.1686, 0.1294, 0.1098,  ..., 0.1137, 0.0902, 0.0510],
          [0.1608, 0.1255, 0.1098,  ..., 0.1137, 0.0902, 0.0510],
          [0.1569, 0.1216, 0.1176,  ..., 0.1176, 0.0902, 0.0510],
          ...,
          [0.2510, 0.2510, 0.2510,  ..., 0.5529, 0.5529, 0.5529],
          [0.2510, 0.2510, 0.2510,  ..., 0.5569, 0.5569, 0.5569],
          [0.2510, 0.2510, 0.2510,  ..., 0.5608, 0.5608, 0.5608]],

         [[0.1412, 0.1020, 0.0863,  ..., 0.0784, 0.0588, 0.0314],
          [0.1255, 0.0980, 0.0863,  ..., 0.0784, 0.0588, 0.0314],
          [0.1216, 0.0941, 0.0863,  ..., 0.0863, 0.0588, 0.0314],
          ...,
          [0.1412, 0.1412, 0.1412,  ..., 0.4078, 0.4078, 0.4078],
          [0.1412, 0.1412, 0.1412,  ..., 0.4118, 0.4118, 0.4118],
          [0.1412, 0.1412, 0.1412,  ..., 0.4157, 0.4157, 0.4157]],

         [[0.0784, 0.0392, 0.0314,  ..., 0.0431, 0.0471, 0.0196],
          [0.0667, 0.0353, 0.0314,  ..., 0.0431, 0.0471, 0.0196],

## Prepare network

### complex Model 

In [12]:
## Best Performance on Validation set: 0.23
class SimpleClassifier(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.first = torch.nn.Conv2d(in_channels=input_dim, out_channels=8, kernel_size=5, stride=2)

        self.conv2 = torch.nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5)
        self.conv3 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool1 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        self.conv4 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5)
        self.conv5 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        self.pool2 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=128)

        self.conv6 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5)
        self.conv7 = torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=5)
        self.pool3 = torch.nn.MaxPool2d(2, 2)

        self.flatten1 = torch.nn.Flatten()
        # self.fc1 = torch.nn.Linear((hidden_dim*16) * 1 * 1, 120)
        self.fc1 = torch.nn.Linear(6400, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.first(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool1(x)

        # x = self.batchnorm1(x)

        x = self.conv4(x)
        x = self.relu(x)
        x = self.conv5(x)
        x = self.relu(x)
        x = self.pool2(x)

        # x = self.batchnorm2(x)

        x = self.conv6(x)
        x = self.relu(x)
        x = self.conv7(x)
        x = self.relu(x)
        x = self.pool3(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

In [44]:
# Best Performance on Validation set: 0.5101351351351351 con step 3e-4
class SimpleClassifier6(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        dim = hidden_dim
        # self.conv_layer1 = torch.nn.Sequential(
        #     torch.nn.Conv2d(in_channels=input_dim, out_channels=dim, kernel_size=5, stride=2, padding='valid'),

        #     torch.nn.BatchNorm2d(num_features=dim),

        #     torch.nn.ReLU(),
        #     torch.nn.MaxPool2d(2, 2),

        # )

        # self.conv_layer2 = torch.nn.Sequential(
        #     torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

        #     torch.nn.BatchNorm2d(dim*2),

        #     torch.nn.ReLU(),
        #     torch.nn.MaxPool2d(2, 2),
        # )
        # dim = dim*2

        # self.conv_layer3 = torch.nn.Sequential(
        #     torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

        #     torch.nn.BatchNorm2d(dim*2),

        #     torch.nn.ReLU(),
        #     torch.nn.MaxPool2d(2, 2),

        #     # torch.nn.Dropout2d(p=0.2),
        # )
        # dim *= 2

        # self.conv_layer4 = torch.nn.Sequential(
        #     torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

        #     torch.nn.BatchNorm2d(dim*2),

        #     torch.nn.ReLU(),
        #     torch.nn.MaxPool2d(2, 2),
        # )
        # dim *= 2

        # self.conv_layer5 = torch.nn.Sequential(
        #     torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),
        #     torch.nn.BatchNorm2d(dim*2),

        #     torch.nn.ReLU(),
        #     torch.nn.MaxPool2d(2, 2),
        # )
        # dim *= 2

        # Initialize the modules we need to build the network

        # self.fc_classifier = torch.nn.Sequential(
        #     torch.nn.Flatten(),
        #     torch.nn.Linear(28224, 100),
        #     torch.nn.ReLU(),
        #     # torch.nn.Linear(200, 100),
        #     # torch.nn.SiLU(),
        #     torch.nn.Dropout(p=0.5),
        #     torch.nn.Linear(100, n_classes),
        #     torch.nn.Softmax()
        # )
        self.elu = torch.nn.ELU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5, padding='valid')
        self.pool1 = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='valid')
        self.pool2 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='valid')
        self.pool3 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=64)

        self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='valid')
        self.pool4 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm3 = torch.nn.BatchNorm2d(num_features=128)
        
        self.conv5 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='valid')
        self.pool5 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm4 = torch.nn.BatchNorm2d(num_features=256)

        self.conv6 = torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, padding='valid')
        self.pool6 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm5 = torch.nn.BatchNorm2d(num_features=512)

        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(512, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.elu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.elu(x)
        x = self.pool2(x)

        x = self.batchnorm1(x)

        x = self.conv3(x)
        x = self.elu(x)
        x = self.pool3(x)

        x = self.batchnorm2(x)

        x = self.conv4(x)
        x = self.elu(x)
        x = self.pool4(x)

        x = self.batchnorm3(x)

        x = self.conv5(x)
        x = self.elu(x)
        x = self.pool5(x)

        x = self.batchnorm4(x)

        x = self.conv6(x)
        x = self.elu(x)
        x = self.pool6(x)
        
        x = self.batchnorm5(x)

        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.elu(x)
        x = self.fc2(x)
        x = self.softmax(x)

        # x = self.conv_layer1(x)
        # x = self.conv_layer2(x)
        # x = self.conv_layer3(x)
        # x = self.conv_layer4(x)
        # x = self.conv_layer5(x)
        
        # x = self.fc_classifier(x)

        return x

### Prova

In [33]:
class ComplexCNN(torch.nn.Module):
    def __init__(self, n_classes):
        super(ComplexCNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=5, stride=2, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 512, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),
            torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.flatten = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(12800, 96)
        # self.fc1 = torch.nn.Linear(512*2*2, 96)
        self.dropout = torch.nn.Dropout(0.5)
        self.fc2 = torch.nn.Linear(96, n_classes)
        self.softmax = torch.nn.Softmax()

        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

### Last Model

In [None]:
# Best Performance on Validation set: 0.5101351351351351 con step 3e-4
class SimpleClassifier5(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        dim = hidden_dim
        self.conv_layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=input_dim, out_channels=dim, kernel_size=5, stride=2, padding='valid'),

            torch.nn.BatchNorm2d(num_features=dim),

            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),

        )

        self.conv_layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

            torch.nn.BatchNorm2d(dim*2),

            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),
        )
        dim = dim*2

        self.conv_layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

            torch.nn.BatchNorm2d(dim*2),

            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),

            # torch.nn.Dropout2d(p=0.2),
        )
        dim *= 2

        self.conv_layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),

            torch.nn.BatchNorm2d(dim*2),

            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),
        )
        dim *= 2

        self.conv_layer5 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=dim, out_channels=dim*2, kernel_size=3, padding='same'),
            torch.nn.BatchNorm2d(dim*2),

            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),
        )
        dim *= 2

        # Initialize the modules we need to build the network

        self.fc_classifier = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(28224, 100),
            torch.nn.ReLU(),
            # torch.nn.Linear(200, 100),
            # torch.nn.SiLU(),
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(100, n_classes),
            torch.nn.Softmax()
        )
        # self.elu = torch.nn.ELU()
        # self.softmax = torch.nn.Softmax()

        # self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5, padding='valid')
        # self.pool1 = torch.nn.MaxPool2d(2, 2)
        # self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='valid')
        # self.pool2 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        # self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='valid')
        # self.pool3 = torch.nn.MaxPool2d(2, 2)
        # self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='valid')
        # self.pool4 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm2 = torch.nn.BatchNorm2d(num_features=128)

        # self.conv5 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='valid')
        # self.pool5 = torch.nn.MaxPool2d(2, 2)
        # self.conv6 = torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, padding='valid')
        # self.pool6 = torch.nn.MaxPool2d(2, 2)

        # self.batchnorm3 = torch.nn.BatchNorm2d(num_features=512)
        
        

        # self.batchnorm4 = torch.nn.BatchNorm2d(num_features=64)

        

        # self.batchnorm5 = torch.nn.BatchNorm2d(num_features=32)

        # self.flatten1 = torch.nn.Flatten()
        # self.fc1 = torch.nn.Linear(1152, 120)
        # self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        # x = self.conv1(x)
        # x = self.elu(x)
        # x = self.pool1(x)
        # x = self.conv2(x)
        # x = self.elu(x)
        # x = self.pool2(x)

        # x = self.batchnorm1(x)

        # x = self.conv3(x)
        # x = self.elu(x)
        # x = self.pool3(x)
        # x = self.conv4(x)
        # x = self.elu(x)
        # x = self.pool4(x)

        # x = self.batchnorm2(x)

        # x = self.conv5(x)
        # x = self.elu(x)
        # x = self.pool5(x)
        # x = self.conv6(x)
        # x = self.elu(x)
        # x = self.pool6(x)
        
        # x = self.batchnorm3(x)
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = self.conv_layer3(x)
        x = self.conv_layer4(x)
        x = self.conv_layer5(x)
        
        x = self.fc_classifier(x)

        return x

### Model1

Model1: prova (migliore) con 2 blocchi (Conv -> RELU -> MaxPool x2) intervallati da un BatchNorm layer.

In [None]:
# Best Performance on Validation set: 0.310
class SimpleClassifier1(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5)
        self.pool1 = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = torch.nn.MaxPool2d(2, 2)


        self.batchnorm1 = torch.nn.BatchNorm2d(num_features=32)

        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5)
        self.pool3 = torch.nn.MaxPool2d(2, 2)
        self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        self.pool4 = torch.nn.MaxPool2d(2, 2)

        self.batchnorm2 = torch.nn.BatchNorm2d(num_features=128)

        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(8192, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)

        x = self.batchnorm1(x)

        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool3(x)
        x = self.conv4(x)
        x = self.relu(x)
        x = self.pool4(x)

        x = self.batchnorm2(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model2

Model2: prova con blocco Convolutional Layer -> RELU -> MaxPooling Layer, ripetuto due volte. Stride usato unicamente per il MaxPooling layers.

In [None]:
# Best Performance on Validation set: 0.24225352112676057
class SimpleClassifier2(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        # self.first = torch.nn.Linear(input_dim, hidden_dim)
        # self.activation = torch.nn.ReLU()
        # self.last = torch.nn.Linear(hidden_dim, n_classes)

        # self.hidden_layers = torch.nn.ModuleList([
        #     torch.nn.Linear(hidden_dim, hidden_dim) for i in range(n_hidden_layers)
        # ])
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=5)
        self.pool1 = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.pool2 = torch.nn.MaxPool2d(2, 2)


        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(70688, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model3

Model3:
prova con un solo Convolutional Layer, a cui aumento kernel_size e stride per ridurne la dimensionalità e di conseguenza il numero di parametri. 

In [None]:
# Best Performance on Validation set: 0.20321931589537223
class SimpleClassifier3(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=11, stride=6)


        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(16384, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Model4

Model4: prova con un convolutional Layer e un pooling Layer.

In [None]:
# Best Performance on Validation set: 
class SimpleClassifier4(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, n_classes, n_hidden_layers=0):
        super().__init__()

        # Initialize the modules we need to build the network
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax()

        self.conv1 = torch.nn.Conv2d(in_channels=input_dim, out_channels=16, kernel_size=7, stride=4)
        self.pool1 = torch.nn.MaxPool2d(2, 2)

        self.flatten1 = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(9216, 120)
        self.fc2 = torch.nn.Linear(120, n_classes)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.conv1(x)
        x = self.relu(x)
        
        x = self.pool1(x)
        
        x = self.flatten1(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        x = self.softmax(x)

        return x

### Chiamata Modelli

In [45]:
# hidden_dim = 128

# model = SimpleClassifier(
#     3,
#     16,
#     n_classes,
# )

# model_best = SimpleClassifier1(
#     3,
#     16,
#     n_classes
# )

# model2 = SimpleClassifier2(
#     3,
#     16,
#     n_classes
# )

# model3 = SimpleClassifier3(
#     3,
#     16,
#     n_classes
# )

# model4 = SimpleClassifier4(
#     3,
#     16,
#     n_classes
# )

# model_complex = ComplexCNN(
#     n_classes
# )

# model5 = SimpleClassifier4(
#     3,
#     16,
#     n_classes
# )

model6 = SimpleClassifier6(
    3,
    16,
    n_classes
)

# model.forward(torch.zeros(size=(3, 200, 200)))

# model = SimpleClassifier(
#     input_dim,
#     hidden_dim,
#     n_classes,
#     n_hidden_layers=1
# )
for name, params in model6.named_parameters():
    print(f"{name}: {params.shape}")

conv1.weight: torch.Size([16, 3, 5, 5])
conv1.bias: torch.Size([16])
conv2.weight: torch.Size([32, 16, 5, 5])
conv2.bias: torch.Size([32])
batchnorm1.weight: torch.Size([32])
batchnorm1.bias: torch.Size([32])
conv3.weight: torch.Size([64, 32, 5, 5])
conv3.bias: torch.Size([64])
batchnorm2.weight: torch.Size([64])
batchnorm2.bias: torch.Size([64])
conv4.weight: torch.Size([128, 64, 5, 5])
conv4.bias: torch.Size([128])
batchnorm3.weight: torch.Size([128])
batchnorm3.bias: torch.Size([128])
conv5.weight: torch.Size([256, 128, 5, 5])
conv5.bias: torch.Size([256])
batchnorm4.weight: torch.Size([256])
batchnorm4.bias: torch.Size([256])
conv6.weight: torch.Size([512, 256, 5, 5])
conv6.bias: torch.Size([512])
batchnorm5.weight: torch.Size([512])
batchnorm5.bias: torch.Size([512])
fc1.weight: torch.Size([120, 512])
fc1.bias: torch.Size([120])
fc2.weight: torch.Size([43, 120])
fc2.bias: torch.Size([43])


## Push the model to the device (CUDA) 

In [46]:
# Push model to device. Has to be done only once.
model6.to(device)

SimpleClassifier6(
  (elu): ELU(alpha=1.0)
  (softmax): Softmax(dim=None)
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=valid)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=valid)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchnorm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=valid)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchnorm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=valid)
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchnorm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_s

## Optimizer

In [16]:
from torch.optim import Adam

In [47]:
optimizer = Adam(model6.parameters(), lr=3e-4)

## Training the network

In [18]:

from tqdm.notebook import tqdm
import torch.nn.functional as F

In [42]:
def ncorrect(scores, y):
    y_hat = torch.argmax(scores, -1)
    return (y_hat == y).sum()

def accuracy(scores, y):
    correct = ncorrect(scores, y)
    return correct.true_divide(y.shape[0])

def train_loop(model, train_dl, epochs, opt, val_dl=None, verbose=False):
    best_val_acc = 0
    best_params = []
    best_epoch = -1

    for e in tqdm(range(epochs)):
    # for e in range(epochs):
        model.train()
        # Train
        train_loss = 0
        train_samples = 0
        train_acc = 0
        # running_loss = 0
        # correct = 0
        # total = 0
        for train_data in train_dl:
            imgs = train_data[0].to(device)
            labels = train_data[1].to(device)

            opt.zero_grad()  # clear

            # print(f'imgs = {imgs.shape}')
            scores = model(imgs)
            # print(f'scores= {scores.shape}')
            
            loss = F.cross_entropy(scores, labels)
            loss.backward()  # fill
            opt.step()       # use


            # running_loss += loss.item()
            # _, predicted = torch.max(scores.data, 1)
            # total += labels.size(0)
            # correct += (predicted == labels).sum().item()

            train_loss += loss.item() * imgs.shape[0]
            train_samples += imgs.shape[0]
            train_acc += ncorrect(scores, labels).item()

            
        # train_loss = running_loss / len(train_dl)
        # train_acc = 100 * correct / total
        train_acc /= train_samples
        train_loss /= train_samples

        # Validation
        model.eval()
        with torch.no_grad():
            val_loss = 0
            val_samples = 0
            val_acc = 0
            # correct = 0
            # total = 0
            if val_dl is not None:
                for val_data in val_dl:
                    imgs = val_data[0].to(device)
                    labels = val_data[1].to(device)
                    val_scores = model(imgs)
                    val_loss += F.cross_entropy(val_scores, labels).item() * imgs.shape[0]

                    val_samples += imgs.shape[0]
                    val_acc += ncorrect(val_scores, labels).item()

                    # val_scores = model(imgs)
                    # _, predicted = torch.max(val_scores.data, 1)
                    # total += labels.size(0)
                    # correct += (predicted == labels).sum().item()

                val_acc /= val_samples
                val_loss /= val_samples
                # val_acc = 100 * correct / total

            if val_dl is None or val_acc > best_val_acc:
                best_val_acc = val_acc if val_dl is not None else 0
                best_params = model.state_dict()
                torch.save(best_params, "best_model.pth")
                best_epoch = e

        if verbose and e % 5 == 0:
            print(f"Epoch {e}: train loss {train_loss:.3f} - train acc {train_acc:.3f}" + ("" if val_dl is None else f" - valid loss {val_loss:.3f} - valid acc {val_acc:.3f}"))
            # print(f"Epoch {e}: train loss {train_loss:.3f} - train acc {train_acc:.3f}" + ("" if val_dl is None else f" - valid loss - valid acc {val_acc:.3f}"))

    if verbose and val_dl is not None:
        print(f"Best epoch {best_epoch}, best acc {best_val_acc}")

    return best_val_acc, best_params, best_epoch

In [48]:
epochs = 30

best_val_acc, best_params, best_epoch = train_loop(
    model6,
    train_dl,
    epochs,
    optimizer,
    val_dl,
    verbose=True
)

  0%|          | 0/30 [00:00<?, ?it/s]

  return self._call_impl(*args, **kwargs)


Epoch 0: train loss 3.622 - train acc 0.191 - valid loss 3.665 - valid acc 0.139
Epoch 5: train loss 3.446 - train acc 0.358 - valid loss 3.570 - valid acc 0.220
Epoch 10: train loss 3.370 - train acc 0.433 - valid loss 3.502 - valid acc 0.297
Epoch 15: train loss 3.330 - train acc 0.475 - valid loss 3.508 - valid acc 0.287
Epoch 20: train loss 3.281 - train acc 0.523 - valid loss 3.440 - valid acc 0.358


KeyboardInterrupt: 

In [26]:
torch.cuda.empty_cache()