In [None]:
import abc
import time
import typing
import warnings

import tqdm
import torch
import wandb
import random
import pandas
import torchscan
import torchvision
import sklearn.metrics
import matplotlib.pyplot as plt
import torch.utils.data as torchdata

device = torch.device(
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)
print(device)

RANDOM_STATE = 42
def set_random_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
def fix_random():
    return set_random_seed(RANDOM_STATE)
fix_random()

In [None]:
wandb.login(anonymous = "allow")

# 1. Train the CNN based classifier

## Load the dataset


In [None]:
fix_random()

# Transforms
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean = torch.tensor([ 0.491, 0.482, 0.447 ]),
        std = torch.tensor([ 0.247, 0.244, 0.262 ])
    )
])

# Load dataset
train_set = torchvision.datasets.CIFAR10('cifar-10', train = True, download = True, transform = transforms)
test_set = torchvision.datasets.CIFAR10('cifar-10', train = False, download = True, transform = transforms)

# Split train dataset into train and val
train_set, val_set = torchdata.random_split(train_set, [ len(train_set) - 5000, 5000 ])

# Extract labels
labels = test_set.classes

# Check
print(len(train_set), len(val_set), len(test_set))
print(labels)

In [None]:
# Denormalization
denormalize = torchvision.transforms.Compose([
    torchvision.transforms.Normalize(
        mean = [ 0., 0., 0. ],
        std = 1 / transforms.transforms[1].std
    ),
    torchvision.transforms.Normalize(
        mean = -transforms.transforms[1].mean,
        std = [ 1., 1., 1. ]
    )
])

# Display some samples from each dataset
plt.rcParams["figure.figsize"] = (15, 5)
for i in range(10):
    image, class_num = train_set[i]
    plt.subplot(3, 10, i + 1)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

for i in range(10):
    image, class_num = val_set[i]
    plt.subplot(3, 10, i + 11)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

for i in range(10):
    image, class_num = test_set[i]
    plt.subplot(3, 10, i + 21)
    plt.axis('off')
    plt.imshow(denormalize(image).permute(1, 2, 0))
    plt.title('{} ({})'.format(labels[class_num], str(class_num)))

## Function for accuracy checking

In [None]:
class BaseClassifier(abc.ABC):
    @abc.abstractmethod
    def fit(self, dataset: torchdata.Dataset):
        raise NotImplementedError

    @abc.abstractmethod
    def predict(self, images: torch.Tensor) -> typing.Tuple[torch.Tensor, torch.Tensor]:
        raise NotImplementedError
    
    def calc_metrics(self, dataset: torchdata.Dataset) -> dict:
        all_true = torch.tensor([])
        all_scores = torch.empty((0, 10))
        all_predictions = torch.tensor([])
        loader = torchdata.DataLoader(dataset, batch_size = 512, shuffle = False)
        for images, labels in loader:
            all_true = torch.cat([ all_true, labels ])
            predictions, scores = self.predict(images)
            all_scores = torch.cat([ all_scores, scores.detach().cpu() ])
            all_predictions = torch.cat([ all_predictions, predictions.detach().cpu() ])

        return {
            '': '',
            'Accuracy':       sklearn.metrics.accuracy_score      (all_true, all_predictions),
            'TOP-2 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 2),
            'TOP-3 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 3),
            'TOP-4 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 4),
            'TOP-5 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 5),
            'TOP-6 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 6),
            'TOP-7 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 7),
            'TOP-8 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 8),
            'TOP-9 Accuracy': sklearn.metrics.top_k_accuracy_score(all_true, all_scores, k = 9),
            'AUC-ROC':        sklearn.metrics.roc_auc_score       (all_true, all_scores, multi_class = 'ovo'),
            'Precision':      sklearn.metrics.precision_score     (all_true, all_predictions, average = 'macro'),
            'Recall':         sklearn.metrics.recall_score        (all_true, all_predictions, average = 'macro'),
            'F1-score':       sklearn.metrics.f1_score            (all_true, all_predictions, average = 'macro')
        }

## Implement CNN class for CIFAR10

In [None]:
class Classifier(BaseClassifier):
    results = [ ]

    def __init__(
            self,
            name: str,
            model: torch.nn.Module,
            batch_size: int = 256,
            device: torch.device = device,
            optimizer: typing.Optional[torch.optim.Optimizer] = None,
            scheduler: typing.Optional[torch.optim.lr_scheduler.LRScheduler] = None,
        ):
        self.name = name
        self.history = [ ]
        self.device = device
        self.input_shape = None
        self.scheduler = scheduler
        self.batch_size = batch_size
        self.model = model.to(self.device)
        self.optimizer = optimizer or torch.optim.AdamW(self.model.parameters())


    def train(self, images: torch.Tensor, labels: torch.Tensor) -> float:
        self.model.train() # Enter train mode
        self.optimizer.zero_grad() # Zero gradients
        output = self.model(images.to(self.device)) # Get predictions
        loss = torch.nn.functional.cross_entropy(output, labels.to(self.device)) # Calculate loss
        loss.backward() # Calculate gradients
        self.optimizer.step() # Update weights
        return loss.item()

    def train_epoch(self, loader: torchdata.DataLoader) -> float:
        sum_loss = 0
        for images, labels in loader:
            sum_loss += self.train(images, labels) # Train on one batch
        if self.scheduler is not None:
            self.scheduler.step()
        return sum_loss / len(loader) # Return average loss to avoid random-dependent graph
       
    def fit(self, train_set: torchdata.Dataset, val_set: torchdata.Dataset, n_epochs: int = 25):
        if self.input_shape is None:
            self.predict(train_set[0][0].unsqueeze(0)) # Initialize lazy layers and input shape
        loader = torchdata.DataLoader(train_set, batch_size = self.batch_size, shuffle = True)
        wandb.init(project = "CV-HW-4", name = self.name, anonymous = "allow")
        wandb.watch(self.model, log = "all")
        for epoch in tqdm.trange(n_epochs):
            # Train
            train_start = time.perf_counter()
            loss = self.train_epoch(loader)
            train_time = time.perf_counter() - train_start

            # Validate
            val_start = time.perf_counter()
            metrics = self.calc_metrics(val_set)
            val_time = time.perf_counter() - val_start
            
            # Upload metrics
            metrics['Validation time'] = val_time
            metrics['Train time'] = train_time
            metrics['Loss'] = loss
            wandb.log(metrics)
            metrics['Epoch'] = epoch + 1
            self.history.append(metrics)

        # Finish the run
        wandb.finish()

        # Store best metrics
        best_metrics = max(self.history, key = lambda item: item['Accuracy'])
        Classifier.results.append({ **best_metrics, 'Name': self.name })

        return self
    

    def predict(self, images: torch.Tensor) -> typing.Tuple[torch.Tensor, torch.Tensor]:
        if self.input_shape is None:
            self.input_shape = images[0].shape # Lazily initialize input shape
        self.model.eval() # Enter evaluation mode
        with torch.no_grad():
            outputs = self.model(images.to(self.device)) # Get outputs
            scores = torch.softmax(outputs, dim = 1) # Make probabilities
            predictions = torch.argmax(scores, dim = 1) # Calculate predictions
        return predictions, scores
    

    def summary(self):
        warnings.filterwarnings("ignore")
        display(pandas.DataFrame(Classifier.results).set_index("Name"))
        torchscan.summary(self.model.eval(), self.input_shape, receptive_field = True)

### Разный размер ядра
Для эксперимента будем использовать модель, содержащую один свёрточный слой

In [None]:
def test_kernel_size(kernel_size):
    fix_random()
    model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = kernel_size), torch.nn.GELU(),
        torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
        torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
    )
    model = Classifier('kernel_size = {}'.format(kernel_size), model)
    model.fit(train_set, val_set).summary()

In [None]:
test_kernel_size(3)

In [None]:
test_kernel_size(5)

In [None]:
test_kernel_size(7)

In [None]:
test_kernel_size(11)

In [None]:
test_kernel_size(17)

Видно, что при размере ядра 5 и 7 качество выше, чем при размере ядра 3. Тем не менеее дальнейшее увеличение размера ядра ведёт к уменьшению качества. \
Заметим, что используемый алгоритм свёртки уменьшает размер картинки. Кажется, это может объяснять наблюдаемое поведение: при использовании большого размера ядра качество уменьшается из-за слишком маленького размера картинки после выполнения свёртки.

### Padding
Проведём аналогичный эксперимент, но с использованием padding = kernel_size / 2. Уменьшения размера картинки при свёртках при этом происходить не будет.

In [None]:
def test_kernel_size_with_padding(kernel_size):
    fix_random()
    model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = kernel_size, padding = kernel_size // 2), torch.nn.GELU(),
        torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
        torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
    )
    model = Classifier('kernel_size = {}, padding = {}'.format(kernel_size, kernel_size // 2), model)
    model.fit(train_set, val_set).summary()

In [None]:
test_kernel_size_with_padding(3)

In [None]:
test_kernel_size_with_padding(5)

In [None]:
test_kernel_size_with_padding(7)

In [None]:
test_kernel_size_with_padding(11)

In [None]:
test_kernel_size_with_padding(17)

Действительно, использование отступа привело к повышению качества. Тем не менее большие свёртки всё равно показывают качество хуже, чем маленькие. 

### Различный тип паддинга

In [None]:
def test_padding_mode(padding_mode):
    fix_random()
    model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3, padding_mode = padding_mode), torch.nn.GELU(),
        torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
        torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
    )
    model = Classifier('padding_mode = {}'.format(padding_mode), model)
    model.fit(train_set, val_set).summary()

In [None]:
test_padding_mode('zeros')

In [None]:
test_padding_mode('reflect')

In [None]:
test_padding_mode('replicate')

In [None]:
test_padding_mode('circular')

### Количество фильтров

In [None]:
def test_out_channels(out_channels):
    fix_random()
    model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels = 3, out_channels = out_channels, kernel_size = 7, padding = 3), torch.nn.GELU(),
        torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
        torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
    )
    model = Classifier('out_channels = {}'.format(out_channels), model)
    model.fit(train_set, val_set).summary()

In [None]:
test_out_channels(8)

In [None]:
test_out_channels(16)

In [None]:
test_out_channels(32)

In [None]:
test_out_channels(64)

### Количество свёрточных слоёв
Для эксперимента будем использовать слои с kernel_size = 5 и padding = 2, так как он показал себя лучше остальных в предыдущем эксперименте.

In [None]:
def test_conv_amount(amount):
    fix_random()
    layers = [ ]
    for i in range(amount):
        layers.append(torch.nn.Conv2d(in_channels = (3 if i == 0 else 16), out_channels = 16, kernel_size = 7, padding = 3))
        layers.append(torch.nn.GELU())
    model = torch.nn.Sequential(
        *layers,
        torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
        torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
    )
    model = Classifier('{} convolutions'.format(amount), model)
    model.fit(train_set, val_set).summary()

In [None]:
test_conv_amount(1)

In [None]:
test_conv_amount(2)

In [None]:
test_conv_amount(3)

In [None]:
test_conv_amount(4)

Видно, что с увеличением количества свёрточных слоёв качество увеличивается, но сильнее проявляется и переобучение. Можно предположить, что оптимально использовать 10 свёрточных слоёв размера 7, чтобы поле восприятия последнего слоя равнялось всей картинке. Тем не менее при этом модель будет обучаться очень долго, и сильно проявится переобучение.

### Pooling
Воспользуемся пулингом как альтернативным способом быстрее повысить поле восприятия. Будем использовать 3 свёрточных слоя.

#### Один пулинг

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('1 Pooling (1)', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('1 Pooling (2)', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('1 Pooling (3)', model)
model.fit(train_set, val_set).summary()

#### Два пулинга

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('2 Poolings (1-2)', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('2 Poolings (1-3)', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('2 Poolings (2-3)', model)
model.fit(train_set, val_set).summary()

#### Три пулинга

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('3 Poolings', model)
model.fit(train_set, val_set).summary()

Лучшее качество показывают модели с одним пулинг-слоем посередине и с двумя пулинг-слоями в конце. Интересно, что модели, которые после свёрток "видят" всю картинку показали не лучшее качество

#### Average pooling

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.AvgPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.GELU(), torch.nn.AvgPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('Average pooling', model)
model.fit(train_set, val_set).summary()

### Нормализация

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(16), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('BatchNorm', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.InstanceNorm2d(16), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.InstanceNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.InstanceNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.InstanceNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('InstanceNorm', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LayerNorm((16, 32, 32)), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LayerNorm((16, 32, 32)), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LayerNorm((16, 16, 16)), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.LayerNorm((1024)), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('LayerNorm', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(2), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(2), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(2), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('LocalResponseNorm(2)', model)
model.fit(train_set, val_set).summary()

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(8), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(8), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.LocalResponseNorm(8), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('LocalResponseNorm(8)', model)
model.fit(train_set, val_set).summary()

### Другие эксперименты

Кажется, целесообразно делать первые свёртки меньшего размера, а последние - большего.

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 5, padding = 2), torch.nn.BatchNorm2d(16), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 16, out_channels = 16, kernel_size = 9, padding = 4), torch.nn.BatchNorm2d(16), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('Idea - 1', model)
model.fit(train_set, val_set).summary()

Действительно, стало немного лучше. \
Попробуем изменить количество фильтров в промежуточных слоях.

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 5, padding = 2), torch.nn.BatchNorm2d(16), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(32), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 9, padding = 4), torch.nn.BatchNorm2d(32), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('Idea - 2', model)
model.fit(train_set, val_set).summary()

Стало значительно лучше \
Попробуем добавить ещё один слой, чтобы повысить поле восприятия до всей картинки

In [None]:
fix_random()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 3, padding = 1), torch.nn.BatchNorm2d(16), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, padding = 2), torch.nn.BatchNorm2d(32), torch.nn.GELU(),
    torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 7, padding = 3), torch.nn.BatchNorm2d(64), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 9, padding = 4), torch.nn.BatchNorm2d(64), torch.nn.GELU(), torch.nn.MaxPool2d(2, 2),
    torch.nn.Flatten(), torch.nn.Dropout(0.5), torch.nn.LazyLinear(1024), torch.nn.BatchNorm1d(1024), torch.nn.GELU(),
    torch.nn.Dropout(0.5), torch.nn.Linear(1024, 10)
)
model = Classifier('Idea - 3', model)
model.fit(train_set, val_set).summary()

## Train the model




### Validat results on test dataset

You must get accuracy above 0.65

In [None]:
model.calc_metrics(test_set)

# Place for brief conclusion:

....
