<a href="https://colab.research.google.com/github/Tako-San/ml-course/blob/main/t3/task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Домашнее задание 2-1, Хайдари Фарид, Б01-008


## Подключение библиотек, написание тренера и логгера

In [41]:
# !pip3 install matplotlib
# !pip3 install scikit-image
# !pip3 install scikit-learn
# !pip3 install tqdm
# !pip3 install torch torchvision torchaudio
# !pip3 install tensorboard
# !pip3 install torchvision
# !pip3 install ipywidgets --user

In [42]:
from copy import deepcopy

import matplotlib.pyplot as plt
from matplotlib.image import imread
from mpl_toolkits import mplot3d
from matplotlib import gridspec
from PIL import Image
import io
from urllib.request import urlopen
# from lime import lime_image
from skimage.segmentation import mark_boundaries

from tqdm.notebook import tqdm
import numpy as np
import requests
import torch

from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter

from torchvision import datasets, transforms

In [43]:
import warnings
warnings.filterwarnings("ignore")

# from google.colab import output
# output.enable_custom_widget_manager()

In [44]:
PIC_SIZE = 28
LOSS_FUNCTION = torch.nn.CrossEntropyLoss()
OPTIMIZER = torch.optim.Adam

MNIST_train = datasets.FashionMNIST(root='data', train=True, download=True,
                             transform=transforms.ToTensor())

MNIST_test = datasets.FashionMNIST(root='data', train=False, download=True,
                            transform=transforms.ToTensor())

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [45]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()

    output = model(x_batch.to(DEVICE))

    loss = loss_function(output, y_batch.to(DEVICE))
    loss.backward()

    optimizer.step()
    return loss.cpu().item()

In [46]:
def train_epoch(train_generator, model, loss_function, optimizer, callback = None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(DEVICE), batch_of_y.to(DEVICE), optimizer, loss_function)

        if callback is not None:
            callback(model, batch_loss)

        epoch_loss += batch_loss*len(batch_of_x)
        total += len(batch_of_x)

    return epoch_loss/total

In [47]:
def trainer(count_of_epoch,
            batch_size,
            dataset,
            model,
            loss_function,
            optimizer,
            lr = 0.001,
            callback = None):

    optima = optimizer(model.parameters(), lr=lr)

    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True),
            leave=False, total=len(dataset)//batch_size+(len(dataset)%batch_size> 0))

        epoch_loss = train_epoch(train_generator=batch_generator,
                    model=model,
                    loss_function=loss_function,
                    optimizer=optima,
                    callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})

In [48]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter = 100, batch_size=64):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size

        self.dataset = dataset

    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)


        if self.step % self.delimeter != 0:
            return

        self.writer.add_graph(model, self.dataset[0][0].view(1,1,PIC_SIZE,PIC_SIZE).to(model.device))

        batch_generator = torch.utils.data.DataLoader(dataset = self.dataset,
                                                      batch_size=self.batch_size)

        pred = []
        real = []
        test_loss = 0
        correct = 0
        for it, (x_batch, y_batch) in enumerate(batch_generator):
            x_batch = x_batch.to(model.device)
            y_batch = y_batch.to(model.device)

            output = model(x_batch)

            test_loss += self.loss_function(output, y_batch).cpu().item()*len(x_batch)

            pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
            real.extend(y_batch.cpu().numpy().tolist())

        test_loss /= len(self.dataset)

        self.writer.add_scalar('LOSS/test', test_loss, self.step)

        self.writer.add_text('REPORT/test', str(classification_report(real, pred)), self.step)

        # x = x_batch[-10:]
        # y = model.layers.body_conv_1(x.to(model.device))
        # z = model.layers.body_conv_2(model.layers.body_pool_1(model.layers.body_relu_1(y)))

    def __call__(self, model, loss):
        return self.forward(model, loss)


In [49]:
def get_loss(md):
    batch_generator = torch.utils.data.DataLoader(dataset = MNIST_test,
                                                  batch_size=64)

    pred = []
    real = []
    test_loss = 0
    for it, (x_batch, y_batch) in enumerate(batch_generator):
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        output = md(x_batch)

        test_loss += LOSS_FUNCTION(output, y_batch).cpu().item()*len(x_batch)

        pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
        real.extend(y_batch.cpu().numpy().tolist())

    test_loss /= len(MNIST_test)
    return test_loss

## Модель

Написана так, чтобы можно было легко менять параметры сети. В конструкторе можно задать количество и размер слоев, размеры ядер свертки и пулинга, значение дропа, а также использования нормализации

In [50]:
from math import floor

def conv_output_size(input_size, kernel_size):
    return input_size - kernel_size + 1

def pool_output_size(input_size, kernel_size):
    return floor((input_size - kernel_size) / kernel_size + 1)

class CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device

    def __init__(self, channels=[6, 16], conv_kernel_size=5, pool_kernel_size=2, batch_norm=False, dropout=None):
        super(CNN, self).__init__()

        self.layers = torch.nn.Sequential()

        cur_size = 28
        ch = 1

        for i, new_ch in enumerate(channels):
          label = str(i + 1)
          self.layers.add_module('body_conv_' + label, torch.nn.Conv2d(1 * ch, 1 * new_ch, kernel_size = conv_kernel_size))
          if batch_norm:
              self.layers.add_module('body_batch_norm_' + label, torch.nn.BatchNorm2d(new_ch))
          self.layers.add_module('body_relu_' + label, torch.nn.ReLU())
          if dropout is not None:
              self.layers.add_module('body_dropout_' + label, torch.nn.Dropout(p = dropout))
          self.layers.add_module('body_pool_' + label, torch.nn.MaxPool2d(kernel_size = pool_kernel_size))
          conv_size = conv_output_size(cur_size, conv_kernel_size)
          cur_size = pool_output_size(conv_size, pool_kernel_size)
          ch = new_ch

        self.layers.add_module('tail_flatten', torch.nn.Flatten())
        self.layers.add_module('tail_linear_1', torch.nn.Linear(ch * cur_size * cur_size, 120))
        self.layers.add_module('tail_relu_1', torch.nn.ReLU())
        self.layers.add_module('tail_linear_2', torch.nn.Linear(120, 84))
        self.layers.add_module('tail_relu_2', torch.nn.ReLU())
        self.layers.add_module('tail_linear_3', torch.nn.Linear(84, 10))

    def forward(self, input):
        return self.layers(input)

## Пример использования модели

За стандартную возьмем модель со следующими параметрами:
* channels=[6, 16]
* conv_kernel_size=5
* pool_kernel_size=2
* batch_norm=False
* dropout=None

In [51]:
LOSS_FUNCTION = torch.nn.CrossEntropyLoss()
OPTIMIZER = torch.optim.Adam

model = CNN()
model.to(DEVICE)

CNN(
  (layers): Sequential(
    (body_conv_1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (body_relu_1): ReLU()
    (body_pool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (body_conv_2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (body_relu_2): ReLU()
    (body_pool_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (tail_flatten): Flatten(start_dim=1, end_dim=-1)
    (tail_linear_1): Linear(in_features=256, out_features=120, bias=True)
    (tail_relu_1): ReLU()
    (tail_linear_2): Linear(in_features=120, out_features=84, bias=True)
    (tail_relu_2): ReLU()
    (tail_linear_3): Linear(in_features=84, out_features=10, bias=True)
  )
)

Результаты необученной стандартной модели

In [52]:
loss = get_loss(model)
print(f"initial loss: {loss}")

initial loss: 2.3058507751464843


Результаты обученной стандартной модели

In [53]:
!rm -r experiment

In [None]:
model = CNN()
model.to(DEVICE)

writer = SummaryWriter(log_dir = 'experiment/default/0')
call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

trainer(count_of_epoch = 1,
        batch_size = 64,
        dataset = MNIST_train,
        model = model,
        loss_function = LOSS_FUNCTION,
        optimizer = OPTIMIZER,
        lr = 0.001,
        callback = call)

loss = get_loss(model)
print(f"trained loss: {loss}")

epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

## Вариация различных параметров сети

Варьируем следующие параметры относительно стандартной модели:
* количество слоев
* размер ядра свертки
* размер ядра пулинга
* значение дропа
* использование нормализации

### Количество слоев

Сравниваем только 1 и 2 слоя, т.к. при добавлении 3го слоя тензорам не хватает размерности. По результатам измерений модель с 1 слоем показала лучший результат и на train и на test. Не совсем понятно почему так вышло. Возможно потому что при меньшем числе слоев у нас получается большая размерность линейного слоя. А это в свою очередь дает нам большую гибкость при классификации

In [None]:
for channels in ([6], [6, 16]):
    model = CNN(channels=channels)
    model.to(DEVICE)

    writer = SummaryWriter(log_dir = 'experiment/layers/' + str(len(channels)))
    call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

    trainer(count_of_epoch = 1,
            batch_size = 64,
            dataset = MNIST_train,
            model = model,
            loss_function = LOSS_FUNCTION,
            optimizer = OPTIMIZER,
            lr = 0.001,
            callback = call)

    loss = get_loss(model)

    print(f"channels = {channels}, "
          f"loss: {loss}")

### Размер ядра свертки

Свертка с ядром размера 3 показала себя лучше. Подозреваю, что причина та же что и в случае с количеством слоёв

In [None]:
for conv_kernel_size in (3, 5):
    model = CNN(conv_kernel_size=conv_kernel_size)
    model.to(DEVICE)

    writer = SummaryWriter(log_dir = 'experiment/conv_kernel_size/' + str(conv_kernel_size))
    call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

    trainer(count_of_epoch = 1,
            batch_size = 64,
            dataset = MNIST_train,
            model = model,
            loss_function = LOSS_FUNCTION,
            optimizer = OPTIMIZER,
            lr = 0.001,
            callback = call)

    loss = get_loss(model)

    print(f"conv_kernel_size = {conv_kernel_size}, "
          f"loss: {loss}")

### Размер ядра пулинга

Пулинг с ядром размера 1 (по сути его отсутствие) показал себя лучше. Подозреваю, что причина та же что и в случае с количеством слоёв

In [None]:
for pool_kernel_size in (1, 2):
    model = CNN(pool_kernel_size=pool_kernel_size)
    model.to(DEVICE)

    writer = SummaryWriter(log_dir = 'experiment/pool_kernel_size/' + str(pool_kernel_size))
    call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

    trainer(count_of_epoch = 1,
            batch_size = 64,
            dataset = MNIST_train,
            model = model,
            loss_function = LOSS_FUNCTION,
            optimizer = OPTIMIZER,
            lr = 0.001,
            callback = call)

    loss = get_loss(model)

    print(f"pool_kernel_size = {pool_kernel_size}, "
          f"loss: {loss}")

### Dropout

Dropout равный 0.3 показал себя наилучшим образом

In [None]:
for dropout in (None, 0.1, 0.3, 0.5):
    model = CNN(dropout=dropout)
    model.to(DEVICE)

    writer = SummaryWriter(log_dir = 'experiment/dropout/' + str(dropout))
    call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

    trainer(count_of_epoch = 1,
            batch_size = 64,
            dataset = MNIST_train,
            model = model,
            loss_function = LOSS_FUNCTION,
            optimizer = OPTIMIZER,
            lr = 0.001,
            callback = call)

    loss = get_loss(model)

    print(f"dropout = {dropout}, "
          f"loss: {loss}")

### Batch normalization

Наличие batch normalization показало улучшение

In [None]:
for batch_norm in (True, False):
    model = CNN(batch_norm=batch_norm)
    model.to(DEVICE)

    writer = SummaryWriter(log_dir = 'experiment/batch_norm/' + str(batch_norm))
    call = callback(writer, MNIST_test, LOSS_FUNCTION, delimeter = 10)

    trainer(count_of_epoch = 1,
            batch_size = 64,
            dataset = MNIST_train,
            model = model,
            loss_function = LOSS_FUNCTION,
            optimizer = OPTIMIZER,
            lr = 0.001,
            callback = call)

    loss = get_loss(model)

    print(f"batch_norm = {batch_norm}, "
          f"loss: {loss}")

In [None]:
%load_ext tensorboard
%tensorboard --logdir experiment/