# Богданов Александр Иванович, Б05-003

## Анализ модели CNN

In [274]:
from tqdm.notebook import tqdm
import numpy as np
import warnings
warnings.filterwarnings("ignore")

import torch
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import classification_report
from torchvision import datasets, transforms

In [275]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

### Вспомогательные функции

In [276]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()
    
    output = model(x_batch.to(device))
    
    loss = loss_function(output, y_batch.to(device))
    loss.backward()

    optimizer.step()
    return loss.cpu().item()

In [277]:
def train_epoch(train_generator, model, loss_function, optimizer, callback = None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)
        
        if callback is not None:
            callback(model, batch_loss)
            
        epoch_loss += batch_loss*len(batch_of_x)
        total += len(batch_of_x)
    
    return epoch_loss/total

In [278]:
def trainer(count_of_epoch, 
            batch_size, 
            dataset,
            model, 
            loss_function,
            optimizer,
            lr = 0.001,
            callback = None):

    optima = optimizer(model.parameters(), lr=lr)
    
    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True), 
            leave=False, total=len(dataset)//batch_size+(len(dataset)%batch_size> 0))
        
        epoch_loss = train_epoch(train_generator=batch_generator, 
                    model=model, 
                    loss_function=loss_function, 
                    optimizer=optima, 
                    callback=callback)
        
        iterations.set_postfix({'train epoch loss': epoch_loss})

In [279]:
def check(batch_size, dataset, model, loss_function):
    
    model.eval()

    batch_generator = torch.utils.data.DataLoader(dataset = dataset, batch_size=batch_size)
            
    pred = []
    real = []
    test_loss = 0
    for it, (x_batch, y_batch) in enumerate(batch_generator):
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        
        output = model(x_batch)

        test_loss += loss_function(output, y_batch).cpu().item()*len(x_batch)

        pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
        real.extend(y_batch.cpu().numpy().tolist())

    test_loss /= len(dataset)

    print('loss: {}'.format(test_loss))
    return test_loss

In [323]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter = 100, batch_size=64):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size

        self.dataset = dataset

    def forward(self, model, loss):
        model.eval()
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)
        
        if self.step % self.delimeter == 0:
            
            self.writer.add_graph(model, self.dataset[0][0].view(1,1,28,28).to(model.device))
            
            batch_generator = torch.utils.data.DataLoader(dataset = self.dataset, 
                                                          batch_size=self.batch_size)
            
            pred = []
            real = []
            test_loss = 0
            for it, (x_batch, y_batch) in enumerate(batch_generator):
                x_batch = x_batch.to(model.device)
                y_batch = y_batch.to(model.device)

                output = model(x_batch)

                test_loss += self.loss_function(output, y_batch).cpu().item()*len(x_batch)

                pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
                real.extend(y_batch.cpu().numpy().tolist())
            
            test_loss /= len(self.dataset)
            
            self.writer.add_scalar('LOSS/test', test_loss, self.step)
            self.writer.add_text('REPORT/test', str(classification_report(real, pred)), self.step)
          
    def __call__(self, model, loss):
        return self.forward(model, loss)

## Модели

###### 0. Обычная модель

In [281]:
class basic_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(basic_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 3))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 3))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(9216, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 10))

    def forward(self, input):
        return self.layers(input)

###### 1. Увеличенное ядро

In [298]:
class kernel_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(kernel_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(6400, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 10))

    def forward(self, input):
        return self.layers(input)

###### 2. Увеличенное число слоев

In [283]:
class layers_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(layers_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 3))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 3))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(9216, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 10))

    def forward(self, input):
        return self.layers(input)

###### 3. Добавление пулинга

In [284]:
class pooling_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(pooling_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 3))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2))
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 3))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(400, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 10))

    def forward(self, input):
        return self.layers(input)

###### 4. Добавление BatchNorm

In [285]:
class batchnorm_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(batchnorm_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 3))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 3))
        self.layers.add_module('batchnorm1', torch.nn.BatchNorm2d(16))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(9216, 120))
        self.layers.add_module('batchnorm2', torch.nn.BatchNorm1d(120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 10))

    def forward(self, input):
        return self.layers(input)

###### 5. Добавление Dropout

In [286]:
class dropout_CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(dropout_CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 6, kernel_size = 3))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 16, kernel_size = 3))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(9216, 120))
        self.layers.add_module('dropout', torch.nn.Dropout(p=0.2))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 10))
        
    def forward(self, input):
        return self.layers(input)

## Подключим tensorboard

In [287]:
%load_ext tensorboard
%tensorboard --logdir tensorboard_1/

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Скачаем данные

In [288]:
FashionMNIST_train = datasets.FashionMNIST('./FashionMNIST', train=True, download=True, transform=transforms.ToTensor())
FashionMNIST_test = datasets.FashionMNIST('./FashionMNIST', train=False, download=True, transform=transforms.ToTensor())

## Обучение

In [289]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam

###### 0. Обычная модель

In [290]:
writer0 = SummaryWriter(log_dir = 'tensorboard_1/basic_CNN')
call0 = callback(writer0, FashionMNIST_test, loss_function, delimeter = 10)

In [291]:
model0 = basic_CNN()
model0.to(device)

basic_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (relu2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=9216, out_features=120, bias=True)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=10, bias=True)
  )
)

In [292]:
check(64, FashionMNIST_test, model0, loss_function)

loss: 2.3047721282958986


2.3047721282958986

In [293]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model0,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call0)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [294]:
check(64, FashionMNIST_test, model0, loss_function)

loss: 0.2631594335079193


0.2631594335079193

###### 1. Увеличенное ядро

In [295]:
writer1 = SummaryWriter(log_dir = 'tensorboard_1/kernel_CNN')
call1 = callback(writer1, FashionMNIST_test, loss_function, delimeter = 10)

In [299]:
model1 = kernel_CNN()
model1.to(device)

kernel_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (relu2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=6400, out_features=120, bias=True)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=10, bias=True)
  )
)

In [300]:
check(64, FashionMNIST_test, model1, loss_function)

loss: 2.303904319000244


2.303904319000244

In [301]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model1,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call1)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [302]:
check(64, FashionMNIST_test, model1, loss_function)

loss: 0.28337887840271


0.28337887840271

###### 2. Увеличенное число слоев

In [303]:
writer2 = SummaryWriter(log_dir = 'tensorboard_1/layers_CNN')
call2 = callback(writer2, FashionMNIST_test, loss_function, delimeter = 10)

In [304]:
model2 = layers_CNN()
model2.to(device)

layers_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (relu2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=9216, out_features=120, bias=True)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=84, bias=True)
    (relu4): ReLU()
    (linear3): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [305]:
check(64, FashionMNIST_test, model2, loss_function)

loss: 2.306398455810547


2.306398455810547

In [306]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model2,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call2)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [307]:
check(64, FashionMNIST_test, model2, loss_function)

loss: 0.2737258492231369


0.2737258492231369

###### 3. Добавление пулинга

In [308]:
writer3 = SummaryWriter(log_dir = 'tensorboard_1/pooling_CNN')
call3 = callback(writer3, FashionMNIST_test, loss_function, delimeter = 10)

In [309]:
model3 = pooling_CNN()
model3.to(device)

pooling_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (relu2): ReLU()
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=400, out_features=120, bias=True)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=10, bias=True)
  )
)

In [310]:
check(64, FashionMNIST_test, model3, loss_function)

loss: 2.304301448440552


2.304301448440552

In [311]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model3,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call3)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [312]:
check(64, FashionMNIST_test, model3, loss_function)

loss: 0.34046540837287903


0.34046540837287903

###### 4. Добавление BatchNorm

In [313]:
writer4 = SummaryWriter(log_dir = 'tensorboard_1/batchnorm_CNN')
call4 = callback(writer4, FashionMNIST_test, loss_function, delimeter = 10)

In [314]:
model4 = batchnorm_CNN()
model4.to(device)

batchnorm_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (batchnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=9216, out_features=120, bias=True)
    (batchnorm2): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=10, bias=True)
  )
)

In [315]:
check(64, FashionMNIST_test, model4, loss_function)

loss: 2.4264142349243163


2.4264142349243163

In [316]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model4,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call4)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [317]:
check(64, FashionMNIST_test, model4, loss_function)

loss: 0.3019942291021347


0.3019942291021347

###### 5. Добавление Dropout

In [318]:
writer5 = SummaryWriter(log_dir = 'tensorboard_1/dropout_CNN')
call5 = callback(writer5, FashionMNIST_test, loss_function, delimeter = 10)

In [319]:
model5 = dropout_CNN()
model5.to(device)

dropout_CNN(
  (layers): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (relu2): ReLU()
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear1): Linear(in_features=9216, out_features=120, bias=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (relu3): ReLU()
    (linear2): Linear(in_features=120, out_features=10, bias=True)
  )
)

In [320]:
check(64, FashionMNIST_test, model5, loss_function)

loss: 2.3047156017303467


2.3047156017303467

In [321]:
trainer(count_of_epoch = 5,
        batch_size = 64,
        dataset = FashionMNIST_train,
        model = model5,
        loss_function = loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call5)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

In [322]:
check(64, FashionMNIST_test, model5, loss_function)

loss: 0.29450145016908647


0.29450145016908647

Были проведены эксперименты над базовой моделью.

Как мы видим из графиков: модели pooling и dropout плохо себя показали; базовая модель, с увеличенным ядром и с увеличенным количеством слоев показали примерно одинаковые результаты, а модель с batchnorm показала лучший результат, правда переобучилась.