In [35]:
from copy import deepcopy

import matplotlib.pyplot as plt
from matplotlib.image import imread
from mpl_toolkits import mplot3d
from matplotlib import gridspec
from PIL import Image
import io
from urllib.request import urlopen
from lime import lime_image
from skimage.segmentation import mark_boundaries

from tqdm.notebook import tqdm
import numpy as np
import requests
import torch

from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms

In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

%load_ext tensorboard
%tensorboard --logdir experiment/

In [37]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter = 100, batch_size=64, exp_number = 0):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size
        self.exp_number = exp_number

        self.dataset = dataset

    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar(f'LOSS/train{self.exp_number}', loss, self.step)
        
        if self.step % self.delimeter == 0 or self.step == 1949 or self.step == 10:
            
            self.writer.add_graph(model, self.dataset[0][0].view(1,1,28,28).to(model.device))
            
            batch_generator = torch.utils.data.DataLoader(dataset = self.dataset, 
                                                          batch_size=self.batch_size)
            
            pred = []
            real = []
            test_loss = 0
            for it, (x_batch, y_batch) in enumerate(batch_generator):
                x_batch = x_batch.to(model.device)
                y_batch = y_batch.to(model.device)

                output = model(x_batch)

                test_loss += self.loss_function(output, y_batch).cpu().item()*len(x_batch)

                pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
                real.extend(y_batch.cpu().numpy().tolist())
            
            test_loss /= len(self.dataset)
            
            self.writer.add_scalar(f'LOSS/test{self.exp_number}', test_loss, self.step)
            self.writer.add_text(f'REPORT/test{self.exp_number}', str(classification_report(real, pred)), self.step)

        # if self.step == 1949 or self.step == 10:
        #     x = x_batch[-10:]
        #     y = model.layers.conv1(x.to(model.device))
        #     z = model.layers.conv2(model.layers.pool1(model.layers.relu1(y)))

        #     fig = plt.figure(figsize=(30, 15))
        #     gs = gridspec.GridSpec(10, 23)
        #     ax = np.empty([10, 23], dtype=object)
        #     for i in range(10):
        #         for j in range(23):
        #             ax[i][j] = fig.add_subplot(gs[i, j])
        #     for i in range(len(x)):
        #         ax[i][0].imshow(x.cpu().data[i, 0].numpy())
        #         ax[i][0].axis("off")
        #         for j in range(1, 7):
        #             ax[i][j].imshow(y.cpu().data[i, j-1].numpy())
        #             ax[i][j].axis("off")

        #         for j in range(7, 23):
        #             ax[i][j].imshow(z.cpu().data[i, j-7].numpy())
        #             ax[i][j].axis("off")
        #     self.writer.add_figure('CNN/convs', fig, self.step)
          
    def __call__(self, model, loss):
        return self.forward(model, loss)


In [38]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()
    
    output = model(x_batch.to(device))
    loss = loss_function(output, y_batch.to(device))
    loss.backward()

    optimizer.step()
    return loss.cpu().item()

def train_epoch(train_generator, model, loss_function, optimizer, callback = None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)
        
        if callback is not None:
            callback(model, batch_loss)
            
        epoch_loss += batch_loss * len(batch_of_x)
        total += len(batch_of_x)
    
    return epoch_loss/total


def trainer(count_of_epoch, 
            batch_size, 
            dataset,
            model, 
            loss_function,
            optimizer,
            lr = 0.001,
            callback = None):

    optima = optimizer(model.parameters(), lr=lr)
    
    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True), 
            leave=False, total=len(dataset)//batch_size+(len(dataset)%batch_size> 0))
        
        epoch_loss = train_epoch(train_generator=batch_generator, 
                    model=model, 
                    loss_function=loss_function, 
                    optimizer=optima, 
                    callback=callback)
        
        iterations.set_postfix({'train epoch loss': epoch_loss})


In [39]:
train = datasets.EMNIST('./emnist', split='letters', train=True, download=True, transform=transforms.ToTensor())
test = datasets.EMNIST('./emnist', split='letters', train=False, download=True, transform=transforms.ToTensor())

In [40]:
def check_quality(model, test, loss_function):
    batch_generator = torch.utils.data.DataLoader(dataset = test, 
                                                batch_size=64)
    pred = []
    real = []
    test_loss = 0
    for it, (x_batch, y_batch) in enumerate(batch_generator):
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        output = model(x_batch)    
        
        test_loss += loss_function(output, y_batch).cpu().item() * len(x_batch)
        pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
        real.extend(y_batch.cpu().numpy().tolist())
        # print("predict:", pred)
        # print("real:", real)
    test_loss /= len(test)
    loss = test_loss
    percent = 100 * len(list(filter(lambda pair: pair[0] == pair[1], zip(pred, real)))) / len(pred)
    return loss, percent


In [41]:
def experiment(ModelClass : torch.nn.Module, exp_num, count_of_epoch = 1):
    model = ModelClass()
    model.to(device)   
    
    loss_function = torch.nn.CrossEntropyLoss()
    def m_loss_function(x, y):
        y -= 1
        return loss_function(x, y)
    
    optimizer = torch.optim.Adam
    
    writer = SummaryWriter(log_dir = 'experiment')
    call = callback(writer, test, m_loss_function, delimeter = 100, exp_number=exp_num)

    quality = check_quality(model, test, m_loss_function)
    print(f"quality before training: loss={quality[0]}, success percent={quality[1]}")
    
    trainer(count_of_epoch = count_of_epoch,
        batch_size = 64,
        dataset = train,
        model = model, ### Качество аппроксимации до обучения
        loss_function = m_loss_function,
        optimizer = optimizer,
        lr = 0.001,
        callback = call)
    
    quality = check_quality(model, test, m_loss_function)
    print(f"quality after training: loss={quality[0]}, success percent={quality[1]}")

# Базовый вариант

In [42]:
class CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16*4*4, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN, 0)

quality before training: loss=3.2605246250446025, success percent=3.8461538461538463


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.3670731298854718, success percent=88.1826923076923


# Оставляем меньше слоев

In [43]:
class CNN1(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN1, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(6*24*24, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN1, 1)

quality before training: loss=3.2602492178403413, success percent=3.625


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.38733663987654904, success percent=87.77884615384616


# Качество упало 
Добавим больше слоев

In [44]:
class CNN2(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN2, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1 *  6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('conv3', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('conv4', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('conv5', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu5', torch.nn.ReLU())
        self.layers.add_module('conv6', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu6', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(6 * 4 * 4, 120))
        self.layers.add_module('relu7', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu8', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN2, 2, count_of_epoch=10)

quality before training: loss=3.2601001761509822, success percent=3.8461538461538463


epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

quality after training: loss=0.371203347410147, success percent=88.34134615384616


# качество меньше на 10% c одной эпохой и выше на 1% с 10ю эпохами

Теперь оставим один relu в конце

In [45]:
class CNN3(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN3, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1 *  6, kernel_size = 5))
        # self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('conv3', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('conv4', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('conv5', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu5', torch.nn.ReLU())
        self.layers.add_module('conv6', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        self.layers.add_module('relu6', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(6 * 4 * 4, 120))
        self.layers.add_module('relu7', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu8', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN3, 3)

quality before training: loss=3.259963056857769, success percent=3.8461538461538463


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.8229907399874468, success percent=74.73076923076923


# Качество ещё ниже
Увеличим kernel size

In [46]:
class CNN4(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN4, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1 *  6, kernel_size = 10))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('conv2', torch.nn.Conv2d(6, 1 * 16, kernel_size = 10))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2))
        # self.layers.add_module('conv3', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu3', torch.nn.ReLU())
        # self.layers.add_module('conv4', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu4', torch.nn.ReLU())
        # self.layers.add_module('conv5', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu5', torch.nn.ReLU())
        # self.layers.add_module('conv6', torch.nn.Conv2d(6, 1 * 6, kernel_size = 5))
        # self.layers.add_module('relu6', torch.nn.ReLU())
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16 * 5 * 5, 120))
        self.layers.add_module('relu7', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu8', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN4, 4)

quality before training: loss=3.261124090781579, success percent=3.8461538461538463


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.385198742950765, success percent=87.60576923076923


# Качество ниже, чем у базового варианта

Увеличим пуллинг. Сначала уменьшим страйд

In [47]:
class CNN5(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN5, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16*18*18, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN5, 5)

quality before training: loss=3.2596484184265138, success percent=3.451923076923077


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.339270992072729, success percent=89.1923076923077


# Качество выросло. Не надо сжимать пуллингом данные так быстро
Теперь увеличим ядро пуллинга

In [48]:
class CNN6(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN6, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 5, stride=1))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 5, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16*12*12, 120))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(120, 84))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(84, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN6, 6)

quality before training: loss=3.2600202178955078, success percent=3.8365384615384617


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.4131858579814434, success percent=86.76442307692308


# С увеличением пуллинга качество упало

Реструктурируем сеть для уменьшения сжатия после разворачивания 3d в 1d

In [49]:
class CNN7(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN7, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16*18*18, 1024))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(1024, 256))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(256, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN7, 7)

quality before training: loss=3.258965319119967, success percent=3.730769230769231


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.26302440037807595, success percent=91.66346153846153


Качество выросло на ~3%
# Добавим batchnorm

In [50]:
class CNN8(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN8, self).__init__()
        
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('bnorm1', torch.nn.BatchNorm2d(6))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('bnorm2', torch.nn.BatchNorm2d(16))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('linear1', torch.nn.Linear(16*18*18, 1024))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('linear2', torch.nn.Linear(1024, 256))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('linear3', torch.nn.Linear(256, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN8, 8)

quality before training: loss=3.2635724639892576, success percent=3.581730769230769


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

quality after training: loss=0.5400554994436411, success percent=83.6875


Скорость не возросла, хотя ожидалось обратное. Видимо вся скорость теряется на толстых линейных слоях. Качество на 10% ниже. Падение качества свидетельствует о том, что нейросеть успевает хорошо обучиться за одну эпоху и так


Теперь добавим dropout 10%

In [51]:
class CNN9(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN9, self).__init__()
        
        p = 0.1
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('drop', torch.nn.Dropout(p=p))
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear1', torch.nn.Linear(16*18*18, 1024))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear2', torch.nn.Linear(1024, 256))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear3', torch.nn.Linear(256, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN9, 9)

quality before training: loss=3.2590960671351508, success percent=3.3125


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.2545776669867337, success percent=91.49038461538461


Качество немного упало.
А если dropout 20%?

In [52]:
class CNN10(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device
        
    def __init__(self):
        super(CNN10, self).__init__()
        
        p = 0.2
        self.layers = torch.nn.Sequential()
        self.layers.add_module('conv1', torch.nn.Conv2d(1, 1*6, kernel_size = 5))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('drop', torch.nn.Dropout(p=p))
        self.layers.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('conv2', torch.nn.Conv2d(1*6, 1*16, kernel_size = 5))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2, stride=1))
        self.layers.add_module('flatten', torch.nn.Flatten())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear1', torch.nn.Linear(16*18*18, 1024))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear2', torch.nn.Linear(1024, 256))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('drop1', torch.nn.Dropout(p=p))
        self.layers.add_module('linear3', torch.nn.Linear(256, 26))

    def forward(self, input):
        return self.layers(input)
    
experiment(CNN10, 10)

quality before training: loss=3.2596936240563026, success percent=3.1538461538461537


epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1950 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


quality after training: loss=0.2547457879093977, success percent=91.53365384615384


## Выводы.

1. C увеличением числа слоев быстро возрастает необходимое количество эпох на обучение.
2. BatchNorm улучшения сходимости не дал
3. Dropout тоже
4. Характерный размер ключевых объектов на картинке - 3-5 пикселей. При поиске паттернов большего размера качество падает. При таких параметрах ядер pooling- и convolution- слоев результаты наилучшие