# Домашнее задание "Сверточные сети"

Задание 1:

Используя свёрточные слои и архитектуру, получите на fashion mnist качество не ниже 89,5%

In [1]:
import torch
from torch import nn

In [2]:
def corr2d(X, K):  
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
    return Y

In [3]:
def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y

In [4]:
import torchvision as tv
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
BATCH_SIZE=256

In [6]:
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [9]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Conv2d(6, 12, kernel_size=5),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(300, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [10]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = torch.Tensor([0]), 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [11]:
def train(net, train_iter, test_iter, trainer, num_epochs):
    loss = nn.CrossEntropyLoss(reduction='sum')
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [12]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.SGD(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.9982, train acc 0.628, test acc 0.775, time 13.5 sec
epoch 2, loss 0.4889, train acc 0.813, test acc 0.844, time 13.2 sec
epoch 3, loss 0.4022, train acc 0.851, test acc 0.860, time 13.2 sec
epoch 4, loss 0.3633, train acc 0.866, test acc 0.870, time 13.3 sec
epoch 5, loss 0.3407, train acc 0.874, test acc 0.872, time 13.2 sec


In [14]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.2251, train acc 0.915, test acc 0.893, time 13.3 sec
epoch 2, loss 0.2150, train acc 0.919, test acc 0.895, time 13.1 sec
epoch 3, loss 0.2098, train acc 0.921, test acc 0.894, time 13.2 sec
epoch 4, loss 0.2055, train acc 0.923, test acc 0.895, time 13.2 sec
epoch 5, loss 0.2017, train acc 0.924, test acc 0.896, time 13.3 sec


В принципе, мы достигли цели, но так неинтересно. Едем дальше...

In [15]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.2031, train acc 0.923, test acc 0.897, time 13.3 sec
epoch 2, loss 0.1944, train acc 0.926, test acc 0.897, time 13.2 sec
epoch 3, loss 0.1915, train acc 0.928, test acc 0.897, time 13.2 sec
epoch 4, loss 0.1889, train acc 0.929, test acc 0.899, time 13.2 sec
epoch 5, loss 0.1865, train acc 0.929, test acc 0.898, time 13.6 sec


Оставим RMSprop в качестве оптимизатора. Поиграем с моделью.

In [24]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(400, 150),
    nn.ReLU(),
    nn.Linear(150, 10)
)

In [25]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.7149, train acc 0.735, test acc 0.802, time 13.8 sec
epoch 2, loss 0.4640, train acc 0.830, test acc 0.840, time 14.0 sec
epoch 3, loss 0.4069, train acc 0.852, test acc 0.855, time 13.8 sec
epoch 4, loss 0.3706, train acc 0.865, test acc 0.867, time 14.1 sec
epoch 5, loss 0.3432, train acc 0.876, test acc 0.872, time 13.9 sec


In [69]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Conv2d(16, 32, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(288, 100),
    nn.ReLU(),
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [70]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.9236, train acc 0.647, test acc 0.754, time 16.4 sec
epoch 2, loss 0.5724, train acc 0.782, test acc 0.798, time 16.5 sec
epoch 3, loss 0.4826, train acc 0.819, test acc 0.826, time 16.7 sec
epoch 4, loss 0.4218, train acc 0.842, test acc 0.848, time 16.7 sec
epoch 5, loss 0.3748, train acc 0.861, test acc 0.863, time 16.5 sec


In [73]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Conv2d(6, 10, kernel_size=5),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(250, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [74]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.7508, train acc 0.718, test acc 0.777, time 13.3 sec
epoch 2, loss 0.5022, train acc 0.814, test acc 0.824, time 13.3 sec
epoch 3, loss 0.4397, train acc 0.841, test acc 0.843, time 13.0 sec
epoch 4, loss 0.4043, train acc 0.853, test acc 0.851, time 13.5 sec
epoch 5, loss 0.3802, train acc 0.861, test acc 0.859, time 13.2 sec


In [100]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(1176, 250),
    nn.ReLU(),
    nn.Linear(250, 100),
    nn.ReLU(),
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [101]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.6990, train acc 0.734, test acc 0.833, time 13.2 sec
epoch 2, loss 0.4060, train acc 0.852, test acc 0.863, time 13.2 sec
epoch 3, loss 0.3424, train acc 0.875, test acc 0.874, time 13.2 sec
epoch 4, loss 0.3045, train acc 0.888, test acc 0.882, time 13.1 sec
epoch 5, loss 0.2765, train acc 0.898, test acc 0.884, time 13.4 sec


In [102]:
model = nn.Sequential(
    nn.Conv2d(1, 10, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(1960, 500),
    nn.ReLU(),
    nn.Linear(500, 250),
    nn.ReLU(),
    nn.Linear(250, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

In [103]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.RMSprop(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.5991, train acc 0.775, test acc 0.862, time 18.7 sec
epoch 2, loss 0.3319, train acc 0.878, test acc 0.881, time 19.0 sec
epoch 3, loss 0.2782, train acc 0.898, test acc 0.892, time 18.5 sec
epoch 4, loss 0.2427, train acc 0.911, test acc 0.899, time 19.4 sec
epoch 5, loss 0.2131, train acc 0.923, test acc 0.901, time 18.5 sec


Вышли за пределы 90% по обоим выборкам. 