### Задание
Постройте модель, используя архитектуру со свёрточными слоями, для классификации Fashion MNIST. Итоговое качество (accuracy) должно быть не ниже 89,5.

Инструкция по выполнению задания. 

1. Скачайте тренировочную и тестовою часть датасета Fashion MNIST
2. Постройте архитектуру модели, используя свёрточные слои, слои регуляризации и один финальный полносвязный слой
3. Обучите модель до необходимого качества. Если сеть не обучается до необходимого качества, попробуйте поменять архитектуру сети, варьируя параметры свёрток, количество каналов, количество свёрточных слоёв, слои регуляризации, тип оптимайзера и т. д.

In [126]:
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import numpy as np
import pandas as pd
import torchvision as tv
import torchvision.transforms as transforms
import time
from torchsummary import summary

In [2]:
BATCH_SIZE=128
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [89]:
def train_model():
    loss = torch.nn.CrossEntropyLoss()
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, round(train_loss / train_iters, 3), round(train_acc / train_passed, 4),
            round(test_loss / test_iters, 3), test_acc / test_passed)
        )

In [150]:
model = torch.nn.Sequential(
    torch.nn.Conv2d(1, 6, kernel_size=3, padding=1),
    torch.nn.MaxPool2d(2),
    torch.nn.Conv2d(6, 32, kernel_size=3, padding=1),
    torch.nn.MaxPool2d(2, stride=2),
    torch.nn.Conv2d(32, 64, kernel_size=5, padding=1),
    torch.nn.AvgPool2d(2, stride=2),
    torch.nn.Flatten(),
    torch.nn.Linear(256, 64),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(64),
    torch.nn.Linear(64, 10)
)
summary(model, input_size=(1, 24, 24), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 24, 24]              60
         MaxPool2d-2            [-1, 6, 12, 12]               0
            Conv2d-3           [-1, 32, 12, 12]           1,760
         MaxPool2d-4             [-1, 32, 6, 6]               0
            Conv2d-5             [-1, 64, 4, 4]          51,264
         AvgPool2d-6             [-1, 64, 2, 2]               0
           Flatten-7                  [-1, 256]               0
            Linear-8                   [-1, 64]          16,448
              ReLU-9                   [-1, 64]               0
      BatchNorm1d-10                   [-1, 64]             128
           Linear-11                   [-1, 10]             650
Total params: 70,310
Trainable params: 70,310
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/ba

In [163]:
trainer = torch.optim.ASGD(model.parameters(), lr=.01)
num_epochs = 10

In [164]:
train_model()

ep: 0, taked: 15.500, train_loss: 0.284, train_acc: 0.8988, test_loss: 0.321, test_acc: 0.8848
ep: 1, taked: 15.226, train_loss: 0.271, train_acc: 0.9031, test_loss: 0.314, test_acc: 0.8864
ep: 2, taked: 15.760, train_loss: 0.265, train_acc: 0.9053, test_loss: 0.312, test_acc: 0.8876
ep: 3, taked: 15.050, train_loss: 0.262, train_acc: 0.9066, test_loss: 0.309, test_acc: 0.8887
ep: 4, taked: 15.711, train_loss: 0.259, train_acc: 0.908, test_loss: 0.307, test_acc: 0.8908
ep: 5, taked: 15.355, train_loss: 0.256, train_acc: 0.9087, test_loss: 0.306, test_acc: 0.8917
ep: 6, taked: 15.819, train_loss: 0.255, train_acc: 0.9093, test_loss: 0.304, test_acc: 0.8927
ep: 7, taked: 15.133, train_loss: 0.253, train_acc: 0.9099, test_loss: 0.303, test_acc: 0.893
ep: 8, taked: 15.758, train_loss: 0.251, train_acc: 0.9105, test_loss: 0.302, test_acc: 0.8931
ep: 9, taked: 15.396, train_loss: 0.25, train_acc: 0.911, test_loss: 0.302, test_acc: 0.8932
