In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
import seaborn as sns
sns.set_style("darkgrid")

import warnings
warnings.filterwarnings("ignore")

import torch
from torch.utils.data import TensorDataset, DataLoader
import random

In [2]:
import torchvision as tv
import torch.nn as nn
import torch.nn.functional as F
import time

# Получаем данные

In [3]:
BATCH_SIZE = 256
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [4]:
train_dataset.data.shape

torch.Size([60000, 28, 28])

In [5]:
test_dataset.data.shape

torch.Size([10000, 28, 28])

In [6]:
set(train_dataset.targets.numpy())

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

# Подбираем архитектуру

In [7]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, round(train_loss / train_iters,5), round(train_acc / train_passed,5),
            round(test_loss / test_iters,5), round(test_acc / test_passed,5))
        )

In [8]:
in_n = 784
out_n = 10

### Базовая

In [34]:
#base
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(in_n,512 ),
    torch.nn.ReLU(),
    torch.nn.Linear(512,128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, out_n)
)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=128, bias=True)
  (4): ReLU()
  (5): Linear(in_features=128, out_features=10, bias=True)
)

In [35]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=0.01)
num_epochs = 50

In [36]:
train_model()

ep: 0, taked: 9.275, train_loss: 0.57253, train_acc: 0.79198, test_loss: 0.42299, test_acc: 0.8431
ep: 1, taked: 9.172, train_loss: 0.38478, train_acc: 0.85907, test_loss: 0.39612, test_acc: 0.8553
ep: 2, taked: 9.191, train_loss: 0.35127, train_acc: 0.87105, test_loss: 0.37073, test_acc: 0.8659
ep: 3, taked: 9.246, train_loss: 0.33069, train_acc: 0.87663, test_loss: 0.3724, test_acc: 0.8664
ep: 4, taked: 9.239, train_loss: 0.31377, train_acc: 0.88293, test_loss: 0.38656, test_acc: 0.8639
ep: 5, taked: 9.255, train_loss: 0.30801, train_acc: 0.8868, test_loss: 0.39395, test_acc: 0.8614
ep: 6, taked: 9.218, train_loss: 0.29933, train_acc: 0.8886, test_loss: 0.41387, test_acc: 0.8625
ep: 7, taked: 9.281, train_loss: 0.2906, train_acc: 0.8921, test_loss: 0.37779, test_acc: 0.8693
ep: 8, taked: 9.242, train_loss: 0.28328, train_acc: 0.89338, test_loss: 0.4014, test_acc: 0.8674
ep: 9, taked: 9.240, train_loss: 0.27676, train_acc: 0.8964, test_loss: 0.37143, test_acc: 0.8718
ep: 10, taked: 9.

### Подбираем более удачную

In [64]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(in_n, 1024),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(1024, 384),
    torch.nn.ReLU(),
    torch.nn.Linear(384, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, out_n)
)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=1024, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=1024, out_features=384, bias=True)
  (5): ReLU()
  (6): Linear(in_features=384, out_features=256, bias=True)
  (7): ReLU()
  (8): Linear(in_features=256, out_features=10, bias=True)
)

In [65]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

In [66]:
train_model()

ep: 0, taked: 10.844, train_loss: 0.61744, train_acc: 0.7737, test_loss: 0.43491, test_acc: 0.8411
ep: 1, taked: 10.611, train_loss: 0.42139, train_acc: 0.84552, test_loss: 0.40212, test_acc: 0.8515
ep: 2, taked: 10.622, train_loss: 0.38524, train_acc: 0.85865, test_loss: 0.38647, test_acc: 0.8607
ep: 3, taked: 10.637, train_loss: 0.3642, train_acc: 0.86577, test_loss: 0.37917, test_acc: 0.8632
ep: 4, taked: 10.679, train_loss: 0.34623, train_acc: 0.87148, test_loss: 0.36235, test_acc: 0.8718
ep: 5, taked: 10.668, train_loss: 0.33596, train_acc: 0.87558, test_loss: 0.3714, test_acc: 0.8678
ep: 6, taked: 10.678, train_loss: 0.32474, train_acc: 0.87953, test_loss: 0.35782, test_acc: 0.87
ep: 7, taked: 10.633, train_loss: 0.3163, train_acc: 0.88227, test_loss: 0.34164, test_acc: 0.8767
ep: 8, taked: 10.631, train_loss: 0.30737, train_acc: 0.88575, test_loss: 0.33987, test_acc: 0.8787
ep: 9, taked: 10.638, train_loss: 0.30218, train_acc: 0.8861, test_loss: 0.33943, test_acc: 0.879
ep: 10, 

In [108]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(in_n, 1024),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(1024, 512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, out_n)
)
model
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

train_model()

ep: 0, taked: 12.457, train_loss: 0.76822, train_acc: 0.70563, test_loss: 0.46077, test_acc: 0.8282
ep: 1, taked: 12.459, train_loss: 0.47878, train_acc: 0.82898, test_loss: 0.41312, test_acc: 0.8492
ep: 2, taked: 12.385, train_loss: 0.43757, train_acc: 0.84268, test_loss: 0.4025, test_acc: 0.853
ep: 3, taked: 12.484, train_loss: 0.41736, train_acc: 0.8519, test_loss: 0.39951, test_acc: 0.8586
ep: 4, taked: 12.434, train_loss: 0.39692, train_acc: 0.85658, test_loss: 0.37584, test_acc: 0.8634
ep: 5, taked: 12.434, train_loss: 0.38612, train_acc: 0.86085, test_loss: 0.36253, test_acc: 0.8701
ep: 6, taked: 12.456, train_loss: 0.37625, train_acc: 0.86433, test_loss: 0.35842, test_acc: 0.8676
ep: 7, taked: 12.441, train_loss: 0.37045, train_acc: 0.86775, test_loss: 0.35426, test_acc: 0.8714
ep: 8, taked: 12.475, train_loss: 0.36381, train_acc: 0.86885, test_loss: 0.35653, test_acc: 0.873
ep: 9, taked: 12.453, train_loss: 0.35678, train_acc: 0.87143, test_loss: 0.34126, test_acc: 0.8767
ep: 

no batch test result: 0.8727
