## Подключаем необходимые библиотеки

In [1]:
import pandas as pd
import numpy as numpy
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split 
import torch.nn as nn
import torch.optim as optim
import torchvision as tv
import time


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Загружаем датасет

In [5]:
train_data = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_data = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)

In [6]:
BATCH_SIZE=256

In [7]:
train = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE)

# Создаем базовую модель

In [8]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256,10)
)

In [17]:
loss = torch.nn.CrossEntropyLoss()
trainer = optim.SGD(model.parameters(), lr=.01)
trainer.param_groups[0]['params'] = [param.to(device) for param in trainer.param_groups[0]['params']]
num_epochs = 20

In [18]:
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [22]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model.train()
        for X, y in train:
            X, y = X.to(device), y.to(device)
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [23]:
train_model()

ep: 0, taked: 5.077, train_loss: 0.8753253520803249, train_acc: 0.7043666666666667, test_loss: 0.8275793567299843, test_acc: 0.7075
ep: 1, taked: 5.026, train_loss: 0.7777534170353666, train_acc: 0.7346333333333334, test_loss: 0.7547829955816269, test_acc: 0.732
ep: 2, taked: 5.013, train_loss: 0.7171938667906091, train_acc: 0.7553666666666666, test_loss: 0.704866062104702, test_acc: 0.7494
ep: 3, taked: 4.975, train_loss: 0.6735299871322956, train_acc: 0.7710333333333333, test_loss: 0.6675939321517944, test_acc: 0.7644
ep: 4, taked: 4.916, train_loss: 0.6399475944803116, train_acc: 0.7842166666666667, test_loss: 0.6386566169559955, test_acc: 0.7787
ep: 5, taked: 4.922, train_loss: 0.6132898897566694, train_acc: 0.7944666666666667, test_loss: 0.6156390599906445, test_acc: 0.7849
ep: 6, taked: 4.952, train_loss: 0.5916393143065433, train_acc: 0.8020166666666667, test_loss: 0.5969382591545582, test_acc: 0.792
ep: 7, taked: 4.976, train_loss: 0.5737484343508457, train_acc: 0.80795, test_l

Как мы видим, модель недостаточно сложна и выдает низкие метрики, усложним ее

In [37]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 1024),
    torch.nn.BatchNorm1d(1024),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(1024, 512),
    torch.nn.BatchNorm1d(512),
    torch.nn.ReLU(),
    torch.nn.Linear(512,256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(256, 10)
)
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=1024, bias=True)
  (2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Dropout(p=0.5, inplace=False)
  (5): Linear(in_features=1024, out_features=512, bias=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): ReLU()
  (8): Linear(in_features=512, out_features=256, bias=True)
  (9): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): Dropout(p=0.5, inplace=False)
  (12): Linear(in_features=256, out_features=10, bias=True)
)

In [38]:
loss = torch.nn.CrossEntropyLoss()
trainer = optim.Adam(model.parameters(), lr=.01)
trainer.param_groups[0]['params'] = [param.to(device) for param in trainer.param_groups[0]['params']]
num_epochs = 20

In [39]:
train_model()

ep: 0, taked: 5.286, train_loss: 0.5533636567440439, train_acc: 0.7988666666666666, test_loss: 0.4371093150228262, test_acc: 0.8432
ep: 1, taked: 5.297, train_loss: 0.42300676272270527, train_acc: 0.8464666666666667, test_loss: 0.4028959147632122, test_acc: 0.8487
ep: 2, taked: 5.267, train_loss: 0.38343917613333844, train_acc: 0.86145, test_loss: 0.3703386586159468, test_acc: 0.8616
ep: 3, taked: 5.261, train_loss: 0.3597409171626923, train_acc: 0.86945, test_loss: 0.34804477663710714, test_acc: 0.8708
ep: 4, taked: 5.277, train_loss: 0.3394501585275569, train_acc: 0.8754166666666666, test_loss: 0.35189390340819954, test_acc: 0.867
ep: 5, taked: 5.260, train_loss: 0.3249080073326192, train_acc: 0.8810166666666667, test_loss: 0.3322750698775053, test_acc: 0.8782
ep: 6, taked: 5.245, train_loss: 0.31347244648223227, train_acc: 0.88575, test_loss: 0.3339101274497807, test_acc: 0.8794
ep: 7, taked: 5.276, train_loss: 0.30197164169017304, train_acc: 0.8892, test_loss: 0.32457662280648947, 

Как мы видим, модель показала неплухую точность в 89 %