# Multilayer Perceptrpn (MLP)

In [None]:
import time

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

import torch
import torchvision as tv

: 

: 

In [2]:
BATCH_SIZE = 256

In [3]:
# Проверка доступности GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


## Data import

In [4]:
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [5]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

## Model SGD

In [6]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [7]:
model = model.to(device)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [8]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=.01)
num_epochs = 10

In [9]:
def training_loop():
    for epoch in range(num_epochs):
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            optimizer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss  += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("epoch: {}, taked: {: .3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            epoch, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed
        ))    

In [10]:
training_loop()

epoch: 0, taked:  12.587, train_loss: 2.0250510347650406, train_acc: 0.5749166666666666, test_loss: 1.655296754837036, test_acc: 0.7509
epoch: 1, taked:  11.361, train_loss: 1.3091280589712426, train_acc: 0.7773833333333333, test_loss: 0.9888455182313919, test_acc: 0.8138
epoch: 2, taked:  11.351, train_loss: 0.8507620442420878, train_acc: 0.8268833333333333, test_loss: 0.7022985890507698, test_acc: 0.8494
epoch: 3, taked:  11.214, train_loss: 0.6546166160005205, train_acc: 0.8517166666666667, test_loss: 0.5710688237100839, test_acc: 0.8674
epoch: 4, taked:  11.226, train_loss: 0.5555998230868198, train_acc: 0.8656166666666667, test_loss: 0.4977589726448059, test_acc: 0.8782
epoch: 5, taked:  11.370, train_loss: 0.49663475720172234, train_acc: 0.8753, test_loss: 0.4513070423156023, test_acc: 0.8853
epoch: 6, taked:  11.465, train_loss: 0.4576267775068892, train_acc: 0.8817, test_loss: 0.41937654111534356, test_acc: 0.8908
epoch: 7, taked:  11.286, train_loss: 0.42988147684868344, train

In [11]:
def check_cuda_availability() -> bool:
    """Checks if CUDA is available and prints the result."""
    is_cuda_available = torch.cuda.is_available()
    print(f"CUDA is available: {is_cuda_available}")
    return is_cuda_available

check_cuda_availability()

CUDA is available: True


True

## Model Adam

In [12]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

model.to(device)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [13]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 10

In [14]:
training_loop()

epoch: 0, taked:  11.431, train_loss: 0.25067175838382955, train_acc: 0.9234333333333333, test_loss: 0.18760781958699227, test_acc: 0.9394
epoch: 1, taked:  11.298, train_loss: 0.10720564884708282, train_acc: 0.9672166666666666, test_loss: 0.13223123312345705, test_acc: 0.9596
epoch: 2, taked:  11.388, train_loss: 0.0742048234010036, train_acc: 0.9767333333333333, test_loss: 0.1423712159623392, test_acc: 0.9599
epoch: 3, taked:  11.315, train_loss: 0.05924570959735107, train_acc: 0.9812166666666666, test_loss: 0.09349114798096707, test_acc: 0.9747
epoch: 4, taked:  11.442, train_loss: 0.050371240187396714, train_acc: 0.9838333333333333, test_loss: 0.12244425698190753, test_acc: 0.9674
epoch: 5, taked:  11.371, train_loss: 0.046684251247132394, train_acc: 0.9856333333333334, test_loss: 0.1343655483527982, test_acc: 0.9672
epoch: 6, taked:  11.262, train_loss: 0.04903338018784973, train_acc: 0.9849833333333333, test_loss: 0.161489720175814, test_acc: 0.9687
epoch: 7, taked:  11.484, trai