# Multilayer Perceptrpn (MLP)

In [1]:
import time

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

import torch
import torchvision as tv

In [2]:
BATCH_SIZE = 256

In [3]:
# Проверка доступности GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


## Data import

In [4]:
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

100%|██████████| 9.91M/9.91M [00:01<00:00, 8.76MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 242kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 2.22MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 809kB/s]


In [5]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

## Model SGD

In [6]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [7]:
model = model.to(device)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [8]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=.01)
num_epochs = 10

In [10]:
def training_loop():
    for epoch in range(num_epochs):
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            optimizer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss  += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("epoch: {}, taked: {: .3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            epoch, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed
        ))    

In [11]:
training_loop()

epoch: 0, taked:  14.608, train_loss: 2.0583167187711027, train_acc: 0.57945, test_loss: 1.704962858557701, test_acc: 0.7706
epoch: 1, taked:  14.235, train_loss: 1.3533118737504837, train_acc: 0.7825166666666666, test_loss: 1.0136128336191177, test_acc: 0.8219
epoch: 2, taked:  15.338, train_loss: 0.8695311865908034, train_acc: 0.8290333333333333, test_loss: 0.7107312135398388, test_acc: 0.849
epoch: 3, taked:  14.713, train_loss: 0.6636266221391394, train_acc: 0.8519666666666666, test_loss: 0.5755699399858714, test_acc: 0.8661
epoch: 4, taked:  14.822, train_loss: 0.5616882188523069, train_acc: 0.8644, test_loss: 0.5010872758924961, test_acc: 0.8784
epoch: 5, taked:  14.754, train_loss: 0.5014460054483819, train_acc: 0.87325, test_loss: 0.4541131529957056, test_acc: 0.8851
epoch: 6, taked:  14.555, train_loss: 0.46167622601732294, train_acc: 0.8800833333333333, test_loss: 0.42185917403548956, test_acc: 0.8894
epoch: 7, taked:  14.059, train_loss: 0.4334077287861641, train_acc: 0.8849

In [12]:
def check_cuda_availability() -> bool:
    """Checks if CUDA is available and prints the result."""
    is_cuda_available = torch.cuda.is_available()
    print(f"CUDA is available: {is_cuda_available}")
    return is_cuda_available

check_cuda_availability()

CUDA is available: False


False

## Model Adam

In [13]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

model.to(device)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [14]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 10

In [15]:
training_loop()

epoch: 0, taked:  14.802, train_loss: 0.25221620179871296, train_acc: 0.9235166666666667, test_loss: 0.16526733628707008, test_acc: 0.9479
epoch: 1, taked:  15.749, train_loss: 0.10758770020837162, train_acc: 0.9667833333333333, test_loss: 0.09858492225175723, test_acc: 0.9695
epoch: 2, taked:  15.055, train_loss: 0.07284427674467418, train_acc: 0.9779333333333333, test_loss: 0.11496030239068204, test_acc: 0.9651
epoch: 3, taked:  14.570, train_loss: 0.058275163567506096, train_acc: 0.9807666666666667, test_loss: 0.0971086934368941, test_acc: 0.9721
epoch: 4, taked:  14.450, train_loss: 0.0462046498178871, train_acc: 0.98515, test_loss: 0.10718096337441238, test_acc: 0.972
epoch: 5, taked:  14.425, train_loss: 0.046425450868171424, train_acc: 0.9850666666666666, test_loss: 0.12465038219061171, test_acc: 0.9698
epoch: 6, taked:  14.830, train_loss: 0.05147317980213011, train_acc: 0.9845666666666667, test_loss: 0.13341975299058503, test_acc: 0.9727
epoch: 7, taked:  15.057, train_loss: 0