# Multilayer Perceptrpn (MLP)

In [3]:
import time

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

import torch
import torchvision as tv

In [1]:
BATCH_SIZE = 256

## Data import

In [4]:
train_dataset = tv.datasets.MNIST('../data/raw', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('../data/raw', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

100%|██████████| 9.91M/9.91M [00:01<00:00, 7.64MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 174kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 1.71MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 1.01MB/s]


In [10]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

## Model SGD

In [11]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [12]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [13]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=.01)
num_epochs = 10

In [18]:
def training_loop():
    for epoch in range(num_epochs):
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            optimizer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            optimizer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss  += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("epoch: {}, taked: {: .3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            epoch, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed
        ))    

In [19]:
training_loop()

epoch: 0, taked:  15.143, train_loss: 2.032381616754735, train_acc: 0.537, test_loss: 1.671196061372757, test_acc: 0.7222
epoch: 1, taked:  14.827, train_loss: 1.3280625462532043, train_acc: 0.7632333333333333, test_loss: 1.0040306136012078, test_acc: 0.8143
epoch: 2, taked:  13.778, train_loss: 0.8653073512493296, train_acc: 0.8232166666666667, test_loss: 0.7120120383799076, test_acc: 0.8497
epoch: 3, taked:  13.844, train_loss: 0.6646339738622625, train_acc: 0.8489, test_loss: 0.5776242233812809, test_acc: 0.8652
epoch: 4, taked:  13.740, train_loss: 0.5630122444731124, train_acc: 0.8642833333333333, test_loss: 0.5025085937231779, test_acc: 0.8766
epoch: 5, taked:  14.395, train_loss: 0.5023965143142862, train_acc: 0.8740333333333333, test_loss: 0.4548795722424984, test_acc: 0.8849
epoch: 6, taked:  13.860, train_loss: 0.4622502591381682, train_acc: 0.8806833333333334, test_loss: 0.4221287749707699, test_acc: 0.8899
epoch: 7, taked:  13.920, train_loss: 0.43370834519254403, train_acc

## Model Adam

In [20]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [21]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 10

In [22]:
training_loop()

epoch: 0, taked:  16.998, train_loss: 0.25414203849244626, train_acc: 0.9224166666666667, test_loss: 0.14438453275943175, test_acc: 0.9546
epoch: 1, taked:  16.079, train_loss: 0.10558548719879794, train_acc: 0.9676833333333333, test_loss: 0.12441704960074276, test_acc: 0.9609
epoch: 2, taked:  16.139, train_loss: 0.0722567948037164, train_acc: 0.9768666666666667, test_loss: 0.11258277373271994, test_acc: 0.9658
epoch: 3, taked:  16.917, train_loss: 0.06127306509148726, train_acc: 0.9803833333333334, test_loss: 0.12030076518367423, test_acc: 0.9655
epoch: 4, taked:  16.370, train_loss: 0.053686418640179596, train_acc: 0.9823333333333333, test_loss: 0.12014913427410648, test_acc: 0.969
epoch: 5, taked:  17.046, train_loss: 0.04400648227001124, train_acc: 0.98575, test_loss: 0.12504899735395156, test_acc: 0.9693
epoch: 6, taked:  16.326, train_loss: 0.04856869768192793, train_acc: 0.9850833333333333, test_loss: 0.13063380663179486, test_acc: 0.9699
epoch: 7, taked:  16.549, train_loss: 0