# MLP: MultiLayer Perceptron

## Build From Scratch

In [1]:
import torch
import numpy as np
import sys
import renyan_utils as ry

In [2]:
batch_size = 256
train_iter, test_iter = ry.load_data_fashion_mnist(batch_size)

In [3]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256

In [4]:
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype = torch.float)
b1 = torch.zeros(num_hiddens, dtype = torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype = torch.float)
b2 = torch.zeros(num_outputs, dtype = torch.float)

In [5]:
params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(True)

In [6]:
def relu(X):
    # torch.tensor(0).dtype = torch.int64
    # torch.float32 = torch.float32
    return torch.max(input = X, other = torch.tensor(0.0))

In [7]:
# Model
def net(X):
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, W1) + b1)
    return torch.matmul(H, W2) + b2

In [8]:
# Loss
loss = torch.nn.CrossEntropyLoss()

In [9]:
# Train
num_epochs, lr = 5, 100
ry.train_fashion_mnist(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0031, train acc 0.710, test acc 0.796
epoch 2, loss 0.0019, train acc 0.824, test acc 0.822
epoch 3, loss 0.0017, train acc 0.846, test acc 0.840
epoch 4, loss 0.0016, train acc 0.856, test acc 0.763
epoch 5, loss 0.0015, train acc 0.864, test acc 0.831


## Simple Version

In [2]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import renyan_utils as ry

In [12]:
a = torch.tensor([[1, 2], [3, 4]])
a = a.unsqueeze(dim = 0)
a.shape

torch.Size([1, 2, 2])

In [13]:
fl = ry.FlattenLayer()
fl(a).shape

torch.Size([1, 4])

In [18]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256

net = nn.Sequential(ry.FlattenLayer(),
                   nn.Linear(num_inputs, num_hiddens),
                   nn.ReLU(),
                   nn.Linear(num_hiddens, num_outputs))
for param in net.parameters():
    init.normal_(param, mean = 0, std = 0.01)

In [20]:
batch_size = 256
train_iter, test_iter = ry.load_data_fashion_mnist(batch_size)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.5)

num_epochs = 5
ry.train_fashion_mnist(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.699, test acc 0.814
epoch 2, loss 0.0019, train acc 0.818, test acc 0.776
epoch 3, loss 0.0016, train acc 0.844, test acc 0.811
epoch 4, loss 0.0015, train acc 0.856, test acc 0.830
epoch 5, loss 0.0014, train acc 0.866, test acc 0.795
