#AI VIET NAM
##Multilayer Perceptron
**Nguyen Quoc Thai**

##1.MLP

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary

**Model**

In [None]:
model = nn.Sequential(
    nn.Linear(1, 1),
    nn.Linear(1, 1),
    nn.Sigmoid()
)

In [None]:
print(model)

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=True)
  (1): Linear(in_features=1, out_features=1, bias=True)
  (2): Sigmoid()
)


In [None]:
summary(model, (1, 1))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 1]               2
            Linear-2                 [-1, 1, 1]               2
           Sigmoid-3                 [-1, 1, 1]               0
Total params: 4
Trainable params: 4
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[-0.8000]])), ('bias', tensor([-0.6401]))])
OrderedDict([('weight', tensor([[-0.1822]])), ('bias', tensor([0.8672]))])
OrderedDict()


In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Linear(2, 2),
    nn.Sigmoid()
)

In [None]:
print(model)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
  (2): Sigmoid()
)


In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
            Linear-2                 [-1, 1, 2]               6
           Sigmoid-3                 [-1, 1, 2]               0
Total params: 12
Trainable params: 12
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[0.6592, 0.5536],
        [0.0871, 0.0487]])), ('bias', tensor([ 0.1435, -0.4404]))])
OrderedDict([('weight', tensor([[-0.3774,  0.5080],
        [ 0.3373,  0.5463]])), ('bias', tensor([ 0.6397, -0.4851]))])
OrderedDict()


**Sample**

In [None]:
x = torch.tensor([1.0, 2.0])
y = torch.tensor([0.0])

In [None]:
x, y

(tensor([1., 2.]), tensor([0.]))

### BCELoss

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

In [None]:
print(model)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=1, bias=True)
  (2): Sigmoid()
)


In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
            Linear-2                 [-1, 1, 1]               3
           Sigmoid-3                 [-1, 1, 1]               0
Total params: 9
Trainable params: 9
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[0.1796, 0.1219],
        [0.6184, 0.0992]])), ('bias', tensor([-0.4525, -0.6271]))])
OrderedDict([('weight', tensor([[-0.0168,  0.1894]])), ('bias', tensor([-0.1033]))])
OrderedDict()


In [None]:
for p in model.parameters():
    nn.init.constant_(p, 0.1)

In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[0.1000, 0.1000],
        [0.1000, 0.1000]])), ('bias', tensor([0.1000, 0.1000]))])
OrderedDict([('weight', tensor([[0.1000, 0.1000]])), ('bias', tensor([0.1000]))])
OrderedDict()


In [None]:
y_pred = model(x)
y_pred

tensor([0.5449], grad_fn=<SigmoidBackward0>)

**Activation**

In [None]:
import torch.nn as nn

act = nn.Sigmoid()
input = torch.tensor([0.18, -0.18])
act(input)

tensor([0.5449, 0.4551])

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
           Sigmoid-2                 [-1, 1, 2]               0
            Linear-3                 [-1, 1, 1]               3
           Sigmoid-4                 [-1, 1, 1]               0
Total params: 9
Trainable params: 9
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
import torch.nn as nn

act = nn.Tanh()
input = torch.tensor([0.18, -0.18])
act(input)

tensor([ 0.1781, -0.1781])

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Tanh(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
              Tanh-2                 [-1, 1, 2]               0
            Linear-3                 [-1, 1, 1]               3
           Sigmoid-4                 [-1, 1, 1]               0
Total params: 9
Trainable params: 9
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
import torch.nn as nn

act = nn.ReLU()
input = torch.tensor([0.18, -0.18])
act(input)

tensor([0.1800, 0.0000])

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.ReLU(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
              ReLU-2                 [-1, 1, 2]               0
            Linear-3                 [-1, 1, 1]               3
           Sigmoid-4                 [-1, 1, 1]               0
Total params: 9
Trainable params: 9
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


**Loss**

In [None]:
import torch.nn as nn
loss_fn = nn.BCELoss()

In [None]:
y_pred

tensor([0.5449], grad_fn=<SigmoidBackward0>)

In [None]:
y

tensor([0.])

In [None]:
loss = loss_fn(y_pred, y)
loss

tensor(0.7872, grad_fn=<BinaryCrossEntropyBackward0>)

In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[0.1000, 0.1000],
        [0.1000, 0.1000]])), ('bias', tensor([0.1000, 0.1000]))])
OrderedDict()
OrderedDict([('weight', tensor([[0.1000, 0.1000],
        [0.1000, 0.1000]])), ('bias', tensor([0.1000, 0.1000]))])


In [None]:
learning_rate = 0.1
optimizer = optim.SGD(model.parameters(), learning_rate)

In [None]:
loss.backward()

In [None]:
optimizer.step()

In [None]:
for layer in model.children():
    print(layer.state_dict())

OrderedDict([('weight', tensor([[0.0946, 0.0891],
        [0.0946, 0.0891]])), ('bias', tensor([0.0946, 0.0946]))])
OrderedDict([('weight', tensor([[0.0782, 0.0782]])), ('bias', tensor([0.0455]))])
OrderedDict()


###CrossEntropyLoss

In [None]:
x = torch.tensor([1.0, 2.0])
y = torch.tensor(0)

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.ReLU(),
    nn.Linear(2, 2),
)

In [None]:
print(model)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): ReLU()
  (2): Linear(in_features=2, out_features=2, bias=True)
)


In [None]:
summary(model, (1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 2]               6
              ReLU-2                 [-1, 1, 2]               0
            Linear-3                 [-1, 1, 2]               6
Total params: 12
Trainable params: 12
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
y_pred = model(x)
y_pred

tensor([0.0580, 0.4275], grad_fn=<AddBackward0>)

In [None]:
import torch.nn as nn
loss_fn = nn.CrossEntropyLoss()

In [None]:
y = torch.tensor(0)
y

tensor(0)

In [None]:
y_pred

tensor([0.0580, 0.4275], grad_fn=<AddBackward0>)

In [None]:
loss_fn(y_pred, y)

tensor(0.8949, grad_fn=<NllLossBackward0>)

##2.Classification

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [None]:
data = load_iris()

In [None]:
data.data.shape

(150, 4)

In [None]:
data.target.shape

(150,)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    data.data,
    data.target,
    test_size=0.3
)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X_train)

In [None]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

array([[ 0.71501041,  0.01132393,  1.04876467,  0.87015288],
       [-0.86170883,  1.67594191, -1.12907069, -1.21821403],
       [-1.10428102, -0.22647864, -1.24075456, -1.21821403],
       [ 2.29172964, -0.22647864,  1.38381627,  1.52276754],
       [ 1.07886869, -0.22647864,  0.88123888,  1.52276754],
       [-0.86170883,  0.72473164, -1.18491262, -1.21821403],
       [-0.98299492, -1.89109662, -0.17975784, -0.17403058],
       [-0.86170883,  1.67594191, -0.96154489, -0.95716817],
       [-0.61913664,  1.43813934, -1.18491262, -1.21821403],
       [-1.22556711, -0.22647864, -1.24075456, -1.0876911 ],
       [ 0.10857993, -0.22647864,  0.32281955,  0.47858408],
       [-0.98299492,  1.20033677, -1.24075456, -1.21821403],
       [-0.74042273,  0.96253421, -1.18491262, -1.21821403],
       [-1.4681393 ,  0.72473164, -1.24075456, -1.0876911 ],
       [-1.34685321,  0.2491265 , -1.29659649, -1.21821403],
       [ 0.71501041, -0.46428121,  0.37866148,  0.21753822],
       [-0.74042273,  2.

In [None]:
X_test

array([[ 0.9575826 , -0.22647864,  0.43450342,  0.34806115],
       [ 1.20015479,  0.2491265 ,  1.2721324 ,  1.52276754],
       [ 0.59372431,  0.48692907,  0.60202921,  0.60910702],
       [ 0.35115212, -0.70208377,  0.21113569,  0.21753822],
       [-0.25527835, -0.22647864,  0.49034535,  0.47858408],
       [-0.13399226, -0.46428121,  0.32281955,  0.21753822],
       [-0.13399226, -0.70208377,  0.49034535,  0.21753822],
       [ 2.53430183,  1.67594191,  1.55134207,  1.13119874],
       [ 1.68529917,  0.2491265 ,  1.32797434,  0.87015288],
       [-0.37656445,  2.62715218, -1.24075456, -1.21821403],
       [ 0.22986603, -2.12889918,  0.76955501,  0.47858408],
       [-1.83199759, -0.22647864, -1.40828035, -1.34873696],
       [-0.25527835, -0.70208377,  0.71371308,  1.13119874],
       [-1.22556711,  0.72473164, -0.96154489, -1.21821403],
       [ 0.59372431, -0.46428121,  1.10460661,  0.87015288],
       [-0.86170883,  0.48692907, -1.07322876, -0.82664524],
       [ 2.29172964, -1.

In [None]:
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train)
X_test = torch.tensor(X_test, dtype=torch.float32)
Y_test = torch.tensor(Y_test)

In [None]:
X_train

tensor([[ 0.7150,  0.0113,  1.0488,  0.8702],
        [-0.8617,  1.6759, -1.1291, -1.2182],
        [-1.1043, -0.2265, -1.2408, -1.2182],
        [ 2.2917, -0.2265,  1.3838,  1.5228],
        [ 1.0789, -0.2265,  0.8812,  1.5228],
        [-0.8617,  0.7247, -1.1849, -1.2182],
        [-0.9830, -1.8911, -0.1798, -0.1740],
        [-0.8617,  1.6759, -0.9615, -0.9572],
        [-0.6191,  1.4381, -1.1849, -1.2182],
        [-1.2256, -0.2265, -1.2408, -1.0877],
        [ 0.1086, -0.2265,  0.3228,  0.4786],
        [-0.9830,  1.2003, -1.2408, -1.2182],
        [-0.7404,  0.9625, -1.1849, -1.2182],
        [-1.4681,  0.7247, -1.2408, -1.0877],
        [-1.3469,  0.2491, -1.2966, -1.2182],
        [ 0.7150, -0.4643,  0.3787,  0.2175],
        [-0.7404,  2.3893, -1.1849, -1.3487],
        [-0.8617,  1.4381, -1.1849, -0.9572],
        [ 0.1086,  0.2491,  0.6579,  0.8702],
        [-0.0127, -0.7021,  0.8254,  1.6533],
        [ 0.8363, -0.2265,  0.8812,  1.1312],
        [-1.2256, -0.2265, -1.2408

In [None]:
Y_train

tensor([2, 0, 0, 2, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 2, 2, 0, 2, 0,
        0, 1, 0, 0, 2, 1, 1, 0, 1, 2, 1, 2, 1, 0, 2, 0, 2, 1, 2, 2, 0, 0, 1, 0,
        1, 1, 0, 2, 0, 2, 1, 2, 1, 2, 2, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2,
        2, 1, 1, 0, 2, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 2, 1, 1, 1, 2, 0, 1, 1, 2,
        1, 1, 0, 1, 2, 2, 1, 2, 2])

In [None]:
model_classifier = nn.Sequential(
    nn.Linear(4, 8),
    nn.ReLU(),
    nn.Linear(8, 3)
)

In [None]:
model_classifier

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=3, bias=True)
)

In [None]:
summary(model_classifier, (1, 4))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 8]              40
              ReLU-2                 [-1, 1, 8]               0
            Linear-3                 [-1, 1, 3]              27
Total params: 67
Trainable params: 67
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
X_train[0]

tensor([0.7150, 0.0113, 1.0488, 0.8702])

In [None]:
y_pred = model_classifier(X_train[0])
y_pred

tensor([ 0.0074, -0.2822,  0.1523], grad_fn=<AddBackward0>)

In [None]:
loss_fn = nn.CrossEntropyLoss()

In [None]:
loss = loss_fn(y_pred, Y_train[0])
loss

tensor(0.9214, grad_fn=<NllLossBackward0>)

In [None]:
loss.item()

0.9213845729827881

In [None]:
learning_rate = 0.01

optimizer = optim.SGD(
    model_classifier.parameters(),
    learning_rate
)

In [None]:
num_epochs = 20
losses = []
for epoch in range(num_epochs):
    epoch_loss = []
    for x_train, y_train in zip(X_train, Y_train):
        y_pred = model_classifier(x_train)
        loss = loss_fn(y_pred, y_train)
        epoch_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    losses.append(sum(epoch_loss)/len(epoch_loss))

In [None]:
losses

[1.0220808364096141,
 0.7892919631231399,
 0.5656777904147193,
 0.43206991709413983,
 0.3591597341001034,
 0.31344637402466363,
 0.2815234812508736,
 0.25728054582363086,
 0.23809761630726003,
 0.22223558177107147,
 0.2087183050567373,
 0.19690465535746798,
 0.1863620154160474,
 0.17690506201636577,
 0.1683366762330046,
 0.16056279294702802,
 0.15347826486963423,
 0.14700075439288326,
 0.14108285806917895,
 0.13565213933332068]

In [None]:
with torch.no_grad():
    Y_pred = model_classifier(X_test)

In [None]:
Y_pred = torch.argmax(Y_pred, dim=1)

In [None]:
Y_pred

tensor([1, 2, 1, 1, 1, 1, 1, 2, 2, 0, 1, 0, 2, 0, 2, 0, 2, 1, 2, 2, 1, 0, 2, 2,
        2, 0, 1, 1, 2, 0, 1, 2, 0, 0, 1, 0, 2, 1, 2, 1, 0, 1, 1, 2, 2])

In [None]:
sum(Y_pred == Y_test)/len(Y_test)

tensor(0.9778)