# Multilayer Perceptron

In [1]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

## XOR training with single layer

In [2]:
x = torch.FloatTensor([[0,0],
                       [1,0],
                       [0,1],
                       [1,1]]).to(device)
y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

linear = torch.nn.Linear(2, 1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear, sigmoid).to(device)

In [3]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [4]:
for epoch in range(10001):
    optimizer.zero_grad()
    hypothesis = model(x)
    
    cost = criterion(hypothesis, y)
    cost.backward()
    optimizer.step()
    
    if epoch % 1000 == 0:
        print(epoch, cost.item())

0 0.7273974418640137
1000 0.6931471824645996
2000 0.6931471824645996
3000 0.6931471824645996
4000 0.6931471824645996
5000 0.6931471824645996
6000 0.6931471824645996
7000 0.6931471824645996
8000 0.6931471824645996
9000 0.6931471824645996
10000 0.6931471824645996


In [5]:
# prediction
with torch.no_grad():
    predicted = (model(x) > 0.5).float()
    accuracy = (predicted == y).float().mean()
    print('\nHypotheis: ', hypothesis.detach().cpu().numpy(), '\nCorrect: ', accuracy.item())


Hypotheis:  [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
Correct:  0.5


 ## Backpropagation with low level implemtation

In [6]:
# nn Layers
w1 = torch.zeros((2, 2), requires_grad=True).to(device)
b1 = torch.zeros(2, requires_grad=True).to(device)
w2 = torch.zeros((2, 1), requires_grad=True).to(device)
b2 = torch.zeros(1, requires_grad=True).to(device)

In [9]:
def sigmoid(x):
    # sigmoid function
    return 1. / (1. + torch.exp(-x))

def sigmoid_prime(x):
    # derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

In [10]:
learning_rate = 0.1
for epoch in range(10001):
    # forward
    l1 = torch.add(torch.matmul(x, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    y_pred = sigmoid(l2)
    
    cost = -torch.mean(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred))
    
    # back propagation
    # loss derivateive
    dy_pred = (y_pred - y)  / (y_pred - (1. - y_pred) + 1e-7)
    
    # layer2
    dl2 = dy_pred * sigmoid_prime(l2)
    db2 = dl2
    dw2 = torch.matmul(torch.transpose(a1, 0, 1), db2)
    
    # layer1
    da1 = torch.matmul(db2,torch.transpose(w2, 0, 1))
    dl1 = da1 * sigmoid_prime(l1)
    db1 = dl1
    dw1 = torch.matmul(torch.transpose(x, 0, 1), db1)
    
    # weight update
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * torch.mean(db1, 0)
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * torch.mean(db2, 0)
    
    if epoch % 1000 == 0:
        print(epoch, cost.item())

0 0.6931471824645996
1000 0.6931471824645996
2000 0.6931471824645996
3000 0.6931471824645996
4000 0.6931471824645996
5000 0.6931471824645996
6000 0.6931471824645996
7000 0.6931471824645996
8000 0.6931471824645996
9000 0.6931471824645996
10000 0.6931471824645996


## XOR training with multilayer 

In [11]:
linear1 = torch.nn.Linear(2, 2, bias = True)
linear2 = torch.nn.Linear(2, 1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

In [12]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [13]:
for epoch in range(100001):
    optimizer.zero_grad()
    hypothesis = model(x)
    
    cost = criterion(hypothesis, y)
    cost.backward()
    optimizer.step()
    if epoch % 10000 == 0:
        print(epoch, cost.item())

0 0.700675368309021
10000 0.36864450573921204
20000 0.3513495624065399
30000 0.3491658568382263
40000 0.34833723306655884
50000 0.3479047417640686
60000 0.3476399779319763
70000 0.3474620282649994
80000 0.3473343849182129
90000 0.3472381830215454
100000 0.34716323018074036


In [14]:
# prediction
with torch.no_grad():
    predicted = (model(x) > 0.5).float()
    accuracy = (predicted == y).float().mean()
    print('\nHypotheis: ', hypothesis.detach().cpu().numpy(), "\nCorrect: ", accuracy.item())


Hypotheis:  [[5.959064e-04]
 [9.993474e-01]
 [4.997455e-01]
 [5.003001e-01]] 
Correct:  0.5
