In [1]:
## MultiLayer Perception
### Back propagation

In [2]:
import torch

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
X = torch.FloatTensor([[0, 0], 
                       [0, 1], 
                       [1, 0], 
                       [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [5]:
# nn layers
w1 = torch.Tensor(2, 2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2, 1).to(device)
b2 = torch.Tensor(1).to(device)

In [6]:
def sigmoid(x):
    # sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))
    # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))

In [7]:
def sigmoid_prime(x):
    # derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

In [8]:
learning_rate = 1
for step in range(10001):
    # forward
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    
    # Cost_Cross Entropy
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
    
    # Back prop (chain rule)
    # Loss derivative!
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
    
    # Layer 2
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
    
    # Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    # Weight update
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 100 == 0:
        print(step, cost.item())


0 0.6931471824645996
100 0.3873674273490906
200 0.3557840585708618
300 0.3515129089355469
400 0.3499065339565277
500 0.34907492995262146
600 0.3485695719718933
700 0.3482309579849243
800 0.34798890352249146
900 0.34780746698379517
1000 0.34766656160354614
1100 0.3475539982318878
1200 0.34746208786964417
1300 0.3473857045173645
1400 0.3473212420940399
1500 0.3472661077976227
1600 0.34721824526786804
1700 0.3471767008304596
1800 0.34714022278785706
1900 0.3471076488494873
2000 0.34707850217819214
2100 0.3470524251461029
2200 0.34702885150909424
2300 0.3470073342323303
2400 0.34698793292045593
2500 0.3469700217247009
2600 0.3469536602497101
2700 0.3469385802745819
2800 0.34692472219467163
2900 0.3469116687774658
3000 0.34689947962760925
3100 0.34688833355903625
3200 0.34687793254852295
3300 0.346868097782135
3400 0.3468588888645172
3500 0.3468502163887024
3600 0.34684211015701294
3700 0.3468343913555145
3800 0.34682708978652954
3900 0.34682005643844604
4000 0.3468136787414551
4100 0.34680

In [9]:
### Xor-nn

In [10]:
#nn Layers
linear1 = torch.nn.Linear(2,2, bias = True)
linear2 = torch.nn.Linear(2,1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

#define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
epoch = 10000
for step in range(epoch + 1):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    #cos/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.6936068534851074
100 0.6889212727546692
200 0.6634229421615601
300 0.5668157339096069
400 0.2764393091201782
500 0.08859294652938843
600 0.04870596528053284
700 0.033078014850616455
800 0.02489047311246395
900 0.019889725372195244
1000 0.01653207466006279
1100 0.01412778440862894
1200 0.012324322015047073
1300 0.010923009365797043
1400 0.009803682565689087
1500 0.008889637887477875
1600 0.008129517547786236
1700 0.007487570401281118
1800 0.006938502192497253
1900 0.006463602650910616
2000 0.0060488334856927395
2100 0.0056835962459445
2200 0.005359520670026541
2300 0.005070063751190901
2400 0.00480996910482645
2500 0.004575020633637905
2600 0.004361758008599281
2700 0.004167293664067984
2800 0.003989296965301037
2900 0.003825799096375704
3000 0.0036750275176018476
3100 0.0035356010776013136
3200 0.0034062727354466915
3300 0.003286037826910615
3400 0.0031739205587655306
3500 0.00306911114603281
3600 0.0029709942173212767
3700 0.0028789250645786524
3800 0.00279231951572001
3900 0.0027

In [11]:
### xor-nn-wide-deep

In [12]:
#nn Layers
linear1 = torch.nn.Linear(2,10, bias = True)
linear2 = torch.nn.Linear(10,10, bias = True)
linear3 = torch.nn.Linear(10,10, bias = True)
linear4 = torch.nn.Linear(10,1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

#define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
epoch = 10000
for step in range(epoch + 1):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    #cos/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.6946343779563904
100 0.6931463479995728
200 0.6931443810462952
300 0.6931424736976624
400 0.6931405067443848
500 0.6931384801864624
600 0.6931363940238953
700 0.6931343078613281
800 0.6931320428848267
900 0.6931296586990356
1000 0.6931271553039551
1100 0.693124532699585
1200 0.6931216716766357
1300 0.6931185126304626
1400 0.6931151151657104
1500 0.6931114196777344
1600 0.6931072473526001
1700 0.6931027173995972
1800 0.6930976510047913
1900 0.6930917501449585
2000 0.6930851340293884
2100 0.6930775046348572
2200 0.6930686235427856
2300 0.6930582523345947
2400 0.6930457353591919
2500 0.6930309534072876
2600 0.6930128335952759
2700 0.6929904222488403
2800 0.6929621696472168
2900 0.692925751209259
3000 0.6928777098655701
3100 0.6928122043609619
3200 0.6927199363708496
3300 0.6925836801528931
3400 0.6923707723617554
3500 0.692011833190918
3600 0.6913390755653381
3700 0.6898604035377502
3800 0.6856058239936829
3900 0.6649138927459717
4000 0.489219069480896
4100 0.040626104921102524
4200 0