### Disclaimer

After watching Andrej Kaparthy's videos on neural networks (https://www.youtube.com/@AndrejKarpathy), I was encouraged to play with PyTorch myself. I might have copied some code parts from him.

In [1]:
import torch

Experimental data: AND

In [2]:
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]]).float()
Y = torch.tensor([[0], [0], [0], [1]]).float()

print(f'{X.shape=}')
print(f'{Y.shape=}')

X.shape=torch.Size([4, 2])
Y.shape=torch.Size([4, 1])


Initialize two neural network layers

In [3]:
# layer 1
w1 = torch.randn((2, 8)).float()
w1.requires_grad = True # for efficiency reasons, gradients must be activated manually for required parameters

b1 = torch.randn((8,)).float()
b1.requires_grad = True

print(f'{w1=}')
print(f'{b1=}')

# layer 2
w2 = torch.randn((8, 1)).float()
w2.requires_grad = True

b2 = torch.randn((1,)).float()
b2.requires_grad = True

print(f'{w2=}')
print(f'{b2=}')

parameters = [w1, b1, w2, b2]

w1=tensor([[ 0.0707, -0.8957, -0.2866, -1.6088, -0.2377, -0.1686, -0.5770, -0.4522],
        [ 1.3622, -0.2389, -0.6008,  0.8820,  0.5197,  0.8727,  0.8527,  2.1509]],
       requires_grad=True)
b1=tensor([ 0.0498, -2.4201,  0.6269, -0.5184, -1.0003,  0.0284,  0.3820, -1.3297],
       requires_grad=True)
w2=tensor([[-0.6461],
        [-0.5591],
        [-2.4527],
        [ 0.9148],
        [-0.0180],
        [-1.7615],
        [ 0.6973],
        [-0.2065]], requires_grad=True)
b2=tensor([0.7021], requires_grad=True)


Train the network

In [4]:
n = 10000

for _ in range(n):
    # forward pass
    l1 = torch.tanh(X @ w1 + b1) # layer 1
    logits = torch.tanh(l1 @ w2 + b2) # layer 2

    # loss
    loss = sum((logits - Y)**2)
    print(loss.item())

    # backward pass
    for p in parameters: # torch requires you to reset parameter gradients. Can be set to 0, but None is more efficient
        p.grad = None

    loss.backward()

    #update parameters
    for p in parameters:
        p.data += -0.01 * p.grad

1.5925706624984741
0.7584408521652222
0.7047849893569946
0.6623420715332031
0.6274709701538086
0.5981963276863098
0.5731309652328491
0.551295816898346
0.5319917798042297
0.5147112607955933
0.4990764260292053
0.4848003387451172
0.4716591536998749
0.4594762921333313
0.44810837507247925
0.4374401271343231
0.4273760914802551
0.41783756017684937
0.4087602496147156
0.40008991956710815
0.39178216457366943
0.38379934430122375
0.37611085176467896
0.36868977546691895
0.36151427030563354
0.3545656204223633
0.3478279113769531
0.3412870764732361
0.3349315822124481
0.3287505805492401
0.32273516058921814
0.31687724590301514
0.3111696243286133
0.3056057393550873
0.3001798987388611
0.29488641023635864
0.28972065448760986
0.2846778631210327
0.2797536849975586
0.2749442458152771
0.2702457308769226
0.2656550407409668
0.2611684203147888
0.2567827105522156
0.2524949908256531
0.24830256402492523
0.24420249462127686
0.24019227921962738
0.23626884818077087
0.23243068158626556
0.22867467999458313
0.224999159574

Evaluate

In [5]:
torch.tanh(torch.tanh(X @ w1 + b1) @ w2 + b2)

tensor([[-2.4843e-04],
        [ 3.0571e-04],
        [ 3.3522e-04],
        [ 9.8652e-01]], grad_fn=<TanhBackward0>)