# Back Propagation

## import libraries

In [1]:
import torch
from random import randint

## NN

activation function

In [2]:
def act(z: float) -> float:
  return torch.tanh(z)

derivative activation function

In [3]:
def df(z: float) -> float:
  s = act(z)
  return 1 - s*s

forward

In [4]:
def go_forward(x: torch.Tensor, w1: torch.Tensor, w2: torch.Tensor) -> tuple: # len(x) == 3
  z1 = torch.mv(w1[:, :3], x) + w1[:, 3] # torch.tensor + bies
  s1 = act(z1)

  z2 = torch.dot(w2[:2], s1) + w2[2]
  y = act(z2)
  return y, z1, s1, z2

seed

In [5]:
torch.manual_seed(1)

w1 = torch.rand(8).view(2, 4) - 0.5
w2 = torch.rand(3) - 0.5

samples

In [6]:
X_train = torch.FloatTensor([(-1,-1,-1),
 (-1,-1,1), (-1,1,-1), (1,-1,-1),
 (1,1,-1), (1,-1,1), (-1,1,1), (1,1,1)])

y_train = torch.FloatTensor([-1, 1, -1, 1, -1, 1, -1, -1])

parameters

In [7]:
ln = 0.05
N = 5000
total = len(y_train)

## Back Propagation

In [8]:
for _ in range(N):
  k = randint(0, total-1)
  x = X_train[k]
  y, z1, s1, out = go_forward(x, w1, w2)

  e = y - y_train[k]
  delta = e*df(out)
  delta2 = w2[:2] * delta * df(z1)

  w2[:2] = w2[:2] - ln * delta * s1
  w2[2] = w2[2] - ln*delta

  w1[0, :3] = w1[0, :3] - ln * delta2[0] * x
  w1[1, :3] = w1[1, :3] - ln * delta2[1] * x

  w1[0, 3] = w1[0, 3] - ln * delta2[0]
  w1[1, 3] = w1[1, 3] - ln * delta2[1]

## Tested

In [9]:
for x, d in zip(X_train, y_train):
  y, z1,s1,z2 = go_forward(x, w1, w2)
  print(f'output {y:.3f} -> {d}')

output -0.960 -> -1.0
output 0.966 -> 1.0
output -0.999 -> -1.0
output 0.966 -> 1.0
output -0.998 -> -1.0
output 0.995 -> 1.0
output -0.998 -> -1.0
output -0.969 -> -1.0


In [10]:
w1, w2

(tensor([[ 0.4476, -0.9389,  0.4359, -0.2581],
         [-0.7970,  1.6001, -0.8061,  0.6937]]),
 tensor([ 1.2324, -2.2670, -0.3373]))