# Lecture 6 notebook
## CS152 September 23, 2018  Neil Rhodes

In [1]:
import math

Given $f(x_1, x_2, x_3, w_1, w_2, b) = \frac{1}{1 + e^{-(w_1x_1 + w_2x_2 + b)}}$, we want to find $\frac{\partial{f}}{{w_1}}$.

We create an evaluation graph showing the various operations.  We name each node with a single letter:

![Evaluation Graph](images/evaluation_graph.png)

In [4]:
import math

def a(x1, w1):
    return x1 * w1

def c(x2, w2):
    return x2 * w2

def d(a, b, c):
    return a + b + c

def e(d):
    return -d

def h(e):
    return math.exp(e)

def i(h):
    return h + 1

def j(i):
    return 1/i


## Numerical Differentiation
Here, we calculate via numeric differentation, the value $\frac{\partial{f}}{{w_1}}$

In [5]:
x1=3.0
w1=1.0
x2=-2.0
w2=2.0
b=2.0

f = j(i(h(e(d(c(x2, w2), a(x1, w1), b)))))
print(f'f: {f:.2f}')

epsilon = .000001
fepsilon = j(i(h(e(d(c(x2, w2), a(x1, w1+epsilon), b)))))
df_dw1 = (fepsilon-f)/epsilon
print(f'df/dw1: {df_dw1:.2f}')


f: 0.73
df/dw1: 0.59


We calculate the output value of each node:

In [7]:
a_out=a(x1, w1)
c_out=c(x2, w2)
d_out=d(a_out, c_out, b)
e_out=e(d_out)
h_out=h(e_out)
i_out=i(h_out)
j_out=j(i_out)
f = j_out
print(f"a: {a_out:0.2f}")
print(f"c: {c_out:0.2f}")
print(f"d: {d_out:0.2f}")
print(f"e: {e_out:0.2f}")
print(f"h: {h_out:0.2f}")
print(f"i: {i_out:0.2f}")
print(f"f: {f:0.2f}")

a: 3.00
c: -4.00
d: 1.00
e: -1.00
h: 0.37
i: 1.37
f: 0.73


## Backpropagation
Now, we'll do backpropagation:

In [9]:
def da_dw1(w1):
    # a(x1, w1) = x1 * w1
    return x1

def dc_dw2(w2):
    # c(x2, w2) = x2 * w2
    return x2

def dd_da(a):
    # d(a) = a + b + c
    return 1

def dd_db(b):
    # d(a) = a + b + c
    return 1

def dd_dc(c):
    # d(a) = a + b + c
    return 1

def de_dd(d):
    # e(d) = -d
    return -1

def dh_de(e):
    # h(e) = math.exp(e)
    return math.exp(e)

def di_dh(h):
    #i(h) = h + 1
    return 1

def dj_di(i):
    #j(i) = 1/i
    return -1/(i*i)

def df_dj(j):
    #f(j) = j
    return 1

df_di_out = df_dj(j_out) * dj_di(i_out)
df_dh_out = df_di_out * di_dh(h_out)
df_de_out = df_dh_out * dh_de(e_out)
df_dd_out = df_de_out * de_dd(d_out)
df_dc_out = df_dd_out * dd_dc(c_out)
df_da_out = df_dc_out * dd_da(a_out)
df_dw1_out = df_da_out * da_dw1(w1)
df_dw2_out = df_dc_out * dc_dw2(w2)
df_db_out = df_dd_out * dd_db(b)
print(f'df/di: {df_di_out:.2f}')
print(f'df/dh: {df_dh_out:.2f}')
print(f'df/de: {df_de_out:.2f}')
print(f'df/dd: {df_dd_out:.2f}')
print(f'df/dc: {df_dc_out:.2f}')
print(f'df/da: {df_da_out:.2f}')
print(f'df/dw1: {df_dw1_out:.2f}')
print(f'df/dw2: {df_dw2_out:.2f}')
print(f'df/db: {df_db_out:.2f}')

df/di: -0.53
df/dh: -0.53
df/de: -0.20
df/dd: 0.20
df/dc: 0.20
df/da: 0.20
df/dw1: 0.59
df/dw2: -0.39
df/db: 0.20


## Automatic Differentiation using PyTorch
Let's look at how to differentiate using PyTorch

In [11]:
import torch

from torch.autograd import Variable
from torch import Tensor

def f(x1, x2, w1, w2, b):
    weightedSum = x1*w1 + x2*w2 + b
    return 1/(1+torch.exp(-weightedSum))

x1 = Variable(Tensor([3.0])) # in pyTorch ≥0.4: x1 = Tensor([3.0])
w1 = Variable(Tensor([1.0]), requires_grad=True) # in pyTorch ≥0.4: w1 = Tensor([1.0], requires_grad=True)
x2 = Variable(Tensor([-2.0]))
w2 = Variable(Tensor([2.0]), requires_grad=True)
b = Variable(Tensor([2.0]), requires_grad=True)

result = f(x1, x2, w1, w2, b)
print("f(...) = ", result)

result.backward()
print(f'df/dw1: {w1.grad}')
print(f'df/dw2: {w2.grad:}')
print(f'df/db: {b.grad:}')

f(...) =  Variable containing:
 0.7311
[torch.FloatTensor of size 1]

df/dw1: Variable containing:
 0.5898
[torch.FloatTensor of size 1]

df/dw2: Variable containing:
-0.3932
[torch.FloatTensor of size 1]

df/db: Variable containing:
 0.1966
[torch.FloatTensor of size 1]



Or, using matrix multiplication:

In [46]:
import torch

from torch.autograd import Variable
from torch import Tensor

def sigmoid_layer(w, x, b):
    return sigmoid(torch.mm(w.t(), x)+ b)

def sigmoid(x):
    return 1/(1+torch.exp(-x))

def relu_layer(w, x, b):
    return relu(torch.mm(w.t(), x)+ b)

def relu(x):
    return x.clamp(min=0)


x = Variable(Tensor([[3.0], [-2.0]])) # shape: (2, 1)
w = Variable(Tensor([[1.0], [2.0]]), requires_grad=True) # shape: (2, 1)
b = Variable(Tensor([2.0]), requires_grad=True)

a1 = sigmoid_layer(x, w, b)
print("f(...) = ", a1)

a1.backward()
print(f'df/dw: {w.grad}')
print(f'df/db: {b.grad:}')

f(...) =  Variable containing:
 0.7311
[torch.FloatTensor of size 1x1]

df/dw: Variable containing:
 0.5898
-0.3932
[torch.FloatTensor of size 2x1]

df/db: Variable containing:
 0.1966
[torch.FloatTensor of size 1]

