# BP examples


In [1]:
# one layer NN with sigmoid activation function

# forward model: f(w,b,x) = 1/(1+e^(-s)), where s = xw + b, x = [x1,x2], w = [w1, w2]^T

import numpy as np

w = np.array([2.0, -3.0]).reshape(2,1)
x = np.array([[-1.0, -2.0]])
b = -3.0
print w
print x

[[ 2.]
 [-3.]]
[[-1. -2.]]


In [2]:
# forward model
s = np.dot(x,w) + b
f = 1.0/(1+np.exp(-s))
print 's = %f, output = %f\n' %(s, f)

s = 1.000000, output = 0.731059



In [3]:
# gradient
ds = (1-f)*f
dw = x.T*ds
db = ds
dx = ds*w.T
print 'dx = ', dx
print 'dw = ', dw
print 'ds = %f, dw1 = %f, dw2 =%f, db = %f' %(ds,dw[0],dw[1],db)  

dx =  [[ 0.39322387 -0.5898358 ]]
dw =  [[-0.19661193]
 [-0.39322387]]
ds = 0.196612, dw1 = -0.196612, dw2 =-0.393224, db = 0.196612


In [4]:
# Matrix-Matrix multiply gradient.
# forward pass D = XW
W = np.random.randn(4, 5)
X = np.random.randn(3, 4)
D = X.dot(W) 

# now suppose we had the gradient on D from above in the circuit
dD = np.random.randn(*D.shape) # same shape as D
dW = np.dot(X.T,dD) #.T gives the transpose of the matrix
dX = np.dot(dD,W.T)

print dW
print dX

[[-0.48171715  2.84821806  0.24285057 -6.27512405  5.06894987]
 [-2.57008759  0.51174963 -0.37654453 -3.36144826 -1.34673997]
 [ 3.07343703 -0.71446201  0.50429241  3.62572389  1.2242635 ]
 [ 1.68606468  0.25722821  0.32233205  0.91745328  2.00765354]]
[[ 1.04657255 -3.29524025  2.10600548 -0.45698212]
 [ 2.89798805  1.84153971 -2.33948406 -0.92571491]
 [-1.12429899 -3.99640742  0.45254807  0.89376836]]


In [5]:
# check gradient array

def eval_numerical_gradient_array(f, x, df, h=1e-5):
  """
  Evaluate a numeric gradient for a function that accepts a numpy
  array and returns a numpy array.
  """
  grad = np.zeros_like(x)
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  while not it.finished:
    ix = it.multi_index
    
    oldval = x[ix]
    x[ix] = oldval + h
    pos = f(x).copy()
    x[ix] = oldval - h
    neg = f(x).copy()
    x[ix] = oldval
    
    grad[ix] = np.sum((pos - neg) * df) / (2 * h)
    it.iternext()
  return grad

def fw(w):
    return X.dot(w)

def fx(x):
    return x.dot(W)

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

dw = eval_numerical_gradient_array(fw, W, dD, h=1e-5)
dx = eval_numerical_gradient_array(fx, X, dD, h=1e-5)

re_w = rel_error(dw,dW)
re_x = rel_error(dx,dX)

print re_x, re_w

1.29455366246e-10 3.10206900594e-11
