# Notebook \#1 - Backpropagation Playground

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

## [PART 1] Forward Pass: Single-Layer Neural Net 

### P10 - Common Activations functions:

In [None]:
# Activation functions:

def sig(z):
    return 1 / (1 + np.exp(-z))

def tanh(z):
    return (1 - np.exp(-2*z)) / (1 + np.exp(-2*z)) 

def relu(z):
    return np.maximum(z, 0)

# Derivative Activation functions:

def d_sig(z):
    return (1 - sig(z)) * sig(z)

def d_tanh(z):
    return 1 - tanh(z)**2

def d_relu(z):
    return np.where(z<0, z, 1).clip(min=0)

### P11 - Computing activations:

In [None]:
x = np.random.randn(2)*2                        # input vector

# FORWARD PASS =======================
# f = sigmoid(dot) 
# with dot = w.x

w = np.array([[2, -3], [1, 2], [0.5, -0.1]])   # 3 neurons (weigths)
dot = np.dot(w, x)
act = sig(dot)

# ------------------------------------
print "INPUTS : x =", x
print "\nNEURAL NET : weights:\n", w
print "\nDOT PRODUCT:", dot
print "\nACTIVATIONS (SIGMOID):", act


### P12 - Plotting the activation functions:

In [None]:
# Plotting:
zs = [(-10+i/10.0) for i in range(200)]

plt.plot(zs, [sig(i) for i in zs], label="${\sigma}(x)$")
plt.plot(zs, [d_sig(i) for i in zs], label="$d{\sigma}(x)$")
plt.plot(dot, act, 'ro', label="neurons outputs")
plt.grid(True)
plt.title("Sigmoid Function $\sigma$(x)\nNeurons Activations")
plt.legend()
plt.xlabel("x")
plt.ylabel("activations")

plt.figure()
plt.plot(zs, [tanh(i) for i in zs], label="tanh(x)")
plt.plot(zs, [d_tanh(i) for i in zs], '--', label="d_tanh(x)")
plt.plot(dot, tanh(dot), 'ro', label="neurons outputs")
plt.title("tanh Function")
plt.grid(True)
plt.legend()
plt.xlabel("x")
plt.ylabel("activations")

plt.figure()
plt.plot(zs, [relu(i) for i in zs], label="relu(x)")
plt.plot(zs, [d_relu(i) for i in zs], '--', label="d_relu(x)")
plt.plot(dot, relu(dot), 'ro', label="neurons outputs")
plt.title("relu Function")
plt.grid(True)
plt.legend()
plt.xlabel("x")
plt.ylabel("activations")


plt.show()

## [PART 2] Backpropagation : An Example

### Exercice 1:

Compute the gradient of the following function using *Automatic Differentiation*: 

$f(x, y) = \frac{x + {\sigma}(y)}{{\sigma}(x) + (x + y)^2} $ 

In [None]:
def f(x,y):
    sig_y = sig(y)                     # (1) : sig
    sig_x = sig(x)                     # (2) : sig
    sum_xy = x + y                     # (3) : sum
    sq_sum_xy = sum_xy**2              # (4) : squarre
    num = x + sig_y                    # (5) : sum
    den = sig_x + sq_sum_xy            # (6) : sum
    inv_den = 1 / den                  # (7) : inverse
    f = num * inv_den                  # (8) : multiplication
    return f

In [None]:
f(3,-4)

In [None]:
def gradf(x,y):
    """
    Compute the gradient of the function f(x,y).
    """
    
    # forward pass:

    sig_y = sig(y)                     # (1) : sig
    sig_x = sig(x)                     # (2) : sig
    sum_xy = x + y                     # (3) : sum
    sq_sum_xy = sum_xy**2              # (4) : squarre
    num = x + sig_y                    # (5) : sum
    den = sig_x + sq_sum_xy            # (6) : sum
    inv_den = 1 / den                  # (7) : inverse
    f = num * inv_den                  # (8) : multiplication
    
    # backprop pass:

    df_dnum       = inv_den           # (8)
    df_dinv       = num               # (8)
    dinv_dden     = -1/(den**2)       # (7)
    dden_dsigx    = 1                 # (6)
    dden_dsq      = 1                 # (6)
    dnum_dx       = 1                 # (5)
    dnum_dsigy    = 1                 # (5)
    dsq_dsumxy    = 2*sum_xy          # (4)
    dsumxy_dx     = 1                 # (3)
    dsumxy_dy     = 1                 # (3)
    dsigx_dx      = d_sig(x)          # (2)
    dsigy_dy      = d_sig(y)          # (1)

    dx = df_dnum*dnum_dx + df_dinv*dinv_dden*(dden_dsigx*dsigx_dx + dden_dsq*dsq_dsumxy*dsumxy_dx)
    dy = df_dnum*dnum_dsigy*dsigy_dy + df_dinv*dinv_dden*dden_dsq*dsq_dsumxy*dsumxy_dy
    return [dx, dy]

In [None]:
def gradf_v2(x,y):
    """
    Compute the gradient of the function f(x,y).
    Same as before but a bit different.
    """
    
    # forward pass:

    sig_y = sig(y)                     # (1) : sig
    sig_x = sig(x)                     # (2) : sig
    sum_xy = x + y                     # (3) : sum
    sq = sum_xy**2                     # (4) : squarre
    num = x + sig_y                    # (5) : sum
    den = sig_x + sq                   # (6) : sum
    inv_den = 1 / den                  # (7) : inverse
    f = num * inv_den                  # (8) : multiplication
    
    # my backprop:

    df_dx,df_dy   = 0,0                               # init
    df_dnum       = 1.0 * inv_den                     # (8) mult
    df_dinv       = 1.0 * num                         # (8) mult
    df_dden       = df_dinv * -1/(den**2)             # (7) inv.
    df_dsigx      = df_dden * 1                       # (6) sum
    df_dsq        = df_dden * 1                       # (6) sum
    df_dx        += df_dnum * 1                       # (5) sum
    df_dsigy      = df_dnum * 1                       # (5) sum
    df_dsumxy     = df_dsq  * 2 * sum_xy              # (4) square 
    df_dx        += df_dsumxy * 1                     # (3) sum
    df_dy        += df_dsumxy * 1                     # (3) sum
    df_dx        += df_dsigx * d_sig(x)               # (2) sig
    df_dy        += df_dsigy * d_sig(y)               # (1) sig

    return [df_dx, df_dy]

In [None]:
# Testing - Compute the gradient of f(x,y) at (3,-4):
print gradf(3,-4)
print gradf_v2(3,-4)

In [None]:
zs = [(-10+i/10.0) for i in range(300)]
plt.plot(zs, [f(i,-4) for i in zs], label='y= -4')
plt.plot(zs, [f(i,-3) for i in zs], label='y= -3')
plt.plot(zs, [f(i,-2) for i in zs], label='y= -2')
plt.plot(zs, [f(i,-1) for i in zs], label='y= -1')
plt.plot(zs, [f(i, 0) for i in zs], label='y=  0')
plt.grid(True)
plt.legend()
plt.xlabel("x")
plt.ylabel("$f(x,y)$")
plt.title("$f(x,y)$ for few y values")
plt.show()

In [None]:
#fig = plt.figure()
#ax = Axes3D(fig)
# plot the 3D curve

In [None]:
plt.figure()
y = np.random.randn()
zs = [(-10+i/10.0) for i in range(300)]
plt.plot(zs, [f(i, y) for i in zs], label="y= {:.2f} ".format(y))
plt.plot(zs, [gradf(i, y)[0] for i in zs], '--', label="derivative")
plt.plot(3, f(3, y), 'or')
plt.legend()
plt.grid(True)
plt.xlabel("x")
plt.ylabel("$f(x,y)$")
plt.title("$f(x,y)$ for one y values")
plt.show()

### Exercice 2: 
Backprop of a the Single-Layer Neural Net 

In [None]:
def grad_sigmoid(W, x):
    # forward:
    z = W.dot(x)
    E = np.exp(-z)
    X = E + 1
    f = 1 / X
    
    # backprop:
    dX = -1 / X**2              # df_dX
    dE = 1 * dX                 # (df_dX) * dX_dE
    dz = -E * dE                # (df_dX * dE_dE) * dE_dz
    
    # can be simplify by doing dz = d_sig(z)
    
    x = x.reshape(1,-1)
    dz = dz.reshape(-1, 1)
    dW = dz.dot(x)              # (df_dX * dE_dE * dE_dz) * dz_dW
    
    return [f, dW]

In [None]:

W = np.array([[2, -3], [1, 2], [0.5, -0.1]])               
x = np.random.randn(2)*2                                   

acts, grads = grad_sigmoid(W, x)
print "ACTIVATIONS:", acts
print "\nGRADS:\n", grads

In [None]:
print "Activations:", acts
print "Update:"

for i in range(20):
    # update (gradient ascend):
    # show that the updates makes all the activations increase towards 1.
    W = W + 1.0 * grads
    acts, grads = grad_sigmoid(W, x)
    print "step {:3d}".format(i+1),":",acts