In [1]:
from tools.numerical_gradient import *
from models.layers import *
import numpy as np

# Let's try out some simple functions for numerical_gradient. #

We know the linear equation y = 3x should always return 3. Let's check it:

In [2]:
def linear(x, slope=3):
    return slope*x

slope = numerical_gradient_check_scalar(linear, 5)
print slope

2.99999999989


In [3]:
def multi_quadratic(x):
    return x[0]**2 + x[1]
arr = np.array([2,2], float)

slope = numerical_gradient_check_multivar(multi_quadratic, arr)
print slope

[ 3.9999999   0.99999997]


In [4]:
def multi_cubic_field(x):
    return np.array([x[0]**3 + x[1]**2, x[0]*2 + x[1]/12])
arr = np.array([3,4], dtype=np.float32)

def matrix_mult(x, b = np.array([[3,5],[2,1]])):
    return x.dot(b)
                
vector_field = numerical_gradient_check_multivar(multi_cubic_field, arr)
print vector_field

arr = np.array([[3,4],[1,2]], dtype=np.float32)
vector_field = numerical_gradient_check_multivar(matrix_mult, arr)
print vector_field

[ 29.03938293   8.09431076]
[[ 8.01086426  3.0040741 ]
 [ 8.01086426  3.0040741 ]]


In [2]:
def affine_transform(w, x, b):
    return x.dot(w) + b

x = np.array([[1,2]], float) # 1 x 2
w = np.array([[3,2,1],[1,2,5]], float) # 2 x 3
b = np.array([[1,5,7]], float) # 1 x 3

fw = lambda w: affine_transform(w,x,b)
fx = lambda x: affine_transform(w,x,b)
fb = lambda b: affine_transform(w,x,b)

vector_field = numerical_gradient_check_multivar(fw, w)
print "fw : ", vector_field
vector_field = numerical_gradient_check_multivar(fx, x)
print "fx : ", vector_field
vector_field = numerical_gradient_check_multivar(fb, b)
print "fb : ", vector_field

fw :  [[ 0.99999997  0.99999997  0.99999997]
 [ 1.99999995  1.99999995  1.99999995]]
fx :  [[ 5.99999985  7.9999998 ]]
fb :  [[ 0.99999997  0.99999997  0.99999997]]


# word_embedding_forward/backward #

In [2]:
# Looks good to me
ans = np.array([[[3, 4, 7, 1],
                [3, 4, 7, 1],
                [1, 5, 9, 4]],

               [[1, 5, 9, 4],
                [1, 5, 9, 4],
                [1, 5, 9, 4]],

               [[4, 3, 2, 5],
                [3, 4, 7, 1],
                [4, 3, 2, 5]]])

x = np.array([[1,1,0], [0,0,0], [2,1,2]], int)
words = np.array([[1,5,9,4],[3,4,7,1],[4,3,2,5]])
arr = word_embedding_forward(words, x)

assert np.array_equal(ans, arr)
print arr.shape, "\n", arr

(3, 3, 4) 
[[[3 4 7 1]
  [3 4 7 1]
  [1 5 9 4]]

 [[1 5 9 4]
  [1 5 9 4]
  [1 5 9 4]]

 [[4 3 2 5]
  [3 4 7 1]
  [4 3 2 5]]]


In [3]:
dout = np.array([[[1,2,0,1],[3,2,9,1],[1,2,1,1]],
                 [[3,9,2,4],[1,9,9,0],[2,0,1,6]],
                 [[1,0,1,0],[0,1,0,5],[3,0,0,1]]])

arr = word_embedding_backward(dout, words.shape, x)
ans = np.array([[  7.,  20.,  13.,  11.],
               [  4.,   5.,   9.,   7.],
               [  4.,   0.,   1.,   1.]])

assert np.array_equal(ans, arr)
print arr.shape, "\n", arr

(3, 4) 
[[  7.  20.  13.  11.]
 [  4.   5.   9.   7.]
 [  4.   0.   1.   1.]]


# Tanh Vanilla RNN Layer #

In [2]:
# Forward
"""
prev_h = (N,H)
x = (N, V)
W_hh = (H,H)
W_xh = (V,H)
b = (H,)
"""
prev_h = np.random.random((3,5)) # N = 3, H = 5
x = np.random.random((3,4)) # N = 3, V = 4
W_hh = np.random.random((5,5)) # H = 5
W_xh = np.random.random((4,5)) # V = 4, H = 5
b = np.random.random((5,)) # H = 5

res = rnn_step_forward(prev_h, W_hh, x, W_xh, b) # N = 3, H = 5
print res.shape

(3, 5)


In [3]:
# Backward
fprev_h = lambda prev_h: rnn_step_forward(prev_h, W_hh, x, W_xh, b)
fW_hh = lambda W_hh: rnn_step_forward(prev_h, W_hh, x, W_xh, b)
fx = lambda x: rnn_step_forward(prev_h, W_hh, x, W_xh, b)
fW_xh = lambda W_xh: rnn_step_forward(prev_h, W_hh, x, W_xh, b)
fb = lambda b: rnn_step_forward(prev_h, W_hh, x, W_xh, b)

dprev_h_num = numerical_gradient_check_multivar(fprev_h, prev_h)
dW_hh_num = numerical_gradient_check_multivar(fW_hh, W_hh)
dx_num = numerical_gradient_check_multivar(fx, x)
dW_xh_num = numerical_gradient_check_multivar(fW_xh, W_xh)
db_num = numerical_gradient_check_multivar(fb, b)

dW_hh, dW_xh, dprev_h, dx, db = rnn_step_backward(prev_h, W_hh, x, W_xh, b, np.ones_like(res))
print "dprev_h : ", norm_loss(dprev_h, dprev_h_num)
print "dW_hh : ", norm_loss(dW_hh, dW_hh_num)
print "dx : ", norm_loss(dx, dx_num)
print "dW_xh : ", norm_loss(dW_xh, dW_xh_num)
print "db : ", norm_loss(db, db_num)

(5, 5) (4, 5) (3, 5) (3, 4) (5,)
dprev_h :  1.26251613167e-08
dW_hh :  1.26274485055e-08
dx :  1.26389902778e-08
dW_xh :  1.26152884459e-08
db :  1.25845483581e-08


# Affine Layer #

In [2]:
# Forward
"""
h = (N,H)
W_hy = (H,D)
b = (D,)
"""
h = np.random.random((3,5)) # N = 3, H = 5
W_hy = np.random.random((5,7)) # H = 5, D = 7
b = np.random.random((7,)) # D = 7

res = affine_forward(h, W_hy, b) # N = 3, D = 7
print res.shape

(3, 7)


In [5]:
fh = lambda h: affine_forward(h, W_hy, b)
fW_hy = lambda W_hy: affine_forward(h, W_hy, b)
fb = lambda b: affine_forward(h, W_hy, b)

dh_num = numerical_gradient_check_multivar(fh, h)
dW_hy_num = numerical_gradient_check_multivar(fW_hy, W_hy)
db_num = numerical_gradient_check_multivar(fb, b)

dh, dW_hy, db = affine_backward(h, W_hy, b, np.ones_like(res))

print "dx : ", norm_loss(dh, dh_num)
print "dW_xh : ", norm_loss(dW_hy, dW_hy_num)
print "db : ", norm_loss(db, db_num)

dx :  1.26312490673e-08
dW_xh :  1.26310252027e-08
db :  1.26310626518e-08
