In [1]:
import numpy as np
import random

from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive



In [2]:
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0])   # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
    labels[i, random.randint(0, dimensions[2]-1)] = 1

params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
    dimensions[1] + 1) * dimensions[2], )


In [3]:
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

In [4]:
all_u = np.array([np.array((x.dot(W1) + b1).flat) for x in data])
all_h = sigmoid(all_u)
all_theta = np.array([np.array((h.dot(W2) + b2).flat) for h in all_h])
all_y_hat = np.array([np.array(softmax(theta).flat) for theta in all_theta])
all_costs = np.array([-np.sum(y*np.log(y_hat)) for y,y_hat in zip(labels,all_y_hat)]) 
cost = np.mean(all_costs)

In [5]:
def get_grad_W2(h,y,y_hat):
    matrix = [] 
    for i in range(len(h)):
        for j in range(len(y_hat)):
            result = (y_hat[j] - y[j])*h[i]
            matrix.append(result)
    matrix = np.reshape(matrix, W2.shape)
    return matrix

gradW2 = np.array([get_grad_W2(h,y,y_hat) for h,y,y_hat in zip(all_h,labels,all_y_hat)])


In [6]:
gradW2.shape

(20, 5, 10)

In [7]:
gradW2[0]

array([[  1.76101956e-03,   3.34548858e-04,   7.89876637e-04,
         -1.14581288e-02,   5.60821253e-04,   7.36155587e-04,
          1.96166028e-05,   5.48964151e-03,   1.42793208e-04,
          1.62365554e-03],
       [  1.46365034e-01,   2.78056281e-02,   6.56496518e-02,
         -9.52328665e-01,   4.66119875e-02,   6.11846910e-02,
          1.63041048e-03,   4.56264987e-01,   1.18680867e-02,
          1.34948188e-01],
       [  8.93798771e-03,   1.69798998e-03,   4.00898878e-03,
         -5.81552960e-02,   2.84642690e-03,   3.73632964e-03,
          9.95633203e-05,   2.78624663e-02,   7.24741489e-04,
          8.24080182e-03],
       [  1.53589321e-02,   2.91780586e-03,   6.88899880e-03,
         -9.99333712e-02,   4.89126625e-03,   6.42046456e-03,
          1.71088429e-04,   4.78785319e-02,   1.24538718e-03,
          1.41608962e-02],
       [  1.51294014e-01,   2.87420086e-02,   6.78604652e-02,
         -9.84399222e-01,   4.81816898e-02,   6.32451426e-02,
          1.68531607e-03

In [8]:
gradW2[10]

array([[ 0.06290139,  0.00951399,  0.00199478, -0.11634053,  0.0046517 ,
         0.000902  ,  0.00101453,  0.01267011,  0.00461083,  0.0180812 ],
       [ 0.19997587,  0.03024685,  0.00634178, -0.36986938,  0.01478867,
         0.00286762,  0.0032254 ,  0.04028077,  0.01465873,  0.05748369],
       [ 0.4502583 ,  0.06810269,  0.01427892, -0.83278426,  0.03329763,
         0.00645664,  0.00726219,  0.09069469,  0.03300505,  0.12942817],
       [ 0.44766793,  0.06771089,  0.01419677, -0.82799319,  0.03310606,
         0.00641949,  0.00722041,  0.09017292,  0.03281517,  0.12868356],
       [ 0.45694521,  0.0691141 ,  0.01449098, -0.84515218,  0.03379214,
         0.00655253,  0.00737004,  0.09204162,  0.03349521,  0.13135034]])