In [258]:
import numpy as np
class Network(object):
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
        
    def foward(self, hours):
        self.hours = hours
        self.z2 = np.dot(hours, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        self.yHat = self.sigmoid(self.z3)
        return self.yHat
    
    def backward(self, known, L):
        dL_dyHat = -(self.yHat - known)
        dy_dz3 = self.dsigmoid(self.z3)
        dL_dz3 = dy_dz3 * dL_dyHat
        self.dL_dW2 = np.dot(self.a2.T, dL_dz3)
        dL_da2 = np.dot(dL_dz3, self.W2.T)
        da2_dz2 = self.dsigmoid(self.z2)
        dL_dz2 = da2_dz2 * dL_da2
        self.dL_dW1 = -np.dot(self.hours.T, dL_dz2)
        return self.dL_dW2, self.dL_dW1
        
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def dsigmoid(self, z):
        return (1/(np.square(1+np.exp(-z)))*(-np.exp(-z)))
    
    def dmatrix(self, z, W):
        ans = np.ones(z.shape[0], z.shape[1])
        return ans * W.T
    
    def getParams(self):
        return np.concatenate((self.W1.ravel(), self.W2.ravel()))
    
    def setParams(self, params):
        W1_start = 0
        W1_end = self.inputLayerSize * self.hiddenLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize, self.hiddenLayerSize))
        self.W2 = np.reshape(params[W1_end:], (self.hiddenLayerSize, self.outputLayerSize))
    
    def computeNumericalGrad(self, X, y):
        paramsInitial = self.getParams()
        perturb = np.zeros(paramsInitial.shape)
        numgrad = np.zeros(paramsInitial.shape)
        e = .0001
        for i in range(len(paramsInitial)):
            perturb[i] = e
            self.setParams(paramsInitial + perturb)
            loss1 = self.cost(self.foward(X), y)
            self.setParams(paramsInitial - perturb)
            loss2 = self.cost(self.foward(X), y)
            numgrad[i] = np.sum((loss1 - loss2)/(2*e))
            perturb[i] = 0
        self.setParams(paramsInitial)
        return numgrad
        
    def cost(self, y, yHat):
        return .5 * np.square(yHat - y)

In [259]:
x = Network()
hours = np.array(([5,1], [6,30], [7,2]))
scores = np.array(([.3], [.1], [.85]))
L = x.foward(hours)
print L

[[ 0.49229881]
 [ 0.73828107]
 [ 0.49108675]]


In [260]:
L = x.cost(L, scores)
print x.backward(scores, L)

(array([[-0.00703768],
       [ 0.06285392],
       [-0.04197113]]), array([[ -1.96495897e-01,   1.56575937e-01,   6.39921354e-04],
       [ -9.71323497e-01,   7.66641663e-01,   4.94924199e-04]]))


In [261]:
print x.computeNumericalGrad(hours, scores)

[ -1.96495895e-01   1.56575940e-01   6.39921538e-04  -9.71323195e-01
   7.66642004e-01   4.94924206e-04  -7.03767636e-03   6.28539194e-02
  -4.19711337e-02]
