In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# training Data features (Hours(sleep), Hours(study))
X = np.array([[3,5],[5,1],[7,2]])
# training Outputs (scores on test)
y = np.array([[73],[82],[91]])

In [3]:
# Normalize the values or scale the values(feature scalling) 
# X = X/max(X), y = y/max(y)

column_wise_max_X_values = np.max(X,axis=0)
X = X/column_wise_max_X_values

# highest score is 100
y = y/100

In [4]:
# class neural network
class NeuralNet:
    
    def __init__(self, inputLayersize=2, outputlayerSize=1, hiddenLayerSize=3):
        # set network hyparameters
        self.inputLayersize = inputLayersize
        self.outputlayerSize = outputlayerSize
        self.hiddenLayerSize = hiddenLayerSize
        
        # weights parameters
        self.W1 = np.random.randn(inputLayersize,hiddenLayerSize)
        self.W2 = np.random.randn(hiddenLayerSize,outputlayerSize)
        
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def sigmoidPrime(self, z):
        return np.exp(-z)/((1+np.exp(-z))**2)
        
    def forward(self,X):
        # propagate inputs through network
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3)
        return yHat
        
    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = 0.5 * sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        delta3 = np.multiply(-(y - self.yHat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3,self.W2.T) * self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T,delta2)
        
        return dJdW1, dJdW2
    
    def getParameters(self):
        # transform to 1 D array and then concatenate
        params = np.concatenate((self.W1.ravel(),self.W2.ravel()))
        return params
    
    def setParameters(self, params):
        w1_start = 0
        w1_end = self.inputLayersize * self.hiddenLayerSize
        self.W1 = np.reshape(params[w1_start:w1_end], (self.inputLayersize, self.hiddenLayerSize))
        w2_end = w1_end + (self.hiddenLayerSize * self.outputlayerSize)
        self.W2 = np.reshape(params[w1_end:w2_end],(self.hiddenLayerSize, self.outputlayerSize))
        
    def computeGradients(self, X, Y):
        dJdW1, dJdW2 = self.costFunctionPrime(X,y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
    
def computrNumericalGradient(NN, X, y):
    initialParams = NN.getParameters()
    numgrad = np.zeros(initialParams.shape)
    perturb = np.zeros(initialParams.shape)
    
    epsilon = 1e-4
    
    for p in range(len(initialParams)):
        perturb[p] = epsilon
        NN.setParameters(initialParams+perturb)
        loss2 = NN.costFunction(X,y)
        
        NN.setParameters(initialParams-perturb)
        loss1 = NN.costFunction(X,y)
        
        numgrad[p] = (loss2-loss1) / (2*epsilon)
        
        perturb[p] = 0
        
    NN.setParameters(initialParams) # return to original
    return numgrad
    

In [5]:
NN = NeuralNet()

In [6]:
numgrad = computrNumericalGradient(NN, X, y)

In [7]:
numgrad

array([ 0.01238686,  0.02234721,  0.05983316,  0.00801623,  0.00951703,
        0.03212099, -0.17963786, -0.21439812, -0.20731077])

In [8]:
grad = NN.computeGradients(X,y)

In [9]:
grad

array([ 0.01238686,  0.02234721,  0.05983316,  0.00801623,  0.00951703,
        0.03212099, -0.17963786, -0.21439812, -0.20731077])

In [10]:
norm(grad - numgrad) / norm(grad + numgrad)

NameError: name 'norm' is not defined