In [82]:
%matplotlib inline
%pylab inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib


In [83]:
# hours of sleep and hours of study 
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
# test scores
y = np.array(([75], [82], [93]), dtype=float)
print(y)

[[75.]
 [82.]
 [93.]]


In [84]:
# since our data is in hours and test scores is in numbers we will divide everything so it comes in range of 0-1

X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100
print(y)

[[0.75]
 [0.82]
 [0.93]]


In [85]:
class Neural_Network(object):
    def __init__(self):        
        #Define Hyperparameters
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #Weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
    def forward(self, X):
        #Propagate inputs though network
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3) 
        return yHat
        
    def sigmoid(self, z):
        #Apply sigmoid activation function to scalar, vector, or matrix
        return 1/(1+np.exp(-z))

In [86]:
# Now we call the previous neural network and pass our hours through and see what is the result of yHat ( the output result)
NN = Neural_Network()
yHat = NN.forward(X) # You can pass in anything you want in X but I am using the same array
print(yHat)

[[0.40557568]
 [0.43074286]
 [0.41214989]]


In [87]:
# printing our original Y
print(y) 

[[0.75]
 [0.82]
 [0.93]]


In [88]:
class Neural_Network(object):
    def __init__(self):        
        #Define Hyperparameters
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #Weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
    def forward(self, X):
        #Propagate inputs though network
        self.z2 = np.dot(X, self.W1) # hidden input
        self.a2 = self.sigmoid(self.z2) # activation function
        self.z3 = np.dot(self.a2, self.W2) # final input
        yHat = self.sigmoid(self.z3) # final output
        return yHat
        
    def sigmoid(self, z):
        #Apply sigmoid activation function to scalar, vector, or matrix
        return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self,z):
        #Gradient of sigmoid
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def costFunction(self, X, y):
        #Compute cost for given X,y, use weights already stored in class.
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        #Compute derivative with respect to W and W2 for a given X and y:
        self.yHat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)  
        
        return dJdW1, dJdW2
    
    #Helper Functions for interacting with other classes:
    def getParams(self):
        #Get W1 and W2 unrolled into vector:
        params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single paramater vector.
        W1_start = 0
        W1_end = self.hiddenLayerSize * self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

In [89]:
NN = Neural_Network()
j = NN.costFunction(X,y)
print(j)

0.29403047179539493


In [90]:
dJdW1, dJdW2 = NN.costFunctionPrime(X,y)

In [91]:
dJdW1

array([[ 0.05412599,  0.02120826, -0.06846175],
       [ 0.04215179,  0.01652598, -0.05683516]])

In [92]:
dJdW2

array([[-0.18979768],
       [-0.11653671],
       [-0.06032531]])

In [93]:
def computeNumericalGradient(N, X, y):
        paramsInitial = N.getParams()
        numgrad = np.zeros(paramsInitial.shape)
        perturb = np.zeros(paramsInitial.shape)
        e = 1e-4

        for p in range(len(paramsInitial)):
            #Set perturbation vector
            perturb[p] = e
            N.setParams(paramsInitial + perturb)
            loss2 = N.costFunction(X, y)
            
            N.setParams(paramsInitial - perturb)
            loss1 = N.costFunction(X, y)

            #Compute Numerical Gradient
            numgrad[p] = (loss2 - loss1) / (2*e)

            #Return the value we changed to zero:
            perturb[p] = 0
            
        #Return Params to original value:
        N.setParams(paramsInitial)

        return numgrad 

In [94]:
NN = Neural_Network()

In [95]:
numgrad = computeNumericalGradient(NN, X, y)
numgrad

array([ 0.00762474, -0.0118671 , -0.01415772,  0.00456391, -0.00571287,
       -0.0084741 , -0.07920447, -0.10715502, -0.03439713])

In [96]:
grad = NN.computeGradients(X,y)
grad

array([ 0.00762474, -0.0118671 , -0.01415772,  0.00456391, -0.00571287,
       -0.0084741 , -0.07920447, -0.10715502, -0.03439713])

In [97]:
norm(grad-numgrad)/norm(grad+numgrad)

2.690491522606029e-10

In [98]:
from scipy import optimize

In [99]:
class trainer(object):
    def __init__(self, N):
        #Make Local reference to network:
        self.N = N
        
    def callbackF(self, params):
        self.N.setParams(params)
        self.J.append(self.N.costFunction(self.X, self.y))   
        
    def costFunctionWrapper(self, params, X, y):
        self.N.setParams(params)
        cost = self.N.costFunction(X, y)
        grad = self.N.computeGradients(X,y)
        
        return cost, grad
        
    def train(self, X, y):
        #Make an internal variable for the callback function:
        self.X = X
        self.y = y

        #Make empty list to store costs:
        self.J = []
        
        params0 = self.N.getParams()

        options = {'maxiter': 200, 'disp' : True}
        _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
                                 args=(X, y), options=options, callback=self.callbackF)

        self.N.setParams(_res.x)
        self.optimizationResults = _res

In [100]:
NN = Neural_Network()

In [101]:
T = trainer(NN)

In [102]:
T.train(X,y)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 85
         Function evaluations: 95
         Gradient evaluations: 95


In [103]:
NN.costFunctionPrime(X,y)

(array([[ 2.93848261e-06, -4.64398136e-06,  5.09709216e-08],
        [ 8.89626607e-06, -6.88655696e-06,  1.07177322e-07]]),
 array([[8.17662744e-06],
        [2.21733822e-06],
        [8.96496980e-06]]))

In [104]:
#Test network for various combinations of sleep/study:
hoursSleep = linspace(0, 10, 100)
hoursStudy = linspace(0, 5, 100)

#Normalize data (same way training data way normalized)
hoursSleepNorm = hoursSleep/10.
hoursStudyNorm = hoursStudy/5.

#Create 2-d versions of input for plotting
a, b  = meshgrid(hoursSleepNorm, hoursStudyNorm)

#Join into a single input matrix:
allInputs = np.zeros((a.size, 2))
allInputs[:, 0] = a.ravel()
allInputs[:, 1] = b.ravel()

In [105]:
allOutputs = NN.forward(allInputs)

In [106]:
print(allOutputs)

[[0.20504499]
 [0.21799845]
 [0.23150316]
 ...
 [0.96524544]
 [0.96543228]
 [0.96561185]]
