In [1]:
import numpy as np

# X --> hours sleeping, hours studying, y --> Score on test
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)

# Normalize
X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100

class Neural_Network(object):
    def __init__(self):        
        #my hyperparameters
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #Weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
    def forward(self, X):
        #Propogate inputs though network
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3) 
        return yHat
        
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))#Apply sigmoid activation function to scalar, vector, or matrix
    
    def sigmoidPrime(self,z):
        return np.exp(-z)/((1+np.exp(-z))**2)#Gradient of sigmoid
    
    def costFunction(self, X, y):
        self.yHat = self.forward(X)   #Compute cost for given X,y, use weights already stored in class.
        J = 0.5*sum((y-self.yHat)**2)
        return J
        
    def costFunctionPrime(self, X, y):
        #compute derivative with respect to W1 and W2
        self.yHat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)  
        
        return dJdW1, dJdW2

In [2]:
NN = Neural_Network()
yHat = NN.forward(X)
yHat

array([[ 0.40994068],
       [ 0.54349324],
       [ 0.49417668]])

In [3]:
cost1 = NN.costFunction(X,y)
dJdW1, dJdW2 = NN.costFunctionPrime(X,y)
dJdW1

array([[ 0.01834334, -0.06635173,  0.00969303],
       [ 0.01146464, -0.04920917,  0.00765315]])

In [4]:
dJdW2

array([[-0.2065903 ],
       [-0.09264389],
       [-0.16503568]])

In [5]:
scalar = 3
NN.W1 = NN.W1 + scalar*dJdW1
NN.W2 = NN.W2 + scalar*dJdW2
cost2 = NN.costFunction(X,y)

In [6]:
print(cost1, cost2)

[ 0.19101915] [ 0.4953993]


In [7]:
dJdW1, dJdW2 = NN.costFunctionPrime(X,y)
NN.W1 = NN.W1 - scalar*dJdW1
NN.W2 = NN.W2 - scalar*dJdW2
cost3 = NN.costFunction(X,y)

In [11]:
print (cost2, cost3)

[ 0.34368533] [ 0.11078466]
