In [53]:
import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt

In [54]:
X = np.array(([3,5],[5,1],[10,2]),dtype=float)
Y = np.array(([75],[82],[93]),dtype = float)
X = X/np.max(X,axis=0)
Y = Y/100

In [75]:
class Neural_Network(object):
    def __init__(self):
        self.inputLayerSize  = 2
        self.hiddenLayerSize = 3
        self.outputLayerSize = 1
        self.w1              = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.w2              = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
    def forward(self,X):
        self.z2   = np.dot(X,self.w1)
        self.a2   = self.sigmoid(self.z2)
        self.z3   = np.dot(self.a2,self.w2)
        yHat      = self.sigmoid(self.z3)
        return yHat
    
    
    
    
    def cost(self,x,y):
        return (0.5)*sum((y-self.forward(x))**2)
    
    
    
    
    def costPrime(self,x,y):
        self.yHat = self.forward(x)
        
        self.delta3 = np.multiply(-(y-self.yHat),self.sigmoidPrime(self.z3))
        djdw2     = np.dot(self.a2.T,self.delta3)
        
        self.delta2 = np.multiply(self.delta3,self.w2.T)*self.sigmoidPrime(self.z3)
        djdw1  = np.dot(x.T,self.delta2)
        
        return djdw1,djdw2
        
        
        
        
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    
    
    
    def sigmoidPrime(self,z):
        return (np.exp(-z))/((1+np.exp(-z))**2)
    
    
    
    def getParams(self):
        #Get W1 and W2 unrolled into vector:
        params = np.concatenate((self.w1.ravel(), self.w2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single paramater vector.
        w1_start = 0
        w1_end = self.hiddenLayerSize * self.inputLayerSize
        self.w1 = np.reshape(params[w1_start:w1_end], (self.inputLayerSize , self.hiddenLayerSize))
        w2_end = w1_end + self.hiddenLayerSize*self.outputLayerSize
        self.Ww = np.reshape(params[w1_end:w2_end], (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self, X, y):
        dJdw1, dJdw2 = self.costPrime(X, y)
        return np.concatenate((dJdw1.ravel(), dJdw2.ravel()))

In [82]:

class trainer(object):
    def __init__(self, N):
        #Make Local reference to network:
        self.N = N
        
    def callbackF(self, params):
        self.N.setParams(params)
        self.J.append(self.N.cost(self.X, self.y))   
        
    def costFunctionWrapper(self, params, X, y):
        self.N.setParams(params)
        cost = self.N.cost(X, y)
        grad = self.N.computeGradients(X,y)
        
        return cost, grad
        
    def train(self, X, y):
        #Make an internal variable for the callback function:
        self.X = X
        self.y = y

        #Make empty list to store costs:
        self.J = []
        
        params0 = self.N.getParams()

        options = {'maxiter': 200, 'disp' : True}
        _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
                                 args=(X, y), options=options, callback=self.callbackF)

        self.N.setParams(_res.x)
        self.optimizationResults = _res

In [83]:
NN = Neural_Network()
T  = trainer(NN)

In [84]:
T.train(X,Y)

         Current function value: 0.174900
         Iterations: 3
         Function evaluations: 124
         Gradient evaluations: 112


In [86]:
NN.forward(X)

array([[0.5],
       [0.5],
       [0.5]])

In [87]:
Y

array([[0.75],
       [0.82],
       [0.93]])