In [2]:
import numpy as np
class Network(object):
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize) / np.sqrt(self.inputLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize) / np.sqrt(self.hiddenLayerSize)
        self.Lambda = .0001
        
    def forward(self, hours):
        self.hours = hours
        self.z2 = np.dot(hours, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        self.yHat = self.sigmoid(self.z3)
        return self.yHat
    
    def backward(self, known, L):
        dL_dyHat = -(known - self.yHat)
        dy_dz3 = self.dsigmoid(self.z3)
        dL_dz3 = dy_dz3 * dL_dyHat
        self.dL_dW2 = -np.dot(self.a2.T, dL_dz3) + self.Lambda*self.W2
        dL_da2 = np.dot(dL_dz3, self.W2.T)
        da2_dz2 = self.dsigmoid(self.z2)
        dL_dz2 = da2_dz2 * dL_da2
        self.dL_dW1 = np.dot(self.hours.T, dL_dz2) + self.Lambda*self.W1
        return self.dL_dW1, self.dL_dW2
        
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def dsigmoid(self, z):
        return (1/(np.square(1+np.exp(-z)))*(-np.exp(-z)))
    
    def dmatrix(self, z, W):
        ans = np.ones(z.shape[0], z.shape[1])
        return ans * W.T
    
    def getParams(self):
        return np.concatenate((self.W1.ravel(), self.W2.ravel()))
    
    def setParams(self, params):
        W1_start = 0
        W1_end = self.inputLayerSize * self.hiddenLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize, self.hiddenLayerSize))
        self.W2 = np.reshape(params[W1_end:], (self.hiddenLayerSize, self.outputLayerSize))
    
    def computeNumericalGrad(self, X, y):
        paramsInitial = self.getParams()
        perturb = np.zeros(paramsInitial.shape)
        numgrad = np.zeros(paramsInitial.shape)
        e = .0001
        for i in range(len(paramsInitial)):
            perturb[i] = e
            self.setParams(paramsInitial + perturb)
            loss1 = self.cost(self.forward(X), y)
            self.setParams(paramsInitial - perturb)
            loss2 = self.cost(self.forward(X), y)
            numgrad[i] = np.sum((loss1 - loss2)/(2*e))
            perturb[i] = 0
        self.setParams(paramsInitial)
        return numgrad
        
    def cost(self, y, yHat):
        return np.sum(.5 * np.square(yHat - y)) + (self.Lambda/2)*(np.sum(self.W1**2) + np.sum(self.W2**2))
    
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.backward(y, self.cost(self.forward(X), y))
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

In [3]:
net = Network()
hours = np.array(([3,5], [5,1], [10,2], [6,1.5]))
scores = np.array(([.75], [.82], [.93], [.70]))
hours -= np.mean(hours, axis=0)
L = net.forward(hours)
print L

[[ 0.36425323]
 [ 0.46209686]
 [ 0.68468144]
 [ 0.54003424]]


In [4]:
L = net.cost(L, scores)
print net.backward(scores, L)

(array([[ 0.02180643,  0.00118815, -0.01077804],
       [ 0.03372359, -0.00403026, -0.01521841]]), array([[-0.10531911],
       [-0.13258084],
       [-0.1470845 ]]))


In [5]:
print net.computeNumericalGrad(hours, scores)

[ 0.02180643  0.00118815 -0.01077804  0.03372359 -0.00403026 -0.01521841
 -0.10531911 -0.13258084 -0.1470845 ]


In [6]:
from scipy import optimize

In [7]:
class trainer(object):
    def __init__(self, NN):
        self.NN = NN
        
    def costWrapper(self, params, X, y):
        self.NN.setParams(params)
        cost = self.NN.cost(self.NN.forward(X), y)
        grad = self.NN.computeGradients(X, y)
        return cost, grad
    
    def callBack(self, params):
        self.NN.setParams(params)
        self.J.append(self.NN.cost(self.NN.forward(self.X), self.y))
    
    def train(self, X, y): 
        self.X = X
        self.y = y
        self.J = []
        params = self.NN.getParams()
        options = {'maxiter':200, 'disp': True}
        magic = optimize.minimize(self.costWrapper, params, jac=True, method='BFGS', args=(X,y), \
                                  options=options, callback=self.callBack)
        self.X = magic.x
        self.optimization = magic

In [8]:
training = trainer(net)
training.train(hours, scores)
print net.forward(hours)
print scores

Optimization terminated successfully.
         Current function value: 0.003103
         Iterations: 165
         Function evaluations: 177
         Gradient evaluations: 177
[[ 0.74978596]
 [ 0.79379575]
 [ 0.92138955]
 [ 0.72372216]]
[[ 0.75]
 [ 0.82]
 [ 0.93]
 [ 0.7 ]]
