In [1]:
import numpy as np
class Network(object):
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        self.W1 = np.absolute(np.random.randn(self.inputLayerSize, self.hiddenLayerSize))
        self.W2 = np.absolute(np.random.randn(self.hiddenLayerSize, self.outputLayerSize))
        self.Lambda = .0001
        
    def forward(self, hours):
        self.hours = hours
        self.z2 = np.dot(hours, self.W1)
        self.a2 = self.relu(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        self.yHat = self.relu(self.z3)
        return self.yHat
    
    def backward(self, known, L):
        dL_dyHat = -(known - self.yHat)
        dy_dz3 = np.square(self.relu_prime(self.z3))
        dL_dz3 = dy_dz3 * dL_dyHat
        self.dL_dW2 = np.dot(self.a2.T, dL_dz3) + self.Lambda*self.W2
        dL_da2 = np.dot(dL_dz3, self.W2.T)
        da2_dz2 = self.relu_prime(self.z2)
        dL_dz2 = da2_dz2 * dL_da2
        self.dL_dW1 = np.dot(self.hours.T, dL_dz2) + self.Lambda*self.W1
        return self.dL_dW1, self.dL_dW2
        
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def dsigmoid(self, z):
        return (1/(np.square(1+np.exp(-z)))*(-np.exp(-z)))
    
    def tanh(self, z):
        return np.tanh(z)
    
    def dtanh(self, z):
        return 1 - np.square(self.tanh(z))
    
    def ReLU(self, z):
        return np.maximum(np.array([0]), z)
    
    def dReLU(self, z):
        return np.where(z > np.array([0]), np.ones(z.shape), np.zeros(z.shape))
    
    def relu(self, input_z):
        return np.where(input_z > np.zeros(input_z.shape), input_z, np.zeros(input_z.shape))
    
    def relu_prime(self, input_z):
        return np.where(input_z > np.zeros(input_z.shape), np.ones(input_z.shape), np.zeros(input_z.shape))
    
    def dmatrix(self, z, W):
        ans = np.ones(z.shape[0], z.shape[1])
        return ans * W.T
    
    def getParams(self):
        return np.concatenate((self.W1.ravel(), self.W2.ravel()))
    
    def setParams(self, params):
        W1_start = 0
        W1_end = self.inputLayerSize * self.hiddenLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize, self.hiddenLayerSize))
        self.W2 = np.reshape(params[W1_end:], (self.hiddenLayerSize, self.outputLayerSize))
    
    def computeNumericalGrad(self, X, y):
        paramsInitial = self.getParams()
        perturb = np.zeros(paramsInitial.shape)
        numgrad = np.zeros(paramsInitial.shape)
        e = .0001
        for i in range(len(paramsInitial)):
            perturb[i] = e
            self.setParams(paramsInitial + perturb)
            loss1 = self.cost(self.forward(X), y)
            self.setParams(paramsInitial - perturb)
            loss2 = self.cost(self.forward(X), y)
            numgrad[i] = np.sum((loss1 - loss2)/(2*e))
            perturb[i] = 0
        self.setParams(paramsInitial)
        return numgrad
        
    def cost(self, y, yHat):
        return np.sum(.5 * np.square(yHat - y)) + (self.Lambda/2)*(np.sum(self.W1**2) + np.sum(self.W2**2))
        
    def costFunction(self, X, y):
        yHat = self.forward(X)
        return 0.5 * np.sum(np.square((y - yHat))) + (self.Lambda/2)*(np.sum(self.W1**2)+np.sum(self.W2**2))
    
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.backward(y, self.cost(y, self.forward(X)))
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

In [2]:
net = Network()
hours = np.array(([3,5], [5,1], [10,2], [6,1.5], [7, 2]))
scores = np.array(([75], [82], [93], [70], [74]))

In [3]:
L = net.costFunction(hours, scores)
print net.backward(scores, L)

(array([[-1056.40880365, -1347.84049649,  -880.24866148],
       [ -377.39441327,  -481.50644094,  -314.46236817]]), array([[-3142.1770565 ],
       [-2998.44734308],
       [-3723.64892292]]))


In [4]:
print net.computeNumericalGrad(hours, scores)

[-1056.40880367 -1347.84049649  -880.24866149  -377.39441326  -481.50644094
  -314.46236817 -3142.1770565  -2998.44734307 -3723.64892291]


In [5]:
from scipy import optimize

In [6]:
class trainer(object):
    def __init__(self, NN):
        self.NN = NN
        
    def costWrapper(self, params, X, y):
        self.NN.setParams(params)
        cost = self.NN.costFunction(X, y)
        grad = self.NN.computeGradients(X, y)
        return cost, grad
    
    def callBackF(self, params):
        self.NN.setParams(params)
        self.J.append(self.NN.cost(self.NN.forward(self.X), self.y))
    
    def train(self, X, y): 
        self.X = X
        self.y = y
        self.J = []
        params0 = self.NN.getParams()
        options = {'maxiter':10000, 'disp': True}
        magic = optimize.minimize(self.costWrapper, params0, jac=True, method='BFGS', args=(X,y), \
                                  options=options)
        self.NN.setParams(magic.x)
        self.optimization = magic

In [7]:
training = trainer(net)
training.train(hours, scores)

9115.30571132 [-1056.40880365 -1347.84049649  -880.24866148  -377.39441327  -481.50644094
  -314.46236817 -3142.1770565  -2998.44734308 -3723.64892292]
[ 1.07525755  1.40585693  1.07722505  1.656449    0.51758248  2.51446316
  0.56045426  0.71506688  0.46699641]
9115.30571132 [-1056.40880365 -1347.84049649  -880.24866148  -377.39441327  -481.50644094
  -314.46236817 -3142.1770565  -2998.44734308 -3723.64892292]
[ 1.07525755  1.40585693  1.07722505  1.656449    0.51758248  2.51446316
  0.56045426  0.71506688  0.46699641]
3685.26956165 [-1238.30513867 -1387.79292759 -1241.97474829  -441.26832583  -494.53823398
  -442.5759008  -2131.08781289 -2107.50986177 -2445.238042  ]
[ 1.25088007  1.62992847  1.22356186  1.71918888  0.59763045  2.56674091
  1.082825    1.2135433   1.08603385]
5444.87750756 [ 2211.26641118  2715.50162169  1961.13339902   649.68013861   797.82658167
   576.19002528  5466.08641551  5356.8375148   6380.50905507]
[ 3.03475126  3.30168898  3.37092265  2.35298172  1.1909523

In [20]:
print net.forward(hours)
print scores

[[  74.44604733]
 [  52.89092489]
 [ 105.78184977]
 [  66.38125707]
 [  79.87158926]]
[[75]
 [82]
 [93]
 [70]
 [74]]
