In [None]:
import numpy as np

class Neural_Net(object):
    '''
    Defines network parameters
    '''
    def __init__(self, input_size, h1_size, h2_size, output_size = 1, lr = .025, hidden_layers = 2):

        # Define network parameters
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layers = hidden_layers
        self.h1_size = h1_size
        self.h2_size = h2_size
        self.lr = lr
        
        # Intialize weights
        # scaled by .1 to be closer to 0
        self.W1 = np.random.randn(self.input_size, self.h1_size) * .01
        self.W2 = np.random.randn(self.h1_size, self.h2_size) * .01
        self.W3 = np.random.randn(self.h2_size, self.output_size) * .01
        
        # Initialize biases
        self.b1 = np.zeros(self.h1_size).reshape(1, -1)
        self.b2 = np.zeros(self.h2_size).reshape(1, -1)
        self.b3 = np.zeros(self.output_size).reshape(1, -1)
        
    '''
    Performs a forward pass through the network.
    This can be used to predict values.
    '''
    def forward(self, x):

        z1_dot = np.dot(x, self.W1) + self.b1 # x -> h1
        self.z1 = self.ReLU(z1_dot) # f(x -> h1)
        
        z2_dot = np.dot(self.z1, self.W2) + self.b2 # h1 -> h2
        self.z2 = self.ReLU(z2_dot) # f(h1 -> h2)
        
        z3_dot = np.dot(self.z2, self.W3) + self.b3 # h2 -> output
        pred = self.ReLU(z3_dot) # f(h2 -> output)
        
        # return our output prediction
        return pred
    
    '''
    Our activation function
    '''
    def ReLU(self, x):
        return np.maximum(x, 0)
    
    '''
    Derivative of our activation function
    '''
    def dReLU(self, x):
        # derivative of ReLU
        return np.where(x > 0, 1, 0)
    
    '''
    Returns the cost of a forward pass
    '''
    def get_cost(self, m, y, pred):
        yhat = np.squeeze(pred)
        Y = np.squeeze(y)
        cost = np.sum((yhat - Y)**2)
        cost = cost / m
        return cost
        
    '''
    Returns the derivative of our cost.
    Use this for back-propagation.
    '''
    def get_cost_deriv(self, m, y, pred):
        yhat = np.squeeze(pred)
        Y = np.squeeze(y)
        dCost = 2 * (yhat - Y) / m
        return dCost

    '''
    Our backward pass through the network
    ''' 
    def back_prop(self, x, y, pred):
        
        m = x.shape[0] # get length of input
        
        cost = self.get_cost(m, y, pred) # find the cost for predictions from hidden layer 2 -> output
        dCost = self.get_cost_deriv(m, y, pred) # find the derivative of the cost
        
        pred_delta = (dCost * self.dReLU(np.squeeze(pred))) / m # find our first delta for h2 -> output
        pred_delta = pred_delta.reshape(pred_delta.shape[0], self.W3.shape[1]) # reshape 0 dimension
        
        self.W3 -= self.lr * np.dot(self.z2.T, pred_delta) # update weight 3  
        self.b3 -= self.lr * np.mean(pred_delta).T # update bias 3
        
        z2_err = np.dot(pred_delta, self.W3.T) # find cost for predictions between hidden layer 1 -> hidden layer 2
        z2_delta = (z2_err * self.dReLU(self.z2)) / m # find delta for h1 -> h2
        
        self.W2 -= self.lr * np.dot(self.z1.T, z2_delta) # update weight 2
        self.b2 -= self.lr * np.mean(z2_delta).T # update bias 2
        
        z1_err =  np.dot(z2_delta, self.W2.T) # find cost for predictions between input -> hidden layer 1
        z1_delta = (z1_err * self.dReLU(self.z1)) / m # find delta for x -> h1
        
        self.W1 -= self.lr * np.dot(x.T, z1_delta) # update weight 1
        self.b1 -= self.lr * np.mean(z1_delta).T # update bias 1

        return cost # returns our cost so we can analyze it
    
    '''
    Goes through a single training step
    '''
    def train_step(self, x, y, batch_size = 32):
        avg_cost = 0.0 # create a cost

        # cycle through training data in mini-batches
        for i in range(0, x.shape[0], batch_size):
            batch_train_x = x[i:i+batch_size]
            batch_train_y = y[i:i+batch_size]
            
            # perform forward pass
            pred = self.forward(batch_train_x)

            # perform backward pass
            # sum cost for each mini-batch pass
            avg_cost += self.back_prop(batch_train_x, batch_train_y, pred)

        avg_cost /= (x.shape[0] / batch_size) # divide sum by number of batches, this gives us avg cost of mini-batches per epoch
        return avg_cost # return cost