# Backpropagation

Train a neural network with one hidden layer of 2 cells, one output layer of one cell, with MSE as error function, to model the admission situation regarding gre, gpa, and rank.

In [1]:
import numpy as np
from data_prep import features, targets, features_test, targets_test
np.random.seed(21)

In [3]:
features.head()

Unnamed: 0,gre,gpa,rank_1,rank_2,rank_3,rank_4
106,0.972155,0.446965,1,0,0,0
9,0.972155,1.392922,0,1,0,0
61,-0.239793,-0.183673,0,0,0,1
224,1.837832,-1.287291,0,1,0,0
37,-0.586063,-1.287291,0,0,1,0


In [4]:
targets.head()

106    1
9      0
61     0
224    0
37     0
Name: admit, dtype: int64

In [5]:
features_test.head()

Unnamed: 0,gre,gpa,rank_1,rank_2,rank_3,rank_4
48,-1.278605,-2.390908,0,0,0,1
50,0.452749,1.235263,0,0,1,0
80,0.972155,-1.287291,0,0,0,1
84,-0.759199,0.552071,0,0,1,0
98,0.972155,-1.339844,0,1,0,0


In [6]:
targets_test.head()

48    0
50    0
80    0
84    1
98    0
Name: admit, dtype: int64

In [3]:

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

# Hyperparameters
n_hidden = 2  # number of hidden units
epochs = 900
learnrate = 0.005
n_records, n_features = features.shape
last_loss = None

# Initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)
for e in range(epochs):
    # initialize the updated derivation to be zeros
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(features.values, targets):
        ## Forward pass ##
        # hidden_input has shape (n_hidden,)
        hidden_input = np.dot(x, weights_input_hidden)
        hidden_output = sigmoid(hidden_input)
        # weights_hidden_output has shape (n_hidden,)
        # output is a scalar
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))
        
        ## Backward pass ##
        # compute the output layer's contribution to the error, which is
        # the derivative of the square error w.r.t the output of the output
        error = y - output
        #the derivative of the square error w.r.t the input of the cell of the output layer 
        # which is the error * the derivative of the output w.r.t the input of the output
        output_error_term = error * output * (1 - output)
        
       
        ## propagate errors to hidden layer
        # TODO: Calculate the hidden layer's contribution to the error
        # which is the derivative of the error to the output of the hidden layer
        # has shape (n_hidden,)
        hidden_error = output_error_term * weights_hidden_output
        
        # TODO: Calculate the error term for the hidden layer
        # which is the derivative of the error to the input of the hidden layer
        # which is the hidden_error * the derivative of the hidden output w.r.t the hidden input
        hidden_error_term = hidden_error * hidden_output * (1-hidden_output)
        
        # the change of the weights hidden to output is the derivative of the cost function to the weights
        # which is the output_error_term * the derivative of the input to output w.r.t the weights hidden to output
        del_w_hidden_output += output_error_term * hidden_output
        # del_w_input_hidden is the derivative of the error w.r.t the weights_input_hidden
        # which is the hidden_error_term * its derivative to the weights_input_hidden which is the inputs
        # hidden_error_term has shape (n_hidden,), each of its ele should times the input vector
        # thus we need to reshape the input of shape (n_features,) into 2d array (n_features, 1) by x[:,None]
        # the * operation between (m,) and (n,1) will times each ele in (m,) by (n,1), and we get (n,m)
        del_w_input_hidden += hidden_error_term * x[:,None]
    
    # TODO: Update weights  (don't forget to division by n_records or number of samples)
    weights_input_hidden += learnrate * (del_w_input_hidden/n_records)
    weights_hidden_output += learnrate * (del_w_hidden_output/n_records)
    
    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss:  0.251357252426
Train loss:  0.249965407188
Train loss:  0.248620052189
Train loss:  0.247319932172
Train loss:  0.246063804656
Train loss:  0.244850441793
Train loss:  0.243678632019
Train loss:  0.242547181518
Train loss:  0.241454915502
Train loss:  0.240400679325
Prediction accuracy: 0.725
