## Import statements

In [30]:
import numpy as np
import pandas as pd

# Data Prep

In [31]:
admissions = pd.read_csv('binary.csv')

# Make dummy variables for rank
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix='rank')], axis=1)
data = data.drop('rank', axis=1)

# Standarize features
for field in ['gre', 'gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std
    
# Split off random 10% of the data for testing
np.random.seed(21)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.ix[sample], data.drop(sample)

# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']

## Meat and potatoes?

In [48]:
# initialize random number generator
np.random.seed(21)


# activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# hyperparameters
n_hidden = 2 # number of hidden units
epochs = 1200
learnrate = 0.005

n_records, n_features = features.shape
last_loss = None

# initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                       size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** 0.5, 
                                        size=(n_hidden))

for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(features.values, targets):
        ## Forward Pass ##
        hidden_input = np.dot(x, weights_input_hidden)
        hidden_output = sigmoid(hidden_input)
        
        '''
        OF COURSE this is going to include the sigmoid function!! That's the
        ACTIVATION function!
        '''
        output = sigmoid(np.dot(weights_hidden_output, hidden_output))
        
        ## Backward Pass ##
        error = y - output
        
        output_error_term = error * output * (1 - output) 
        
        ## Propagate errors to hidden layer
        
        '''
        In dealing with the hidden error term here the hidden error has been broken down
        into two parts, unlike the last exercise where it was completed in one line. So
        it appears that we are scaling the output_error_term to the weights_hidden_output.
        Meaning....this portion of the error is due to the hidden output weights... therefor 
        hidden error
        '''
        hidden_error = np.dot(output_error_term, weights_hidden_output)
        hidden_error_term =  hidden_error * hidden_output * (1 - hidden_output)
        
        '''
        Can't include learnrate yet because this dataset is being iterated through, unlike
        the last example. The learnrate is applied to the change in weights after the learning
        takes place in this loop.
        '''
       
        del_w_hidden_output += output_error_term * hidden_output
        del_w_input_hidden += hidden_error_term * x[:, None]
        
    weights_input_hidden += learnrate * del_w_input_hidden / n_records # why do we devide by number of records?
    weights_hidden_output += learnrate * del_w_hidden_output / n_records
    
    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

        

Train loss:  0.251357252426
Train loss:  0.249511852169
Train loss:  0.247748356382
Train loss:  0.246063804656
Train loss:  0.244455280857
Train loss:  0.242919920243
Train loss:  0.241454915502
Train loss:  0.240057521796
Train loss:  0.238725060915
Train loss:  0.237454924603


## Calculate accuracy on test data

In [49]:
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accurary: {:.3f}".format(accuracy))

Prediction accurary: 0.750
