In [1]:
!wget http://www.ats.ucla.edu/stat/data/binary.csv

--2019-05-24 15:03:04--  http://www.ats.ucla.edu/stat/data/binary.csv
Resolving www.ats.ucla.edu (www.ats.ucla.edu)... 128.97.141.26
Connecting to www.ats.ucla.edu (www.ats.ucla.edu)|128.97.141.26|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://stats.idre.ucla.edu/ [following]
--2019-05-24 15:03:05--  http://stats.idre.ucla.edu/
Resolving stats.idre.ucla.edu (stats.idre.ucla.edu)... 128.97.141.21
Connecting to stats.idre.ucla.edu (stats.idre.ucla.edu)|128.97.141.21|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://stats.idre.ucla.edu/ [following]
--2019-05-24 15:03:05--  https://stats.idre.ucla.edu/
Connecting to stats.idre.ucla.edu (stats.idre.ucla.edu)|128.97.141.21|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘binary.csv’

binary.csv              [ <=>                ]  23.24K  --.-KB/s    in 0.1s    

2019-05-24 15:03:06 (1

In [0]:
import numpy as np
import pandas as pd

In [0]:
admissions = pd.read_csv('binary.csv')

In [9]:
# Make dummy variables for rank
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix='rank')], axis = 1)
data.drop('rank', axis=1)

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,380,3.61,0,0,1,0
1,1,660,3.67,0,0,1,0
2,1,800,4.00,1,0,0,0
3,1,640,3.19,0,0,0,1
4,0,520,2.93,0,0,0,1
5,1,760,3.00,0,1,0,0
6,1,560,2.98,1,0,0,0
7,0,400,3.08,0,1,0,0
8,1,540,3.39,0,0,1,0
9,0,700,3.92,0,1,0,0


In [20]:
# Standarize features
for field in ['gre','gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std
    
data

Unnamed: 0,admit,gre,gpa,rank,rank_1,rank_2,rank_3,rank_4
280,0,0.564820,1.419385,2,0,1,0,0
53,1,0.737815,-0.274800,2,0,1,0,0
147,0,-0.300153,-1.690834,3,0,0,1,0
312,0,0.564820,0.989517,3,0,0,1,0
199,0,-0.127158,0.989517,4,0,0,0,1
42,1,0.045836,-0.578236,2,0,1,0,0
18,0,1.775783,0.938944,2,0,1,0,0
129,0,-1.165126,-0.578236,4,0,0,0,1
296,0,-0.300153,-0.552949,1,1,0,0,0
389,0,0.391825,0.332072,2,0,1,0,0


In [0]:
# Split off random 10% of the data for testing
np.random.seed(42)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace = False)
data, test_data = data.loc[sample], data.drop(sample)

In [0]:
# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']

In [0]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [0]:
# Using same seed to make debugging easier
np.random.seed(42)

In [0]:
n_records, n_features = features.shape
last_loss = None

In [0]:
# Initializing the weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)

In [0]:
# Neural Network Hyperparameters
epochs = 1000
learnrate = 0.5

In [34]:
# Training and testing
for e in range(epochs):
    
    del_w = np.zeros(weights.shape)
    for x, y in zip(features.values, targets):
        # Looping through all records, x is the input, y is the target
        
        # Calculating the output, instead of calculating h before, calculating with output
        output = sigmoid(np.dot(x, weights))
        
        # Calculating the error
        error = y - output
        
        # Calculating the error term
        error_term = error * output * (1 - output)
        
        # The gradient descent step
        del_w += error_term * x
        
    # Updating the weights (Remember why I am dividing it with n_records)
    weights += learnrate * del_w / n_records
    
    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        out = sigmoid(np.dot(features, weights))
        loss = np.mean((out - targets) ** 2)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss
        
# Calculate accuracy on test data
tes_out = sigmoid(np.dot(features_test, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss:  0.3104933253472141
Train loss:  0.19381128166408063
Train loss:  0.19266471023230788
Train loss:  0.1921130871300722
Train loss:  0.19183447551969718
Train loss:  0.19168874466228691
Train loss:  0.19160971919242084
Train loss:  0.19156546761051615
Train loss:  0.19154002063432127
Train loss:  0.19152507377427136
Prediction accuracy: 0.741
