<a href="https://colab.research.google.com/github/MartynBonham-Yang/purple-octopus/blob/master/2020_11_11_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import numpy as np 
import pandas as pd 

In [35]:
# Add mount Drive code here ?

In [36]:
admissions = pd.read_csv('/content/drive/My Drive/Document.csv')

In [37]:
#Make dummy variables for ranks
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix = 'rank')], axis = 1)
data = data.drop('rank', axis = 1)

In [38]:
#Standardise features
for t in ['gre','gpa']:
  mean, st_dev = data[t].mean(), data[t].std()
  data.loc[:,t] = (data[t] - mean)/st_dev

In [39]:
#Testing set
np.random.seed(20122102)
test = np.random.choice(data.index, size = int(len(data)*0.9), replace = False)
data, test_data = data.loc[test], data.drop(test)

In [40]:
#Features & prediction targets
X, Y = data.drop('admit', axis = 1), data['admit']
X_test, Y_test = test_data.drop('admit', axis = 1), test_data['admit']

In [41]:
#Define sigmoid activation function
#Remember sigmoid' = sigmoid(1-sigmoid)
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [42]:
#Define shape variables
n_records,n_features = X.shape
last_loss = None

In [43]:
#Initialise weights
weights = np.random.normal(scale=1/n_features**0.5, size = n_features)


In [44]:
#Define neural network hyperparameters
epochs = 1000
learnrate = 0.15

In [45]:
#Spin up model
for e in range(epochs):
  del_w = np.zeros(weights.shape)   #Start with del_weights as 0

  for x, y in zip(X.values, Y):
    #Loop through all records. x is the input and y is the target for that x
    
    output = sigmoid(np.dot(x, weights)) #Define the output as activation function applied to sum of x multiplied by weights

    error = y - output #Error of this output

    error_term = error * output * (1-output) #The error term is defined as error*sigmoid'(x)

    del_w += error_term * x #The change in weights is updated for this sample

  
  #Update the weights using the learnrate and average del_w
  weights += (learnrate * del_w) / n_features

  #Mean squared error on training set (checks every tenth epoch): 
  if e % (epochs/10) == 0:
    out = sigmoid(np.dot(X, weights))
    l = np.mean((out-Y) ** 2)
    if last_loss and last_loss < l: #Checking if last_loss has been updated and if so if it is lower than current loss
      print('Training loss: ', l, ' WARNING: LOSS INCREASING')
    else:
      print('Training loss: ', l)
    last_loss = l


Training loss:  0.22708528170284212
Training loss:  0.19354219177739257
Training loss:  0.1935394130121607
Training loss:  0.19353940671673842
Training loss:  0.1935394067015391
Training loss:  0.1935394067015024
Training loss:  0.19353940670150227
Training loss:  0.1935394067015024


In [46]:
#Check accuracy on test data
test_out = sigmoid(np.dot(X_test, weights))
preds = test_out > 0.5
accuracy = np.mean(preds == Y_test)
print('Prediction accuracy: {:.3f}'.format(accuracy))

Prediction accuracy: 0.725
