In [1476]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [1477]:
def normalize(data,m):#normalize the data
    avgs=data.mean(axis=0) #calculates the means of each feature (each column)
    averages=avgs.reshape((2352,1))
    for k in range(m):   #do normalisation by looping through raw_data and subtracting the mean of each feature from each data point
        for h in range(2352):
            data[k][h]=data[k][h]-averages[h][0]
    return data
    #return (data - data.min())/ (data.max() - data.min())

In [1478]:
def pca(data, n): #perform PCA on data to reduce dimensionality (change the n_components / features to see how it affects the results)
    transposed=data.transpose() #to calculate covariance matrix, find transpose
    intermediate=np.dot(transposed,data)
    cov_matrix=(1/(2000-1))*intermediate #covariance matrix
    # cov_matrix = np.cov(data.T) #covariance matrix
    eigenvalues,eigenvectors = np.linalg.eig(cov_matrix) #eigenvalues and eigenvectors for covariance matrix
    variation_matrix = []

    for i in eigenvalues:
        variation_matrix.append(i/np.sum(eigenvalues)*100) #get the percentage of variation for each eigenvalue
    
    cumulative_variation = np.cumsum(variation_matrix) #cumulative variation -> get components that contribute most to the data
    # print(cumulative_variation) #-> to see how many components are needed to explain the data (10 components are enough)


    projected_data = np.dot(data,eigenvectors[:,:n]) #projected data

    # U, Sigma, Vh = np.linalg.svd(data, full_matrices=False, compute_uv=True)
    # projected_data = np.dot(U, np.diag(Sigma))
    return projected_data

In [1479]:
data = np.loadtxt("inputs.txt")
labels = np.loadtxt("labels.txt")

In [1480]:
m, n = data.shape #m is the number of data-points /samples, n is the number of features
n = 750 #number of components to be used -> PCA

In [1481]:
#code to shuffle 2 arrays, and keep corresponding elements
randomize = np.arange(len(labels)) 
np.random.shuffle(randomize) #creates a randomized sequence to be used as an index for the two arrays to shuffle them (https://www.delftstack.com/howto/numpy/python-numpy-shuffle-two-arrays/)

data = data[randomize]
labels = labels[randomize]
data=normalize(data,m)
data = pca(data, n)

#This is to avoid dimension issues with np.dot 
#Also...I took small samples of the date to test the ann
training_data = data[:1200].T
Y_training = labels[:1200]
X_training = training_data[0:n]

validation_data = data[1200:1600].T #note, includes start index, excludes end index
Y_validation = labels[1200:1600]
X_validation = validation_data[0:n]

testing_data = data[1600:].T
Y_testing = labels[1600:]
X_testing = testing_data[0:n]

print(data.shape)

(2000, 750)


In [1482]:
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

In [1483]:
def sigmoid_prime(Z):
    return Z * (1 - Z)

In [1484]:
def softmax(Z):
    return np.exp(Z)/np.sum(np.exp(Z), axis=0)

In [1485]:
def rand_params(): #generate a random set of weights and biases for the neural network between -1 and 1
    w1= np.random.rand(5, n) - 1 # n is the number of features
    b1 = np.random.rand(5, 1) - 1
    w2 = np.random.rand(5, 5) - 1
    b2 = np.random.rand(5, 1) - 1
    w3 = np.random.rand(10, 5) - 1
    b3 = np.random.rand(10, 1) - 1
    return w1, b1, w2, b2, w3, b3

In [1486]:
def forward_prop(X, w1, b1, w2, b2, w3, b3): #forward propagation
    Z1 = np.dot(w1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(w2, A1) + b2
    A2 = sigmoid(Z2)
    Z3 = np.dot(w3, A2) + b3
    A3 = softmax(Z3)
    return A1, A2, A3

In [1487]:
def one_hot_encode(labels): #encode labels as one-hot vectors
    labels = labels.astype(int)
    encoded_labels = np.zeros((labels.size, 10))
    for i in range(labels.size):
        encoded_labels[i][labels[i]] = 1
    return encoded_labels.T

In [1488]:
print(one_hot_encode(Y_training))
print(Y_training)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 1. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[6. 8. 6. ... 2. 6. 1.]


In [1489]:
def regularize(w1, w2, w3): #regularization (lambda = 0.99)
    w1 = w1 * 0.99
    w2 = w2 * 0.99
    w3 = w3 * 0.99

In [1490]:
#Parameters:
#w1, b1, w2, b2, w3, b3: weights and biases
#X: training data
#Y: training labels
#A1, A2, A3: activation functions

#Returns:
#dw1, db1, dw2, db2, dw3, db3: deltas for weights and biases
def backprop(X, Y, A1, A2, A3, w1, b1, w2, b2, w3, b3): #backpropagation
    Y = one_hot_encode(Y)
    dZ3 = A3 - Y
    dW3 = np.dot(dZ3, A2.T)
    db3 = np.sum(dZ3, axis=1, keepdims=True)
    dA2 = np.dot(w3.T, dZ3)
    dZ2 = dA2 * sigmoid_prime(A2)
    dW2 = np.dot(dZ2, A1.T)
    db2 = np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(w2.T, dZ2)
    dZ1 = dA1 * sigmoid_prime(A1)
    dW1 = np.dot(dZ1, X.T)
    db1 = np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2, dW3, db3

In [1491]:
#Parameters:
#dW1, db1, dW2, db2, dW3, db3: deltas for weights and biases

#Returns:   
#w1, b1, w2, b2, w3, b3: updated weights and biases
def update_params(w1, b1, w2, b2, w3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate): #update parameters
    w1 = w1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    w2 = w2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    w3 = w3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3
    regularize(w1, w2, w3) #regularize weights
    return w1, b1, w2, b2, w3, b3

In [1492]:
def get_predictions(A3): 
    return np.argmax(A3,0)

def get_accuracy(predictions, Y):
    accuracy = np.sum(predictions == Y) / Y.size
    return accuracy * 100

In [1493]:
def gradient_descent(X, Y, epochs, learning_rate): #gradient descent -> learn weights and biases
    w1, b1, w2, b2, w3, b3 = rand_params()
    for i in range(epochs):
        A1, A2, A3 = forward_prop(X, w1, b1, w2, b2, w3, b3)
        dW1, db1, dW2, db2, dW3, db3 = backprop(X, Y, A1, A2, A3, w1, b1, w2, b2, w3, b3)
        w1, b1, w2, b2, w3, b3 = update_params(w1, b1, w2, b2, w3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate)

    return w1, b1, w2, b2, w3, b3

In [1494]:
epochs = 2000
alpha = 0.01
#learnt set of weights and biases (this is basically what we are submitting)
w1, b1, w2, b2, w3, b3 = gradient_descent(X_training, Y_training, epochs, alpha)

training_accuracy = get_accuracy(get_predictions(forward_prop(X_training, w1, b1, w2, b2, w3, b3)[2]), Y_training)
print("Epochs: ",epochs,"|","Training Accuracy: ", training_accuracy,"%", "|", "Learning Rate: ", alpha)

Epochs:  2000 | Training Accuracy:  96.75 % | Learning Rate:  0.01


In [1495]:
print("Validation Accuracy: ", get_accuracy(get_predictions(forward_prop(X_validation, w1, b1, w2, b2, w3, b3)[2]), Y_validation), "%")

Validation Accuracy:  59.5 %


In [1496]:
print("Testing Accuracy: ", get_accuracy(get_predictions(forward_prop(X_testing, w1, b1, w2, b2, w3, b3)[2]), Y_testing), "%")

Testing Accuracy:  64.5 %
