In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.decomposition as skd 
#have a look at the documentation for skd here 
#"https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html"

In [None]:
def normalize(data): #normalize data to be between 0 and 1
    return (data - np.mean(data))/np.std(data)

In [None]:
def pca(data, n_components=2): #perform PCA on data to reduce dimensionality (change the n_components to see how it affects the results)
    pca = skd.PCA(n_components=n_components)
    pca.fit(data)
    return pca.transform(data)

In [None]:
raw_data = np.loadtxt("inputs.txt")
raw_data_labels = np.loadtxt("labels.txt")
#print(raw_data.size)

#code to shuffle 2 arrays, and keep corresponding elements
randomize = np.arange(len(raw_data_labels)) 
np.random.shuffle(randomize) #creates a randomized sequence to be used as an index for the two arrays to shuffle them (https://www.delftstack.com/howto/numpy/python-numpy-shuffle-two-arrays/)

raw_data = raw_data[randomize]
raw_data_labels = raw_data_labels[randomize]

# print(raw_data)
# print(raw_data_labels)

#split into training, validation, testing
training_data = raw_data[:1200] 
training_data_labels = raw_data_labels[:1200] 

validation_data = raw_data[1200:1600] #note, includes start index, excludes end index
validation_data_labels = raw_data_labels[1200:1600]

testing_data = raw_data[1600:2000]
testing_data_labels = raw_data_labels[1600:2000]

print(validation_data_labels.shape)

In [None]:
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

In [None]:
def sigmoid_prime(Z):
    return Z * (1 - Z)

In [None]:
def softmax(Z):
    return np.exp(Z)/np.sum(np.exp(Z), axis=0)

In [None]:
def rand_params(): #generate a random set of weights and biases for the neural network between -1 and 1
    w1= np.random.rand(1000, 2352) - 1
    b1 = np.random.rand(1000, 1) - 1
    w2 = np.random.rand(1000, 1000) - 1
    b2 = np.random.rand(1000, 1) - 1
    w3 = np.random.rand(10, 1000) - 1
    b3 = np.random.rand(10, 1) - 1

In [None]:
def forward_prop(X, w1, b1, w2, b2, w3, b3): #forward propagation
    Z1 = np.dot(w1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(w2, A1) + b2
    A2 = sigmoid(Z2)
    Z3 = np.dot(w3, A2) + b3
    A3 = softmax(Z3)
    return A1, A2, A3

In [None]:
def one_hot_encode(labels): #encode labels as one-hot vectors
    encoded_labels = np.zeros((labels.size, 9))
    for i in range(labels.size):
        encoded_labels[i][labels[i]] = 1
    return encoded_labels

In [None]:
# parameter:
# (w1, b1, w2, b2, w3, b3) = weight and biases for each layer
# X = input data
# Y = labels
# lr = learning rate for gradient descent (aka alpha)

# Refer to "https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65",
# "https://drive.google.com/file/d/1NVle5nlr0m2OHDiSTReSv3TCzMJGu27s/view?usp=sharing"
#"https://drive.google.com/file/d/1NVle5nlr0m2OHDiSTReSv3TCzMJGu27s/view?usp=sharing"
# It basically calculates the error of activations layers and then backpropagates the error to the previous layer
def update_params(w1, b1, w2, b2, w3, b3, A1, A2, A3, X, Y, lr): 
    dZ3 = A3 - Y #delta for the output layer
    dW3 = np.dot(dZ3, A2.T) #delta for the weights of the output layer
    db3 = np.sum(dZ3, axis=1, keepdims=True) #delta for the biases of the output layer
    dZ2 = np.dot(w3.T, dZ3) * sigmoid_prime(A2) #delta for the hidden layer
    dW2 = np.dot(dZ2, A1.T) #delta for the weights of the hidden layer
    db2 = np.sum(dZ2, axis=1, keepdims=True)#delta for the biases of the hidden layer
    dZ1 = np.dot(w2.T, dZ2) * sigmoid_prime(A1) #delta for the hidden layer
    dW1 = np.dot(dZ1, X.T) #delta for the weights of the hidden layer
    db1 = np.sum(dZ1, axis=1, keepdims=True) #delta for the biases of the hidden layer
    w1, w2, w3 = regularize(w1,w2,w3)
    w1 = w1 - lr * dW1
    b1 = b1 - lr * db1
    w2 = w2 - lr * dW2
    b2 = b2 - lr * db2
    w3 = w3 - lr * dW3
    b3 = b3 - lr * db3
    return w1, b1, w2, b2, w3, b3

In [None]:
def regularize(w1, w2, w3): #regularization (lambda = 0.99)
    w1 = w1 * 0.99
    w2 = w2 * 0.99
    w3 = w3 * 0.99

    return w1, w2, w3

In [None]:
def gradient_descent(): #perform gradient descent / learn neural network parameters
    w1, b1, w2, b2, w3, b3 = rand_params()
    for i in range(10000):
        A1, A2, A3 = forward_prop(training_data, w1, b1, w2, b2, w3, b3)
        w1, b1, w2, b2, w3, b3 = update_params(w1, b1, w2, b2, w3, b3, A1, A2, A3, training_data, training_data_labels, 0.01)
    return w1, b1, w2, b2, w3, b3

In [None]:
#learnt set of parameters
w1, b1, w2, b2, w3, b3 = gradient_descent()

In [None]:
def get_prediction(X, w1, b1, w2, b2, w3, b3): #make a prediction
    A1, A2, A3 = forward_prop(X, w1, b1, w2, b2, w3, b3)
    return np.argmax(A3, axis=0)

In [None]:
def test_prediction(X, Y, w1, b1, w2, b2, w3, b3): #test the prediction -> calculate accuracy
    prediction = get_prediction(X, w1, b1, w2, b2, w3, b3)
    return np.mean(prediction == Y)

In [None]:
prediction = get_prediction(validation_data, w1, b1, w2, b2, w3, b3)
accuracy = test_prediction(validation_data, validation_data_labels, w1, b1, w2, b2, w3, b3)

In [None]:
def classify(X, w1, b1, w2, b2, w3, b3): #classify data using learnt parameters -> for stdin input
    A1, A2, A3 = forward_prop(X, w1, b1, w2, b2, w3, b3)
    return np.argmax(A3, axis=0)