# 0. Importing packages

In [92]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

path = r'digit-recognizer\train.csv'

data = pd.read_csv(path)


Loaded the data into a pandas dataframe.

In [93]:
data.head()


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Converting the data from a dataframe to a numpy array.

In [94]:
data = np.array(data)


Making sure that the model isn't overfitted, i.e., the model makes fairly accurate predictions for the training data but isn't generalised for the data it's supposed to have a high accuracy for. Setting aside a portion of the training data to perform cross-validation on to avoid overfitting.

Shuffling the data before we split the data into dev and training data. Note, `np.random.shuffle()` permutes the sequence in place.

In [95]:
np.random.shuffle(data)


In [96]:
data


array([[5, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [7, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [8, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [97]:
# Storing the dimensions
m, n = data.shape

# m - Number of images; n - label + pixels for each image
m, n


(42000, 785)

Splitting the data into dev and training. We're using dev to cross validate and we're setting aside only 1000 images to do so.

In [98]:
# Transposing the data using only 1000 images
data_dev = data[:1000].T

# Storing the labels in YDev
Y_dev = data_dev[0]

# Storing the pixels
X_dev = data_dev[1:]


In [99]:
# Storing the rest of the images
data_train = data[1000:m].T

# Extract labels
Y_train = data_train[0]

# Get the rest
X_train = data_train[1:]


Printing details of all arrays implemented so far.

In [100]:
print(
    f'Printing dimensions of all existing arrays:\n(i) X - pixels\nX_dev: {X_dev.shape}\nX_train: {X_train.shape}\n\n(ii) Y - labels\nY_dev: {Y_dev.shape}\nY_train: {Y_train.shape}')


Printing dimensions of all existing arrays:
(i) X - pixels
X_dev: (784, 1000)
X_train: (784, 41000)

(ii) Y - labels
Y_dev: (1000,)
Y_train: (41000,)


Defining a function to initialise the neural network by creating random weights. We use `rand()` to obtain a random value between 0 and 1 and then we subtract from those values to make sure the range in which our random values lie is `[-0.5, 0.5]`. 

In [101]:
def init_params():
    # There's 10 connections for each of the 784 nodes
    W1 = np.random.rand(10, n - 1) - 0.5

    # There's 10 biases
    b1 = np.random.rand(10, 1) - 0.5

    # Similarly,
    # There's 10 connections to 10 output nodes
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5

    return W1, b1, W2, b2


Function implementing the ReLU (rectified linear unit) activation function.

In [102]:
def ReLU(Z):
    # Taking the maximum element-wise using numpy
    return np.maximum(0, Z)


Function implementing the softmax activation function

In [103]:
def softmax(Z):
    A = np.exp(Z) / np.sum(np.exp(Z))
    
    # Returning the probability
    return A


Defining a function to implement forward propagation through the neural net.

In [104]:
def forward_propagation(W1, b1, W2, b2, X):
    # Deactivated first layer
    Z1 = W1.dot(X) + b1
    
    # Activating Z1
    A1 = ReLU(Z1)
    
    # Creating the next layer's deactivated input
    Z2 = W2.dot(A1) + b2
    
    # Since the next layer is the output layer, we apply softmax
    A2 = softmax(Z2)
    

Function to implement one-hot encoding of Y. This is to represent the target classes as an array instead of a label.

In [145]:
def one_hot_encode(Y):
    # Encoding
    one_hot_encoded_df = pd.get_dummies(Y)
    
    # Taking the transpose so the columns represent images
    one_hot_encoded_array = np.array(one_hot_encoded_df).T
    
    return one_hot_encoded_array


Test to illustrate the working of `one_hot_encode(Y)`.

In [140]:
test = Y_train[:20]
test


array([3, 6, 7, 2, 6, 9, 7, 7, 0, 4, 6, 5, 6, 6, 2, 3, 0, 1, 8, 0],
      dtype=int64)

In [148]:
df = pd.get_dummies(test).T
df

ls = np.array(df)
ls


array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=uint8)

Comparing it to our function.

In [146]:
one_hot_encode(test)

array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=uint8)

Function to back propagate through the neural network to calculate the differences in the weights and biases. 

In [None]:
def back_propagation():
    