In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 

data = pd.read_csv("C:/Users/sovta/Downloads/archive/mnist_train.csv")
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape


In [3]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2
    


In [12]:
def get_predictions(A2):
    return np.argmax(A2, 0)


def get_accuracy(predictions, Y):
    return np.sum(predictions == Y)/Y.size
    
    
def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for _ in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) 
        if _ % 50 == 0:
            print(f'Iteration: {_}')
            print(f'Accuracy: {get_accuracy(get_predictions(A2), Y)}')
    return W1, b1, W2, b2

In [13]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10,  500)

Iteration: 0
Accuracy: 0.09059322033898305
Iteration: 50
Accuracy: 0.4206949152542373
Iteration: 100
Accuracy: 0.621
Iteration: 150
Accuracy: 0.7041016949152542
Iteration: 200
Accuracy: 0.7478135593220339
Iteration: 250
Accuracy: 0.774542372881356
Iteration: 300
Accuracy: 0.7940508474576271
Iteration: 350
Accuracy: 0.810677966101695
Iteration: 400
Accuracy: 0.8239322033898305
Iteration: 450
Accuracy: 0.833728813559322


In [14]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions



In [15]:
dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
get_accuracy(dev_predictions, Y_dev)

0.835

In [16]:
W_1, b_1, W_2, b_2 = gradient_descent(X_train, Y_train, 0.20,  500)

Iteration: 0
Accuracy: 0.10938983050847458
Iteration: 50
Accuracy: 0.525542372881356
Iteration: 100
Accuracy: 0.7128135593220339
Iteration: 150
Accuracy: 0.7376440677966102
Iteration: 200
Accuracy: 0.7977796610169492
Iteration: 250
Accuracy: 0.8196101694915254
Iteration: 300
Accuracy: 0.8358983050847457
Iteration: 350
Accuracy: 0.8494745762711864
Iteration: 400
Accuracy: 0.858406779661017
Iteration: 450
Accuracy: 0.8653389830508474


In [9]:
dev_predictions_1 = make_predictions(X_dev, W_1, b_1, W_2, b_2)
get_accuracy(dev_predictions_1, Y_dev)

[8 2 7 1 2 4 4 3 8 1 4 8 2 1 8 3 0 9 3 1 6 1 8 1 9 2 0 1 6 6 8 2 3 7 4 6 1
 7 7 7 1 0 1 0 4 4 3 2 6 3 3 6 9 9 3 1 7 0 3 8 4 9 4 2 8 8 1 2 9 7 9 8 6 8
 8 6 8 3 2 7 0 7 7 4 1 7 5 9 8 7 0 7 8 7 8 4 2 4 1 8 3 5 2 3 7 3 6 4 7 5 9
 6 0 0 9 5 6 7 3 8 0 8 7 0 5 5 8 7 1 6 3 3 6 0 1 2 3 1 9 9 0 5 8 5 4 8 9 0
 3 2 2 8 3 3 1 1 1 5 1 7 5 0 1 4 1 8 7 7 9 1 5 0 4 8 4 0 2 5 6 7 4 0 7 8 3
 2 1 3 6 4 4 4 3 8 3 8 4 7 6 8 4 7 9 4 3 7 7 2 5 4 6 7 7 4 6 2 0 6 1 8 1 6
 6 7 7 6 8 4 6 9 3 8 2 2 5 1 0 0 8 3 7 0 8 3 4 8 3 4 1 0 8 6 5 7 3 9 7 9 6
 9 5 7 1 7 1 5 0 1 1 8 4 8 4 1 2 8 3 3 6 4 5 3 9 4 6 4 1 9 2 5 1 7 3 7 1 2
 1 8 8 6 2 8 7 1 0 4 6 1 1 3 8 2 6 6 1 1 8 8 7 8 5 9 4 5 3 1 8 3 3 5 4 9 1
 7 2 2 4 2 6 2 4 9 0 7 3 8 7 0 7 4 4 2 1 7 8 8 8 3 7 1 3 4 1 2 1 8 5 9 2 2
 0 3 4 0 8 1 7 4 0 9 6 0 0 9 1 0 3 6 5 9 4 9 0 6 2 9 4 4 0 0 0 3 9 7 4 5 4
 1 3 4 8 5 1 9 7 9 2 4 2 9 3 3 9 4 2 1 6 2 9 8 9 5 1 3 2 3 2 9 0 1 9 0 4 7
 6 5 0 2 9 6 8 6 0 8 9 8 2 1 0 5 5 5 0 1 9 0 0 4 3 2 4 4 8 9 3 3 4 5 9 9 4
 2 2 2 8 0 2 1 7 6 4 2 0 

0.866