In [24]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [6]:
data_train = pd.read_csv('C:/Users/PMLS/Desktop/Projects/Mnist_Code_Ann/dataset/mnist_train.csv')
data_test = pd.read_csv("C:/Users/PMLS/Desktop/Projects/Mnist_Code_Ann/dataset/mnist_test.csv")

In [7]:
data = np.array(data_train)
m, n = data.shape

data_train = data[:].T

Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255. # Normalization
print('Training Shape:',X_train.shape, Y_train.shape)

data = np.array(data_test).T
Y_test = data[0]
X_test = data[1:]
X_test = X_test / 255.
print('Testing Shape:',X_test.shape, Y_test.shape)

Training Shape: (784, 60000) (60000,)
Testing Shape: (784, 10000) (10000,)


In [114]:
# Initalize Parameters
def init_param():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1)   - 0.5
    W2 = np.random.rand(10, 10)  - 0.5
    b2 = np.random.rand(10, 1)   - 0.5
    return W1, b1, W2, b2

def ReLU(X):
    return np.maximum(0, X)

def Softmax(X):
    return np.exp(X) / sum(np.exp(X))

def forward_prop(W1, b1, W2, b2, X):
    H1  = W1.dot(X) + b1
    A1 = ReLU(H1)
    O1  = W2.dot(A1) + b2
    A2 = Softmax(O1)
    return H1, A1, O1, A2

def derv_ReLU(X):
    return X > 0

def hot_encoding(Y): 
    y = np.zeros((Y.size, Y.max()+1))
    y[np.arange(Y.size), Y] = 1
    return y.T

def backward_prop(H1, A1, O1, A2, X, W2, Y):
    m = Y.size
    y_one_hot = hot_encoding(Y)
    dO1 = A2 - y_one_hot
    dW2 = (1/m) * dO1.dot(A1.T)
    db2 = (1/m) * np.sum(dO1)
    dH1 = W2.T.dot(dO1) * derv_ReLU(H1)
    dW1 = (1/m) * dH1.dot(X.T)
    db1 = (1/m) * np.sum(dH1)
    return dW1, db1, dW2, db2

def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, beta):
    W1 = W1 - beta * dW1
    b1 = b1 - beta * db1
    W2 = W2 - beta * dW2
    b2 = b2 - beta * db2
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(pred, Y):
    print(pred, Y)
    return np.sum(pred == Y) / Y.size

def grad_descent(X, Y, epochs, beta):
    W1, b1, W2, b2 = init_param()
    
    for i in range(0, epochs):
        H1, A1, O1, A2     = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(H1, A1, O1, A2, X, W2, Y)
        W1, b1, W2, b2     = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, beta)
        if (i % 10 == 0):
            print("Epoch:   ",i)
            print("Accuracy:", get_accuracy(get_predictions(A2), Y))
        
    return W1, b1, W2, b2

def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(W1, b1, W2, b2):
    _,n = X_test.shape
    preds = make_predictions(X_test, W1, b1, W2, b2)
    print("Accuray", sum(preds == Y_test) / n)

def live_prediction(X, W1, b1, W2, b2):
    preds = make_predictions(X, W1, b1, W2, b2)
    print(preds)

In [126]:
# Training

W1, b1, W2, b2 = grad_descent(X_train, Y_train, 500, 0.2)
s_W1 = W1
s_b1 = b1
s_W2 = W2
s_b2 = b2

Epoch:    0
[8 2 2 ... 8 8 2] [5 0 4 ... 5 6 8]
Accuracy: 0.1145
Epoch:    10
[3 0 2 ... 3 8 8] [5 0 4 ... 5 6 8]
Accuracy: 0.23163333333333333
Epoch:    20
[3 0 2 ... 3 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.35181666666666667
Epoch:    30
[5 0 5 ... 3 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.4562833333333333
Epoch:    40
[5 0 5 ... 3 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.52895
Epoch:    50
[3 0 5 ... 3 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.5801166666666666
Epoch:    60
[3 0 5 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.6227333333333334
Epoch:    70
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.6566166666666666
Epoch:    80
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.6826333333333333
Epoch:    90
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.7043833333333334
Epoch:    100
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.7226
Epoch:    110
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.7372
Epoch:    120
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy: 0.7507
Epoch:    130
[3 0 4 ... 5 6 8] [5 0 4 ..

In [127]:
# Prediction

test_prediction(s_W1, s_b1, s_W2, s_b2)

Accuray 0.8799


In [131]:
# Live Predict

img = mpimg.imread('C:/Users/PMLS/Desktop/Projects/Mnist_Code_Ann/img7.png')
print("Before:",img.shape)
img.resize((28,28))
print("After:",img.shape)
array_img = np.array(img)
array_img = array_img.reshape((784,1))
array_img = array_img / 255.0

live_prediction(array_img, s_W1, s_b1, s_W2, s_b2)

Before: (355, 291, 4)
After: (28, 28)
[5]
