In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
from keras.datasets import mnist

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
type(x_train)

numpy.ndarray

In [None]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [None]:
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [None]:
#  checking the shapes:
print("x_train:", x_train.shape)
print("x_test:", x_test.shape)

x_train: (60000, 784)
x_test: (10000, 784)


In [None]:
#### MY NOTES ####
# our parameters are:
# w1: the input of hidden layer
#  w2 : the output of hidden layer
# b1 : bias of the hidden layer
#  b2: bias of the output layer
#  act1 is preactivations, act2 : activations
#  The bias terms allows the network to shift the activation fn. horizontally
#  we have 2 act. functions, Relu is often used in the hidden layers whi;e the softmax in the output oone (& for multi-class classification )

In [None]:
def parameters():
    w1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    w2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return w1, b1, w2, b2

# RELU (Rectified linear unit activation function):
def RELU(z):
    return np.maximum(z, 0)   # max z for +ve inputs otherwise 0 (for -ve )

def softMax(z):
    a = np.exp(z) /sum(np.exp(z))
    return a


In [None]:
def forwardPropagation(w1, b1, w2, b2, X):
    z1 = w1.dot(X.T) + b1
    a1 = RELU(z1)
    z2 = w2.dot(a1) + b2
    a2 = softMax(z2)
    return z1, a1, z2, a2


def ReLU_deriv(z):
    return (z > 0)


def oneHot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


m = x_train.shape[0]

def backwardPropagation(z1, a1, z2, a2, w1, w2, X, Y):
    one_hot_Y = oneHot(Y)
    dZ2 = a2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(a1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = w2.T.dot(dZ2) * ReLU_deriv(z1)
    dW1 = 1 / m * dZ1.dot(X)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2


In [None]:
def updateParams(w1, b1, w2, b2, dW1, db1, dW2, db2, alpha):
    w1 = w1 - alpha * dW1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dW2
    b2 = b2 - alpha * db2
    return w1, b1, w2, b2

def get_predictions(a2):
    return np.argmax(a2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size


In [None]:
def gradient_descent(X, Y, alpha, epochs):
    w1, b1, w2, b2 = parameters()
    m = Y.shape[0]
    for i in range(epochs):
        z1, a1, z2, a2 = forwardPropagation(w1, b1, w2, b2, X)
        dW1, db1, dW2, db2 = backwardPropagation(z1, a1, z2, a2, w1, w2, X, Y)
        w1, b1, w2, b2 = updateParams(w1, b1, w2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("epoch number: ", i)
            predictions = get_predictions(a2)
            print("Accuracy:", get_accuracy(predictions, Y) * 100)
    return w1, b1, w2, b2


In [None]:
w1, b1, w2, b2 = gradient_descent(x_train, y_train, 0.10, 500)

epoch number:  0
Accuracy: 7.253333333333334
epoch number:  10
Accuracy: 22.236666666666665
epoch number:  20
Accuracy: 32.37
epoch number:  30
Accuracy: 37.361666666666665
epoch number:  40
Accuracy: 42.943333333333335
epoch number:  50
Accuracy: 48.26166666666667
epoch number:  60
Accuracy: 52.410000000000004
epoch number:  70
Accuracy: 55.81333333333334
epoch number:  80
Accuracy: 58.48
epoch number:  90
Accuracy: 60.97
epoch number:  100
Accuracy: 63.18333333333334
epoch number:  110
Accuracy: 65.25166666666667
epoch number:  120
Accuracy: 67.08666666666666
epoch number:  130
Accuracy: 68.78999999999999
epoch number:  140
Accuracy: 70.30499999999999
epoch number:  150
Accuracy: 71.63166666666667
epoch number:  160
Accuracy: 72.88166666666666
epoch number:  170
Accuracy: 73.99
epoch number:  180
Accuracy: 75.05
epoch number:  190
Accuracy: 75.92833333333333
epoch number:  200
Accuracy: 76.68166666666667
epoch number:  210
Accuracy: 77.35833333333333
epoch number:  220
Accuracy: 77.9

In [None]:
w1, b1, w2, b2 = gradient_descent(x_train, y_train, 0.10, 600)

Iteration:  0
Accuracy: 10.106666666666667
Iteration:  10
Accuracy: 21.141666666666666
Iteration:  20
Accuracy: 26.866666666666667
Iteration:  30
Accuracy: 34.125
Iteration:  40
Accuracy: 43.031666666666666
Iteration:  50
Accuracy: 49.7
Iteration:  60
Accuracy: 54.779999999999994
Iteration:  70
Accuracy: 58.70833333333333
Iteration:  80
Accuracy: 61.809999999999995
Iteration:  90
Accuracy: 64.345
Iteration:  100
Accuracy: 66.53833333333333
Iteration:  110
Accuracy: 68.33500000000001
Iteration:  120
Accuracy: 69.94500000000001
Iteration:  130
Accuracy: 71.28333333333333
Iteration:  140
Accuracy: 72.54666666666667
Iteration:  150
Accuracy: 73.54833333333333
Iteration:  160
Accuracy: 74.45166666666667
Iteration:  170
Accuracy: 75.28833333333334
Iteration:  180
Accuracy: 76.06166666666667
Iteration:  190
Accuracy: 76.76833333333335
Iteration:  200
Accuracy: 77.4
Iteration:  210
Accuracy: 78.02
Iteration:  220
Accuracy: 78.515
Iteration:  230
Accuracy: 79.01166666666667
Iteration:  240
Accu

In [None]:
def test(x_test, y_test, w1, b1, w2, b2):
    z1, a1, z2, a2 = forwardPropagation(w1, b1, w2, b2, x_test)
    predictions = get_predictions(a2)
    acc = get_accuracy(predictions, y_test)
    confusion_mat = confusion_matrix(y_test, predictions)
    return acc, confusion_mat

# w1, b1, w2, b2 = gradient_descent(X_train, Y_train, 0.10, 600)
acc, confusion_mat = test(x_test, y_test, w1, b1, w2, b2)
print("Test Accuracy:", acc*100, '%')
print("Confusion Matrix:")
print(confusion_mat)

Test Accuracy: 85.39999999999999 %
Confusion Matrix:
[[ 935    0    7    7    1   18    7    1    3    1]
 [   0 1090    4    6    1    3    4    1   25    1]
 [  15   18  858   34   16    2   23    9   49    8]
 [   5    3   34  856    1   42    1   25   35    8]
 [   4    1   10    1  843    1   21    0   18   83]
 [  27    5   13   55    8  661   26   11   77    9]
 [  14    3   19    5   18   17  873    0    8    1]
 [   4   18   24   13    7    2    0  865    7   88]
 [  10   19   17   44   23   54   18    9  750   30]
 [   8    2    2   16   77   20    2   58   15  809]]
