In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt

In [2]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784')

X = mnist.data.astype('float32')/255.0
y = mnist.target.astype('int')

In [3]:
X = pd.DataFrame(X)
X['Label'] = y
X

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784,Label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5


In [4]:
data = np.array(X)
m, n = data.shape
np.random.shuffle(data)

data_test = data[0:10000].T
X_test = data_test[0:n-1]
y_test = data_test[n-1]

data_train = data[10000:m].T
X_train = data_train[0:n-1]
y_train = data_train[n-1]

In [5]:
y_train = y_train.astype(int)
y_train

array([1, 0, 7, ..., 8, 3, 1])

In [6]:
def init_param():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

In [7]:
def ReLU(x):
    return np.maximum(0, x)

def softMax(x):
    exp_x = np.exp(x)
    return exp_x/sum(np.exp(x))

In [8]:
def fwd_propogation(X_train, W1, b1, W2, b2):
    # First Layer
    Z1 = W1.dot(X_train) + b1
    A1 = ReLU(Z1)

    # Second Layer
    Z2 = W2.dot(A1) + b2
    A2 = softMax(Z2)
    return Z1, A1, Z2, A2

In [9]:
def one_hot(y_train):
    one_hot = np.zeros((y_train.size, (y_train.max()) + 1))
    one_hot[np.arange(y_train.size), y_train] = 1
    one_hot = one_hot.T
    return one_hot

In [10]:
def derivative_ReLU(Z):
    return Z > 0

In [11]:
def back_Propogation(Z1, A1, Z2, A2, W2, X_train, y_train):
    Y = one_hot(y_train)

    #Back Propogation to Hidden Layer(1st Layer) -> 0 based indexing of layers.
    dz2 = A2 - Y
    dw2 = (1.0/Y.size)*dz2.dot(A1.T)
    db2 = (1.0/Y.size)*np.sum(dz2)

    #Back Propogation to Input Layer(0th Layer) -> 0 based indexing of layers.
    dz1 = W2.T.dot(dz2)*derivative_ReLU(Z1)
    dw1 = (1.0/Y.size)*dz1.dot(X_train.T)
    db1 = (1.0/Y.size)*np.sum(dz1)

    return dw1, db1, dw2, db2

In [12]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y)/y.size

In [13]:
def update_params(W1, b1, W2, b2, dw1, db1, dw2, db2, alpha):
    W1 = W1 - alpha * dw1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dw2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

In [14]:
def gradient_descent(X, y, iterations, alpha):
    W1, b1, W2, b2 = init_param()
    for i in range(0, iterations):
        Z1, A1, Z2, A2 = fwd_propogation(X, W1, b1, W2, b2)
        dw1, db1, dw2, db2 = back_Propogation(Z1, A1, Z2, A2, W2, X, y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dw1, db1, dw2, db2, alpha)
        if i % 10 == 0:
            print(f"Iteration: {i}")
            print(f"Accuracy: {get_accuracy(get_predictions(A2), y)*100.0}")
    return W1, b1, W2, b2

In [15]:
W1, b1, W2, b2 = gradient_descent(X_train, y_train, 100, 0.10)

Iteration: 0
[4 2 3 ... 4 4 4] [1 0 7 ... 8 3 1]
Accuracy: 7.994285714285715
Iteration: 10
[4 2 3 ... 4 4 6] [1 0 7 ... 8 3 1]
Accuracy: 8.45142857142857
Iteration: 20
[3 2 2 ... 4 4 6] [1 0 7 ... 8 3 1]
Accuracy: 8.688571428571429
Iteration: 30
[3 2 2 ... 4 4 6] [1 0 7 ... 8 3 1]
Accuracy: 9.09
Iteration: 40
[3 2 2 ... 4 4 6] [1 0 7 ... 8 3 1]
Accuracy: 9.610000000000001
Iteration: 50
[3 2 2 ... 4 4 6] [1 0 7 ... 8 3 1]
Accuracy: 10.221428571428572
Iteration: 60
[3 2 2 ... 5 5 8] [1 0 7 ... 8 3 1]
Accuracy: 11.08
Iteration: 70
[3 2 2 ... 5 5 8] [1 0 7 ... 8 3 1]
Accuracy: 12.252857142857144
Iteration: 80
[3 2 2 ... 5 5 5] [1 0 7 ... 8 3 1]
Accuracy: 13.595714285714285
Iteration: 90
[3 2 2 ... 5 5 5] [1 0 7 ... 8 3 1]
Accuracy: 14.827142857142858


In [16]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = fwd_propogation(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    # Extract the current image and reshape to (784, 1) for a single image
    current_image = X_train[:, index, None]
    
    # Make prediction for the current image
    prediction = make_predictions(current_image, W1, b1, W2, b2)
    label = y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    # Reshape the image to (28, 28) for visualization and multiply by 255 for proper display
    current_image = current_image.reshape((28, 28)) * 255
    current_image = np.clip(current_image, 0, 255)  # Ensure values are in the range [0, 255]
    
    # Plot the image
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()
