# 1. Importing the libraries and MNIST Dataset

In [108]:
import numpy as np
from keras.datasets import mnist

data = mnist.load_data()
data

((array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         ...,
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0

In [109]:
type(data[0])

tuple

# 2. Data Preprocessing

In [110]:
# Split the data into train and test sets
(X_train, y_train), (X_test, y_test) = data

# print the shape of all the data sets with description
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (60000, 28, 28)
y_train shape:  (60000,)
X_test shape:  (10000, 28, 28)
y_test shape:  (10000,)


In [111]:
# Reshape the data
X_train = X_train.reshape((X_train.shape[0], 28*28)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], 28*28)).astype('float32')

In [112]:
# Normalize the pixel values from a scale of 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# 3. Build the model without Deep Learning Libraries (Traditional)

In [113]:
# Define the model
# Topology: 784 -> 256 -> 10
# Activation Function: Sigmoid -> Softmax
# Loss Function: Categorical Crossentropy
# Optimizer: Stochastic Gradient Descent


def softmax(x):
    exp_shifted = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_shifted / np.sum(exp_shifted, axis=1, keepdims=True)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def cross_entropy_loss(y_true, y_pred):
    L_sum = np.sum(np.multiply(y_true, np.log(y_pred)))
    m = y_true.shape[0]
    L = -(1/m) * L_sum
    return L

def softmax_cross_entropy_loss(y_true, y_pred):
    loss = -np.sum(y_true * np.log(y_pred + 1e-15))
    return loss

def BenchmarkModel(X_train, y_train, X_test, y_test):
    # Initialize the weights and biases
    input_dim = X_train.shape[1]
    hidden_dim = 256
    output_dim = 10
    lr = 0.1
    epochs = 3

    W1 = np.random.randn(input_dim, hidden_dim)
    b1 = np.zeros((1, hidden_dim))
    W2 = np.random.randn(hidden_dim, output_dim)
    b2 = np.zeros((1, output_dim))

    # Training the model
    for i in range(epochs):
        # Forward Propagation
        z1 = np.dot(X_train, W1) + b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, W2) + b2
        a2 = softmax(z2)

        # Loss Calculation
        loss = softmax_cross_entropy_loss(y_train, a2)
        print("Epoch ", i, " Loss: ", loss)

        # Backward Propagation
        dl_a2 = a2 - y_train
        dl_z2 = np.dot(dl_a2, W2.T)
        dl_W2 = np.dot(a1.T, dl_a2)
        dl_b2 = np.sum(dl_a2, axis=0, keepdims=True)
        dl_a1 = np.dot(dl_a2, W2.T)
        dl_z1 = dl_a1 * sigmoid_derivative(a1)
        dl_W1 = np.dot(X_train.T, dl_z1)
        dl_b1 = np.sum(dl_z1, axis=0, keepdims=True)

        # Update the weights and biases
        W1 = W1 - lr * dl_W1
        b1 = b1 - lr * dl_b1
        W2 = W2 - lr * dl_W2
        b2 = b2 - lr * dl_b2

    # Testing the model
    z1 = np.dot(X_test, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)

    # Calculate the accuracy
    predictions = np.argmax(a2, axis=1)
    labels = np.argmax(y_test, axis=1)
    accuracy = np.mean(predictions == labels)
    print("Accuracy: ", accuracy)

    return W1, b1, W2, b2

def evaluate(X_test, y_test, W1, b1, W2, b2):
    # Testing the model
    z1 = np.dot(X_test, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)

    # Calculate the accuracy, precision, recall, and f1 score
    predictions = np.argmax(a2, axis=1)
    labels = np.argmax(y_test, axis=1)
    accuracy = np.mean(predictions == labels)
    precision = np.sum(predictions & labels) / np.sum(predictions)
    recall = np.sum(predictions & labels) / np.sum(labels)
    f1 = 2 * precision * recall / (precision + recall)
    
    return accuracy, precision, recall, f1

def one_hot_encoding(y):
    n_values = np.max(y) + 1
    return np.eye(n_values)[y]


    
    

# 4. Train the Model

In [114]:
# One hot encoding the labels
y_train = one_hot_encoding(y_train)
y_test = one_hot_encoding(y_test)  # Ensure y_test is also one-hot encoded

# Train the model
W1, b1, W2, b2 = BenchmarkModel(X_train, y_train, X_test, y_test)

Epoch  0  Loss:  1031011.2059609923
Epoch  1  Loss:  1839466.1532401552
Epoch  2  Loss:  1855941.1495805276
Accuracy:  0.101


# 5. Evaluate the Model

In [115]:
# Evaluate the model
accuracy, precision, recall, f1 = evaluate(X_test, y_test, W1, b1, W2, b2)

print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

Accuracy:  0.101
Precision:  0.43766666666666665
Recall:  0.2954944411936805
F1 Score:  0.3527957653760378
