# **Nural Network From Scrach**
NN consists of:

1-Layers: input layer, one {binary classification} or more {multiclass classificatio} hidden layers, and an output layer.

2-Nodes: num of nodes == num of features in the MNIST . Hidden layers == variable, output layer == 10 nodes.


Here: it is multiclass classification, thus I'll use Activation function "RELU" &  softmax for output layer


In [3]:
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score, confusion_matrix

In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0
Y_train = pd.get_dummies(y_train).values.T
Y_test = pd.get_dummies(y_test).values.T

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [7]:
# NN
input_size = X_train.shape[1]
hidden_size = 64

output_size = 10

learning_rate = 0.1


num_epochs = 2000

In [8]:
#parameters
W1 = np.random.randn(hidden_size, input_size) * 0.01
b1 = np.zeros((hidden_size, 1))
W2 = np.random.randn(output_size, hidden_size) * 0.01
b2 = np.zeros((output_size, 1))

In [9]:
# Activation function (sigmoid)\n",
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

In [12]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X.T) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    return {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}

In [13]:


def backward_propagation(X, Y, cache, W1, W2):
    m = X.shape[0]
    A1 = cache["A1"]
    A2 = cache["A2"]
    # Calculate gradients for the second layer
    dZ2 = A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    # Calculate gradients for the first layer
    dZ1 = np.dot(W2.T, dZ2) * (A1 * (1 - A1))
    dW1 = (1 / m) * np.dot(dZ1, X)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    return {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }


In [14]:
def train_model(X_train, Y_train, W1, b1, W2, b2, num_epochs, learning_rate):
    for epoch in range(num_epochs):
        cache = forward_propagation(X_train, W1, b1, W2, b2)

        gradients = backward_propagation(X_train, Y_train, cache, W1, W2)

        # Update weights
        W1 -= learning_rate * gradients["dW1"]
        b1 -= learning_rate * gradients["db1"]
        W2 -= learning_rate * gradients["dW2"]
        b2 -= learning_rate * gradients["db2"]

    return W1, b1, W2, b2


In [20]:


# Train
for epoch in range(num_epochs):
    cache = forward_propagation(X_train)
    gradients = backward_propagation(X_train, Y_train, cache)


    W1 -= learning_rate * gradients["dW1"]
    b1 -= learning_rate * gradients["db1"]
    W2 -= learning_rate * gradients["dW2"]
    b2 -= learning_rate * gradients["db2"]

# Test
cache_test = forward_propagation(X_test)
predictions = cache_test["A2"]
y_pred = np.argmax(predictions, axis=0)
y_true = np.argmax(Y_test, axis=0)


accuracy = accuracy_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)
print("Accuracy:", accuracy * 100, "%")
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 90.67 %
Confusion Matrix:
[[ 963    0    1    2    1    3    7    1    2    0]
 [   0 1105    1    4    1    1    4    1   18    0]
 [  13    6  907   17   16    1   17   12   39    4]
 [   3    1   23  907    1   26    4   19   21    5]
 [   1    5    4    0  906    1   11    2    5   47]
 [  20    3    6   51   15  731   16    9   31   10]
 [  20    3    7    1   11   17  895    1    3    0]
 [   6   20   32    2   11    0    0  929    3   25]
 [  10   11    9   26   11   25   16   10  841   15]
 [  13    8    5   12   46   10    1   25    6  883]]
