# Ground-up Neural Network with Numpy

[This article on GeeksofGeeks](https://www.geeksforgeeks.org/implementation-of-neural-network-from-scratch-using-numpy/) seemed very interesting, so I decided to implement the concept in a different style. Still, the 'restriction' of using only numpy is kept.

In [32]:
import numpy as np
import matplotlib.pyplot as plt

In [33]:
def softmax(x, derivative=False):
    if derivative:
        return x * (1 - x)
    exps = np.exp(x - np.max(x, axis=0))
    return exps / np.sum(exps, axis=0)

def tanh(x, derivative=False):
    return 1 - x**2 if derivative else np.tanh(x)

def relu(x, derivative=False):
    return (1 if x > 0 else 0) if derivative else np.max(0, x)

def sigmoid(x, derivative=False):
    sigma = 1 / (1 + np.exp(-x))
    return sigma * (1 - sigma) if derivative else sigma

In [34]:
class Layer:
    def __init__(self, input_size, output_size, activation):
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(2 / input_size) # Xavier initialization
        self.bias = np.zeros((output_size, 1))
        self.activation = activation
        self.input = None
        self.output = None
        self.gradient = None

    def forward(self, input_data):
        self.input = input_data
        self.output = self.activation(self.weights @ self.input + self.bias)
        return self.output

    def backward(self, output_gradient):
        self.gradient = output_gradient * self.activation(self.output, derivative=True)
        input_gradient = self.weights.T @ self.gradient
        self.weights -= self.gradient @ self.input.T
        self.bias -= self.gradient
        return input_gradient

In [35]:
from sklearn.datasets import load_digits
from sklearn.preprocessing import OneHotEncoder

In [36]:
digits = load_digits()
X = digits.data
y = digits.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)



In [37]:
input_size    = X.shape[1]
hidden_size_1 = int(X.shape[1] / 2)
hidden_size_2 = int(X.shape[1] / 4)
output_size   = y_onehot.shape[1]

mlp = [
    Layer(input_size, hidden_size_1, tanh),
    Layer(hidden_size_1, hidden_size_2, tanh),
    Layer(hidden_size_2, output_size, sigmoid)
]

In [38]:
# Train the network
for epoch in range(10000):
    for i in range(X.shape[0]):
        x = X[i].reshape(-1, 1)
        y_ = y_onehot[i].reshape(-1, 1)

        # Forward pass
        for layer in mlp:
            x = layer.forward(x)

        # Backward pass
        error = y_ - x
        for layer in reversed(mlp):
            error = layer.backward(error)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch} - Error: {np.abs(np.mean(np.sum(error, axis=1)))}")


  sigma = 1 / (1 + np.exp(-x))


Epoch 0 - Error: 1.774617420371047e-19
Epoch 1000 - Error: 4.46406004680558e-23
Epoch 2000 - Error: 2.191797886021828e-23
Epoch 3000 - Error: 1.5455646200522454e-23


KeyboardInterrupt: 

In [40]:
# Test the network
for i in range(X.shape[0]):
    x = X[i].reshape(-1, 1)
    y_ = y_onehot[i].reshape(-1, 1)

    # Forward pass
    for layer in mlp:
        x = layer.forward(x)

    print("Actu: ", y_.T, "\nPred: ", x.T, "\n")

Actu:  [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

Actu:  [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]] 
Pred:  [[1. 1. 1. 1. 1.