### Building a Neural Network From Scratch.
Here, I am building a Neural Network from scratch using only numpy. It's an interesting challenge, so why not? :-)

In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist

Lets begin with an introductory dataset- the MNIST handwritten digit dataset. It's the hello world for CV!

Preprocessing and train-test splitting is already done.

In [36]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [37]:
#training will be 60k instances of 28x28 images
assert x_train.shape == (60000, 28, 28)
assert y_train.shape == (60000,)

In [38]:
#testing will be 10k instances of 28x28 images
assert x_test.shape == (10000, 28, 28)
assert y_test.shape == (10000,)

In [39]:
def onehot_encode(y):
    output = np.zeros(10)
    output[y-1] = 1
    return output

In [40]:
y_train = np.array([onehot_encode(y) for y in y_train])
y_test = np.array([onehot_encode(y) for y in y_test])

In [41]:
def init_params():
    w1 = np.random.randn(784, 10)
    b1 = np.random.randn(10)

    w2 = np.random.randn(10, 10)
    b2 = np.random.randn(10)

    return w1, b1, w2, b2

In [42]:
def sftmax(x):
    return np.exp(x) / np.sum(np.exp(x))
    
def deriv_sftmax(x):
    return x * (1 - x)

def deriv_tanh(x):
    return 1 - x**2

In [50]:
def forwardprop(x, w1, b1, w2, b2):
    input = x.ravel()

    z1 = np.dot(input, w1) + b1
    a1 = np.tanh(z1)

    z2 = np.dot(a1, w2) + b2
    output = sftmax(z2)

    return z1, a1, z2, output

In [None]:
output_array = []
w1, b1, w2, b2 = init_params()

for x in x_train:
    output_array.append(forwardprop(x, w1, b1, w2, b2))

error = []

for i in range(len(output_array)):
    error.append(y_train[i] - output_array[i][3])

In [49]:
def backprop(x, a1, output, error, w1, b1, w2, b2):
    error_gradient = error * deriv_sftmax(output)

    alpha = 0.001

    w2 += alpha * a1 * error_gradient

    b2 += alpha * error_gradient

    hidden_error_gradient = deriv_sftmax(a1) * np.sum(error_gradient * w2)

    w1 += alpha * x * hidden_error_gradient

    b1 += alpha * hidden_error_gradient