# 📌 Building a Neural Network from Scratch using only Numpy

In [16]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import os
import zipfile

In [3]:
directory = 'dataset/'
files = os.listdir(directory)

In [4]:
with zipfile.ZipFile(directory + 'train.csv.zip', 'r') as zip:
    zip.extractall('dataset/')

In [5]:
# Loading the csv using pandas

In [None]:
data.head()

In [8]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

In [9]:
# Create train and test split and preprocess the images

In [11]:
X_train[:, 0].shape

(784,)

In [None]:
# Plot an example image

### Some information
Our NN will have a simple two-layer architecture:
-  Input layer $A^{[0]}$ will have 784 units corresponding to the 784 pixels in each 28x28x1 input image;
- A hidden layer $A^{[1]}$ will have 10 units with ReLU activation; 
- the output layer $a^{[2]}$ will have 10 units corresponding to the 10-digit classes with softmax activation.


In order to have a NN with good accuracy performance in prediction we have to implement 3 main phases that will compose the training algorithm:
- **Forward propagation**
- **Backward propagation**
- **Parameter updates**

### Parameters initialization

In [14]:
def init_params():
    W1 = np.random.normal(size=(10, 784)) * np.sqrt(1./(784))
    b1 = np.random.normal(size=(10, 1)) * np.sqrt(1./10)
    W2 = np.random.normal(size=(10, 10)) * np.sqrt(1./10)
    b2 = np.random.normal(size=(10, 1)) * np.sqrt(1./(10))
    return W1, b1, W2, b2

### Utilities Function

In [19]:
def ReLU(Z):
    pass


def ReLU_derivative(Z):
    pass


def softmax(Z):
    pass


def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

### Training Phase

**Forward propagation**

$$Z^{[1]} = W^{[1]} X + b^{[1]}$$
$$A^{[1]} = g_{\text{ReLU}}(Z^{[1]}))$$
$$Z^{[2]} = W^{[2]} A^{[1]} + b^{[2]}$$
$$A^{[2]} = g_{\text{softmax}}(Z^{[2]})$$

**Backward propagation**

$$dZ^{[2]} = A^{[2]} - Y$$
$$dW^{[2]} = \frac{1}{m} dZ^{[2]} A^{[1]T}$$
$$dB^{[2]} = \frac{1}{m} \Sigma {dZ^{[2]}}$$
$$dZ^{[1]} = W^{[2]T} dZ^{[2]} .* g^{[1]\prime} (z^{[1]})$$
$$dW^{[1]} = \frac{1}{m} dZ^{[1]} A^{[0]T}$$
$$dB^{[1]} = \frac{1}{m} \Sigma {dZ^{[1]}}$$

**Parameter updates**

$$W^{[2]} := W^{[2]} - \alpha dW^{[2]}$$
$$b^{[2]} := b^{[2]} - \alpha db^{[2]}$$
$$W^{[1]} := W^{[1]} - \alpha dW^{[1]}$$
$$b^{[1]} := b^{[1]} - \alpha db^{[1]}$$

In [10]:
def forward_propagation(W1, b1, W2, b2, X):
    # Z1 = 
    # A1 = 
    # Z2 = 
    # A2 = 
    return #Z1, A1, Z2, A2



def backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y):
    pass


def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    # W1 = 
    # b1 = 
    
    # W2 =  
    # b2 =

    return # W1, b1, W2, b2


def get_predictions(A2):
    pass


def get_accuracy(predictions, Y):
    pass


def gradient_descent(X, Y, alpha, iterations):
    pass


**Shapes**

- Forward propagation
    - $A^{[0]} = X$: 784 x m
    - $Z^{[1]} \sim A^{[1]}$: 10 x m
    - $W^{[1]}$: 10 x 784 (as $W^{[1]} A^{[0]} \sim Z^{[1]}$)
    - $B^{[1]}$: 10 x 1
    - $Z^{[2]} \sim A^{[2]}$: 10 x m
    - $W^{[1]}$: 10 x 10 (as $W^{[2]} A^{[1]} \sim Z^{[2]}$)
    - $B^{[2]}$: 10 x 1

- Backpropagation
    - $dZ^{[2]}$: 10 x m ($~A^{[2]}$)
    - $dW^{[2]}$: 10 x 10
    - $dB^{[2]}$: 10 x 1
    - $dZ^{[1]}$: 10 x m ($~A^{[1]}$)
    - $dW^{[1]}$: 10 x 10
    - $dB^{[1]}$: 10 x 1

## Build the NN and make prediction

In [None]:
# NN =

### Make some predictions

In [12]:
def make_predictions(X, NN):
    _, _, _, A2 = forward_propagation(NN[0], NN[1], NN[2], NN[3], X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, NN):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], NN)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [None]:
test_prediction(0, NN)
test_prediction(1, NN)
test_prediction(2, NN)
test_prediction(3, NN)

### Accuracy on Test set

In [None]:
dev_predictions = make_predictions(X_test, NN)
get_accuracy(dev_predictions, Y_test)

## Homework
- Analyze the training phase
    - plot the accuracy
    - plot the loss function