# Building a Neural Network From Scratch

In [1]:
import numpy as np
import gzip
import pickle
from fastai import datasets
import torch

Let's try to build a neural network from scratch. Roughly speaking we need to do the following:

 - [x] Get the dataset 
 - [x] Implement matrix multiplication
 - [x] Implement ReLU
 - [x] Implement Softmax 
 - [x] Calculate Loss 
 - [ ] Back prop

### Get the dataset

In [2]:
MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'
path = datasets.download_data(MNIST_URL, ext='.gz');
with gzip.open(path, 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

In [3]:
print("Training set: ", len(x_train))
print("Validation set: ", len(x_valid))

Training set:  50000
Validation set:  10000


In [4]:
#Our first input
x1 = x_train[:1]
x1.shape

(1, 784)

In [5]:
#Our first set of weights and biases
weights = np.random.rand(784, 10)
biases = np.zeros(10)

## Implement Matrix Multiplication

In [7]:
def matrix_multiply(a, b):
    
    a_rows, a_columns = a.shape
    b_rows, b_columns = b.shape
    
    print(a_rows, a_columns, sep='x')
    print(b_rows, b_columns, sep='x')
    assert(a_columns == b_rows)
    
    result = np.zeros((a_rows, b_columns))
    
    for i in range(0, a_rows):
        for j in range(0, b_columns):
            result[i,j] = (a[i,:] * b[:,j]).sum()
            
    return result

a = np.array([[1,2,1],[0,1,0], [2,3,4]])
b = np.array([[2,5],[6,7],[1,8]])
matrix_multiply(a, b)

3x3
3x2


array([[15., 27.],
       [ 6.,  7.],
       [26., 63.]])

## Implement ReLU

In [8]:
def relu(x):
    #If the item is less than zero, return 0. Otherwise return the original value.
    result = (x > 0) * x
    return result

a = np.array([[1,-2,-1],[0,1,0], [2,3,4]])
relu(a)

array([[1, 0, 0],
       [0, 1, 0],
       [2, 3, 4]])

## Implement Softmax

Next we have to implement a `softmax` function to map from the output of our network to a set of probabilities for each class. Softmax is defined as:

\begin{align}
\sigma(\mathbf{z})_j = \frac{e^{z_j}}{\sum_{k=1}^K e^{z_k}}
\end{align}

In [155]:
def softmax(x):
    assert len(x.shape) == 2 #We're only going to operate on batches
    
    numerator = np.exp(x)
    denominator = np.expand_dims(np.sum(np.exp(x), axis=1), axis=1) 
    return numerator / denominator

probabilities = softmax(np.random.rand(3,5))

In [156]:
probabilities

array([[0.175308, 0.229286, 0.29765 , 0.171439, 0.126316],
       [0.144358, 0.156612, 0.331245, 0.155997, 0.211788],
       [0.14033 , 0.23684 , 0.14718 , 0.213848, 0.261802]])

## Calculate Loss

Now we need to calculate loss on a given mini-batch of examples. Cross entropy loss is a fairly standard way of calculating loss for multi-class classification problems. It is defined as:

\begin{align}
{H(p,q)=-\sum _{x}p(x)\,\log q(x).\!}
\end{align}

Where:
 - `p(x)` is `1` in the case of a correct class, `0` otherwise
 - `q(x)` is the probability our network has assigned to the current class

In [147]:
fake_preds = np.random.rand(3,5)
print(fake_preds)

[[0.521736 0.541294 0.21993  0.0036   0.828725]
 [0.25187  0.89769  0.233337 0.263168 0.71167 ]
 [0.02052  0.155222 0.326832 0.979635 0.427007]]


In [148]:
fake_y = np.zeros((3,), dtype=np.long)
fake_y[0] = 0
fake_y[1] = 1
fake_y[2] = 1
fake_y

array([0, 1, 1])

In [154]:
loss = torch.nn.CrossEntropyLoss()
results = loss(torch.tensor(fake_preds), torch.tensor(fake_y, dtype=torch.long))
results.numpy()

array(1.556455)

In [153]:
def my_loss(y_pred, y_target):
    # Rescale the raw values as probabilities
    y_pred = softmax(y_pred)
    numberOfItems, numberOfClasses = y_pred.shape    
    
    totalLoss = 0
    for i in range(numberOfItems):
        correctClass = y_target[i]
        itemLoss = np.log(y_pred[i][correctClass])
        totalLoss = totalLoss + itemLoss
        
    return -(totalLoss) / numberOfItems

my_loss(fake_preds, fake_y)

1.5564550739075456