# Machine Learning - Neural Network

This section aims to implement the feedforward NN with backpropagation to a dataset of 5000 images. Both regularized and unregularized versions of the nueral network cost function and gradient computation using the backpropagation algorithm. 

First step to include all libraries:

In [30]:
#include all libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
%matplotlib inline

## Visualizing the data

First step is to load and present the data:

In [31]:
#Load and show the data
data = loadmat('../../data/ex3data1.mat')
print (type(data), data)

<class 'dict'> {'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011', '__version__': '1.0', '__globals__': [], 'X': array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), 'y': array([[10],
       [10],
       [10],
       ...,
       [ 9],
       [ 9],
       [ 9]], dtype=uint8)}


"data" is a dictionary with two labels 'X' and 'Y'. 
Next step is to assign X and Y arrays (input and output):

In [32]:
X = data['X']
y = data['y']

print (type(X), type(y))
print (X.shape, y.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(5000, 400) (5000, 1)


To better perform the classification problem, y labels need to be transformed using one-hot encoding into matrix of binary values (0, 1). More explanasion on one-hot encoding can be found here:
https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f

In [33]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False, categories='auto')
y_onehot = enc.fit_transform(y)

print (type(y_onehot), y_onehot.shape)

<class 'numpy.ndarray'> (5000, 10)


In [34]:
#Test one label:
print (y[1000], y_onehot[1000,:])

[2] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


## Feed Forward Cost Function

In [37]:
def random_weight_generator(size, offset):
    w = np.random.rand(size) * 2 * offset - offset
    return w

def act_fn(z):
    return 1 / (1 + np.exp(-z))

def grad_act_fn(z):
    return np.multiply(act_fn(z), (1 - act_fn(z)))

    
def feedforwardprop(x, w1, w2):
    # Add bias to the first element of X and called in a1 matrix (new input matrix)
    a1 = np.insert(x, 0, values=np.ones(1), axis=1)
    
    z2 = a1 * w1.T
    a2 = np.insert(act_fn(z2), 0, values=np.ones(1), axis=1)
    
    z3 = a2 * w2.T
    h = act_fn(z3)
        
    return a1, z2, a2, z3, h
    
def costfn(y, h, w1, w2, n_inputs, learning_rate, iregularization):
    y = np.matrix(y)
    m = n_inputs
        
    # calculate the cost
    J = 0
    for i in range(m):
        first_log = np.multiply(-y[i,:], np.log(h[i,:]))
        second_log = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_log - second_log)
    
    J = J / m
    
    if iregularization == 1:
        J += ((learning_rate /2 / m) * (np.sum(w1[:,:]**2) + np.sum(w2[:,:]**2)))
    
    return J    

## Model Representation

For every NN, the number of layers and neourns in each layers should be fixed first. For this example, we have one input layer, one output layer and one or more hidden layers:

In [38]:
#Input Parameters:
n_samples= X.shape[0]
n_inputs = X.shape[1]
n_outputs= y_onehot.shape[1]
n_hidden_layers = 1
n_neurons= 25
learning_rate = 1
iregularization = 0

#cost = costfn(y, h, w1, w2, n_inputs, learning_rate, iregularization)

## Back Propagation

In [47]:
def backpropg(X, y, n_neurons, learning_rate, iregularization):
    m = X.shape[0]    #number of samples
    n = X.shape[1]    #number of inputs
    X = np.matrix(X)
    y = np.matrix(y)
    
    #generate a random weight matrix (-offset, +offset) and then reshape
    offset = np.sqrt(6)/np.sqrt(n_neurons + n_inputs)
    w1 = random_weight_generator(n_neurons * (n_inputs + 1), offset)
    w1 = np.matrix(np.reshape(w1[:n_neurons * (n_inputs + 1)], (n_neurons, n_inputs + 1)))   #(25, 401)

    offset = np.sqrt(6)/np.sqrt(n_neurons + n_outputs)
    w2 = random_weight_generator(n_outputs * (n_neurons + 1), offset)
    w2 = np.matrix(np.reshape(w2[:n_neurons * (n_inputs + 1)], (n_outputs, n_neurons + 1)))  #(10, 26)
    
    # initializations of Δs 
    Del1 = np.zeros(w1.shape)   #(25, 401)
    Del2 = np.zeros(w2.shape)   #(10, 26)

    for i in range(2):
        #set the input value to last sample
        x = X[i, :]

        # run the feed-forward fn
        a1, z2, a2, z3, h = feedforwardprop(x, w1, w2)  # a1(1, 401) z2(1, 25) a2(1, 26) z3(1, 10) h(1, 10)
        
        del3 = h - y[i, :]   # (1, 10)
        
        z2p = np.insert(z2, 0, values=np.ones(1), axis=1)       # (26, 1)
        
        del2 = np.multiply((w2.T * del3.T).T, grad_act_fn(z2p)) # (1, 26)
        del2 = np.delete(del2, 0, None)                         # (1, 25)
                
        Del1 = Del1 + np.multiply(del2.T, a1) # (25, 401)
        Del2 = Del2 + np.multiply(del3.T, a2) # (10, 26)

    dJdw1 = Del1 / m  # (25, 401)
    dJdw2 = Del2 / m  # (10, 26)
    
    dJdw = np.concatenate((np.ravel(dJdw1), np.ravel(dJdw2)))
    
    return 
        
        
    
backpropg(X, y_onehot, n_neurons, learning_rate, iregularization)

(25, 401)
