In [1]:
'''

Author: Luca Pedrelli
l.pedrelli@deeplearningitalia.com
lucapedrelli@gmail.com

Exercice: A possible implementation of an MLP from scratch

NB: This file is realized exclusively for educational purposes

'''

import warnings
warnings.filterwarnings("ignore")

import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

from matplotlib import pylab

import keras
import numpy as np

np.random.seed(7)

# Mean Squared Error
def MSE(X,Y):
    return  np.mean((X-Y)**2)

Using TensorFlow backend.


In [None]:
# load dataset
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
pylab.imshow(x_train[0,:,:])

In [None]:
pylab.imshow(x_train[1,:,:])

In [None]:
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1]*x_train.shape[2]))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1]*x_test.shape[2]))

In [None]:
x_test.shape, x_test.shape

In [None]:
inputs = np.concatenate([x_train, x_test], axis=0).T.astype('float32')
targets = np.concatenate([y_train, y_test], axis=0).T.astype('float32')

In [None]:
inputs.shape, targets.shape

In [None]:
targets = keras.utils.to_categorical(targets, num_classes=None).T

In [None]:
TR_indexes = range(0,50)
VL_indexes = range(50,60)
TS_indexes = range(60,70)
Full_TR_indexes = list(TR_indexes) + list(VL_indexes)
Ninputs = inputs.shape[0]
Noutputs = targets.shape[0]


In [None]:
inputs.shape, targets.shape

In [None]:
# Create network: initialize weights from [-1,1]
Nlayers = 2
Neurons = 100

W = []
layers = range(Nlayers)
for l in layers:
    if len(layers) == 1:
        W.append(np.random.uniform(-1,1, (Noutputs, Ninputs+1)))
    elif l == 0:
        W.append(np.random.uniform(-1,1, (Neurons, Ninputs+1)))
    elif l < Nlayers-1:
        W.append(np.random.uniform(-1,1, (Neurons, Neurons+1)))
    else:
        W.append(np.random.uniform(-1,1, (Noutputs, Neurons+1))) 

$W^{(l+1)} = \mathbf{tanh}(W^{(l)} O^{(l-1)})$ <br>
$O^{(L)} = W_{L} O^{(l-1)}$

In [None]:
# Compute layers outputs
def forward_pass(W, inputs):
    otuputs = []
    for l in range(len(W)):
        if l==len(W)-1:
            otuputs.append(W[l][:,:-1].dot(inputs) + np.expand_dims(W[l][:,-1], axis=1))
        else:
            otuputs.append(np.tanh(W[l][:,:-1].dot(inputs) + np.expand_dims(W[l][:,-1], axis=1)))
        inputs = otuputs[-1]
        
    return otuputs

In [None]:
# Compute a gradient descent pass on a sample input
def backward_pass(W, sample_input, single_target):

    outputs = forward_pass(W, sample_input)

    deltas = []
    dWs = []

    for l in reversed(range(len(W))):
        delta = np.zeros(W[l].shape[0])
        dW = np.zeros(W[l].shape)

        for k in range(W[l].shape[0]):
            if l == Nlayers-1:       
                delta[k] = (single_target[k,0] - outputs[l][k,0])

            else:
                delta_kk = 0.0
                for z in range(deltas[-1].shape[0]):
                    delta_kk = delta_kk + deltas[-1][z] * W[l+1][z,k]

                delta[k] = delta_kk * (1 - outputs[l][k,0]**2)

            for i in range(W[l].shape[1]-1): # without bias
                if l==0:
                    dW[k,i] = delta[k] * sample_input[i,0]
                else:
                    dW[k,i] = delta[k] * outputs[l-1][i,0]

            dW[k,-1] = dW[k,-1] + W[l][k,-1] # add bias

        deltas.append(delta)
        dWs.append(dW)
    
    return dWs


In [None]:
# leraning rate
lr = 0.0001
epochs = 20
errors = []

for epoch in range(epochs):
    print(epoch)
    for p in Full_TR_indexes:
        dWs = backward_pass(W, inputs[:,p:p+1], targets[:,p:p+1])
        for l in reversed(range(len(W))):
            W[Nlayers-1-l] = W[Nlayers-1-l] + lr*dWs[l]

    outputs = forward_pass(W, inputs)[-1]
    errors.append(MSE(outputs[:, Full_TR_indexes], targets[:, Full_TR_indexes]))

In [None]:
# Plot Learning Curve
pylab.plot(range(len(errors)),errors)

In [None]:
errors