In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
import os

In [2]:
path = os.getcwd() + '\ex3data1.mat'
data = loadmat(path)

In [3]:
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [4]:
training_inputs = data['X']
training_outputs = data['y']

In [5]:
training_inputs, training_outputs

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8))

In [6]:
def sigmoid(x):
    return (1/(1 + np.exp(-x)))

In [None]:
def forward_propagation(inputs , num_inputs , num_hidden , num_outputs , weights):
    
    layers = [num_inputs] + num_hidden + [num_outputs]
    
    #creating random values for weights
    if len(weights) != 0:
        continue
    else:
        for i in range(len(layers) - 1):

            w = np.random.rand(layers[i] , layers[i+1])

            weights.append(w)
    
    #forward propagating 
    #intilizing activations to the input values
    activations = inputs
    
    #creating activations list so we can use them later
    activations_list = [activations]
    
    #iterating over weights between two consecutive layers
    for w in weights:
        
        #matrix multiplication between weights and activations
        next_inputs = np.dot(activations , w)
        
        #calculating next activations
        activations = sigmoid(next_inputs)
        
        #append activations to their list
        activations_list.append(activations)
        
    return activations_list , weights , layers

In [8]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [26]:
#implementing back_propagation function
def back_propagation(target , activations_list ,weights ,  layers):
    #calculating the error in the last activations
    error = activations_list[-1] - target
    
    #calculating the derivative of the last activations
    derivatives = []
    for i in range(len(layers) - 1):
        #intilizing the derivatives to zeros
        derivative = np.zeros((layers[i] , layers[i+1]))
        #append each derivative to the derivatives list so we can use them later
        derivatives.append(derivative)
        
    # now we'll be calculating the derivatives
    #the reversed method means that we will be starting from the end of the network (right --> left)
    for i in reversed(range(len(derivatives))):
        
        print('derivation number :' , i)
        #selecting the (i+1)th activations
        last_activations = activations_list[i+1]
        
        #calculating the delta
        delta = error * sigmoid(last_activations)
        
        #reshaping the delta to match the matrix multiplication with the ith activations
        delta_reshaped = delta.reshape(delta.shape[0] , -1).T
        
        #selecting the ith activations
        current_activations = activations_list[i]
        
        #reshaping the current activations
        current_reshaped_activations = current_activations.reshape(current_activations.shape[0] , -1)
        
        #upgrading the derivatives
        derivatives[i] = np.dot(current_reshaped_activations , delta_reshaped)
        
        #changing the error to calculate the next derivative
        error = np.dot(delta , weights[i].T)
        
    return derivatives , error
        
    

In [34]:
def gradient_descent(weights , derivatives, learning_rate):
    new_weights = []
    #looping over the weights
    for w in range(len(weights)):
        weight = weights[w]
        derivative = derivatives[w]
        #upgrading the weights
        weight += derivative * learning_rate
        new_weights.append(weight)
        
    return new_weights

In [None]:
def train(inputs , targets , learning_rate):
    #number of iterations
    iterations = 100
    errors = []
    weights = []
    #looping over the number of iterations to train the model
    for i in range(iterations):
        #looping over the training set
        for j in range(len(inputs)):
            #selecting the ith training example
            training_input = inputs[j]
            #selecting the ith training target example
            target = targets[j]
            #applying forward propagation
            activations_list , weights , layers = forward_propagation(training_input , training_input.shape[0] , [25] , 1 , weights)
            #applying back_propagation
            derivatives , error = back_propagation(target , activations_list ,weights ,  layers)
            #applying gradient descent
            new_weights = gradient_descent(weights , derivatives , learning_rate)
            #upgrading the wieghts
            wieghts = new_weights
            #adding the errors to a list so we can have a plot with matplotlib
            errors.append(np.sum(error))
            print('errors : ' , errors)
    
    print('training complet successfully')
            
            

In [None]:
train(training_inputs , training_outputs , 0.01)