### IMPORTING LIBRARIES

In [1]:
import numpy as np
import pandas as pd
from keras.datasets import fashion_mnist
import matplotlib.pyplot as plt
import random
import math
from sklearn.model_selection import train_test_split

### DATA PREPROCESSING

In [2]:
(x1,y1),(x2,y2) = fashion_mnist.load_data()
x1 = x1 / 255.0
x2 = x2 / 255.0
x_train1,x_test2,y_train1,y_test2 = train_test_split(x1 , y1 , test_size = 0.1 , random_state = 0)

In [3]:
x_train = x_train1.reshape(54000,784).T
y1_train = y_train1.reshape(54000,1).T
x_test  = x_test2.reshape(6000,784).T
y1_test  = y_test2.reshape(6000,1).T
print("without one-hot encoding")
print("shape of x_train :",x_train.shape)
print("shape of y_train :",y1_train.shape)
print("shape of x_test  :",x_test.shape)
print("shape of y_test  :",y1_test.shape)

without one-hot encoding
shape of x_train : (784, 54000)
shape of y_train : (1, 54000)
shape of x_test  : (784, 6000)
shape of y_test  : (1, 6000)


In [4]:
#one hot encoding

y_train = np.zeros((10,y1_train.shape[1]))
for i in range(0,y1_train.shape[1]):
    for j in range(0,10):
        if y1_train[0,i] == j:
            y_train[j,i] = 1
            
y_test = np.zeros((10,y1_test.shape[1]))
for i in range(0,y1_test.shape[1]):
    for j in range(0,10):
        if y1_test[0,i] == j:
            y_test[j,i] = 1


### DEFINIG ACTIVATION FUNCTIONS AND LOSS FUNCTIONS

In [5]:
# Defining Activation function for neural network

def sigmoid_function(x):
    exp = np.exp(-x)
    return 1/(1+exp)

def sigmoid_derivative(x):
    return sigmoid_function(x) * (1-sigmoid_function(x))

def tanh_function(x):
    return np.tanh(x)

def tanh_derivative(x):
    return (1 - (np.tanh(x)**2))

def ReLu(x):
    return np.maximum(0,x)

def ReLu_derivative(x):
    return 1*(x>0) 

def softmax_function(x):
    
    exps = np.exp(x - np.max(x , axis=0, keepdims = True))
    return exps / np.sum(exps, axis=0 , keepdims = True)

def softmax_derivative(x):
    return softmax_function(x) * (1-(softmax_function(x)))

def cost_function(al,y,Batch_size,loss,lamb,parameters):
    al = np.clip(al, 1e-9, 1 - 1e-9)                                                    # Clip to avoid taking the log of 0 or 1
    if loss == 'cross_entropy':
        if y.shape[0] == 1:                                                             # binary classification 
            cost = (1/Batch_size) * (-np.dot(y,np.log(al).T) - np.dot(1-y, np.log(1-al).T))
        else:                                                                           # multiclass-classification
            cost = -(1/Batch_size) * np.sum(y * np.log(al))
    elif loss == 'mse':
         cost = (1/2) * np.sum((y-al)**2)/Batch_size
    acc = 0
    for i in range(1, len(parameters)//2 + 1):
        acc += np.sum(parameters["W"+str(i)]**2)
    cost = cost + (lamb/(2*Batch_size))*acc
    cost = np.squeeze(cost)      
    return cost


### INITIALIZING PARAMETERS

In [6]:
def init_parameters(layers , init_mode):  
    ''' Function to initialize weights, biases and previous updates of Neural_Network
    
    Parameters
    ----------
    layers : List
        list of numbers of neurons per layer specifying layer dimensions in the format [#inp_features,#num_neurons in layer1,#num_neurons in layer2,......,#out_layer]
    
    init_mode : String
        initialization mode ('Random_normal','Random_uniform','Xavier')
    
    Returns
    -------
    Parameters : Dictionary
         contains weights and biases
    
    Previous_Updates : Dictionary   
         used for different purposes for different optimizers
    
    '''
    np.random.seed(42)
    Parameters = {}
    Previous_Updates = {}
    L = len(layers)           #no.of layers
    
    for l in range(1, L):     #except the last activation layer
        if init_mode == 'Random_normal':
            Parameters['W'+str(l)] = np.random.randn(layers[l],layers[l-1])
            
        elif init_mode == 'Random_uniform':
            Parameters['W'+str(l)] = np.random.rand(layers[l],layers[l-1])  
            
        elif init_mode == 'Xavier':
            Parameters['W'+str(l)] = np.random.randn(layers[l],layers[l-1])*np.sqrt(2/(layers[l]+layers[l-1]))
            
            
        Parameters['b'+str(l)] = np.zeros((layers[l],1))
        
        Previous_Updates['W'+str(l)] = np.zeros((layers[l],layers[l-1]))
        Previous_Updates['b'+str(l)] = np.zeros((layers[l], 1))
        
    return Parameters,Previous_Updates


### FORWARD PROPAGATION

In [7]:
def Forward_Propagation(x, Parameters, activation_function):
    '''Function to forward propagate a minibatch of data once through the NN

    Parameters
    -----------
    x: numpy array
        data in (features,batch_size) format

    Parameters: Dictionary
        Weights(W) and biases(b) of the Neural Network

    activation_function: String
        activation function to be used except the output layer where it takes accordingly(Sigmoid,softmax) based on the type of classification

    Returns
    --------
    output: numpy array
        contains the output probabilities for each class and each data sample after only one pass
    h: numpy array
        contains all post-activations
    A: numpy array
        contains all pre-activations

    '''
   
    forward_prop = {}
    L = math.floor(len(Parameters)/2)                  
    
    #first activation layer will be input layer itself
    
    forward_prop['h0'] = x 
    
    # tanh or ReLu activation functions are used for l-1 layers
    for l in range(1, L):
        
    # ai+1 = Wi+1 * hij + bi+1   
        forward_prop['a' + str(l)] = np.dot(Parameters['W' + str(l)],forward_prop['h' + str(l-1)]) + Parameters['b' + str(l)]
        
        if activation_function == 'tanh':
            forward_prop['h' + str(l)] = tanh_function(forward_prop['a' + str(l)])
        elif activation_function == 'ReLu':
            forward_prop['h' + str(l)] = ReLu(forward_prop['a' + str(l)])
        elif activation_function == 'sigmoid':
            forward_prop['h' + str(l)] = sigmoid_function(forward_prop['a' + str(l)])

    forward_prop['a' + str(L)] = np.matmul(Parameters['W' + str(L)],forward_prop['h' + str(L-1)]) + Parameters['b' + str(L)]
    
    # sigmoid or softmax functions are used for output layer
    if forward_prop['a' + str(L)].shape[0] == 1:  #if it is a binary output then sigmoid function
        forward_prop['h' + str(L)] = sigmoid_function(forward_prop['a' + str(L)])
    else :
        forward_prop['h' + str(L)] = softmax_function(forward_prop['a' + str(L)]) #if it is multiclass classification then it is softmax function
    
    return forward_prop['h' + str(L)], forward_prop

### FEED_FORWARD FUNCTION

In [29]:
def feed_forward(x,y,layers,init_mode,loss,activation_function):
    
    Parameters,Previous_Updates = init_parameters(layers , init_mode)
    al, forward_prop = Forward_Propagation(x, Parameters, activation_function)
    
    return al

In [49]:
layers = [x_train.shape[0],y_train.shape[0]]
num_neuron = [95,90]

for i in range(len(num_neuron)):
    layers.insert(i+1,num[i])
for j in range(len(num_neuron)):
    print('neurons in hidden layers'+str(j+1) ,num_neuron[j])

y_pred = feed_forward(x_train,y_train,layers,init_mode='Xavier',loss='mse',activation_function='sigmoid')
print('predicted output :',y_pred[:,0]) #for one data point

neurons in hidden layers1 95
neurons in hidden layers2 90
predicted output : [0.03896708 0.08704719 0.30094989 0.03719906 0.10418865 0.13478975
 0.21114578 0.01945331 0.02087356 0.04538574]
