In [1]:
import os
import pandas as pd
import numpy as np
import random
# from scipy.special import expit as activation_function
from scipy.stats import truncnorm


In [12]:
path="."

filename = os.path.join("fashion-mnist_train.csv")
df = pd.read_csv(filename, na_values = ["NA", "?"])

In [13]:
df.shape

(60000, 785)

In [14]:
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
#Sigmoid activation function with forward pass
@np.vectorize
def sigmoid (x):
  return 1/(1+np.e**-x)

In [16]:
#Sigmoid activation function with backward pass
@np.vectorize
def d_sigmoid (x):
  return x*(1.0-x)

In [17]:
#ReLU activation function with forward pass
@np.vectorize
def relu (x):
  return max(0,x)

In [18]:
#ReLU activation function with backward pass
@np.vectorize
def d_relu (x):
  if x<0:
    return 0
  if x>0:
    return 1

In [19]:
def softmax(x):
  for i in len(X):
    s = (np.e**-i)/(sum(np.e**-i))
    return s

In [21]:
class NeuralNetwork:
    
    def __init__(self, 
                 no_of_in_nodes, 
                 no_of_out_nodes, 
                 no_of_hidden_nodes,
                 learning_rate,
                 no_of_hidden_layers,
                 active_input_percentage,
                 active_hidden_percentage):
        
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes       
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate 
        self.no_of_hidden_layers = no_of_hidden_layers          
        self.active_input_percentage=active_input_percentage
        self.active_hidden_percentage=active_input_percentage
        self.create_weight_matrices()

    def truncated_normal(mean, sd, low, upp):
        return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)


   #method to initialise the weight matrices of the NN     
    def create_weight_matrices(self):  # add no_of_hidden_layers
        
        X = truncated_normal(mean, sd, low, upp)
        #shape parameters of wih
        n = (self.no_of_in_nodes) * self.no_of_hidden_nodes
        X = truncated_normal(mean, sd, low, upp)
        #random variates of weights connecting input and hidden nodes
        self.wih = X.rvs(n).reshape((self.no_of_hidden_nodes, self.no_of_in_nodes ))

        # if no_of_hidden_layers>1:
        #   for i no_of_hidden_layers:
        #     n = (self.no_of_hidden_nodes) * self.no_of_hidden_nodes
        #     X = truncated_normal(mean, sd, low, upp)
        #     self.whh = X.rvs(n).reshape((self.no_of_hidden_nodes,(self.no_of_hidden_nodes )))


        #shape parameters of who
        n = (self.no_of_hidden_nodes ) * self.no_of_out_nodes
        X = truncated_normal(mean, sd, low, upp)
        #weights connecting hidden and output nodes
        self.who = X.rvs(n).reshape((self.no_of_out_nodes,(self.no_of_hidden_nodes )))


    def train_single(self, input_vector, target_vector):
        """ 
        input_vector and target_vector can be tuple, list or ndarray
        """
        #forward pass


        #initialising with the mminimum dimension = 2
        input_vector = np.array(input_vector, ndmin=2).T
        target_vector = np.array(target_vector, ndmin=2).T
        #multiplication of matrix wih with the input vector
        output_vector1 = np.dot(self.wih, input_vector)
        #calculated output passed to the activation function
        output_vector_hidden = activation_function(output_vector1)

        #multiplication of matrix who with the output vector
        output_vector2 = np.dot(self.who, output_vector_hidden)
        #calculated output passed to the activation function
        output_vector_network = activation_function(output_vector2)

        #derivative of the loss
        output_errors = target_vector - output_vector_network
        #derivative of the activation function
        derivative_output = activation_derivative(output_vector_network)

        # update the weights:
        tmp = output_errors*derivative_output    
        #multiply with the previous activation (output_vector_network)
        who_update = self.learning_rate  * np.dot(tmp, output_vector_hidden.T)
        
        
        # calculate hidden errors:
        hidden_errors = np.dot(self.who.T, output_errors*derivative_output)
        # update the weights:
        tmp = hidden_errors * activation_derivative(output_vector_hidden)
        wih_update = self.learning_rate *np.dot(tmp, input_vector.T)
        
        
        #update the weights 
        self.who += who_update
        self.wih += wih_update
            
        
    def train(self, data_array, 
              labels_one_hot_array,
              epochs=1,
              active_input_percentage=0.70,
              active_hidden_percentage=0.70,
              no_of_dropout_tests = 10):
      
      
        partition_length = int(len(data_array) / no_of_dropout_tests)
        
        for i in range(epochs):
            print("epoch: ", epochs)
            for start in range(0, len(data_array), partition_length):
                active_in_indices, active_hidden_indices = \
                           self.dropout_weight_matrices()
                for i in range(start, start + partition_length):
                    self.train_single(data_array[i][active_in_indices], 
                                     labels_one_hot_array[i]) 
                    
                self.weight_matrices_reset(active_in_indices, active_hidden_indices)
                
                
                
    def dropout_weight_matrices(self):
        # restore wih array, if it had been used for dropout
        self.wih_orig = self.wih.copy()
        self.no_of_in_nodes_orig = self.no_of_in_nodes
        self.no_of_hidden_nodes_orig = self.no_of_hidden_nodes
        self.who_orig = self.who.copy()
        
        active_input_nodes = int(self.no_of_in_nodes * self.active_input_percentage)
        active_input_indices = sorted(random.sample(range(0, self.no_of_in_nodes), 
                                      active_input_nodes))
        active_hidden_nodes = int(self.no_of_hidden_nodes * self.active_hidden_percentage)
        active_hidden_indices = sorted(random.sample(range(0, self.no_of_hidden_nodes), 
                                       active_hidden_nodes))
        
        self.wih = self.wih[:, active_input_indices][active_hidden_indices]       
        self.who = self.who[:, active_hidden_indices]
        
        self.no_of_hidden_nodes = active_hidden_nodes
        self.no_of_in_nodes = active_input_nodes
        return active_input_indices, active_hidden_indices
    
    def weight_matrices_reset(self, 
                              active_input_indices, 
                              active_hidden_indices):
        
        """
        self.wih and self.who contain the newly adapted values from the active nodes.
        We have to reconstruct the original weight matrices by assigning the new values 
        from the active nodes
        """
 
        temp = self.wih_orig.copy()[:,active_input_indices]
        temp[active_hidden_indices] = self.wih
        self.wih_orig[:, active_input_indices] = temp
        self.wih = self.wih_orig.copy()
        self.who_orig[:, active_hidden_indices] = self.who
        self.who = self.who_orig.copy()
        self.no_of_in_nodes = self.no_of_in_nodes_orig
        self.no_of_hidden_nodes = self.no_of_hidden_nodes_orig
        
    def run(self, input_vector):
        # input_vector can be tuple, list or ndarray   
        input_vector = np.array(input_vector, ndmin=2).T
        #wih = self.wih * self.active_input_percentage
        output_vector = np.dot(self.wih, input_vector)
        output_vector = activation_function(output_vector)
        output_vector = output_vector * (1-self.active_input_percentage)  #dropout %
        return output_vector.T