In [1]:
import numpy as np
import random
from scipy.special import expit as activation_function # the logistic function
from scipy.stats import truncnorm

def truncated_normal(mean=0, sd=1, low=0, upp=10): # default values of params
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

# this class creates a NeuralNetwork
class NeuralNetwork: 
    def __init__(self, 
                 no_of_in_nodes, 
                 no_of_out_nodes,  
                 no_of_hidden_nodes, 
                 learning_rate, 
                 bias=None # default value of bias
                ):  
        # initialize the neural network
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes       
        self.no_of_hidden_nodes = no_of_hidden_nodes          
        self.learning_rate = learning_rate 
        self.bias = bias
        self.create_weight_matrices() # call method to create a weight matrix
        
        
    def create_weight_matrices(self):
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        # print(X)
        
        bias_node = 1 if self.bias else 0 # if bias exists, the value of bias node is 1

        # construct the matrix btw input layer and hidden layer
        n = (self.no_of_in_nodes + bias_node) * self.no_of_hidden_nodes # the total entries of the matrix btw input and hidden 
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        self.wih = X.rvs(n).reshape((self.no_of_hidden_nodes, 
                                                   self.no_of_in_nodes + bias_node)) # rvs()?

        # construct the matrix btw hidden layer and output layer
        n = (self.no_of_hidden_nodes + bias_node) * self.no_of_out_nodes
        X = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        self.who = X.rvs(n).reshape((self.no_of_out_nodes, 
                                                    (self.no_of_hidden_nodes + bias_node))) # rvs()?
        

    def dropout_weight_matrices(self,
                                active_input_percentage=0.70,
                                active_hidden_percentage=0.70):
        # restore wih array, if it had been used for dropout
        self.wih_orig = self.wih.copy()
        self.no_of_in_nodes_orig = self.no_of_in_nodes
        self.no_of_hidden_nodes_orig = self.no_of_hidden_nodes
        self.who_orig = self.who.copy() # restore who array
           
        # randomly dropout nodes in input layer 
        active_input_nodes = int(self.no_of_in_nodes * active_input_percentage) # number of active input nodes
        active_input_indices = sorted(random.sample(range(0, self.no_of_in_nodes), # random choose the active nodes
                                      active_input_nodes))
        
        # randomly dropout nodes in hidden layer
        active_hidden_nodes = int(self.no_of_hidden_nodes * active_hidden_percentage)
        active_hidden_indices = sorted(random.sample(range(0, self.no_of_hidden_nodes), 
                                       active_hidden_nodes))
        
        # construct the dropout matrix wih
        # ignore an input node is equivalent to delete a column of wih
        # ignore a hidden node is equivalent to delete a row of wih 
        self.wih = self.wih[:, active_input_indices][active_hidden_indices]  
        
        # construct the dropout matrix who
        # ignore a hidden node is equivalent to delete a column of who
        self.who = self.who[:, active_hidden_indices]
        
        # update the number of nodes in input layer and hidden layer after dropout
        self.no_of_in_nodes = active_input_nodes
        self.no_of_hidden_nodes = active_hidden_nodes
        
        return active_input_indices, active_hidden_indices
    
    
    # ?????
    def weight_matrices_reset(self, 
                              active_input_indices, 
                              active_hidden_indices):
        
        """
        self.wih and self.who contain the newly adapted values from the active nodes.
        We have to reconstruct the original weight matrices by assigning the new values 
        from the active nodes
        """
        temp = self.wih_orig.copy()[:,active_input_indices]
        temp[active_hidden_indices] = self.wih
        self.wih_orig[:, active_input_indices] = temp
        self.wih = self.wih_orig.copy()

        self.who_orig[:, active_hidden_indices] = self.who
        self.who = self.who_orig.copy()
        
        self.no_of_in_nodes = self.no_of_in_nodes_orig
        self.no_of_hidden_nodes = self.no_of_hidden_nodes_orig
 
           
    def train_single(self, input_vector, target_vector):
        """ 
        input_vector and target_vector can be tuple, list or ndarray
        """
 
        if self.bias:
            # add a bias node to end of input layer
            input_vector = np.concatenate( (input_vector, [self.bias]) )

        input_vector = np.array(input_vector, ndmin=2).T # the minimum demision is 2 # transpose
        target_vector = np.array(target_vector, ndmin=2).T # target vector contains the correct rsults

        output_vector1 = np.dot(self.wih, input_vector) # get the dot product of input vector and wih
        output_vector_hidden = activation_function(output_vector1) # get nodes in hidden layer using logistic function
        
        if self.bias:
            # add a bias node to end of hidden layer
            output_vector_hidden = np.concatenate( (output_vector_hidden, [[self.bias]]) )
        
        output_vector2 = np.dot(self.who, output_vector_hidden) # get the dot product of hidden vector and who
        output_vector_network = activation_function(output_vector2) # get nodes in output layer using logistic function
        
        # calculate the error of output layer
        output_errors = target_vector - output_vector_network # compute the error
        
        # update the weights between hidden layer and output layer:
        tmp = output_errors * output_vector_network * (1.0 - output_vector_network) # ????? 
        tmp = self.learning_rate  * np.dot(tmp, output_vector_hidden.T) # ?????
        self.who += tmp 

        # calculate error of hidden layer:
        hidden_errors = np.dot(self.who.T, output_errors) # ?????
        
        # update the weights:
        tmp = hidden_errors * output_vector_hidden * (1.0 - output_vector_hidden) # ?????
        if self.bias:
            x = np.dot(tmp, input_vector.T)[:-1,:] 
        else:
            x = np.dot(tmp, input_vector.T)
        self.wih += self.learning_rate * x # ?????

                   
    def train(self, data_array, 
              labels_one_hot_array, 
              epochs=1,
              active_input_percentage=0.70,
              active_hidden_percentage=0.70,
              no_of_dropout_tests = 10):

        partition_length = int(len(data_array) / no_of_dropout_tests) # divide training data into 10 folds
        
        for epoch in range(epochs):
            print("epoch: ", epoch)
            for start in range(0, len(data_array), partition_length): # partition_length is the step size 
                active_in_indices, active_hidden_indices = \
                           self.dropout_weight_matrices(active_input_percentage,
                                                        active_hidden_percentage)
                
                for i in range(start, start + partition_length): # do training for each fold
                    self.train_single(data_array[i][active_in_indices], 
                                     labels_one_hot_array[i]) 
                    
                self.weight_matrices_reset(active_in_indices, active_hidden_indices) # reconstruct the original weight matrix
        
    
    # obtain the training result (output verctor)
    def run(self, input_vector):
        # input_vector can be tuple, list or ndarray
        
        if self.bias:
            # adding bias node to the end of the input_vector
            input_vector = np.concatenate( (input_vector, [self.bias]) )
        input_vector = np.array(input_vector, ndmin=2).T

        output_vector = np.dot(self.wih, input_vector)
        output_vector = activation_function(output_vector)
        
        if self.bias:
            output_vector = np.concatenate( (output_vector, [[self.bias]]) )
            

        output_vector = np.dot(self.who, output_vector)
        output_vector = activation_function(output_vector)
    
        return output_vector
    
    # count the numer of correct & wrong classifications
    def evaluate(self, data, labels):
        corrects, wrongs = 0, 0
        for i in range(len(data)):
            res = self.run(data[i])
            res_max = res.argmax()
            if res_max == labels[i]:
                corrects += 1
            else:
                wrongs += 1
        return corrects, wrongs

In [15]:
# read in the data from csv file
image_size = 28 # width and length
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

# get data of the training set
train_data = np.loadtxt("mnist_train.csv", 
                        delimiter=",")

# get data of the test set
test_data = np.loadtxt("mnist_test.csv", 
                       delimiter=",") 

#train_data = train_data[0:10000, :]
#test_data = test_data[0:1000, :]

In [16]:
# We will map these values into an interval from [0.01, 1] by multiplying each pixel by 0.99 / 255 
# and adding 0.01 to the result. This way, we avoid 0 values as inputs.
fac = 0.99 / 255
train_imgs = np.asfarray(train_data[:, 1:]) * fac + 0.01
test_imgs = np.asfarray(test_data[:, 1:]) * fac + 0.0


train_labels = np.asfarray(train_data[:, :1]) # the correct classification of each image in training set
test_labels = np.asfarray(test_data[:, :1]) # the correct classification of each image in test set

In [12]:
# transfer the labelled images into one-hot representation
lr = np.arange(no_of_different_labels)

# transform labels into one hot representation
train_labels_one_hot = (lr==train_labels).astype(np.float)
test_labels_one_hot = (lr==test_labels).astype(np.float)

# we don't want zeroes and ones in the labels neither:
train_labels_one_hot[train_labels_one_hot==0] = 0.01
train_labels_one_hot[train_labels_one_hot==1] = 0.99
test_labels_one_hot[test_labels_one_hot==0] = 0.01
test_labels_one_hot[test_labels_one_hot==1] = 0.99

In [13]:
parts = 10
partition_length = int(len(train_imgs) / parts)
print(partition_length)

start = 0
for start in range(0, len(train_imgs), partition_length):
    print(start, start + partition_length)

1000
0 1000
1000 2000
2000 3000
3000 4000
4000 5000
5000 6000
6000 7000
7000 8000
8000 9000
9000 10000


In [14]:
epochs = 3

simple_network = NeuralNetwork(no_of_in_nodes = image_pixels, 
                               no_of_out_nodes = 10, 
                               no_of_hidden_nodes = 1000,
                               learning_rate = 0.1)
    
simple_network.train(train_imgs, 
                     train_labels_one_hot, 
                     active_input_percentage=1,
                     active_hidden_percentage=1,
                     no_of_dropout_tests = 100,
                     epochs=epochs)

corrects, wrongs = simple_network.evaluate(train_imgs, train_labels)
print("accruracy train: ", corrects / ( corrects + wrongs))
corrects, wrongs = simple_network.evaluate(test_imgs, test_labels)
print("accruracy: test", corrects / ( corrects + wrongs))

epoch:  0
epoch:  1
epoch:  2
accruracy train:  0.1127
accruracy: test 0.126
