In [5]:
import numpy as np 
import math 
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt 
from sklearn.preprocessing import OneHotEncoder

In [6]:
class Node:
    
    def __init__(self, id , is_output_node = False ): 
        self.id = id 
        self.value = None 
        self.net = None 
        self.delta = None 
        self.downstream_nodes = np.array([]) 
        self.upstream_nodes = np.array([]) 
        self.downstream_weights = np.array([]) 
        self.upstream_idxs = np.array([])  # index of current node in upstream node's downstream list 
        self.is_output_node = is_output_node 
        self.actual_output = None # relevant only when the node is output node 

    def add_downstream_node(self, node): # returns idx in the downstream_nodes list 
        self.downstream_nodes = np.append(self.downstream_nodes, node) 
        # get a random number b/w -0.5 and 0.5
        x = np.random.rand() - 0.5
        self.downstream_weights = np.append(self.downstream_weights, x )   
        # self.downstream_weights = np.append(self.downstream_weights, 0 ) 
        return len(self.downstream_nodes) - 1 
    
    def add_upstream_node(self, node, idx):
        self.upstream_nodes = np.append(self.upstream_nodes, node) 
        self.upstream_idxs = np.append(self.upstream_idxs, idx)  
    
    def compute_net(self):
        net = 0 
        for i in range(len(self.upstream_nodes)):
            idx = self.upstream_idxs[i] 
            # print("idx is ", idx) 
            # print("i is ", i)
            net += ( (self.upstream_nodes[i].value) * ((self.upstream_nodes[i]).downstream_weights[int(idx)])  ) 
        self.net = net 
        return net 
    # write a print function for this class 
    def __repr__(self):
        if (not self.is_output_node): return "Node: " + str(self.id)  
        else : return "Output Node: " + str(self.id) 

In [7]:
class LogisticNode(Node):
    # i assume loss as cross entropy loss in compute delta
    def activation_function(self, x):
        return 1 / (1 + math.exp(-x)) 

    def compute_value(self):
        net = self.compute_net() 
        self.value = self.activation_function(net) 

    def compute_delta(self, actual_output = None):
        assert(self.value != None) # value should be computed before delta
        if (self.is_output_node):
            pass # no need as logistic node not in output layer for multiclass classification 
        else:
            sum = 0 
            multiply = self.value * (1 - self.value) 
            for i in range(len(self.downstream_nodes)):
                sum += (self.downstream_nodes[i].delta * self.downstream_weights[i]) 
            self.delta = multiply * sum
    

In [8]:
class SoftmaxNode(Node):
    def __init__(self, id, is_output_node = False):
        super().__init__(id, is_output_node) 
        self.j = None # class represented by the output node 
        self.output_layer = None 
    
    def set_output_layer(self, output_layer):
        self.output_layer = output_layer
        for i in range(len(output_layer)):
            if (output_layer[i] == self):
                self.j = i 
                break
        
    
    def compute_output_value(self):
        # assuming here net for all output nodes have been computed 
        sum = np.sum(np.exp([node.net for node in self.output_layer])) 
        self.value = math.exp(self.net) / sum 
    
    def compute_delta(self, actual_output = None): 
        assert(self.value != None) # value should be computed before delta 
        if (self.is_output_node):
            if (self.j == int(actual_output)):
                self.delta = 1 - self.value
            else : 
                self.delta =  -self.value  
        else :
            sum = 0 
            multiply = self.value * (1 - self.value) 
            for i in range(len(self.downstream_nodes)):
                sum += (self.downstream_nodes[i].delta * self.downstream_weights[i]) 
            self.delta = multiply * sum

In [9]:
class Neural_Network:

    def __init__(self, minibatch_size, no_of_features, hidden_layers, no_of_classes):
        self.M = minibatch_size 
        self.n = no_of_features
        self.hidden_layers = hidden_layers 
        self.K = no_of_classes 
        self.training_data = None # should be already processed
        self.training_labels = None # should be already processed
    
    
    def make_network(self):
        cnt = 1 
        network = [] 
        input_layer, output_layer = np.array([]), np.array([]) 

        for i in range(self.n):
            input_layer = np.append(input_layer, LogisticNode(cnt, False)) 
            cnt += 1
        
        # print(input_layer)
        network.append(input_layer)
        for size in self.hidden_layers:
            hidden_layer = np.array([]) 
            for i in range(size):
                hidden_layer = np.append(hidden_layer, LogisticNode(cnt))  
                cnt += 1  
            # print(hidden_layer)
            network.append(hidden_layer)  
        
        for i in range(self.K):
            output_layer = np.append(output_layer, SoftmaxNode(cnt, is_output_node = True)) 
            cnt += 1
        # print(output_layer)

        for node in output_layer:
            node.set_output_layer(output_layer)
        network.append(output_layer)
        # inserting the edges 
        # fully connected architecture 

        for i in range(len(network) - 1):
            # print(f"connecting layers {i} and {i+1}") 
            for node in network[i]:
                for next_node in network[i+1]:
                    next_node.add_upstream_node(node, node.add_downstream_node(next_node)) 

        print(network)
        self.network = network 
        print("network created") 
    
    def forward_propagation(self, eg):
        # eg is training example which we will propagate forwards 
        for feature, node in zip(eg, self.network[0]):
            node.value = feature
        # print("input layer initialized") 

        for i in range(1, len(self.network) - 1):
            for node in self.network[i]:
                node.compute_value()
        
        for node in self.network[len(self.network) - 1]:
            node.compute_net() 
        for node in self.network[len(self.network) - 1]:
            node.compute_output_value() 
        
        # print("forward propagation done")
    

    def back_propagation(self, actual_output):
        # actual_output is the actual output of the training example
        # output layer
        for node in self.network[len(self.network) - 1]:
            node.compute_delta(actual_output)
        # hidden layers
        for i in range(len(self.network) - 2 , -1, -1):
            for node in self.network[i]:
                node.compute_delta()
        # print("back propagation done") 
    
    def propagate(self, eg, label):
        self.forward_propagation(eg) 
        self.back_propagation(label) 
    
    

    def get_weight_diff(self, w1, w2):
        sum = 0 
        for i in range(len(w1)):
            sum += abs(w1[i] - w2[i])
        return sum 


    def weight_update(self, delta_list, value_list, learning_rate):


        for i in range(len(self.network) - 1):
            for l  in range(len(self.network[i])):
                node = self.network[i][l] 
                for j in range(len(node.downstream_nodes)):
                    # node.downstream_weights[j] += (learning_rate * delta_list[i][j] * value_list[i][j])/self.M 
                    update = 0 
                    for k in range(len(delta_list)):
                        # assert(value_list[k][i][l] != 0)
                        # assert(delta_list[k][i + 1][j] != 0) 
                        
                        update += (delta_list[k][i + 1][j] * value_list[k][i][l])
                    
                    update *= learning_rate/self.M
                    update *= learning_rate
                    node.downstream_weights[j] += update 
                    # assert(update != 0)
        # print("len of delta list was ", len(delta_list))
        # print("weight update done")

    def train(self, training_data, training_labels, learning_rate):
        self.training_data, self.training_labels =  self.shuffle(training_data, training_labels) 
        prev_weights =[]
        weights = []
        for layer in self.network:
            for node in layer:
                for i in range(len(node.downstream_weights)):
                            weights.append(node.downstream_weights[i])
        cnt = 0 
        while (cnt < 400):
        
            for i in range(0, len(training_data), self.M):
                minibatch_data = training_data[i : i + self.M] 
                minibatch_labels = training_labels[i : i + self.M]
                delta_list = []
                value_list = []
                for eg, label in zip(minibatch_data, minibatch_labels):
                
                        self.propagate(eg, label) 
                        network_delta, network_values = [],  [] 
                        for layer in self.network:
                            layer_deltas, layer_values = [],  []
                            for node in layer:
                                # layer_deltas = np.append(layer_deltas, node.delta) 
                                layer_deltas.append(node.delta)
                                # layer_values = np.append(layer_values, node.value)
                                layer_values.append(node.value)  
                            # print("shape of layer_deltas is ", layer_deltas.shape)
                            # network_delta = np.append(network_delta, layer_deltas)
                            network_delta.append(layer_deltas)
                            # network_values = np.append(network_values, layer_values) 
                            network_values.append(layer_values)
                        # print("shape of network_delta is ", network_delta.shape)
                        # input() 
                        # delta_list = np.append(delta_list, network_delta)
                        delta_list.append(network_delta)
                        # value_list = np.append(value_list, network_values)
                        value_list.append(network_values)
                # print("shape of delta_list is ", delta_list.shape)
                # print("shape of value_list is ", value_list.shape) 
                # prev_weights = weights 
                # weights = [] 
                self.weight_update(delta_list, value_list, learning_rate)  
                # for layer in self.network:
                    # for node in layer:
                        # for i in range(len(node.downstream_weights)):
                            # weights.append(node.downstream_weights[i])
                # diff = self.get_weight_diff(weights, prev_weights)
                # print("weights updated by ", diff)
            
            cnt += 1 
            print(f"epoch {cnt} done")
            # computed_accuracy, loss = self.compute_accuracy_and_loss(X_train, Y_train) 
            # loss = self.compute_loss(training_data, training_labels)
            # print("accuracy is ", computed_accuracy)
            # print("loss is ", loss)
        
        print("training done")
    
    def predict(self, eg, answer):
        self.forward_propagation(eg) 
        max = -1 
        loss = 0 
        for node in self.network[len(self.network) - 1]:
            if (node.value > max):
                max = node.value 
                prediction = node.j 
        for k in range(len(self.network[len(self.network) - 1])):
            node = self.network[len(self.network) - 1][k] 
            if (node.j == answer):
                loss -= math.log(node.value)
        return prediction,loss 

    def compute_accuracy_and_loss(self, test_data, test_labels):
        correct = 0 
        total_loss = 0 
        for eg, label in zip(test_data, test_labels):
            prediction, loss = self.predict(eg, label) 
            if (prediction == label):
                correct += 1 
            total_loss += loss 
        print("Correct: ", correct) 
        print("Total: ", len(test_data))
        return (correct / len(test_data), total_loss) 

    
    def shuffle(self, training_data, training_labels):
        idxs = np.array([i for i in range(len(training_data))]) 
        idx = np.random.shuffle(idxs)
        print("shuffled idx is ", idxs) 
        training_data = training_data[idxs]
        training_labels = training_labels[idxs]
        return (training_data, training_labels)
        
    def __repr__(self):
        to_print = ""
        for layer in self.network:
            for node in layer:
                to_print += str(node) + " "

                to_print += "\n"
                to_print += "downstream nodes: " + str(node.downstream_nodes) + "\n"
                # to_print += "downstream weights: " + str(node.downstream_weights) + "\n"
                to_print += "upstream idxs: " + str(node.upstream_idxs) + "\n"
                to_print += "upstream nodes: " + str(node.upstream_nodes) + "\n"
                to_print += "value: " + str(node.value) + "\n"
                to_print += "delta: " + str(node.delta) + "\n"
                if (node.is_output_node):
                    to_print += "output class is " + str(node.j) + "\n"
                to_print += "-----------------" 
                to_print += "\n"
        return to_print
            
    
    


In [10]:
my_nn = Neural_Network(10, 3, [3], 3) 
my_nn.make_network()
# print(my_nn)

[array([Node: 1, Node: 2, Node: 3], dtype=object), array([Node: 4, Node: 5, Node: 6], dtype=object), array([Output Node: 7, Output Node: 8, Output Node: 9], dtype=object)]
network created


In [11]:
my_nn.forward_propagation(np.array([1, 2, 3])) 
my_nn.back_propagation(1) 

In [12]:
def get_input(input_path, output_path):
    x = np.load(input_path)
    y = np.load(output_path)

    y = y.astype('float')
    x = x.astype('float')

    #normalize x:
    x = 2*(0.5 - x/255)
    return x,y
x = np.array([[1,2,3], [4,5,6], [7,8,9]])
y = np.array([1,2,3])
x, y = my_nn.shuffle(x,y)
print(x,y)

shuffled idx is  [2 1 0]
[[7 8 9]
 [4 5 6]
 [1 2 3]] [3 2 1]


In [23]:
X_train, Y_train = get_input("../data/part b/x_train.npy", "../data/part b/y_train.npy") 
X_test, Y_test = get_input("../data/part b/x_test.npy", "../data/part b/y_test.npy" )
print(X_train.shape) 
print(Y_train.shape)
Y_train = Y_train - 1 
Y_test = Y_test - 1 

label_encoder = OneHotEncoder(sparse_output = False)
label_encoder.fit(np.expand_dims(Y_train, axis = -1))

y_train_onehot = label_encoder.transform(np.expand_dims(Y_train, axis = -1))
y_test_onehot = label_encoder.transform(np.expand_dims(Y_test, axis = -1))

print(y_train_onehot) 
print(y_test_onehot) 
# print(X_train)
# print(Y_train) 
# print(Y_test)

(10000, 1024)
(10000,)
[[1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 ...
 [0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]]
[[0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 ...
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]]


In [14]:
nn = Neural_Network(32, 1024, [10], 5) 
nn.make_network()
# print(nn)
nn.train(X_train, Y_train, 0.01)

[array([Node: 1, Node: 2, Node: 3, ..., Node: 1022, Node: 1023, Node: 1024],
      dtype=object), array([Node: 1025, Node: 1026, Node: 1027, Node: 1028, Node: 1029,
       Node: 1030, Node: 1031, Node: 1032, Node: 1033, Node: 1034],
      dtype=object), array([Output Node: 1035, Output Node: 1036, Output Node: 1037,
       Output Node: 1038, Output Node: 1039], dtype=object)]
network created
shuffled idx is  [ 929 8879 3661 ... 9319 6200 8260]


KeyboardInterrupt: 

In [27]:

clf = MLPClassifier([5], activation="logistic", solver="sgd", batch_size=32, learning_rate="constant" , learning_rate_init=0.03, 
                    shuffle=False,  momentum=0 , max_iter=1000, verbose=True) 
clf.fit(X_train, Y_train)
test_accuracy = clf.score(X_test, Y_test) 
train_accuracy = clf.score(X_train, Y_train) 
print(f"test accuracy is {test_accuracy}")
print(f"test accuracy is {train_accuracy}") 

Iteration 1, loss = 1.59247996
Iteration 2, loss = 1.48101393
Iteration 3, loss = 1.30370545
Iteration 4, loss = 1.17780136
Iteration 5, loss = 1.10203257
Iteration 6, loss = 1.04988275
Iteration 7, loss = 1.00934160
Iteration 8, loss = 0.97588285
Iteration 9, loss = 0.94766919
Iteration 10, loss = 0.92369537
Iteration 11, loss = 0.90321915
Iteration 12, loss = 0.88562936
Iteration 13, loss = 0.87042135
Iteration 14, loss = 0.85718538
Iteration 15, loss = 0.84559192
Iteration 16, loss = 0.83537623
Iteration 17, loss = 0.82632452
Iteration 18, loss = 0.81826292
Iteration 19, loss = 0.81104888
Iteration 20, loss = 0.80456471
Iteration 21, loss = 0.79871259
Iteration 22, loss = 0.79341075
Iteration 23, loss = 0.78859037
Iteration 24, loss = 0.78419325
Iteration 25, loss = 0.78016987
Iteration 26, loss = 0.77647781
Iteration 27, loss = 0.77308058
Iteration 28, loss = 0.76994657
Iteration 29, loss = 0.76704826
Iteration 30, loss = 0.76436160
Iteration 31, loss = 0.76186545
Iteration 32, los

In [None]:
# def weight_update(self, delta_list, value_list, learning_rate):


        # for i in range(len(self.network) - 1):
        #     for l  in range(len(self.network[i])):
        #         node = self.network[i][l] 
        #         for j in range(len(node.downstream_nodes)):
        #             # node.downstream_weights[j] += (learning_rate * delta_list[i][j] * value_list[i][j])/self.M 
        #             update = 0 
        #             for k in range(len(delta_list)):
        #                 # assert(value_list[k][i][l] != 0)
        #                 # assert(delta_list[k][i + 1][j] != 0) 
                        
        #                 update += (delta_list[k][i + 1][j] * value_list[k][i][l])
                    
        #             update *= learning_rate/self.M
        #             update *= learning_rate
        #             node.downstream_weights[j] += update 
        #             # assert(update != 0)
        # # print("len of delta list was ", len(delta_list))
        # # print("weight update done")

In [None]:
# self.training_data, self.training_labels =  self.shuffle(training_data, training_labels) 
#         prev_weights =[]
#         weights = []
#         for layer in self.network:
#             for node in layer:
#                 for i in range(len(node.downstream_weights)):
#                             weights.append(node.downstream_weights[i])
          
#         while (True):
           
#             for i in range(0, len(training_data), self.M):
#                 minibatch_data = training_data[i : i + self.M] 
#                 minibatch_labels = training_labels[i : i + self.M]
#                 delta_list = []
#                 value_list = []
#                 for eg, label in zip(minibatch_data, minibatch_labels):
                    
#                         self.propagate(eg, label) 
#                         network_delta, network_values = [],  [] 
#                         for layer in self.network:
#                             layer_deltas, layer_values = [],  []
#                             for node in layer:
#                                 # layer_deltas = np.append(layer_deltas, node.delta) 
#                                 layer_deltas.append(node.delta)
#                                 # layer_values = np.append(layer_values, node.value)
#                                 layer_values.append(node.value)  
#                             # print("shape of layer_deltas is ", layer_deltas.shape)
#                             # network_delta = np.append(network_delta, layer_deltas)
#                             network_delta.append(layer_deltas)
#                             # network_values = np.append(network_values, layer_values) 
#                             network_values.append(layer_values)
#                         # print("shape of network_delta is ", network_delta.shape)
#                         # input() 
#                         # delta_list = np.append(delta_list, network_delta)
#                         delta_list.append(network_delta)
#                         # value_list = np.append(value_list, network_values)
#                         value_list.append(network_values)
#                 # print("shape of delta_list is ", delta_list.shape)
#                 # print("shape of value_list is ", value_list.shape) 
#                 # prev_weights = weights 
#                 # weights = [] 
#                 self.weight_update(delta_list, value_list, learning_rate)  
#                 # for layer in self.network:
#                     # for node in layer:
#                         # for i in range(len(node.downstream_weights)):
#                             # weights.append(node.downstream_weights[i])
#                 # diff = self.get_weight_diff(weights, prev_weights)
#                 # print("weights updated by ", diff)
                
#             computed_accuracy, loss = self.compute_accuracy_and_loss(X_train, Y_train) 
#             # loss = self.compute_loss(training_data, training_labels)
#             print("accuracy is ", computed_accuracy)
#             print("loss is ", loss)
           
#         print("training done")