# Training

In [1]:
import numpy as np

In [2]:
def one_hot_encoded(y, classes = 0):
    values = y
    onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
    integer_encoded = values.reshape(len(values), 1)
    one_hot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return one_hot_encoded

In [3]:
class Model:
    
    # Constructor
    def __init__(self):
        self.x = []
        self.y = []
        self.num_layers = 0
        self.layers_info = {}
        self.layers = []
        self.net_sum = []
        self.weights = []
        self.biases = []
        self.jacobian_weights = []
        self.jacobian_biases = []
    
    # Add a fully connected layer
    def add_dense_layer(self, size, input_size = 0, activation = 'linear'):
        self.num_layers += 1
        self.layers_info['Layer_'+str(self.num_layers)] = {
            'size': size,
            'activation': activation
        }
        self.layers.append(np.ones(size))
        self.net_sum.append(np.ones(size))
        if input_size != 0:
            self.weights.append(np.random.randn(size, input_size) * np.sqrt(2/(size+input_size)))
        else:
            input_size = self.layers_info['Layer_'+str(self.num_layers-1)]['size']
            self.weights.append(np.random.randn(size, input_size) * np.sqrt(2/(size+input_size)))
        self.biases.append(np.random.randn(size) * np.sqrt(1/size))
        
    def mse(self, true, pred):
        return ((true-pred)**2)/2
        
    # Selection of Loss Function and Optimization function
    def set_parameters(self, lr = 0.01, loss = 'mse'):
        if loss == 'mse':
            self.loss = loss
            self.loss_function = self.mse
        self.lr = lr
    
    # Sigmoid
    def sigmoid(self, x):
        return 1 / (1 + (np.e ** -x))
    
    # ReLU - Rectified Linear Unit
    def relu(self, x):
        return max(0, x)
    
    # Softmax
    def softmax(self, x):
        exp = np.exp(x)
        return np.true_divide(exp, sum(exp)).transpose()
    
    def activation(self, net_sum, activation):
        if activation == 'sigmoid':
            activation_function = self.sigmoid
            output_vector = np.array([activation_function(i) for i in net_sum]).transpose()
        elif activation == 'relu':
            activation_function = self.relu
            output_vector = np.array([activation_function(i) for i in net_sum]).transpose()
        elif activation == 'softmax':
            activation_function = self.softmax
            output_vector = activation_function(net_sum)
        return output_vector
    
    # Forward Propogation
    def neural_network_output(self, record):
        input_vector = record.transpose()
        output_vector = None
        for i in range(len(self.layers)):
            # y = aW + b
            self.net_sum[i] = np.matmul(self.weights[i], input_vector) + self.biases[i].transpose()
            self.layers[i] = self.activation(self.net_sum[i], self.layers_info['Layer_'+str(i+1)]['activation'])
            input_vector = self.layers[i]
        output_vector = self.layers[len(self.layers)-1]
        return output_vector
    
    # For output layer
    # delta = dE/dnet_sum_output = dE/doutput * doutput/dnet_sum_output
    # dE/doutput
    def output_loss_derivative(self, true, pred):
        if self.loss == 'mse':
            return (true - pred)
    
    # doutput/dnet_sum_output
    def activation_derivative(self, output_vector_i, net_sum_i, activation):
        if activation == 'relu':
            return np.array([1 if i>0 else 0 for i in net_sum_i])
        elif activation == 'sigmoid':
            return output_vector_i * (1 - output_vector_i)
        elif activation == 'softmax':
            return output_vector_i * (1 - output_vector_i)
        else:
            return output_vector_i * (1 - output_vector_i)
    
    # For hidden layer
    # dE/dnet_sum_hidden = dotproduct(weights_ji, delta_o)
    def hidden_loss_derivative(self, grad_next, layer_number, num_of_neurons_next):
        grad = []
        for i in range(num_of_neurons_next):
            weights_next = self.weights[layer_number+1][:, i]
            grad.append(np.dot(weights_next, grad_next))
        return np.array(grad)
    
    # Training using Backpropogation
    def train(self, X_train, y_train, epochs = 1, batch_size = 0): 
        
        # initializing Jacobian
        for i in range(self.num_layers):
            size = self.layers_info['Layer_'+str(i+1)]['size']            
            input_size = 0
            self.jacobian_biases.append(np.zeros((size)))
            if i == 0:
                input_size = X_train.shape[1]
            else:
                input_size = len(self.layers[i-1])
            self.jacobian_weights.append(np.zeros((size, input_size)))
        
        # Creation of batches
        self.x = X_train
        self.y = y_train
        if batch_size == 0:
            batch_size = len(y_train)
        batches = int(abs(len(X_train) / batch_size))
        X_train = np.array_split(X_train, batches)
        y_train = np.array_split(y_train, batches)
        
        for epoch in range(epochs): 
            for batch in range(batches):
                for record, label in zip(X_train[batch], y_train[batch]):                    
                    true_output = np.array(label)
                    predicted_output = self.neural_network_output(np.array(record))

                    # gradient of output layer
                    output_vector_i = self.layers[self.num_layers-1]
                    net_sum_i = self.net_sum[self.num_layers-1]
                    activation = self.layers_info['Layer_'+str(self.num_layers)]['activation']
                    # derivative of output loss * derivetive of activation
                    grad_o = self.output_loss_derivative(true_output, predicted_output) * self.activation_derivative(output_vector_i, net_sum_i, activation)
                    
                    # Jacobian Output Layer - Jw = input * grad_o and Jb = 1.0 * grad_o
                    if self.num_layers-2 < 0:
                        self.jacobian_weights[self.num_layers-1] += grad_o[:, None] @ np.array(record)[None, :]
                    else:
                        self.jacobian_weights[self.num_layers-1] += grad_o[:, None] @ self.layers[self.num_layers-2][None, :]
                    self.jacobian_biases[self.num_layers-1] += grad_o
                    
                    # gradient of hidden layer
                    grad_next = grad_o
                    for i in range(self.num_layers-2, -1, -1):
                        output_vector_i = self.layers[i]
                        net_sum_i = self.layers[i]
                        activation = self.layers_info['Layer_'+str(i+1)]['activation']
                        num_of_neurons_next = self.layers_info['Layer_'+str(i+2)]['size']
                        
                        # derivative of hidden loss * derivetive of activation
                        loss_grad = self.hidden_loss_derivative(grad_next, i, num_of_neurons_next)
                        activation_grad = self.activation_derivative(output_vector_i, net_sum_i, activation) 
                        grad_h = np.tensordot(loss_grad, activation_grad, axes=0)[0]
                        
                        # Jacobian Hidden layer - Jw = input * grad_next and Jb = 1.0 * grad_next
                        if i-1 < 0:
                            self.jacobian_weights[i] += grad_h[:, None] @ np.array(record)[None, :]
                        else:
                            self.jacobian_weights[i] += grad_h[:, None] @ self.layers[i-1][None, :]
                        self.jacobian_biases[i] += grad_h
                        
                        # change the gradient
                        grad_next = grad_h
                
                # Divide accumulated jacobian by number of records in the batch
                for i in range(self.num_layers):
                    self.jacobian_weights[i] = self.jacobian_weights[i] / len(record)
                    self.jacobian_biases[i] = self.jacobian_biases[i] / len(record)
                
                # Update weights and biases
                for i in range(self.num_layers-1, -1, -1):
                    self.weights[i] += self.lr * self.jacobian_weights[i]
                    self.biases[i] += self.lr * self.jacobian_biases[i]
            
            # Accuracy and Loss
            print('Epoch ',epoch)
            self.evaluate(self.x, self.y)
            print('')
            
    
    # Predict
    def predict(self, X_test):
        predictions = []
        classes = self.layers_info['Layer_'+str(self.num_layers)]['size']
        for record in X_test:
            predicted_output = self.neural_network_output(record)
            if classes == 1:
                if predicted_output[0] >= 0.5:
                    predictions.append(1)
                else:
                    predictions.append(0)
            else:
                predict = np.argmax(predicted_output)
                predictions.append(predict)
        return predictions
    
    # Accuracy and Loss
    def evaluate(self, X_test, y_test):       
        # Loss
        loss = 0
        for record, label in zip(X_test, y_test):
            true_output = np.array(label)
            predicted_output = self.neural_network_output(record)
            loss += self.loss_function(true_output, predicted_output)        
        print('Loss: ',sum(loss)/(len(X_test) * len(loss)))
        
        # Accuracy
        true_output = []
        if len(y_test.shape) > 1:
            for i in y_test:
                true_output.append(np.argmax(i))
        else:
            true_output = y_test
        true_output = np.array(true_output)
        predicted_output = self.predict(X_test)
        count = 0
        for true, pred in zip(true_output, predicted_output):
            if (true == pred):
                count += 1
        accuracy = count/len(true_output)
        print('Accuracy: ',accuracy)


# Testing

Feed-froward Neural Network

In [9]:
model = Model()

model.add_dense_layer(4, 3, activation='relu')
model.add_dense_layer(1, activation='sigmoid')

Creating Dataset - XOR Dataset

In [5]:
X_train = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 0, 0], [1, 1, 1], [0, 0, 0]])
y_train = np.array([0, 1, 1, 1, 1, 0, 0])

Setting Hyperparameters: <br>
Loss Function: Mean Square Error(mse) <br> 
Learning rate: 0.01

In [10]:
model.set_parameters(lr=0.1, loss='mse')

Gradient Descent Optimization

In [7]:
model.train(X_train, y_train, epochs=1000)

Epoch  0
Loss:  0.16819650044716714
Accuracy:  0.42857142857142855

Epoch  1
Loss:  0.16775098766226984
Accuracy:  0.42857142857142855

Epoch  2
Loss:  0.16719904509816974
Accuracy:  0.42857142857142855

Epoch  3
Loss:  0.16653369242726732
Accuracy:  0.42857142857142855

Epoch  4
Loss:  0.1658289827886948
Accuracy:  0.42857142857142855

Epoch  5
Loss:  0.1650984952892576
Accuracy:  0.42857142857142855

Epoch  6
Loss:  0.16434677602107445
Accuracy:  0.42857142857142855

Epoch  7
Loss:  0.1635754042057091
Accuracy:  0.42857142857142855

Epoch  8
Loss:  0.1627850288878279
Accuracy:  0.42857142857142855

Epoch  9
Loss:  0.16197605544479682
Accuracy:  0.42857142857142855

Epoch  10
Loss:  0.16114887976426634
Accuracy:  0.42857142857142855

Epoch  11
Loss:  0.16030397068995367
Accuracy:  0.42857142857142855

Epoch  12
Loss:  0.1594419012900937
Accuracy:  0.42857142857142855

Epoch  13
Loss:  0.15856336250709904
Accuracy:  0.42857142857142855

Epoch  14
Loss:  0.15766917030307617
Accuracy:  0

Epoch  220
Loss:  0.11698030031374618
Accuracy:  0.5714285714285714

Epoch  221
Loss:  0.11692199087969822
Accuracy:  0.5714285714285714

Epoch  222
Loss:  0.11686668194175982
Accuracy:  0.5714285714285714

Epoch  223
Loss:  0.11680542644997137
Accuracy:  0.5714285714285714

Epoch  224
Loss:  0.11674977740966126
Accuracy:  0.5714285714285714

Epoch  225
Loss:  0.11669195998974095
Accuracy:  0.5714285714285714

Epoch  226
Loss:  0.11663628015090233
Accuracy:  0.5714285714285714

Epoch  227
Loss:  0.11657594391349287
Accuracy:  0.5714285714285714

Epoch  228
Loss:  0.11652064578644879
Accuracy:  0.5714285714285714

Epoch  229
Loss:  0.11645888037071044
Accuracy:  0.5714285714285714

Epoch  230
Loss:  0.11640285639753471
Accuracy:  0.5714285714285714

Epoch  231
Loss:  0.1163453853603106
Accuracy:  0.5714285714285714

Epoch  232
Loss:  0.11628879909416066
Accuracy:  0.5714285714285714

Epoch  233
Loss:  0.11622886581527782
Accuracy:  0.5714285714285714

Epoch  234
Loss:  0.116172904468307

Epoch  446
Loss:  0.10571173334887064
Accuracy:  0.8571428571428571

Epoch  447
Loss:  0.10565417317619716
Accuracy:  0.8571428571428571

Epoch  448
Loss:  0.10560112735426504
Accuracy:  0.8571428571428571

Epoch  449
Loss:  0.1055431410971384
Accuracy:  0.8571428571428571

Epoch  450
Loss:  0.10549750697299105
Accuracy:  0.8571428571428571

Epoch  451
Loss:  0.10543932440460581
Accuracy:  0.8571428571428571

Epoch  452
Loss:  0.10538491423650011
Accuracy:  0.8571428571428571

Epoch  453
Loss:  0.10532664859104297
Accuracy:  0.8571428571428571

Epoch  454
Loss:  0.10527772651776086
Accuracy:  0.8571428571428571

Epoch  455
Loss:  0.10521768673364937
Accuracy:  0.8571428571428571

Epoch  456
Loss:  0.1051714699782336
Accuracy:  0.8571428571428571

Epoch  457
Loss:  0.105112939115678
Accuracy:  0.8571428571428571

Epoch  458
Loss:  0.1050568175061536
Accuracy:  0.8571428571428571

Epoch  459
Loss:  0.10499814663160521
Accuracy:  0.8571428571428571

Epoch  460
Loss:  0.10494751989432527
A

Epoch  673
Loss:  0.09088305798708331
Accuracy:  0.8571428571428571

Epoch  674
Loss:  0.09081777471904738
Accuracy:  0.8571428571428571

Epoch  675
Loss:  0.0907569745294849
Accuracy:  0.8571428571428571

Epoch  676
Loss:  0.09069034847754112
Accuracy:  0.8571428571428571

Epoch  677
Loss:  0.09063120297016525
Accuracy:  0.8571428571428571

Epoch  678
Loss:  0.09056802739980406
Accuracy:  0.8571428571428571

Epoch  679
Loss:  0.09050086854410866
Accuracy:  0.8571428571428571

Epoch  680
Loss:  0.09044010642822055
Accuracy:  0.8571428571428571

Epoch  681
Loss:  0.09037692498978596
Accuracy:  0.8571428571428571

Epoch  682
Loss:  0.09031684207612076
Accuracy:  0.8571428571428571

Epoch  683
Loss:  0.09025086361447003
Accuracy:  0.8571428571428571

Epoch  684
Loss:  0.09019346029253374
Accuracy:  0.8571428571428571

Epoch  685
Loss:  0.09012999408811959
Accuracy:  0.8571428571428571

Epoch  686
Loss:  0.0900644594000468
Accuracy:  0.8571428571428571

Epoch  687
Loss:  0.0900082335626706

Epoch  838
Loss:  0.08277409157698841
Accuracy:  0.8571428571428571

Epoch  839
Loss:  0.0827414256937745
Accuracy:  0.8571428571428571

Epoch  840
Loss:  0.08270515315515121
Accuracy:  0.8571428571428571

Epoch  841
Loss:  0.08266980824835693
Accuracy:  0.8571428571428571

Epoch  842
Loss:  0.0826374578960425
Accuracy:  0.8571428571428571

Epoch  843
Loss:  0.08260171197390957
Accuracy:  0.8571428571428571

Epoch  844
Loss:  0.08256686026414742
Accuracy:  0.8571428571428571

Epoch  845
Loss:  0.08253476657704108
Accuracy:  0.8571428571428571

Epoch  846
Loss:  0.08249957711853882
Accuracy:  0.8571428571428571

Epoch  847
Loss:  0.08246523043894356
Accuracy:  0.8571428571428571

Epoch  848
Loss:  0.08243333776678352
Accuracy:  0.8571428571428571

Epoch  849
Loss:  0.08239873236685034
Accuracy:  0.8571428571428571

Epoch  850
Loss:  0.08236490170190544
Accuracy:  0.8571428571428571

Epoch  851
Loss:  0.08233315743438219
Accuracy:  0.8571428571428571

Epoch  852
Loss:  0.0822991615692721

Model Evaluation

In [8]:
print(model.predict(X_train), model.predict(np.array([[1,1,0]])))
model.evaluate(X_train, y_train)

[0, 1, 1, 1, 1, 1, 0] [1]
Loss:  0.07867639374088777
Accuracy:  0.8571428571428571


Stochastic Gradient Desccent Optimization 

In [11]:
model.train(X_train, y_train, epochs=1000, batch_size=1)

Epoch  0
Loss:  0.1431782081987901
Accuracy:  0.42857142857142855

Epoch  1
Loss:  0.14296416506383755
Accuracy:  0.42857142857142855

Epoch  2
Loss:  0.14279114629935324
Accuracy:  0.42857142857142855

Epoch  3
Loss:  0.14265471593671386
Accuracy:  0.42857142857142855

Epoch  4
Loss:  0.1430113580078935
Accuracy:  0.42857142857142855

Epoch  5
Loss:  0.1434791176576769
Accuracy:  0.42857142857142855

Epoch  6
Loss:  0.14325839476984606
Accuracy:  0.42857142857142855

Epoch  7
Loss:  0.14288193733255802
Accuracy:  0.42857142857142855

Epoch  8
Loss:  0.14252802224182673
Accuracy:  0.42857142857142855

Epoch  9
Loss:  0.14200002188871758
Accuracy:  0.42857142857142855

Epoch  10
Loss:  0.14138702521977367
Accuracy:  0.42857142857142855

Epoch  11
Loss:  0.14079410844775503
Accuracy:  0.42857142857142855

Epoch  12
Loss:  0.1402573136880621
Accuracy:  0.42857142857142855

Epoch  13
Loss:  0.14000564545326263
Accuracy:  0.42857142857142855

Epoch  14
Loss:  0.13977466063767818
Accuracy:  

Epoch  207
Loss:  0.06784595188556798
Accuracy:  0.8571428571428571

Epoch  208
Loss:  0.06751620178242107
Accuracy:  0.8571428571428571

Epoch  209
Loss:  0.06713710125549285
Accuracy:  0.8571428571428571

Epoch  210
Loss:  0.06683625638656071
Accuracy:  0.8571428571428571

Epoch  211
Loss:  0.06650748452862307
Accuracy:  1.0

Epoch  212
Loss:  0.06608359378470531
Accuracy:  1.0

Epoch  213
Loss:  0.0658399144088809
Accuracy:  1.0

Epoch  214
Loss:  0.06541781534958097
Accuracy:  1.0

Epoch  215
Loss:  0.06512415814775789
Accuracy:  1.0

Epoch  216
Loss:  0.06476958130235683
Accuracy:  1.0

Epoch  217
Loss:  0.06435643449255049
Accuracy:  1.0

Epoch  218
Loss:  0.06407480461353375
Accuracy:  1.0

Epoch  219
Loss:  0.06376021599680515
Accuracy:  1.0

Epoch  220
Loss:  0.06339450302536648
Accuracy:  1.0

Epoch  221
Loss:  0.06314593689025012
Accuracy:  1.0

Epoch  222
Loss:  0.06276604220408658
Accuracy:  1.0

Epoch  223
Loss:  0.062451871711408986
Accuracy:  1.0

Epoch  224
Loss:  0.06

Epoch  413
Loss:  0.023412357617213168
Accuracy:  1.0

Epoch  414
Loss:  0.023271626132772227
Accuracy:  1.0

Epoch  415
Loss:  0.023142943802675297
Accuracy:  1.0

Epoch  416
Loss:  0.023048224951358387
Accuracy:  1.0

Epoch  417
Loss:  0.022943680635677446
Accuracy:  1.0

Epoch  418
Loss:  0.02279747573877802
Accuracy:  1.0

Epoch  419
Loss:  0.022701692941252138
Accuracy:  1.0

Epoch  420
Loss:  0.02255459008777205
Accuracy:  1.0

Epoch  421
Loss:  0.0224709152098932
Accuracy:  1.0

Epoch  422
Loss:  0.022352122316855103
Accuracy:  1.0

Epoch  423
Loss:  0.02223713895847299
Accuracy:  1.0

Epoch  424
Loss:  0.02213890746832162
Accuracy:  1.0

Epoch  425
Loss:  0.022037426277096633
Accuracy:  1.0

Epoch  426
Loss:  0.02189704922637258
Accuracy:  1.0

Epoch  427
Loss:  0.02180742158099399
Accuracy:  1.0

Epoch  428
Loss:  0.021726273513088902
Accuracy:  1.0

Epoch  429
Loss:  0.02158874336426866
Accuracy:  1.0

Epoch  430
Loss:  0.02150071403814096
Accuracy:  1.0

Epoch  431
Loss:  0.

Epoch  603
Loss:  0.010407510999377548
Accuracy:  1.0

Epoch  604
Loss:  0.010379592806669388
Accuracy:  1.0

Epoch  605
Loss:  0.0103380425942446
Accuracy:  1.0

Epoch  606
Loss:  0.010295565345773916
Accuracy:  1.0

Epoch  607
Loss:  0.010271575043692237
Accuracy:  1.0

Epoch  608
Loss:  0.010238397027054476
Accuracy:  1.0

Epoch  609
Loss:  0.010194288386738056
Accuracy:  1.0

Epoch  610
Loss:  0.01015250372856436
Accuracy:  1.0

Epoch  611
Loss:  0.01012021465088701
Accuracy:  1.0

Epoch  612
Loss:  0.010090602600728376
Accuracy:  1.0

Epoch  613
Loss:  0.010066541195116402
Accuracy:  1.0

Epoch  614
Loss:  0.01003339945911277
Accuracy:  1.0

Epoch  615
Loss:  0.009996578680521099
Accuracy:  1.0

Epoch  616
Loss:  0.009953488994916132
Accuracy:  1.0

Epoch  617
Loss:  0.0099275003958517
Accuracy:  1.0

Epoch  618
Loss:  0.009894037411106258
Accuracy:  1.0

Epoch  619
Loss:  0.009846754801655968
Accuracy:  1.0

Epoch  620
Loss:  0.009822026637769621
Accuracy:  1.0

Epoch  621
Loss: 

Epoch  799
Loss:  0.005993569994462834
Accuracy:  1.0

Epoch  800
Loss:  0.005988840375828851
Accuracy:  1.0

Epoch  801
Loss:  0.0059679560342820945
Accuracy:  1.0

Epoch  802
Loss:  0.005954754228094451
Accuracy:  1.0

Epoch  803
Loss:  0.005939594927374626
Accuracy:  1.0

Epoch  804
Loss:  0.005927004314789263
Accuracy:  1.0

Epoch  805
Loss:  0.005912290936043139
Accuracy:  1.0

Epoch  806
Loss:  0.005901340702224546
Accuracy:  1.0

Epoch  807
Loss:  0.00588755570207275
Accuracy:  1.0

Epoch  808
Loss:  0.005868679526462595
Accuracy:  1.0

Epoch  809
Loss:  0.005863057480460916
Accuracy:  1.0

Epoch  810
Loss:  0.005847902148665393
Accuracy:  1.0

Epoch  811
Loss:  0.005837803576441326
Accuracy:  1.0

Epoch  812
Loss:  0.0058226670343735415
Accuracy:  1.0

Epoch  813
Loss:  0.005806684290570062
Accuracy:  1.0

Epoch  814
Loss:  0.0057978472215957765
Accuracy:  1.0

Epoch  815
Loss:  0.005783634787550472
Accuracy:  1.0

Epoch  816
Loss:  0.0057712623081795445
Accuracy:  1.0

Epoch  

Model Evaluation

In [12]:
print(model.predict(X_train), model.predict(np.array([[1,1,0]])))
model.evaluate(X_train, y_train)

[0, 1, 1, 1, 1, 0, 0] [0]
Loss:  0.004067040241527103
Accuracy:  1.0
