<a href="https://colab.research.google.com/github/AditMeh/Automatic-differentiation-notes/blob/main/simple_graphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



#### Imports


In [None]:
import numpy as np
import math

**Sources:**
- https://stats.stackexchange.com/questions/224140/step-by-step-example-of-reverse-mode-automatic-differentiation

- https://www.youtube.com/watch?v=EEbnprb_YTU&ab_channel=NathanSprague

- https://www.youtube.com/watch?v=twTIGuVhKbQ&ab_channel=JorisGillis

In [9]:
import numpy as np

class NeuralNet:
    def __init__(self, layer_list):
        self.layers = len(layer_list)
        self.weights = []
        self.biases = []

        for layer_index in range(1, len(layer_list)):
            self.weights.append(np.random.rand(layer_list[layer_index], layer_list[layer_index - 1])*0.01)
            self.biases.append(np.random.rand(layer_list[layer_index], 1)*0.01)
        
        print("Weights")
        print([element.shape for element in self.weights])

        print("\n" + "Biases")
        print([element.shape for element in self.biases])
    
    def sigmoid(self, x):
        return 1/(1 + (np.e)**(-x))
    def sigmoid_prime(self, x):
        return self.sigmoid(x)*(1 - self.sigmoid(x))
    def compute_cost(self, real, predicted):
        return np.sum(1/2*(predicted - real)**2)
    def cost_derivative(self, real, predicted):
        return (predicted - real)
    
    def feedforward(self, x, output):
       self.x = x
       current_a = self.x
       self.z = []
       self.a = [self.x]
       for w_i, b_i in zip(self.weights, self.biases):
           z_i = np.dot(w_i, current_a) + b_i
           self.z.append(z_i)
           current_a = self.sigmoid(z_i)
           self.a.append(current_a)

       return self.compute_cost(output, current_a)
        
    def compute_gradients_backprop(self, output):
        w_i_grad = [np.zeros(element.shape) for element in self.weights]
        b_i_grad = [np.zeros(element.shape) for element in self.biases]


        #compute final layer error
        delta  = self.cost_derivative(output, (self.a)[-1]) * self.sigmoid_prime((self.z)[-1])
        
        
        dCdW_final_layer = np.dot(delta, self.a[-2].T)
        
        w_i_grad[-1] = dCdW_final_layer
        b_i_grad[-1] = delta

        for i in range(2, self.layers):
            z_current = self.z[-i]


            delta = np.dot(self.weights[-i + 1].T, delta) * self.sigmoid_prime(z_current)

            dW = np.dot(delta, self.a[-i - 1].T)

            w_i_grad[-i] = dW
            b_i_grad[-i] = delta
        return w_i_grad, b_i_grad

    def update_weights_and_biases(self, w_grad, b_grad, learning_rate):        

        #apply gradients
        for i in range(len(self.weights)): 
            self.weights[i] -= learning_rate*w_grad[i]
            self.biases[i] -= learning_rate*b_grad[i]



In [24]:
#Testing on MNIST using SGD

from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2]) / 255

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])

def prepare_mini_batches(minibatch_size):
    random_indexes = []
    indexes = np.random.choice(X_train.shape[0], minibatch_size, replace=False)
    random_indexes.append(indexes) 
    return random_indexes

def one_hot_encode(length, index):
    output = [0 for i in range(length)]
    output[index] = 1
    return np.asarray(output).reshape(length, 1)


def compute_test_loss(test_set, net):
    total_loss = []
    for i in range(len(X_test)):
        loss = net.feedforward(X_test[i].reshape(X_test[i].shape[0], 1), one_hot_encode(10 ,Y_test[i]))
        total_loss.append(loss)
    return sum(total_loss)/len(total_loss)

#instantiate neural net
nn = NeuralNet([784, 392, 196, 10])


#training loop
epochs = 300
mini_size = 32
for i in range(epochs):
    random_indexes = prepare_mini_batches(mini_size)

    for minibatch in random_indexes:
        grads_w = [np.zeros(element.shape) for element in nn.weights]
        grads_b = [np.zeros(element.shape) for element in nn.biases]
        cost = []
        for j in minibatch:
            cost_iter = nn.feedforward(X_train[j].reshape(X_train[j].shape[0], 1), one_hot_encode(10 ,Y_train[j]))
            cost.append(cost_iter)
            grad_w, grad_b = nn.compute_gradients_backprop(one_hot_encode(10, Y_train[j]))

            for i in range(len(grad_w)):
                grads_w[i] += grad_w[i]
                grads_b[i] += grad_b[i]
        grads_w = [grads_w[i]/mini_size for i in range(len(grad_w))]
        grads_b = [grads_b[i]/mini_size for i in range(len(grad_b))]

        #update weights
        print("Cost for batch  = " + str(sum(cost)/len(cost)))
        print("updating weights")
        nn.update_weights_and_biases(grads_w, grads_b, 0.01)
        print("test loss = " + str(compute_test_loss(X_test, nn)))







Weights
[(392, 784), (196, 392), (10, 196)]

Biases
[(392, 1), (196, 1), (10, 1)]
Cost for batch  = 2.1398411560725386
updating weights
test loss = 2.058023376824719
Cost for batch  = 1.9533218804125139
updating weights
test loss = 1.8496265713740996
Cost for batch  = 1.7755249384113183
updating weights
test loss = 1.6518069431240399
Cost for batch  = 1.6041963240614319
updating weights
test loss = 1.4706860062097227
Cost for batch  = 1.442206608590869
updating weights
test loss = 1.3121096299662305
Cost for batch  = 1.3061595424652779
updating weights
test loss = 1.1750746014358078
Cost for batch  = 1.1836407393363197
updating weights
test loss = 1.059658966851878
Cost for batch  = 1.0794004723895567
updating weights
test loss = 0.9629488865477049
Cost for batch  = 0.9952508868351942
updating weights
test loss = 0.8832232439647836
Cost for batch  = 0.9193169965605776
updating weights
test loss = 0.8174836380757027
Cost for batch  = 0.854990241267479
updating weights
test loss = 0.7633

KeyboardInterrupt: ignored