<a href="https://colab.research.google.com/github/AditMeh/deep-learning/blob/main/Vectorized_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
import numpy as np
def compute_dw(delta, a):
        batch_size, num_prev_neurons = a.shape
        _, num_curr_neurons = delta.shape

        tile_delta = np.tile(delta, (1, num_prev_neurons)).reshape((batch_size, num_prev_neurons, num_curr_neurons))
        tile_delta = np.transpose(tile_delta, (0, 2, 1))


        tile_activations = np.tile(a, (1, num_curr_neurons)).reshape(batch_size, num_curr_neurons, num_prev_neurons)


        dW =  tile_delta * tile_activations 
        dW = np.transpose(dW, (0, 2, 1))
        return dW

In [109]:

class NeuralNet:
    def __init__(self, layer_list):
        self.layers = len(layer_list)
        self.weights = []
        self.biases = []


        #randomly initialize weights and biases (scale each random value by 0.01 to prevent vanishing gradient due the saturation of sigmoid)
        for layer_index in range(1, len(layer_list)):
            self.weights.append(np.random.rand(layer_list[layer_index - 1], layer_list[layer_index])*0.01)
            self.biases.append(np.random.rand(layer_list[layer_index],)*0.01)
        
        print("Weights")
        print([element.shape for element in self.weights])

        print("\n" + "Biases")
        print([element.shape for element in self.biases])
    
    def sigmoid(self, x):
        return 1/(1 + (np.e)**(-x))
    def sigmoid_prime(self, x):
        return self.sigmoid(x)*(1 - self.sigmoid(x))
    def compute_cost(self, real, predicted, batch_size):
        return (1/batch_size) * (np.sum(1/2*(predicted - real)**2))
    def cost_derivative(self, real, predicted):
        return (predicted - real)
    
    def feedforward(self, x, output):
       self.x = x
       self.output = output

       #Activation of the input layer is equivalent to the inputs that are passed in
       current_a = self.x

       self.z = []
       self.a = [self.x]
       for w_i, b_i in zip(self.weights, self.biases):
           z_i = np.dot(current_a, w_i) + b_i

           self.z.append(z_i)
           current_a = self.sigmoid(z_i)
           self.a.append(current_a)

       return self.compute_cost(output, current_a, self.x.shape[0])
    def backward(self):
        #print([thing.shape for thing in self.a])
        w_i_grad = [np.zeros(element.shape) for element in self.weights]
        b_i_grad = [np.zeros(element.shape) for element in self.biases]

        # compute final layer error
        delta  = self.cost_derivative(self.output, (self.a)[-1]) * self.sigmoid_prime((self.z)[-1])

        w_i_grad[-1] = compute_dw(delta, self.a[-2])
        b_i_grad[-1] = delta
        # compute  
        for i in range(2, self.layers):
            z_current = self.z[-i]

            # delta non-final layer error

            delta = np.dot(delta, (self.weights[-i + 1]).T) * self.sigmoid_prime(z_current)
            #print(delta.shape)

            # Computing dC/Dw

            dW = compute_dw(delta, self.a[-i - 1])

            w_i_grad[-i] = dW
            b_i_grad[-i] = delta


        return w_i_grad, b_i_grad
    
    def update_weights_and_biases(self, w_grad, b_grad, learning_rate):        

        batch_size = w_grad[0].shape[0]
        #apply gradients to current weights
        for i in range(len(self.weights)): 
            self.weights[i] -= learning_rate*(np.sum(w_grad[i], axis = 0)/batch_size)
            self.biases[i] -= learning_rate*(np.sum(b_grad[i], axis = 0)/batch_size)

In [43]:
input = np.ones((10, 50))


output = np.ones((10, 1))

nn = NeuralNet([50, 20, 30, 2])

nn.feedforward(input, output)
nn.backward()

Weights
[(50, 20), (20, 30), (30, 2)]

Biases
[(20,), (30,), (2,)]


([(10, 50, 20), (10, 20, 30), (10, 30, 2)], [(10, 20), (10, 30), (10, 2)])

In [111]:
from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2]) / 255

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])

from sklearn.preprocessing import OneHotEncoder

def prepare_mini_batches(minibatch_size: int):
    """
    Takes in a minibatch size parameter and generates random indexes from the
    training set (X_train). Returns a list of indexes of length minibatch_size.
    """
    random_indexes = np.random.choice(X_train.shape[0], minibatch_size, replace=False)
    return random_indexes




# Creating one hot encoder
encoder = OneHotEncoder()
encoder.fit(Y_train.reshape(-1, 1))


nn = NeuralNet([784, 362, 100, 10])

#training loop
epochs = 50
mini_size = 32
lr = 0.01

for i in range(epochs):

    print("EPOCH: " + str(i + 1))

    #Generates a randomly chosen set of training indexes per epoch
    random_indexes = prepare_mini_batches(mini_size)
    cost_iter = nn.feedforward(X_train[random_indexes], encoder.transform(Y_train[random_indexes].reshape(-1, 1)).toarray())
    print(cost_iter)
    grads_w, grads_b = nn.backward()

    nn.update_weights_and_biases(grads_w, grads_b, learning_rate=lr)





Weights
[(784, 362), (362, 100), (100, 10)]

Biases
[(362,), (100,), (10,)]
EPOCH: 1
1.6731024220610091
EPOCH: 2
1.5950638754436333
EPOCH: 3
1.5170748465424986
EPOCH: 4
1.4417938946739213
EPOCH: 5
1.3720806144030686
EPOCH: 6
1.3083736597711497
EPOCH: 7
1.2451773951446292
EPOCH: 8
1.1891888293997566
EPOCH: 9
1.1390783754304312
EPOCH: 10
1.0873418184531585
EPOCH: 11
1.043464803219305
EPOCH: 12
1.0015507013468286
EPOCH: 13
0.9643101019944917
EPOCH: 14
0.9299562413669962
EPOCH: 15
0.8964673678843496
EPOCH: 16
0.8678009361226471
EPOCH: 17
0.8436526652873632
EPOCH: 18
0.8160708517717126
EPOCH: 19
0.7917850022967567
EPOCH: 20
0.7741077400026235
EPOCH: 21
0.7525998391819165
EPOCH: 22
0.7351437315596149
EPOCH: 23
0.7208435933893589
EPOCH: 24
0.7028542948384227
EPOCH: 25
0.6913736934932853
EPOCH: 26
0.6758103746392391
EPOCH: 27
0.6666772715910146
EPOCH: 28
0.6528511139830022
EPOCH: 29
0.6417076440128607
EPOCH: 30
0.6332880327484706
EPOCH: 31
0.622358490353623
EPOCH: 32
0.6159571917075448
EPOCH: 

In [34]:
## Axis are reversed, review what this operation actually needs in ortder


delta = np.array([[1, 2, 3, 4],
                 [5, 6, 7, 8]])

a = np.array([[5, 5],
             [6, 6]])

batch_size, num_prev_neurons = a.shape
_, num_curr_neurons = delta.shape

delta = np.tile(delta, (1, num_prev_neurons)).reshape((batch_size, num_prev_neurons, num_curr_neurons))


delta = np.transpose(delta, (0, 2, 1))

#print(delta)


a = np.tile(a, (1, num_curr_neurons)).reshape(batch_size, num_curr_neurons, num_prev_neurons)

#print(a)


k = (delta * a)

print(k)
print(np.transpose(k, (0, 2, 1)).shape)


[[[ 5  5]
  [10 10]
  [15 15]
  [20 20]]

 [[30 30]
  [36 36]
  [42 42]
  [48 48]]]
(2, 2, 4)


In [46]:
a = np.array([[1,2,3,4,5],
              [1,2,3,4,5]])

p = np.array([1,2,3,4,4])
print(p.shape)
print(a - p)

(5,)
[[0 0 0 0 1]
 [0 0 0 0 1]]


In [91]:
a = np.array([
              [[1,2,3,4,3],
              [1,2,3,4,5]],
              [[1,2,3,4,5],
               [0,0,1,1,1]]
              ])



print(np.sum(a, axis= 1))




[[2 4 6 8 8]
 [1 2 4 5 6]]
