# Assignment 02 Part 2: Neural Net Template

This file contains the template code for the Neural Net with hidden layers.

### Artificial Neural Net Class

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd drive/MyDrive/PhD/

/content/drive/MyDrive/PhD


In [None]:
import pandas as pd
import numpy as np

In [76]:
def load_data():
  train_data = np.array(pd.read_csv('mnist_test.csv'))
  #train_data = np.array(train_data)
  train_labels = train_data[:, 0]
  train_data = train_data[:, 1:]
  m, n = train_data.shape
  train_x = train_data.reshape(m, n) # m number of samples, n number of features
  train_y = train_labels.reshape(m, 1)
  
  test_data = np.array(pd.read_csv('mnist_train_10.csv'))
  test_labels = test_data[:, 0]
  test_data = test_data[:, 1:]
  m1, n1 = test_data.shape
  test_y = test_labels.reshape(m1, 1)
  test_x = test_data.reshape(m1, n1)

  sum = np.sum(train_x, axis=1, keepdims=True)
  train_x = train_x / sum
  sum = np.sum(test_x, axis=1, keepdims=True)
  test_x = test_x/sum
 # print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
  return train_x, train_y, test_x, test_y
  

In [75]:
def initialize_parameters(layer_dims):
    
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1])*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

        
    return parameters

In [45]:
def L_model_forward(X, parameters):

    caches = []
    A = X
    L = len(parameters) // 2                  # number of layers in the neural network
    #print('number of layers: ', L)
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
        caches.append(cache)
    
    # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (X.shape[0], parameters['b' + str(L)].shape[0]))
            
    return AL, caches

In [46]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    
    assert (A.shape == (A_prev.shape[0], W.shape[0]))
    cache = (linear_cache, activation_cache)

    return A, cache

In [50]:
def linear_forward(A, W, b):
    Z = np.dot(A, W.T) + b.T       # W.dot(A) + b
    
    assert(Z.shape == ( A.shape[0], W.shape[0]))
    cache = (A, W, b)
    #print(A.shape, W.shape, b.shape, Z.shape)
    
    return Z, cache

In [51]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

def relu(Z):
    A = np.maximum(0,Z)
    assert(A.shape == Z.shape)
    cache = Z 
    return A, cache


In [52]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -np.sum(np.multiply(Y, np.log10(AL)) + np.multiply((1-Y), np.log(1-AL)))/m
    #cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
   # cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
  #  assert(cost.shape == ())
    
    return cost

In [53]:
def gradient(Z, activation):
    if activation=='sigmoid':
        A, _ = sigmoid(Z)
        return A * (1 - A)
    elif activation == 'relu':
        A, _ = relu(Z)
        return np.where(A>0, 1, 0)


In [78]:
import numpy as np
import random
from sklearn import metrics
class ANN:

    #==========================================#
    # The init method is called when an object #
    # is created. It can be used to initialize #
    # the attributes of the class.             #
    #==========================================#
    def __init__(self, no_inputs, no_hidden_layers=1, hidden_layer_size=28, max_iterations=20, learning_rate=0.1, no_outputs=10):

        self.no_inputs = no_inputs
        self.no_hidden_layers = no_hidden_layers
        self.hidden_layer_size = hidden_layer_size
        self.no_outputs = no_outputs
        # TODO initialise weights

        self.layer_dims = [self.no_inputs, 28, self.no_outputs]
        self.parameters = initialize_parameters(self.layer_dims)

        for i in range(1, len(self.layer_dims)):
          print(self.parameters['W'+str(i)].shape)
          print(self.parameters['b'+str(i)].shape)


        self.max_iterations = max_iterations
        self.learning_rate = learning_rate

    #===================================#
    # Performs the activation function. #
    # Expects an array of values of     #
    # shape (1,N) where N is the number #
    # of nodes in the layer.            #
    #===================================#
   
    #===============================#
    # Trains the net using labelled #
    # training data.                #
    #===============================#
    def train(self,  training_data, labels):
      #assert len(training_data) == len(labels)
        #return
      m = training_data.shape[0]
      #print('before', self.parameters['W'+str(1)])
      for i in range(0, self.max_iterations):
          
          # Forward propagation
          self.AL, self.caches = L_model_forward(training_data, self.parameters)      # caches=[(A, W, b), Z] is list of each layer parameters activatio,
                                                                                      # weight, bias, and input of the layer Z; 
                                                                                      # and AL is the output logits
          L = len(self.parameters) //2  
          delta_L = self.AL - labels
          errors = []
          errors.append(delta_L)
          for l in range(L, 1, -1):
              delta = np.dot(delta_L, self.parameters['W'+str(l)]) * gradient(self.caches[l-2][1], 'relu')
              errors.append(delta)
              delta_L = delta
          c = 0
          for ll in range(L, 0, -1):
              grad_w = np.dot(errors[c].T, self.caches[ll-1][0][0])
              grad_b = np.sum(errors[c], axis=0, keepdims=True).T     #.reshape(errors[c].shape[1], 1)
              self.parameters['W'+str(ll)] = self.parameters['W'+str(ll)] - self.learning_rate*grad_w
              self.parameters['b'+str(ll)] = self.parameters['b'+str(ll)] - self.learning_rate*grad_b
              c = c+1
          
      #print('after', self.parameters['W'+str(1)])
                            
          # Print the cost every 100 training example
     #     if print_cost and i % 100 == 0:
     #         print ("Cost after iteration %i: %f" %(i, cost))


    #=========================================#
    # Tests the prediction on each element of #
    # the testing data. Prints the precision, #
    # recall, and accuracy.                   #
    #=========================================#
    def test(self, testing_data, labels):
        assert len(testing_data) == len(labels)
        self.logits, _ = L_model_forward(testing_data, self.parameters)      # caches=[(A, W, b), Z] is list of each layer parameters activatio,
                                                                                      # weight, bias, and input of the layer Z; 
                                                                                      # and AL is the output logits
        #predictions = np.amax(self.logits, axis=1).reshape(labels.shape[0], 1)
        predictions = self.logits.argmax(axis=1).reshape(labels.shape[0], 1)
        true_positives = np.sum(np.where(predictions==self.logits, 1, 0))
        print(predictions[1:6, :], labels[1:6, :])
        #accuracy = true_positives/labels.shape[0]
        
        print(metrics.classification_report(labels, predictions, labels=[0,1,2,3,4,5,6,7,8,9], digits=4))
        

### Main method

The following cell(s) should complete parts 2.1 to 2.5.

In [80]:
if __name__ == '__main__':
  # TODO load training data
  # TODO load testing data
  train_x, train_y, test_x, test_y = load_data()
  m, n_x = train_x.shape
  no_outputs=10   # number of classes
  train_labels = np.zeros((m, no_outputs))
  for i in range(m):
    train_labels[i, train_y[i]] = 1
  #print(train_labels[1, :], train_y[1, :])

  # e.g. net = ANN(0)
  no_inputs=n_x; no_hidden_layers=1; hidden_layer_size=28; max_iterations=20;  learning_rate=0.1; 
  model = ANN(no_inputs, no_hidden_layers, hidden_layer_size, max_iterations, learning_rate, no_outputs)
  model.train(train_x, train_labels)
  model.test(test_x, test_y)
  # TODO call train
  # TODO call test

(28, 784)
(28, 1)
(10, 28)
(10, 1)


  


[[1]
 [1]
 [1]
 [1]
 [1]] [[4]
 [1]
 [9]
 [2]
 [1]]
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         1
           1     0.3333    1.0000    0.5000         3
           2     0.0000    0.0000    0.0000         1
           3     0.0000    0.0000    0.0000         1
           4     0.0000    0.0000    0.0000         2
           5     0.0000    0.0000    0.0000         0
           6     0.0000    0.0000    0.0000         0
           7     0.0000    0.0000    0.0000         0
           8     0.0000    0.0000    0.0000         0
           9     0.0000    0.0000    0.0000         1

   micro avg     0.3333    0.3333    0.3333         9
   macro avg     0.0333    0.1000    0.0500         9
weighted avg     0.1111    0.3333    0.1667         9



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [49]:
prob = np.array([[0, 0.002, 3], [0, 0.002, 3]])
prob  = np.where(prob > 1.0e-10, prob, 1.0e-10)
np.log(1.0e-10)

-23.025850929940457

In [None]:
pp = (2,4,5)
aa = 3
pp1 = (pp, aa)
pp1[0][0]

2

In [None]:
for i in range(5, 1, -1):
  print(i)
aa = np.array([2,4,5])
np.where(aa<3, 0, 333)

5
4
3
2


array([  0, 333, 333])

In [None]:
prob =np.array([0.002, 0.3]) # np.random.randint(5, size=4) /4
print(prob)

result = np.where(prob > 0.0000000001, prob, -10)
# print(result)
np.log(result, out=result, where=result > 0)
print(result)

[0.002 0.3  ]
[-6.2146081 -1.2039728]
