**CISC 452 Assignment 2**

**Chris Gray**

**10185372**

**14cmg5**

In [0]:
!pip install tensorflow-gpu==2.0.0rc numpy --no-cache-dir

In [0]:

from tensorflow.keras.datasets import mnist
import numpy as np
import pandas as pd
import time
import random
import os
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from math import exp

class Model():
  
  def __init__(self, Layers, LearningRate = 5.0, Epochs = 100, BatchSize = 32, MomentiumFactor = 0.75, MSE_Threshold = 0.250):
    self.NumLayers = len(Layers)
    self.Layers = Layers
    self.Weights =self.__Gen_Weights() 
    self.Biases = self.__Gen_Biases() 
    self.LearningRate = LearningRate
    self.Epochs = Epochs
    self.BatchSize = BatchSize
    self.MomentiumFactor = MomentiumFactor
    self.MSE_Threshold = MSE_Threshold
    self.__MSE = 999 # set starting MSE to a large number at first as it wont get updated until epoch 10
    self.__mWeights = self.__Gen_Empty_Weights() # stores the last deltaW to be used in momentium 
    self.__mBiases = self.__Gen_Empty_Biases() # stores the last deltaB to be used in momentium 
  
  # Takes in outputs and returns the outputs of the output nodes
  def __call__(self,inputs):
    activation = inputs
    for weight, bias in zip(self.Weights, self.Biases):
      activation = np.dot(weight,activation) + bias # y = wx+b
      activation = sigmoid(activation) # apply the activation function
    return activation
  
  # Public Functions
  
  # trains our network on training data in batches
  def Train(self,TrainingData):
    random.shuffle(TrainingData) # get an even distribution of data points
    batches = self.__Gen_Batches(TrainingData) # create batches on the shuffled data
    for epoch in range(1,(self.Epochs+1)): 
      for batch in batches:
        self.__Batch_Train(batch) # train on our batches 
      if epoch % 10 == 0:
        # Decrase learning rate and momentium factor as time goes on 
        self.LearningRate *=0.9
        self.MomentiumFactor *=0.9
        self.__Print_Epoch_Metrics(epoch,TrainingData) # print out metrics
        
      # check to see if this epoch we reach our MSE goal, if so stop training
      if self.__MSE <= self.MSE_Threshold:
        break

  # Evaluate the network on data and return relevant metrics
  def Evaluate_Data(self, data):
    correct = 0 
    mse = 0
    ds = []
    ys = []
    for x , d in data:
      d = np.argmax(d)
      ds.append(d)
      
      y = np.argmax(self(x))
      ys.append(y)
      
      mse += Mean_Squared_Error(d,y)
      if d == y:
        correct += 1
        
    acc = correct / len(data) * 100
    mse = mse / len(data) # divide total MSE by number of tests
    return correct , acc , mse , ds , ys
      
  # Private Functions
  
  # Derivative of MSE:((d-y)^2)
  def __Error(self, desired, actual):
    return desired - actual
  
  # Train the network on a batch of inputs and labels
  def __Batch_Train(self, batch):
    delta_Ws = self.__Gen_Empty_Weights()
    delta_Bs = self.__Gen_Empty_Biases()
    
    for inputs, label in batch:
      dWs , dBs = self.__Back_Propagation(inputs,label)
      # sum the delta from each data point in the batch
      for i in range(len(delta_Ws)):
        delta_Ws[i] += dWs[i] 
      for i in range(len(delta_Bs)):
        delta_Bs[i] += dBs[i]
        
    for i in range(len(self.Weights)):
      deltaW = (self.LearningRate / len(batch)) * delta_Ws[i] # divide by batch size to get the mean of the deltas
      self.Weights[i] += deltaW + (self.MomentiumFactor * self.__mWeights[i]) # apply delta plus momentium
      self.__mWeights[i] = deltaW # save the delta as momentium for the next change
      
    for i in range(len(self.Biases)):
      deltaB = (self.LearningRate / len(batch)) * delta_Bs[i] # divide by batch size to get the mean of the deltas   
      self.Biases[i] += deltaB + (self.MomentiumFactor * self.__mBiases[i]) # apply delta plus momentium
      self.__mBiases[i] = deltaB # save the delta as momentium for the next change
  
  # Calcualte the changes of weights
  def __Back_Propagation(self, inputs,label):
    delta_Ws = self.__Gen_Empty_Weights()
    delta_Bs = self.__Gen_Empty_Biases()
    
    activations = [] # raw activations before function has been applied: a
    function_activations = [inputs] # activations after the output function has been applied: f(a)
    
    for weight,bias in zip(self.Weights, self.Biases):
      net = np.dot(weight,inputs) + bias #  y = mx+b
      activations.append(net) # save this activation before the simoid is applied
      
      fNet = sigmoid(net) # apply the sigmoid to get the output(y) of the layer
      function_activations.append(fNet) # save this value
      inputs = fNet
    
    outputLayer = -1 # Start by calculating the error on the ouput layer
    
    actual = function_activations[outputLayer] # last f(a) is our output
    error = label - actual # d - y
    
    fPrimeOutput = sigmoid_prime(activations[outputLayer]) # f'(a) of the output layer
    delta = error * fPrimeOutput
    
    delta_Bs[outputLayer] = delta
    # calculate the change in weights for each weight going to the output layer
    delta_Ws[outputLayer] = np.dot(delta,function_activations[outputLayer -1].transpose()) 
    
    for i in range(2,self.NumLayers):
      layer = -i # work backwords from the last hidden layer until the input layer
      a = activations[layer] # get the activation of the layer we are working on
      fPrimeOfLayer = sigmoid_prime(a) # get the f'(a) of the layer we are working on
      
      delta = np.dot(self.Weights[layer + 1].transpose(),delta) * fPrimeOfLayer # update the delta based on the sum of error from the previous layer 
      
      delta_Bs[layer] = delta
      # calculate the change in weights for each weight going to this layer
      delta_Ws[layer] = np.dot(delta,function_activations[layer-1].transpose()) 
      
    return delta_Ws, delta_Bs
  
  # Print out metrics for current network
  def __Print_Epoch_Metrics(self, epoch,TrainingData):
    print("Epoch #%d" %(epoch))
    c, a,mse,ds,ys= self.Evaluate_Data(TrainingData)
    self.__MSE = mse # update MSE
    print("Accuracy: %d/%d | %2.2f%% | MSE: %2.4f" %(c,len(TrainingData),a,mse))  
  
  # splits the data into batches of the model's batch size
  def __Gen_Batches(self, data):
    batches = []
    for i in range(0,len(data),self.BatchSize):
      batches.append(data[i:i+self.BatchSize])
    return batches
  
  # Generates a list of weight matrices based on the number of nodes in each layer
  def __Gen_Weights(self):
    weights = []
    for inputDimension,outputDimension in zip(self.Layers[:-1],self.Layers[1:]):
      weights.append(np.random.randn(outputDimension,inputDimension))
    return weights
  
   # Generates a list of bias vectors based on the number of nodes in each layer
  def __Gen_Biases(self):
    biases = []
    for nodes in self.Layers[1:]:
      biases.append(np.random.randn(nodes,1))
    return biases
  
  # Generates a list of the weight matrices filled with 0s
  def __Gen_Empty_Weights(self):
    weights = []
    for w in self.Weights:
      weights.append(np.zeros(w.shape))
    return weights
  
  # Generates a list of the bias vectors filled with 0s
  def __Gen_Empty_Biases(self):
    biases = []
    for b in self.Biases:
      biases.append(np.zeros(b.shape))
    return biases
  
  
# sigmoid activation function
def sigmoid(x):
  return 1.0/(1.0+np.exp(-x))

# derivative of the sigmoid function
def sigmoid_prime(x):
  return sigmoid(x)*(1-sigmoid(x))

# calculates the mean squared error of a list of outputs.
def Mean_Squared_Error(d,y):
  return np.mean((d - y)**2)
    
# Put all the data in the range of 0 and 1
# this ensures all the activations are small
# so that larged signed values dont have more sway on the results
def Normilize_Data(data, maxValue = 255):
  data = np.array(data,dtype=float)
  for i in range(len(data)):
      data[i] = (data[i] / maxValue)
  return data
  
# Flattens the data into a 1d array , in this case 784,1
def Flatten_Data(data):
  flattenedData = []
  for d in data:
    d = np.reshape(d,(d.size,1))
    flattenedData.append(d)
  return flattenedData

# Hot encodes the labels
# eg 0's become [1,0,0,0,0,0,0,0,0,0] etc.
def Hot_Encode(data):
  encodedData = []
  for d in data:
    encoded = np.zeros((10,1))
    encoded[d] = 1
    encodedData.append(encoded)
  return encodedData

def Get_Data():
  # Get the data
  (X_train,Y_train),(X_test,Y_test) = mnist.load_data()
  
  # Reformat the data
  X_train = Normilize_Data(Flatten_Data(X_train))
  X_test = Normilize_Data(Flatten_Data(X_test))
  
  # Hode encodes the output
  Y_train = Hot_Encode(Y_train)
  Y_test = Hot_Encode(Y_test)
  
  # Zip the data
  TrainingData = list(zip(X_train, Y_train))
  TestingData = list(zip(X_test, Y_test))
  return TrainingData, TestingData

# Prints the confusion matrix for the data
def Print_Confusion_Matrix(ds,ys):
  confusionMatrix = confusion_matrix(ds,ys)
  cm = pd.DataFrame(confusionMatrix)
  print(cm)

# Prints classifcation report for the data
# includes the precision and recall values among other 
def Print_Classification_Report(ds,ys):
  names = ["Zero","One","Two","Three","Four",
             "Five","Six","Seven","Eight","Nine"]
  print(classification_report(ds, ys,target_names = names)) 

  
def main():
  TrainingData,TestingData = Get_Data()
  # Check that the data is the correct chape
  print("Data Shapes")
  print("X shape:",TrainingData[0][0].shape)
  print("Y shape:",TrainingData[0][1].shape)
  
  
  # Create the model
  layers = [784,30,15,10]
  LearningRate = 5
  BatchSize = 32
  Epochs = 150
  MomentiumFactor = 0.8
  MSE_Threshold = 0.25  
  
  # Create the model
  model = Model(Layers=layers, LearningRate=LearningRate, BatchSize=BatchSize, Epochs=Epochs ,MomentiumFactor=MomentiumFactor, MSE_Threshold=MSE_Threshold)    
    
  print("-----------------------------------------------")
  
  
  print("Beginning to train the network...")
  start = time.time()
  # Train the model
  model.Train(TrainingData)
  end = time.time()
  print("Training Complete")
  print("Training time: %ds"%((end-start)))
  
  
  print("-----------------------------------------------")
  
  
  print("Evaluating Training Data")
  correct , acc , mse , ds , ys = model.Evaluate_Data(TrainingData)
  print("Accuracy: %d/%d | %2.2f%% | MSE: %2.4f" %(correct,len(TrainingData),acc,mse)) 
  Print_Confusion_Matrix(ds,ys)
  Print_Classification_Report(ds,ys)
  
  print("-----------------------------------------------")
  
  
  print("Evaluating Testing Data")
  correct , acc , mse , ds , ys = model.Evaluate_Data(TestingData)
  print("Accuracy: %d/%d | %2.2f%% | MSE: %2.4f" %(correct,len(TestingData),acc,mse))
  Print_Confusion_Matrix(ds,ys)
  Print_Classification_Report(ds,ys)
  

main()

Data Shapes
X shape: (784, 1)
Y shape: (10, 1)

Layer:  1
Weights:  (30, 784)
Bias:  (30, 1)
Layer:  2
Weights:  (15, 30)
Bias:  (15, 1)
Layer:  3
Weights:  (10, 15)
Bias:  (10, 1)
-----------------------------------------------
Beginning to train the network...
Epoch #10
Accuracy: 57248/60000 | 95.41% | MSE: 0.7843
Epoch #20
Accuracy: 57466/60000 | 95.78% | MSE: 0.7987
Epoch #30
Accuracy: 58065/60000 | 96.78% | MSE: 0.5694
Epoch #40
Accuracy: 58340/60000 | 97.23% | MSE: 0.4985
Epoch #50
Accuracy: 58825/60000 | 98.04% | MSE: 0.3424
Epoch #60
Accuracy: 58925/60000 | 98.21% | MSE: 0.3237
Epoch #70
Accuracy: 58990/60000 | 98.32% | MSE: 0.3056
Epoch #80
Accuracy: 59070/60000 | 98.45% | MSE: 0.2846
Epoch #90
Accuracy: 59108/60000 | 98.51% | MSE: 0.2776
Epoch #100
Accuracy: 59142/60000 | 98.57% | MSE: 0.2685
Epoch #110
Accuracy: 59165/60000 | 98.61% | MSE: 0.2586
Epoch #120
Accuracy: 59176/60000 | 98.63% | MSE: 0.2544
Epoch #130
Accuracy: 59185/60000 | 98.64% | MSE: 0.2507
Epoch #140
Accurac