<a href="https://colab.research.google.com/github/CadeHarger/portfolio/blob/main/Personal_Projects/Neural_Network_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Import Relevant Modules
import random
import math
import time
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
#from tensorflow.keras import layers

In [None]:
#@title Load the MNIST Dataset
np.set_printoptions(linewidth = 200)
(fdataSet, labelSet),(testDataSet, testLabelSet) = tf.keras.datasets.mnist.load_data()
dataSet = np.zeros((len(fdataSet), 784), dtype = np.float32)
for x in range(len(dataSet)):
  dataSet[x] = fdataSet[x].flatten()
dataSet /= 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
class Node:
  # Represents a node in the network
  def __init__(self, count):
    self.bias = 0
    self.activation = []
    self.z = []
    self.weights = np.zeros((count), dtype=np.float32)
    self.wGradients = []
    self.bGradients = []
  def enableNP(self, batchSize):
    self.activation = np.zeros((batchSize), dtype = np.float32)
    self.z = np.zeros((batchSize), dtype = np.float32)
  def setZ(self, zeta, example):
    self.z[example] = zeta
  def setActivation(self, a, example):
    self.activation[example] = a
  def setWeights(self, w):
    self.weights = w
  def addWGradient(self, gradient):
    self.wGradients.append(gradient)
  def addBGradient(self, gradient):
    self.bGradients.append(gradient)
  def update(self, lr):
    self.weights = np.subtract(self.weights, np.sum(self.wGradients, 0) * lr / len(self.wGradients))
    self.bias -= np.sum(self.bGradients) * lr / len(self.bGradients)
    wGradients = []
    bGradients = []
class InputNode:
  # Represents an input node in the input layer
  def __init__(self):
    self.activation = 0
  def enableNP(self, batchSize):
    self.activation = np.zeros((batchSize), dtype = np.float32)
  def setInput(self, n, example):
    self.activation[example] = n
  def setWeights(self, w):
    pass
  def update(self, lr):
    pass


class Model:
  # Represents the neural network model
  def __init__(self):
    self.lossFunc = SquaredError()
    self.layers = [] # Contains every layer in the network as lists of nodes
    self.activations = [] # Contains the activation for every layer (with the same indexes)
    self.values = [] # The key for one-hot encoded labels
  def __init__(self, lf):
    self.layers = []
    self.activations = []
    self.values = []
    if lf == 'squaredError':
      self.lossFunc = SquaredError()
    elif lf == 'categoricalCrossEntropy':
      self.lossFunc = CategoricalCrossEntropy()
    else:
      self.lossFunc = SquaredError()

  def addLayer(self, units, activation):
    # Adds a layer to the model
    layer = [Node(len(self.layers[-1])) for x in range(units)] #Create every neuron and weight
    self.layers.append(layer)
    if activation == 'relu':
      self.activations.append(Relu())
    elif activation == 'softmax':
      self.activations.append(Softmax())
    else:
      self.activations.append(ActivationFunction())
  def addInput(self, size):
    # Adds an input layer to the model
    self.layers.append([InputNode() for x in range(size)])
    self.activations.append(None)

  def randomizeWeights(self, length, batchSize):
    # Initializes all of the weights in the network to be random
    for layer in range(len(self.layers)):
      for node in self.layers[layer]:
        if layer != 0:
          prevLayerLen = len(self.layers[layer - 1])
          node.setWeights((-1 / math.sqrt(prevLayerLen)) + np.random.rand(prevLayerLen) * (2 / math.sqrt(prevLayerLen))) #Xavier initialization?
          #node.setWeights((1 / prevLayerLen) + np.random.rand(prevLayerLen) * (2 / prevLayerLen))
        node.enableNP(batchSize)

  def oneHotEncode(self, labelSet):
    # One-hot encodes the label set and creates a key for the encodings
    for value in labelSet:
      if not value in self.values:
        self.values.append(value)
    temp = np.zeros((labelSet.shape[0], len(self.values)), dtype=np.int8)
    for x in range(labelSet.shape[0]):
      onehot = np.zeros((len(self.values)), dtype=np.int8)
      onehot[self.values.index(labelSet[x])] = 1
      temp[x] = onehot
    return temp

  def predict(self, data):
    # Predicts outputs given a batch (or less, but not more) of input data
    predictions = np.zeros((data.shape[0], len(self.layers[-1])), dtype=np.float32) #2d array storing every prediction
    for example in range(len(data)): #Check the sum of each layers activations to check random initialization?
      for x in range(data[example].shape[0]):
        self.layers[0][x].setInput(data[example][x], example) #test whether list comprehension is faster?
      predictions[example] = self.calculate(self.layers, self.activations, example)
    return predictions
  def calculate(self, resultLayer, layerActivation, example):
    # Forward propogation by recursive matrix multiplication
    if len(resultLayer) == 1:
      inputs = np.zeros(len(resultLayer[0]), dtype=np.float32)
      for x in range(len(resultLayer[0])):
        inputs[x] = resultLayer[0][x].activation[example]
      return inputs
    else:
      prevLayerOutput = self.calculate(resultLayer[:-1], layerActivation[:-1], example) #retrieve the activations of the previous layer
      weitrix = np.zeros((len(resultLayer[-1]), prevLayerOutput.shape[0]), dtype=np.float32) # Each row is a node's weights
      biases = np.zeros((len(resultLayer[-1])), dtype=np.float32) #test whether list comprehension is faster?
      for node in range(len(resultLayer[-1])):
        weitrix[node] = resultLayer[-1][node].weights
        biases[node] = resultLayer[-1][node].bias
      result = np.add(np.matmul(weitrix, prevLayerOutput), biases) # z
      for x in range(result.shape[0]):
        resultLayer[-1][x].setZ(result[x], example)
        result[x] = layerActivation[-1].fx(result, x) # Applies the activation function
        resultLayer[-1][x].setActivation(result[x], example)
      return result

  def train(self, epochs, dataSet, labelSet, batchSize, lr):
    # Trains the model
    if len(labelSet.shape) == 1 and len(self.layers[-1]) != 1:
      labelSet = self.oneHotEncode(labelSet) # One-hot encode the labels if it needs it (detection flawed)
    self.randomizeWeights(len(self.layers[0]), batchSize)
    batchCount = int(dataSet.shape[0] / batchSize)
    x_batches = np.zeros((batchCount, batchSize, dataSet.shape[1]), dtype=np.float32)
    y_batches = np.zeros((batchCount, batchSize, labelSet.shape[1]), dtype=np.float32)
    for x in range(batchCount):
      x_batches[x] = dataSet[batchSize * x: batchSize * x + batchSize]
      y_batches[x] = labelSet[batchSize * x: batchSize * x + batchSize]
    for epoch in range(epochs):
      print("Epoch: ", epoch)
      for batch in range(x_batches.shape[0]):
        print("   Batch: ", batch)
        predictions = self.predict(x_batches[batch])
        for example in range(predictions.shape[0]):
          #BACKPROPOGATION (does ignoring input layer cause issues?)
          #Do activations for previous layers get fully updated?
          #Random Initialization only works for datasets normalized 0.0 to 1.0
          #PrevLayerActivations causing ram issues?
          #Test out creating a list for the partial derivatives and comparing speeds
          #Research Jacobian Matrix
          #Test all of the list comprehension comments
          #Print loss after each batch?
          #Double Check Math (Confidence that its learning is absolute but accuracy not high enough (improper calculation?))
          #Time everything
          activationGradients = []
          for currentLayer in range(len(self.layers) - 1, 0, -1):
            isOutput, prevLayerLen, zVectors = (currentLayer == len(self.layers) - 1), len(self.layers[currentLayer - 1]), np.zeros((len(self.layers[currentLayer])), dtype=np.float32)
            activationGradients = np.zeros((prevLayerLen), dtype = np.float32)
            for nodeNo in range(len(self.layers[currentLayer])):
              zVectors[nodeNo] = self.layers[currentLayer][nodeNo].z[example] # List Comprehension? needs reconversion to numpy
            if isOutput:
              prevLayerActivations = self.lossFunc.dfx(predictions[example], y_batches[batch][example])
            else:
              prevLayerActivations = activationGradients
            for nodeNo in range(len(self.layers[currentLayer])):
              bGradient = self.activations[currentLayer].dfx(zVectors, nodeNo) * prevLayerActivations[nodeNo]
              self.layers[currentLayer][nodeNo].addWGradient(np.array([self.layers[currentLayer - 1][x].activation[example] for x in range(prevLayerLen)]) * bGradient)
              self.layers[currentLayer][nodeNo].addBGradient(bGradient)
              activationGradients = np.add(activationGradients, np.array([self.layers[currentLayer][nodeNo].weights[x] for x in range(prevLayerLen)]) * bGradient)
        #map(lambda y:map(lambda x:x.update(lr), y), self.layers) # Apply the computed gradients to every node
        for layer in self.layers:
          for node in layer:
            node.update(lr)


class ActivationFunction:
  def __init__(self):
    return
  def fx(self, z, i):
    return z[i]
  def dfx(self, z):
    return 1

class Relu(ActivationFunction):
  def fx(self, z, i):
    return max(0, z[i])
  def dfx(self, z, i):
    return 1 * (z[i] > 0)

class Softmax(ActivationFunction):
  def fx(self, z, i):
    return math.exp(z[i]) / np.sum(np.exp(z))
  def dfx(self, z, i):
    pi = self.fx(z, i)
    return np.sum([pi * (1 - pi) if i == j else -1 * pi * self.fx(z, j) for j in range(z.shape[0])])


class LossFunction:
  def __init__(self):
    return
  def fx(self, a, y):
    return y
  def dfx(self, a, y):
    return y

class SquaredError(LossFunction):
  def fx(self, a, y):
    return (a - y) * (a - y)
  def dfx(self, a, y):
    return 2 * (a - y)

class CategoricalCrossEntropy(LossFunction):
  def fx(self, a, y):
    return (-1 * y * np.log(np.exp(a) / np.sum(np.exp(a)))).min()
  def dfx(self, a, y):
    exps = np.exp(a)
    return exps / np.sum(exps) - y

model = Model(lf = 'categoricalCrossEntropy')
model.addInput(784)
model.addLayer(100, 'relu')
model.addLayer(50, 'relu')
model.addLayer(10, 'softmax')
model.train(epochs = 1, dataSet = dataSet, labelSet = labelSet, batchSize = 400, lr = 0.003)


Epoch:  0
   Batch:  0
   Batch:  1
   Batch:  2
   Batch:  3
   Batch:  4
   Batch:  5
   Batch:  6
   Batch:  7
   Batch:  8
   Batch:  9
   Batch:  10
   Batch:  11
   Batch:  12
   Batch:  13
   Batch:  14
   Batch:  15
   Batch:  16
   Batch:  17
   Batch:  18
   Batch:  19
   Batch:  20
   Batch:  21
   Batch:  22
   Batch:  23
   Batch:  24
   Batch:  25
   Batch:  26
   Batch:  27
   Batch:  28
   Batch:  29
   Batch:  30
   Batch:  31
   Batch:  32
   Batch:  33
   Batch:  34
   Batch:  35
   Batch:  36
   Batch:  37
   Batch:  38
   Batch:  39
   Batch:  40
   Batch:  41
   Batch:  42
   Batch:  43
   Batch:  44
   Batch:  45
   Batch:  46
   Batch:  47
   Batch:  48
   Batch:  49
   Batch:  50
   Batch:  51
   Batch:  52
   Batch:  53
   Batch:  54
   Batch:  55
   Batch:  56
   Batch:  57
   Batch:  58
   Batch:  59
   Batch:  60
   Batch:  61
   Batch:  62
   Batch:  63


KeyboardInterrupt: ignored

In [None]:
test = model.predict(dataSet[4000:4020])
print(model.values)
print(test)
print(labelSet[4000:4020])

[5, 0, 4, 1, 9, 2, 3, 6, 7, 8]
[[0.0973471  0.10171684 0.09842508 0.09840237 0.09686217 0.09354674 0.09587026 0.08968022 0.08964325 0.09280908]
 [0.09813985 0.10389046 0.09434298 0.09765258 0.10133725 0.09671469 0.08936594 0.09208387 0.09349604 0.08929063]
 [0.09683602 0.10607359 0.09458646 0.10036898 0.09826672 0.09272058 0.09127555 0.09059379 0.08894579 0.09260476]
 [0.09701217 0.10001724 0.09569116 0.09724838 0.10013254 0.09415543 0.0933549  0.09067732 0.0913     0.09227415]
 [0.09684304 0.10446884 0.09773659 0.09906532 0.10028784 0.09340028 0.08944467 0.093858   0.08965965 0.09233219]
 [0.09851065 0.10435416 0.09416638 0.09957847 0.09718162 0.0934089  0.09014022 0.09247489 0.08910544 0.09500491]
 [0.09896956 0.10154279 0.09565723 0.09924411 0.0987475  0.09400031 0.09117657 0.09273111 0.09192222 0.09203524]
 [0.09851333 0.10559402 0.09418648 0.0993282  0.10051779 0.09387708 0.0908221  0.09416704 0.09117511 0.09050097]
 [0.09915796 0.10419482 0.09653349 0.09846865 0.10043537 0.091872

In [None]:
def accuracy(predictions, labels, needsOneHot):
  #Look in book for ml definition
  #Create user friendly predict function (batch size independent, no backprop prep)
  newLabels = labels.copy
  total = 0.0
  for x in range(len(predictions)):
    if needsOneHot:
      total += 1 * np.where(predictions[x] == np.max(predictions[x]))[0][0] == labels[x]

  return total / len(predictions)

total = 0.0
bat = 400
full = 20000
for x in range(0, full, bat):
  print(x)
  total += accuracy(model.predict(dataSet[x:x + bat]), labelSet[x:x + bat], True)
print(total / (full / bat))

0
400
800
1200
1600
2000
2400
2800
3200
3600
4000
4400
4800
5200
5600
6000
6400
6800
7200
7600
8000
8400
8800
9200
9600
10000
10400
10800
11200
11600
12000
12400
12800
13200
13600
14000
14400
14800
15200
15600
16000
16400
16800
17200
17600
18000
18400
18800
19200
19600
0.12774999999999997
