<a href="https://colab.research.google.com/github/CrzPhil/IN3063-Coursework/blob/main/Coursework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IN3063 - Coursework

## Libraries

In [1]:
import math
import numpy as np
from numpy.random import default_rng
import matplotlib.pyplot as plt

## Sigmoid & ReLU

- By Aymen
- Reference:
    - https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795
    - https://www.sharpsightlabs.com/blog/numpy-relu/
    - Lab 6

In [2]:
# Forward pass for Sigmoid
def forward_sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Backward pass for Sigmoid
def backward_sigmoid(x):
    return forward_sigmoid(x) * (1 - forward_sigmoid(x))

In [3]:
# Forward pass for ReLU
def forward_relu(x):
    return np.maximum(0, x)

# Backward pass for ReLU
def backward_relu(x):
    return np.where(x > 0, 1, 0)

## Softmax

- By Aymen
- Using the Numpy version
- Reference:
    - https://towardsdatascience.com/softmax-function-simplified-714068bf8156
    - https://en.wikipedia.org/wiki/Softmax_function
    - https://www.sharpsightlabs.com/blog/numpy-softmax/

In [4]:
# Forward pass for Softmax
def forward_softmax(x):
    exponential = np.exp(x - np.max(x))
    return exponential / exponential.sum() # calculates softmax probability

# Backward pass for Softmax
def backward_softmax(x):
    return np.reshape(forward_softmax(x) * (1 - forward_softmax(x)), (1, -1)) # computes gradient of softmax

# Testing:
x = np.array([100.0, 2000.0, 300.0]) # large numbers
print("Forward pass result:", forward_softmax(x))
print("Backward pass result:", backward_softmax(x))
print ("\n")

x = np.array([1.0, 2.0, 3.0]) # small numbers
print("Forward pass result:", forward_softmax(x))
print("Backward pass result:", backward_softmax(x))

Forward pass result: [0. 1. 0.]
Backward pass result: [[0. 0. 0.]]


Forward pass result: [0.09003057 0.24472847 0.66524096]
Backward pass result: [[0.08192507 0.18483645 0.22269543]]


## Dropout

- By Adam
- References
  - Lecture 7
  - https://stackoverflow.com/questions/70836518/typeerror-bad-operand-type-for-unary-list-python
  - https://stackoverflow.com/questions/25854380/enforce-arguments-to-a-specific-list-of-values

In [5]:
'''
Valid value structure constants.
They're defined here so they aren't recreated every time the function is ran.
'''
ACTIVATION_FUNCTIONS = {
    "sigmoid": [forward_sigmoid, backward_sigmoid],
    "relu": [forward_relu, backward_relu],
    "softmax": [forward_softmax, backward_softmax]
}

VALID_DIRECTIONS = ["forward", "backward"]

'''
Dropout function
    x = input vector
    probability is a float between 0.0 and 1.0
    activation_function is a string that corresponds to one of the key values above
        determines which activation function to use
    direction is a string that corresponds to one of the array values above
        determines whether to use a forward or backward pass activation function
    inverted is a boolean
        determines whether or not use inverted dropout
    train is a boolean
        determines whether to train or test
'''
def dropout(x, probability, activation_function, direction, inverted, train):
    if activation_function not in ACTIVATION_FUNCTIONS.keys():
        raise ValueError(f"Activation function must be one of {ACTIVATION_FUNCTIONS.keys()}")

    if direction not in VALID_DIRECTIONS:
        raise ValueError(f"Direction must be one of {VALID_DIRECTIONS}")

    value_index = 0 if direction == "forward" else 1

    H1 = ACTIVATION_FUNCTIONS[activation_function][value_index](x)
    mask = (np.random.rand(*H1.shape) < probability)

    if inverted:
        return H1 * (mask / probability) if train else H1
    else:
        return H1 * mask if train else H1 * probability

# Testing the function
#     Starting by defining x
x = np.array([2.0, 4.0, 7.0, 8.0])

#     Training
H1_dropped = dropout(x, 0.5, "sigmoid", "forward", False, True)
print(H1_dropped)

#     Testing
H1_dropped = dropout(x, 0.5, "sigmoid", "forward", False, False)
print(H1_dropped)

#     Training, inverted
H1_dropped = dropout(x, 0.5, "sigmoid", "forward", True, True)
print(H1_dropped)

#     Testing, inverted
H1_dropped = dropout(x, 0.5, "sigmoid", "forward", True, True)
print(H1_dropped)

[0.88079708 0.98201379 0.99908895 0.99966465]
[0.44039854 0.4910069  0.49954447 0.49983232]
[1.76159416 1.96402758 1.9981779  1.9993293 ]
[0.         1.96402758 1.9981779  1.9993293 ]


## Neural Network
- By Philip  
References:
  - Lecture 6
  - Lecture 7

In [6]:
# Reading the MNIST dataset as per http://yann.lecun.com/exdb/mnist/
import os
import struct

def read_idx(filename):
    with open(filename, 'rb') as file:
        # Read two bytes (big endian and unsigned)
        zero, data_type, dims = struct.unpack('>HBB', file.read(4))
        # Four byte integer big endian
        shape = tuple(struct.unpack('>I', file.read(4))[0] for d in range(dims))
        return np.frombuffer(file.read(), dtype=np.uint8).reshape(shape)

def load_mnist(path):
    # Paths to the files
    train_images_path = os.path.join(path, 'train-images-idx3-ubyte')
    train_labels_path = os.path.join(path, 'train-labels-idx1-ubyte')
    test_images_path = os.path.join(path, 't10k-images-idx3-ubyte')
    test_labels_path = os.path.join(path, 't10k-labels-idx1-ubyte')

    # Loading the datasets
    train_images = read_idx(train_images_path)
    train_labels = read_idx(train_labels_path)
    test_images = read_idx(test_images_path)
    test_labels = read_idx(test_labels_path)

    return train_images, train_labels, test_images, test_labels

In [7]:
# Example use
t_images, t_labels, test_images, test_labels = load_mnist('./dataset')

In [8]:
t_images[0]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [None]:
class NeuralNet:
  def __init__(self, activation_function, layers: int, batch_size: int, neurons: list):
    """
    Initialises a new instance of the NeuralNet class.

    Parameters:
    activation_function (func): The activation function to be used in the network layers.
                                The function is used in all layers.
    layers (int): The number of layers in the neural network.
    batch_size (int): The size of the batches used in training. This affects how the data is split during training iterations.
    neurons (list of int): The number of neurons in each layer. This should be a list where each element represents
                            the number of neurons in the respective layer of the network.
    Returns:
    None
    """
    self.activation_function = activation_function
    self.layers = layers
    self.batch_size = batch_size
    self.neurons = neurons
    # Will be initialised once features are known
    self.weights = []
    self.biases = []

  def init_weights_and_biases(self, input_features):
    # Initialise weights and biases based on the layers, neurons, and input features
    # Fully connected through weights
    for i in range(self.layers):
      if i == 0:
          layer_weights = np.random.randn(self.neurons[i], input_features) * 0.01
      else:
          layer_weights = np.random.randn(self.neurons[i], self.neurons[i - 1]) * 0.01
      layer_bias = np.zeros((self.neurons[i], 1))
      self.weights.append(layer_weights)
      self.biases.append(layer_bias)

  def forward_pass(self, X):
    activations = [X]
    for i in range(self.layers):
      Z = np.dot(self.weights[i], activations[-1]) + self.biases[i]
      A = self.apply_activation(Z)
      activations.append(A)
    return activations

  def apply_activation(self, Z):
    return self.activation_function(Z)

  def backward_pass(self, Y, activations):
    m = Y.shape[1]
    n = len(self.weights)
    gradients = {}

    # Output layer
    dA = activations[-1] - Y  # Derivative of loss wrt (with respect to) output

    for i in reversed(range(n)):
      dZ = dA * self.activation_function[1](activations[i+1])
      dW = np.dot(dZ, activations[i].T) / m
      db = np.sum(dZ, axis=1, keepdims=True) / m
      if i > 0:
        dA = np.dot(self.weights[i].T, dZ)

      gradients['dW' + str(i + 1)] = dW
      gradients['db' + str(i + 1)] = db

    return gradients

  def update_weights_and_biases(self, gradients, learning_rate):
    for i in range(self.layers):
      pass

  def train_network(self, epochs, batch_size, learning_rate, X_train, Y_train):
    # Initialise weights & biases
    self.init_weights_and_biases(X_train)
    loss_across_epochs = []

    for epoch in range(epochs):
      # Maybe shuffle training data before batching it?

      # Iterate batches
      for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i + batch_size]
        Y_batch = Y_train[i:i + batch_size]

        # Forward pass over the batch
        activations = self.forward_pass(X_batch)

        # Backward pass over the batch (get gradients)
        gradients = self.backward_pass(activations, Y_batch)

        # Update weights & biases
        self.update_weights_and_biases(gradients, learning_rate)


  def evaluate_model(self, x_test, y_test, X_train, Y_train, loss_list):
    pass