<a href="https://colab.research.google.com/github/MartinSoldani/neural-networks-and-deep-learning/blob/master/NEW_COMP4329_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define shared project folder
project_path = '/content/drive/MyDrive/A1 Deep Learning Shared'
# Define the data directory path
data_path = os.path.join(project_path, 'data')

# Check if the shared folder exists
if os.path.exists(project_path):
    print(f"✅ Shared folder is found: {project_path}")
    print("📁 Contents:")
    print(os.listdir(project_path))
else:
    print(f"❌ Shared folder not found at: {project_path}")




Mounted at /content/drive
✅ Shared folder is found: /content/drive/MyDrive/A1 Deep Learning Shared
📁 Contents:
['data', '.ipynb_checkpoints', 'NEW COMP4329 Code.ipynb']


In [2]:
import numpy as np
import pandas as pd
import random
import time

In [3]:
test_data = np.load(os.path.join(data_path, 'test_data.npy'))
test_label = np.load(os.path.join(data_path, 'test_label.npy'))
train_data = np.load(os.path.join(data_path, 'train_data.npy'))
train_label = np.load(os.path.join(data_path, 'train_label.npy'))

In [4]:
class Activation(object):
    def __tanh(self, x):
        return np.tanh(x)

    def __tanh_deriv(self, a):
        return 1.0 - a**2

    def __logistic(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def __logistic_deriv(self, a):
        return a * (1 - a)

    def __relu(self, x):
        return np.maximum(0, x)

    def __relu_deriv(self, a):
        return 1 * (a > 0)

    def __init__(self, activation='tanh'):
        if activation == 'logistic':
            self.f = self.__logistic
            self.f_deriv = self.__logistic_deriv

        elif activation == 'tanh':
            self.f = self.__tanh
            self.f_deriv = self.__tanh_deriv

        elif activation == 'relu':
            self.f = self.__relu
            self.f_deriv = self.__relu_deriv

In [5]:
class HiddenLayer(object):
    def __init__(self,
                 num_inputs,
                 num_neurons,
                 activation_last_layer,
                 activation,
                 weights=None,
                 bias=None):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (num_inputs,num_neurons)
        and the bias vector b is of shape (num_neurons,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type num_inputs: int
        :param num_inputs: dimensionality of input

        :type num_neurons: int
        :param num_neurons: number of neurons (hidden units) in this layer

        :type activation: string
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input = None
        self.activation = Activation(activation).f

        self.activation_deriv = None
        if activation_last_layer:
            self.activation_deriv = Activation(activation_last_layer).f_deriv

        # this range is based on a popular method (Xavier/Glorot init)
        # that helps prevent exploding/vanishing gradients
        self.weights = np.random.uniform(
                low=-np.sqrt(6. / (num_inputs + num_neurons)),
                high=np.sqrt(6. / (num_inputs + num_neurons)),
                size=(num_inputs, num_neurons)
        )

        # if activation == 'logistic':
        #     self.W *= 4

        self.bias = np.zeros(num_neurons,)

        self.grad_W = np.zeros(self.weights.shape)
        self.grad_b = np.zeros(self.bias.shape)


    def forward(self, input):
        '''
        :type input: numpy.array
        :param input: a symbolic tensor of shape (num_inputs,)
        '''
        z = np.dot(input, self.weights) + self.bias
        self.output = (
            z if self.activation is None
            else self.activation(z)
        )

        # We save the input so that it can be used later during the backward pass
        # FORMULA BP4
        # To compute the gradient of the loss with respect to the weights we need:
        #   The original input to the layer
        self.input = input
        #   The error signal (delta) coming from the next layer

        # dropout
        # shape_output = self.output.shape
        # rd_indices = np.random.randint(0, shape_output[0], shape_output[0] * 0.6)
        # self.output[rd_indices] *= 0.0
        # self.output = self.output/(1- 0.6)
        return self.output

    # this does only compute it for the hidden layers, not the output layer
    # output layer has already been computed in MLP class
    def backward(self, delta, output_layer=False):
        # compute BP4
        self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
        # compute BP3
        self.grad_b = delta
        # Output layer skips this extra derivative,
        # because it was already applied in the loss function
        if not output_layer and self.activation_deriv:
            # compute BP2
            delta = delta.dot(self.weights.T) * self.activation_deriv(self.input)

        return delta

In [6]:
class MLP:
    """
    """
    def __init__(self, layers, activation=[None,'tanh','tanh']):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """
        ### initialize layers
        self.layers = []
        self.params = []

        self.activation = activation
        for i in range(len(layers) - 1):
            self.layers.append(HiddenLayer(layers[i], layers[i+1], activation[i], activation[i+1]))

    # forward progress: pass the information through the layers and out the results of final output layer
    def forward(self, input):
        for layer in self.layers:
            output = layer.forward(input)
            input = output
        return output

    # define the objection/loss function, we use mean square error (MSE) as the loss
    # you can try other loss, such as cross entropy.
    # when you try to change the loss, you should also consider the backward formula for the new loss as well!
    def criterion_MSE(self, y, y_hat):
        # get activation derivative of last layer
        activation_deriv = Activation(self.activation[-1]).f_deriv
        # MSE FOR JUST ONE EXAMPLE AT A TIME (NO SUMMATION WHATSOEVER)
        error = y - y_hat
        loss = np.mean(error**2)
        # calculate the MSE's delta of the output layer 2 * (z1 - t)
        # FORMULA BP1 IN BOOK
        delta = (-2 * error) * activation_deriv(y_hat) # (z1 - t)^2 -> 2 (z1 - t) * act_deriv(z1)
        # return loss and delta
        return loss , delta

    # backward progress
    def backward(self, delta):
        delta = self.layers[-1].backward(delta, output_layer=True)
        for layer in reversed(self.layers[:-1]):
            delta = layer.backward(delta)

    # update the network weights after backward.
    # make sure you run the backward function before the update function!
    def update(self, lr):
        for layer in self.layers:
            layer.weights -= lr * layer.grad_W
            layer.bias -= lr * layer.grad_b

    # define the training function
    # it will return all losses within the whole training process.
    def fit(self, input, target, learning_rate=0.1, epochs=100):
        """
        Online learning.
        :param input: Input data or features
        :param target: Input targets
        :param learning_rate: parameters defining the speed of learning
        :param epochs: number of times the dataset is presented to the network for learning
        """
        input = np.array(input)
        target = np.array(target)
        to_return = np.zeros(epochs)

        for k in range(epochs):
            loss = np.zeros(input.shape[0])

            for it in range(input.shape[0]):
                i = np.random.randint(input.shape[0])

                # forward pass
                y_hat = self.forward(input[i])

                # backward pass
                # calculate BP1 here: cost of last layer (output layer)
                result_loss, delta = self.criterion_MSE(target[i], y_hat)
                loss[it] = result_loss
                self.backward(delta)


                # update
                self.update(learning_rate)

            to_return[k] = np.mean(loss)
        return to_return

    # define the prediction function
    # we can use predict function to predict the results of new data, by using the well-trained network.
    def predict(self, x):
        x = np.array(x)
        output = np.zeros(x.shape[0])
        for i in np.arange(x.shape[0]):
            output[i] = self.forward(x[i,:])
        return output

In [None]:
# Hyperparameters
LAYER_NEURONS = [128, 3, 2, 10]
LAYER_ACTIVATION_FUNCS = [None, 'relu', 'relu', 'relu']

nn = MLP(LAYER_NEURONS, LAYER_ACTIVATION_FUNCS)

t0 = time.time()
trial1 = nn.fit(train_data, train_label)
t1 = time.time()

print(f"============= Model Build Done =============")
print(f"Time taken to build model: {round(t1 - t0, 4)} seconds.")


Time taken to build model: 262.7205 seconds.
