In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib


In [9]:
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_grad(x):
    return x * (1 - x)

def tanh(x):
    return np.tanh(x)

def tanh_grad(x):
    return 1 - x ** 2

def relu(x):
    return np.maximum(0, x)

def relu_grad(x):
    return np.where(x > 0, 1, 0)

def leaky_relu(x):
    return np.where(x > 0, x, x * 0.01)

def leaky_relu_grad(x):
    return np.where(x > 0, 1, 0.01)

def linear(x):
    return x

def linear_grad(x):
    return np.ones_like(x)

def softmax(x):
    e_x = np.exp(x - x.max(axis=0, keepdims=True))
    return e_x / e_x.sum(axis=0, keepdims=True)

def weight_zero(number, prev):
    return np.zeros((number, prev))

def weight_random(number, prev):
    return np.random.uniform(-0.5, 0.5, (number, prev))

def weight_normal(number, prev):
    return np.random.normal(0, 1, (number, prev))

class neuralnetwork:

    def __init__(self, layers, Layer_sizes, lr, activation, initialisation, epochs, batch_size):
        self.layers = layers
        self.Layer_sizes = Layer_sizes
        self.lr = lr
        self.activation = activation
        self.initialisation = initialisation
        self.epochs = epochs
        self.batch_size = batch_size
        self.weights = []
        self.biases = []

    def weight_initialisation(self, number, prev):
        if self.initialisation == 'zero':
            xyy=weight_zero(number, prev)
            return xyy
        elif self.initialisation == 'random':
            xyy=weight_random(number, prev)
            return xyy
        elif self.initialisation == 'normal':
            xyy=weight_normal(number, prev)
            return xyy
        else:
            print("Invalid initialisation")
            br
            return None

    def activation_gradient(self, x, derivative=False):
        if derivative:
            if self.activation == 'sigmoid':
                return sigmoid_grad(x)
            elif self.activation == 'tanh':
                return tanh_grad(x)
            elif self.activation == 'relu':
                return relu_grad(x)
            elif self.activation == 'leaky_relu':
                return leaky_relu_grad(x)
            elif self.activation == 'linear':
                return linear_grad(x)
            else:
                print("Invalid activation")
                return None
        else:
            if self.activation == 'sigmoid':
                return sigmoid(x)
            elif self.activation == 'tanh':
                return tanh(x)
            elif self.activation == 'relu':
                return relu(x)
            elif self.activation == 'leaky_relu':
                return leaky_relu(x)
            elif self.activation == 'linear':
                return linear(x)
            elif self.activation == 'softmax':
                return softmax(x)
            else:
                print("Invalid activation")
                return None

    def forward_propagation(self, X):
        input = [X]
        for i in range(len(self.weights) - 1):
            yy=np.dot(self.weights[i], input[-1])
            input_eqn = self.biases[i] + yy
            gradient_descent = self.activation_gradient(input_eqn, derivative=False)
            input.append(gradient_descent)
        f=np.dot(self.weights[-1], input[-1])
        input_eqn = self.biases[-1] + f
        input.append(softmax(input_eqn))
        return input

    def backward_propagation(self, X_batch, y_batch):
        derivative_1 = self.forward_propagation(X_batch)[-1] - y_batch
        derivatives = []
        derivatives.append(derivative_1 / self.batch_size)

        derivative_1 = derivatives[0]
        for i in range(len(self.weights) - 2, -1, -1):
            gg=self.weights[i + 1].T.dot(derivative_1)
            hh=self.activation_gradient(self.forward_propagation(X_batch)[i + 1], derivative=True)
            derivative_2 = gg * hh
            derivatives.append(derivative_2 / self.batch_size)
            derivative_1 = derivative_2

        derivative_1 = derivatives[0]
        a=np.dot(derivative_1, self.forward_propagation(X_batch)[-2].T)
        self.weights[-1] += -self.lr * a
        b=np.sum(derivative_1)
        self.biases[-1] += -self.lr * b

        k = 1
        i = len(self.weights) - 2
        while i >= 0:
            derivative_2 = derivatives[k]
            c=np.dot(derivative_2, self.forward_propagation(X_batch)[i].T)
            self.weights[i] += -self.lr * c
            d=np.sum(derivative_2)
            self.biases[i] += -self.lr * d
            k += 1
            i -= 1

    def calculate_losses(self, X, y):
        input = self.forward_propagation(X)
        loss = np.sum(-y * np.log(input[-1])) / X.shape[1]
        return loss
    

    def plot_losses(self, loss_train, loss_val, save_path=None):
        plt.title('Loss for activation function ' + self.activation + ' and initialization ' + self.initialisation)
        plt.plot(loss_train, label='train')
        plt.plot(loss_val, label='val')
        plt.legend()

        if save_path is None:
            # Automatically generate file name based on activation and initialization
            file_name = f'loss_plot_{self.activation}_{self.initialisation}.png'
            save_path = os.path.join(os.getcwd(), file_name)

        plt.savefig(save_path)
        plt.close()
    def fit(self, X_train, y_train, X_test, y_test):
        print("Started Training")
        loss_train = []
        loss_val = []
        prev = X_train.shape[0]
        for j in self.Layer_sizes:
            self.weights.append(self.weight_initialisation(j, prev))
            self.biases.append(np.zeros((j, 1)))
            prev = j

        self.weights.append(self.weight_initialisation(y_train.shape[0], prev))
        self.biases.append(np.zeros((y_train.shape[0], 1)))

        j = 0
        while j < self.epochs:
            start = 0
            while start < X_train.shape[1]:
                end = start + self.batch_size
                part_x = X_train[:, start:end]
                part_y = y_train[:, start:end]

                input = self.forward_propagation(part_x)
                self.backward_propagation(part_x, part_y)

                start = end  

            print("Epoch: ", j, "Train Loss: ", self.calculate_losses(X_train, y_train))
            print("Epoch: ", j, "Val Loss: ", self.calculate_losses(X_test, y_test))
            loss_train.append(self.calculate_losses(X_train, y_train))
            loss_val.append(self.calculate_losses(X_test, y_test))

            j += 1


        self.plot_losses(loss_train, loss_val)

        print("Training completed")

    def predict_forward_pass(self, input_data):
        input = [input_data]
        for i in range(len(self.weights) - 1):
            yy=np.dot(self.weights[i], input[-1])
            input_eqn = self.biases[i] + yy
            input.append(self.activation_gradient(input_eqn, derivative=False))

        input_eqn = self.biases[-1] + np.dot(self.weights[-1], input[-1])
        input.append(softmax(input_eqn))
        return input

    def _make_predictions(self, X_test):
        predictions = []
        for i in X_test:
            i.shape += (1,)
            input = self.predict_forward_pass(i)
            predictions.append(input[-1].argmax())
        return predictions

    def predict(self, X_test):
        return self._make_predictions(X_test)
    
    def score(self, Y_PRED, y_test):
 
        accuracy = 0
        i = 0
        accuracy = 0

        while i < len(Y_PRED):
            if Y_PRED[i] == y_test[i].argmax():
                accuracy += 1
            i += 1

        return accuracy / len(Y_PRED)

    def forward_pass_predict_prob(self, input_data):
        input = [input_data]
        for i in range(len(self.weights) - 1):
            input_eqn = self.biases[i] + np.dot(self.weights[i], input[-1])
            input.append(self.activation_gradient(input_eqn, derivative=False))
        input_eqn = self.biases[-1] + np.dot(self.weights[-1], input[-1])
        input.append(softmax(input_eqn))
        return input

    def predict_proba_single(self, input_data):
        input_data.shape += (1,)
        input = self.forward_pass_predict_prob(input_data)
        return input[-1]

    def predict_proba(self, X_test):
        predictions = [self.predict_proba_single(i) for i in X_test]
        return predictions
    def save_model(self, filename):
        model_info = {
            'layers': self.layers,
            'Layer_sizes': self.Layer_sizes,
            'lr': self.lr,
            'activation': self.activation,
            'initialisation': self.initialisation,
            'epochs': self.epochs,
            'batch_size': self.batch_size,
            'weights': self.weights,
            'biases': self.biases
        }

        joblib.dump(model_info, filename)


In [7]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
x_data, y_data = mnist.data, mnist.target.astype(int)
x_data = x_data / 255.0
x_data = x_data.values.reshape((-1, 28 * 28))
y_data = np.eye(10)[y_data]
x_train, x_test = x_data[:60000], x_data[60000:]
y_train, y_test = y_data[:60000], y_data[60000:]

  warn(


In [10]:
activation=["sigmoid","tanh","relu","leaky_relu","linear"]
initialisation=["zero","random","normal"]
for i in activation:
    for j in initialisation:
        print(i,j)
        nn=neuralnetwork(layers=4, Layer_sizes=[256, 128, 64, 32], lr=0.1, activation=i, initialisation=j, epochs=100, batch_size=128)
        nn.fit(x_train.T,y_train.T,x_test.T,y_test.T)
        y_pred=nn.predict(x_test)
        nn.score(y_pred,y_test)
        nn.save_model(i+"_"+j+".joblib")

sigmoid zero
Started Training
Epoch:  0 Train Loss:  2.3015161070048085
Epoch:  0 Val Loss:  2.3014862072172457
Epoch:  1 Train Loss:  2.3015160744464884
Epoch:  1 Val Loss:  2.3014861583254462
Epoch:  2 Train Loss:  2.3015160420438496
Epoch:  2 Val Loss:  2.301486109615177
Epoch:  3 Train Loss:  2.301516009796685
Epoch:  3 Val Loss:  2.3014860610862287
Epoch:  4 Train Loss:  2.301515977704788
Epoch:  4 Val Loss:  2.301486012738391
Epoch:  5 Train Loss:  2.3015159457679504
Epoch:  5 Val Loss:  2.3014859645714525
Epoch:  6 Train Loss:  2.3015159139859653
Epoch:  6 Val Loss:  2.301485916585203
Epoch:  7 Train Loss:  2.3015158823586246
Epoch:  7 Val Loss:  2.3014858687794293
Epoch:  8 Train Loss:  2.3015158508857176
Epoch:  8 Val Loss:  2.3014858211539195
Epoch:  9 Train Loss:  2.301515819567036
Epoch:  9 Val Loss:  2.3014857737084617
Epoch:  10 Train Loss:  2.3015157884023716
Epoch:  10 Val Loss:  2.3014857264428423
Epoch:  11 Train Loss:  2.3015157573915146
Epoch:  11 Val Loss:  2.30148

  e_x = np.exp(x - x.max(axis=0, keepdims=True))


Epoch:  0 Train Loss:  nan
Epoch:  0 Val Loss:  nan
Epoch:  1 Train Loss:  nan
Epoch:  1 Val Loss:  nan
Epoch:  2 Train Loss:  nan
Epoch:  2 Val Loss:  nan
Epoch:  3 Train Loss:  nan
Epoch:  3 Val Loss:  nan
Epoch:  4 Train Loss:  nan
Epoch:  4 Val Loss:  nan
Epoch:  5 Train Loss:  nan
Epoch:  5 Val Loss:  nan
Epoch:  6 Train Loss:  nan
Epoch:  6 Val Loss:  nan
Epoch:  7 Train Loss:  nan
Epoch:  7 Val Loss:  nan
Epoch:  8 Train Loss:  nan
Epoch:  8 Val Loss:  nan
Epoch:  9 Train Loss:  nan
Epoch:  9 Val Loss:  nan
Epoch:  10 Train Loss:  nan
Epoch:  10 Val Loss:  nan
Epoch:  11 Train Loss:  nan
Epoch:  11 Val Loss:  nan
Epoch:  12 Train Loss:  nan
Epoch:  12 Val Loss:  nan
Epoch:  13 Train Loss:  nan
Epoch:  13 Val Loss:  nan
Epoch:  14 Train Loss:  nan
Epoch:  14 Val Loss:  nan
Epoch:  15 Train Loss:  nan
Epoch:  15 Val Loss:  nan
Epoch:  16 Train Loss:  nan
Epoch:  16 Val Loss:  nan
Epoch:  17 Train Loss:  nan
Epoch:  17 Val Loss:  nan
Epoch:  18 Train Loss:  nan
Epoch:  18 Val Loss: