In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_digits
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer, OneHotEncoder

class MatrixMultiplicationLayer:
    def __init__(self, X, W):
        self.X = X
        self.W = W
        
    def forward(self):
        self.output = np.dot(self.X, self.W)

    def backward(self):
        self.doutput_dX = (self.W)
        self.doutput_dW = (self.X).T 

class BiasAdditionLayer:
    
    def __init__(self, output : np.ndarray , B : np.ndarray):
        self.B = B
        self.output = output
    
    def forward(self):
        self.output = self.output + self.B

    def backward(self):
        self.doutput_dB = np.identity(self.B.shape[1])

class MeanSquaredLossLayer:

    def __init__(self, target: np.ndarray, predicted: np.ndarray):

        self.predicted = predicted
        self.target = target 
        
    def forward(self):
        error = self.predicted - self.target
        self.L = np.mean(np.square(error))

    def backward(self):
        self.dL_dpredicted = (2 / len(self.target)) * (self.predicted - self.target).T
        
# softmax(x)i = e^(x_i)/∑ j={1,n}e^(x_j)
# Here,softmax(x)i represents the i-th element of the output vector after applying the softmax function to 
# x, and x_i represents the i-th element of the input vector x. The softmax function normalizes the values of 
# x into a probability distribution where each element of the output vector 
# softmax(x) is in the range [0, 1] and the sum of all elements equals 1.

class SoftmaxLayer:

    def __init__(self, Z):
        self.Z = Z 
        
    def forward(self):
        exp_X = np.exp(self.Z - np.max(self.Z, axis=1, keepdims=True))
        self.output = exp_X / np.sum(exp_X, axis=1, keepdims=True)

    def backward(self):
        self.doutput_dZ = np.diag(self.output.reshape(-1)) - (self.output.T)@((self.output))
        
# sigmoid(x)= 1/1+e^−x
# Here,sigmoid(x) represents the output of the sigmoid function for input x. The sigmoid function maps any 
# real-valued number to the range [0, 1]. It has an S-shaped curve, with the function outputting values 
# close to 0 for large negative inputs and values close to 1 for large positive inputs. It is often used 
# to convert raw scores into probabilities in binary classification tasks or as an activation function 
# in neural networks

class SigmoidLayer:

    def __init__(self, Z):
        self.Z = Z
        
    def forward(self):
        self.output = 1 / (1 + np.exp(-self.Z))

    def backward(self):
        self.doutput_dZ = np.diag(np.multiply(self.output,(1 - self.output)).reshape(-1))
        
# CrossEntropyLoss= (1/N) * ∑ i={1,N} ∑ c={1,C} (yi,c * log(p i,c)) 
# Here:

# N is the number of samples.

# C is the number of classes.

# yi,c is a binary indicator (0 or 1) for whether class c is the correct classification for sample i.

# pi,c is the predicted probability that sample i belongs to class c.

# The Cross-Entropy Loss penalizes models more heavily when they make large errors in classification, 
# as the logarithm term amplifies the loss for confident, incorrect predictions. It is commonly used as 
# the loss function in multi-class classification problems, especially when combined with softmax activation 
# in the output layer of neural networks.

class CrossEntropyLossLayer:

    def __init__(self, target,predicted):
        self.target = target
        self.predicted = predicted
        
    def forward(self):
        epsilon = 1e-15
        self.L = -np.sum(self.target * np.log(self.predicted + epsilon))
    def backward(self):
        self.dL_dpredicted =  -1*((self.target) / (len(self.predicted)+1e-40)).T

# LinearActivation(x)=x
# While Linear Activation is rarely used within hidden layers of neural networks due to its inability 
# to introduce non-linearity, it is often used as the activation function in the output layer of regression 
# models, where the network is tasked with predicting continuous values.
class LinearActivation:

    def __init__(self, Z):
        self.Z = Z 
        
    def forward(self):
        self.output = self.Z
         
    def backward(self):
        self.doutput_dZ = np.identity(self.Z.shape[1])
        
# TanhActivation(x)= (e^x - e^−x) /(e^x + e^−x)
# The Tanh Activation function is often used in hidden layers of neural networks to introduce non-linearity. 
# It is particularly useful when dealing with data that has negative values since it can map negative inputs to 
# negative outputs.

class TanhActivation:

    def __init__(self, Z):
        self.Z = Z
        
    def forward(self):
        self.output = np.tanh(self.Z)
    
    def backward(self):
        self.doutput_dZ =  np.diag(1 - self.output.reshape(-1)**2)
# ReLU(x)=max(0,x)
# ReLU has become the default choice for many neural network architectures due to its simplicity and 
# effectiveness in combating the vanishing gradient problem during training. It is particularly effective in 
# deep neural networks, where it helps mitigate the vanishing gradient problem by allowing gradients to flow 
# more freely during backpropagation.

class ReLUActivation:

    def __init__(self, Z): 
        self.Z = Z 
        self.Leak = 0.01
        
    def forward(self):
        self.output = np.maximum(0,self.Z)
      
    def backward(self):
        self.doutput_dZ = np.diag([1. if x>=0 else 0.01 for x in self.output.reshape(-1)])

In [2]:
from sklearn.model_selection import train_test_split

def load_data_sklearn(dataset_name='california', normalize_X=False, normalize_y=False, one_hot_encode_y=False, test_size=0.2):
    if dataset_name == 'california':
        california_data = fetch_california_housing()
        data = {'data': california_data.data, 'target': california_data.target}       
    elif dataset_name == 'iris':
        iris_data = load_iris()
        data = {'data': iris_data.data, 'target': iris_data.target}
    elif dataset_name == 'mnist':
        mnist_data = load_digits()
        data = {'data': (mnist_data.data >= 8), 'target': (mnist_data.target).astype(int)}

    X = data['data']
    y = data['target'].reshape(-1, 1)

    if normalize_X:
        normalizer = Normalizer()
        X = normalizer.fit_transform(X)

    if normalize_y:
        normalizer = Normalizer()
        y = normalizer.fit_transform(y)

    if one_hot_encode_y:
        encoder = OneHotEncoder()
        y = encoder.fit_transform(y).toarray()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    return X_train, y_train, X_test, y_test

In [3]:
class Layer:
    def __init__(self, dimenshion_of_input,output_neuron, activation_name="linear", seed=42):
        np.random.seed(seed)
        
        self.dimenshion_of_input = dimenshion_of_input
        self.output_neuron = output_neuron
        
        # random initialization of input X  and output Z
        self.X = np.random.random((1, self.dimenshion_of_input))   # assigned during SGD
        self.Z = np.random.random((1, self.output_neuron))
        
        self.W = np.random.random((dimenshion_of_input, output_neuron)) * \
            np.sqrt(2 / (dimenshion_of_input + output_neuron))
        self.B = np.random.random((1, output_neuron))*np.sqrt(2 / (1 + output_neuron))
        
        self.multiply_layer = MatrixMultiplicationLayer(self.X, self.W)
        self.bias_layer = BiasAdditionLayer(self.B, self.B)

        if activation_name == 'linear':
            self.activation_layer = LinearActivation(self.Z)
        elif activation_name == 'sigmoid':
            self.activation_layer = SigmoidLayer(self.Z)
        elif activation_name == 'softmax':
            self.activation_layer = SoftmaxLayer(self.Z)
        elif activation_name == 'tanh':
            self.activation_layer = TanhActivation(self.Z)
        elif activation_name == 'relu':
            self.activation_layer = ReLUActivation(self.Z)
            
    def forward_layer(self):
        self.multiply_layer.forward()
        self.bias_layer.output = self.multiply_layer.output
        self.bias_layer.forward()
        self.activation_layer.Z = self.bias_layer.output
        self.activation_layer.forward()
        self.Z = self.activation_layer.output
    def backward_layer(self):
        self.activation_layer.backward()
        self.bias_layer.backward()
        self.multiply_layer.backward()

class NeuralNetwork(Layer):
    """
    Input  - layers : list of layer objects , loss_name : Name of loss layer
    """

    # [ "mean_squared", "cross_entropy"]
    def __init__(self, layers, loss_name="mean_squared", learning_rate=0.01, seed=42):
        np.random.seed(seed)

        self.layers = layers
        self.n_layers = len(layers)  # number of layers in neural network
        self.learning_rate = learning_rate

        self.inp_shape = self.layers[0].X.shape
        self.out_shape = self.layers[-1].Z.shape

        # random initialization of input X  and output Z
        self.X = np.random.random(self.inp_shape)   # assigned during SGD
        self.Y = np.random.random(self.out_shape)  # output of neural network

        # define loss layer
        if loss_name == "mean_squared":
            self.loss_layer = MeanSquaredLossLayer(self.Y, self.Y)
        if loss_name == "cross_entropy":
            self.loss_layer = CrossEntropyLossLayer(self.Y, self.Y)

    def forward(self):
        self.layers[0].X = self.X
        self.loss_layer.target = self.Y

        self.layers[0].forward_layer()
        for i in range(1, self.n_layers):
            self.layers[i].X = self.layers[i-1].Z
            self.layers[i].forward_layer()

        self.loss_layer.predicted = self.layers[-1].Z
        self.loss_layer.forward()

    def backward(self):

#         self.loss_layer.Z = self.Y
        self.loss_layer.backward()
        self.grad_nn = self.loss_layer.dL_dpredicted
        for i in range(self.n_layers-1, -1, -1):
            self.layers[i].backward_layer()

            dL_dZ = np.dot(
                self.layers[i].activation_layer.doutput_dZ, self.grad_nn)
            dL_dW = np.dot(self.layers[i].multiply_layer.doutput_dW, dL_dZ.T)
            dL_dB = np.dot(self.layers[i].bias_layer.doutput_dB, dL_dZ).T

            # Update W & B
            self.layers[i].W -= self.learning_rate*dL_dW
            self.layers[i].B -= self.learning_rate*dL_dB

            # Update outer_grad
            self.grad_nn = np.dot(self.layers[i].multiply_layer.doutput_dX, dL_dZ)

            del dL_dZ, dL_dW, dL_dB

In [4]:
def createLayers(inp_shape, layers_sizes, layers_activations):
    layers = []
    n_layers = len(layers_sizes)
    layer_0 = Layer(inp_shape, layers_sizes[0], layers_activations[0])
    layers.append(layer_0)
    inp_shape_next = layers_sizes[0]
    for i in range(1, n_layers):
        layer_i = Layer(inp_shape_next, layers_sizes[i], layers_activations[i])
        layers.append(layer_i)
        inp_shape_next = layers_sizes[i]

    out_shape = inp_shape_next
    return inp_shape, out_shape, layers

In [5]:
## CNN Model

def rotateMatrix(mat):
    N = len(mat)
    rot_mat = np.zeros((N, N))

    for t1 in range(N):
        k = N - 1
        for t2 in range(N):
            rot_mat[t1][t2] = mat[k][N - 1 - t2]
        k -= 1

    return rot_mat

# flatten operation:

def flatten(inp_mat):
    flatten_vector = []

    for i in range(len(inp_mat)):  # number of rows
        for j in range(len(inp_mat[0])):  # number of columns
            flatten_vector.append(inp_mat[i][j])

    flatten_vector = np.array(flatten_vector)
    return flatten_vector

class ConvolutionalLayer:
    
    def __init__(self, input_shape, activation='tanh', filter_shape=(1, 1), learning_rate=0.01, num_output_channels=1, seed=42):
        np.random.seed(seed)

        assert (input_shape[1] >= filter_shape[0] and input_shape[2] >= filter_shape[1]), \
            f"Error: Input shape {input_shape} is incompatible with filter shape {filter_shape}"

        self.input_shape = input_shape
        self.num_input_channels = input_shape[0]  
        self.num_output_channels = num_output_channels  
        self.learning_rate = learning_rate

        # Initialize filters, biases, output, and flatten output
        self.filter_shape = (self.num_output_channels, self.num_input_channels, *filter_shape)
        self.filters = np.random.rand(*self.filter_shape)
        self.biases = np.random.rand(self.num_output_channels, input_shape[1] - filter_shape[0] + 1, input_shape[2] - filter_shape[1] + 1)
        self.output_shape = self.biases.shape
        self.flatten_shape = np.prod(self.output_shape)
        self.output = np.zeros(self.output_shape)
        self.flatten_output = np.zeros((1, self.flatten_shape))

        # Define activation function
        if activation == 'tanh':
            self.activation_layer = TanhActivation(self.output)

    def forward(self):
        # Add bias to output
        self.output = self.biases.copy()

        # Perform convolution
        for i in range(self.num_output_channels):
            for j in range(self.num_input_channels):
                self.output[i] += self.convolve(self.input[j], self.filters[i, j])

        # Flatten output
        self.flatten_output = self.output.reshape(1, -1)

        # Forward pass through activation layer
        self.activation_layer.Z = self.flatten_output
        self.activation_layer.forward()

    def backward(self, gradient_nn):
        # Backward pass through activation layer
        self.activation_layer.backward()
        loss_gradient = np.dot(self.activation_layer.doutput_dZ, gradient_nn)

        # Reshape loss gradient to match output shape
        loss_gradient = loss_gradient.reshape(self.output_shape)

        # Initialize gradients
        self.filters_gradient = np.zeros(self.filter_shape)
        self.input_gradient = np.zeros(self.input_shape)
        self.biases_gradient = loss_gradient

        # Pad loss gradient
        padded_loss_gradient = np.pad(loss_gradient, (
            (0, 0),
            (self.filter_shape[2] - 1, self.filter_shape[2] - 1),
            (self.filter_shape[3] - 1, self.filter_shape[3] - 1)
        ))

        # Compute gradients for filters and input
        i = 0
        while i < self.num_output_channels:
            j = 0
            while j < self.num_input_channels:
                # Compute filter gradient
                self.filters_gradient[i, j] = self.convolve(self.input[j], loss_gradient[i])
                
                # Rotate the filter by 180 degrees
                rot180_filter_ij = np.rot90(np.rot90(self.filters[i, j], axes=(0, 1)), axes=(0, 1))
                
                # Compute input gradient
                k = 0
                while k < self.num_output_channels:
                    l = 0
                    while l < self.num_input_channels:
                        self.input_gradient[l] += self.convolve(padded_loss_gradient[k], rot180_filter_ij)
                        l += 1
                    k += 1
                    l = 0  # Reset inner loop index
                
                j += 1
            i += 1
        
        # Update filters and biases
        self.filters -= self.learning_rate * self.filters_gradient
        self.biases -= self.learning_rate * self.biases_gradient

    def convolve(self, x, y):
        x_conv_y = np.zeros((x.shape[0] - y.shape[0] + 1, x.shape[1] - y.shape[1] + 1))
        i = 0
        while i < x.shape[0] - y.shape[0] + 1:
            j = 0
            while j < x.shape[1] - y.shape[1] + 1:
                tmp = x[i:i + y.shape[0], j:j + y.shape[1]]
                tmp = np.multiply(tmp, y)
                x_conv_y[i, j] = np.sum(tmp)
                j += 1
            i += 1
        return x_conv_y


class CNN : 
    """
    Implementation of Convolutional Neural Network
    """
    def __init__(self, 
                convolutional_layer,                   # convolutional layer 
                nn,                                    # feed forward neural network
                seed = 42): 

        self.nn = nn 
        self.convolutional_layer = convolutional_layer 
        self.X = _ # assigned during SGD 
        self.Y = _ # assigned during SGD 
    
    def forward(self):
        # forward pass of convolutional layer 
        self.convolutional_layer.input = self.X 
        self.convolutional_layer.forward()

        # forward pass of neural network 
        self.nn.X = self.convolutional_layer.activation_layer.output
        self.nn.Y = self.Y 
        self.nn.forward()  
    
    def backward(self): 
        # backward pass of neural network 
        self.nn.backward() 

        # backward pass of convolutional network 
        self.convolutional_layer.backward( self.nn.grad_nn ) 

def SGD_CNN(X_train,
            y_train,
            X_test,
            y_test,
            cnn,
            inp_shape,
            out_shape,
            n_iterations=1000,
            task="classification"):
    
    # Define a custom progress bar function
    def progress_bar(iteration, total_iterations):
        progress = (iteration + 1) / total_iterations
        bar_length = 30
        filled_length = int(bar_length * progress)
        bar = '=' * filled_length + '-' * (bar_length - filled_length)
        print(f'\rTraining ... [{bar}] {progress * 100:.2f}%', end='', flush=True)
        if iteration == total_iterations - 1:
            print()
            
    # Initialize iteration counter
    iteration = 0

    # Loop until iteration reaches n_iterations
    while iteration < n_iterations:
        # Display the progress bar
        progress_bar(iteration, n_iterations)
        randomIndx = np.random.randint(len(X_train))
        X_sample = X_train[randomIndx, :].reshape(inp_shape)
        Y_sample = y_train[randomIndx, :].reshape(out_shape)

        cnn.X = X_sample
        cnn.Y = Y_sample

        cnn.forward()  # Forward Pass
        cnn.backward()  # Backward Pass

        # Increment the iteration counter
        iteration += 1

    # Clear the progress bar after training
    print('\033[K', end='', flush=True)
    
    # Lets run ONLY forward pass for train and test data and check accuracy/error

    if task == "classification":
        X_train = X_train.reshape(-1, 8, 8)
        y_true = np.argmax(y_train, axis=1)
        acc = 0
        for i in range(len(X_train)):
            cnn.X = X_train[i][np.newaxis, :, :]
            cnn.Y = y_train[i]
            cnn.forward()
            y_pred_i = np.argmax(cnn.nn.loss_layer.predicted, axis=1)
            if (y_pred_i == y_true[i]):
                acc += 1
        
        print("Classification Accuracy (Training Data ):" + str(acc) + "/" + str(len(y_true)) + " = " + str(acc*100/len(y_true)) + " %" )

        X_test = X_test.reshape(-1, 8, 8)
        y_true = np.argmax(y_test, axis=1)
        acc = 0
        for i in range(len(X_test)):
            cnn.X = X_test[i][np.newaxis, :, :]
            cnn.Y = y_test[i]
            cnn.forward()
            y_pred_i = np.argmax(cnn.nn.loss_layer.predicted, axis=1)
            if (y_pred_i == y_true[i]):
                acc += 1
        
        print("Classification Accuracy (Testing Data ):" + str(acc) + "/" + str(len(y_true)) + " = " + str(acc*100/len(y_true)) + " %" )

In [6]:
X_train, y_train, X_test, y_test = load_data_sklearn('mnist', one_hot_encode_y=True)

In [7]:
conv_inp_shape = (1,8,8)   # sklearn digit dataset has images of shape 1 x 8 x 8
Co = 16  # 16 channel output 
conv_filter_shape = (3,3)
conv_activation = 'tanh'
convolutional_layer = ConvolutionalLayer(conv_inp_shape,
                                        activation = conv_activation,
                                        filter_shape = conv_filter_shape, 
                                        learning_rate = 0.01,
                                        num_output_channels = Co
                                        )
nn_inp_shape = convolutional_layer.flatten_shape 
layers_sizes = [10]
layers_activations = ['softmax']

nn_inp_shape, nn_out_shape, layers = createLayers(nn_inp_shape, layers_sizes, layers_activations)
loss_nn = 'cross_entropy'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.01)

cnn = CNN(convolutional_layer, nn)
out_shape =  (1, layers_sizes[-1])  # one_hot encoded ouptut 

SGD_CNN(X_train,y_train,X_test,y_test, cnn,conv_inp_shape, out_shape,n_iterations=5000)

[KClassification Accuracy (Training Data ):149/1437 = 10.368823938761308 %
Classification Accuracy (Testing Data ):29/360 = 8.055555555555555 %
