***Abstract Base Class : Layer***

In [5]:
# #Base Class
# class Layer:

#     def __init__(self):
#         self.input= None
#         self.output= None

#     # computes the output Y of a layer for a given input X
#     def forward_propagation(self, input):
#         raise NotImplementedError

#     # computes dE/dX for a given dE/dY (and update parameters if any)
#     def backward_propagation(self, output_error, learning_rate):
#         raise NotImplementedError

In [6]:
#from layer import Layer (IF LAYER CLASS WAS IN DIFFERENT FILE NAMED layer.py)
import numpy as np

***CODING THE FULLY CONNECTED LAYER***

In [7]:
# inherit from base class Layer
class FClayer ():
    def __init__(self, input_size, output_size):
        # input_size = number of input neurons
        # output_size = number of output neurons
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
    
    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
    
    # computes weight_error dE/dW, dE/dB for a given output_error=dE/dY.; Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weight_error = np.dot(self.input.T, output_error)
        #dBias = ouput_error
        
        #UPDATING PARAMETERS
        self.weights -= learning_rate*weight_error
        self.bias -= learning_rate*output_error
        return input_error

***CODING THE ACTIVATION LAYER***

In [8]:
class ActivationLayer():

    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        
    # Return activated Input    
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [9]:
#Activation function and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1-np.tanh(x)**2

In [10]:
#loss function and it's derivative

def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred - y_true)/y_true.size

***BUILDING NETWORK CLASS***

In [11]:
class Network:

    def __init__(self):
        self.layers = []
        self.loss= None
        self.loss_prime = None

    # Adding layer to the network
    def add(self,layer):
        self.layers.append(layer)
        
    # Set loss to use
    def use(self, loss, loss_prime):
        self.loss= loss
        self.loss_prime = loss_prime

    # Predicting output for the given input
    def predict(self, input_data):
        result = []
        samples = len(input_data)

        # running through all samples
        for i in range (samples):
            output = input_data[i]
            #forward propogation function used
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
         
        return result  

    #training the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        samples = len(x_train)

        #training loop
        for i in range(epochs):
            err = 0
            #forward propogation
            for j in range (samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                #compute loss
                err += self.loss(y_train[j], output)

                
                #backward propogation
                error = self.loss_prime(y_train[j], output)

                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d %d error=%f' %(i+1,epochs,err))
            accuracy = 100-err
            print('accuracy= %f' %(accuracy))

***CHECKING THE MODEL FOR XOR***

***Model Implementation on MNIST Dataset***

In [12]:
from keras.datasets import mnist

2024-02-26 20:07:14.532695: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-26 20:07:14.569334: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-26 20:07:14.569860: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
#A function from Keras utils module for one-hot encoding categorical variables (converting class vectors to binary class matrices).

from keras.utils import to_categorical

In [14]:
#from keras.utils import np_utils

In [15]:
# loading mnist from server
(x_train,y_train),(x_test,y_test) = mnist.load_data()

*RESHAPING AND NORMALISATION*

In [16]:
#Reshapes the input data to have a shape of (num_samples, 1, 28*28)
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
#normalizes pixel values to the range [0, 1] by dividing by 255
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = to_categorical(y_train)

x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255

y_test = to_categorical(y_test)

In [17]:
# Network
net = Network()
net.add(FClayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FClayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FClayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

In [18]:
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs= 35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test)
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

epoch 1 35 error=0.243465
accuracy= 99.756535
epoch 2 35 error=0.092766
accuracy= 99.907234
epoch 3 35 error=0.071676
accuracy= 99.928324
epoch 4 35 error=0.059631
accuracy= 99.940369
epoch 5 35 error=0.051016
accuracy= 99.948984
epoch 6 35 error=0.044643
accuracy= 99.955357
epoch 7 35 error=0.039419
accuracy= 99.960581
epoch 8 35 error=0.035309
accuracy= 99.964691
epoch 9 35 error=0.031926
accuracy= 99.968074
epoch 10 35 error=0.029206
accuracy= 99.970794
epoch 11 35 error=0.026794
accuracy= 99.973206
epoch 12 35 error=0.024731
accuracy= 99.975269
epoch 13 35 error=0.022911
accuracy= 99.977089
epoch 14 35 error=0.021336
accuracy= 99.978664
epoch 15 35 error=0.019951
accuracy= 99.980049
epoch 16 35 error=0.018770
accuracy= 99.981230
epoch 17 35 error=0.017689
accuracy= 99.982311
epoch 18 35 error=0.016796
accuracy= 99.983204
epoch 19 35 error=0.015930
accuracy= 99.984070
epoch 20 35 error=0.015155
accuracy= 99.984845
epoch 21 35 error=0.014457
accuracy= 99.985543
epoch 22 35 error=0.01