In [79]:
# !pip install scikit-learn


In [80]:
import numpy as np
import pandas as pd
import matplotlib
import math
from sklearn.preprocessing import StandardScaler, MinMaxScaler

<h2>SOFTMAX LAYER</h2>

In [81]:
class Softmax_Layer:
    def __init__(self):
        self.layer_type = 'Softmax'
    
    def __str__(self):
        return f"{self.layer_type} Layer"
    
    def forward(self, X):
        Z = np.exp(X)
        return Z / np.einsum('ij->j', Z)
    
    def backward(self, dZ, learning_rate=0.0001):
        return np.copy(dZ)

<h2>ReLU ACTIVATION </h2>

In [82]:
class ReLU_Activation:
    def __init__(self):
        self.layer_type = 'ReLU'
    
    def __str__(self):
        return f"{self.layer_type} Activation"
    
    def forward(self, X):
        self.X = X

        Z = np.copy(X)
        Z[Z < 0] = 0
        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dX = np.copy(self.X)

        dX[dX < 0] = 0
        dX[dX > 0] = 1
        return dX * dZ

<h2>FULLY CONNECTED LAYER</h2>

In [83]:
class Fully_Connected_Layer:
    def __init__(self, output_dim):
        self.output_dim = output_dim
        self.W = None
        self.b = None

    def __str__(self):
        return f"Fully_Connected_Layer(output_dim={self.output_dim})"
    
    def forward(self, X):
        self.X = X

        if self.W is None:
            self.W = np.random.randn(X.shape[1], self.output_dim) * math.sqrt(2 / X.shape[0])
        
        if self.b is None:
            self.b = np.zeros((1, self.output_dim))

        Z = np.einsum('ij,jk->ik', X, self.W) + self.b
        
        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dW = np.einsum('ij,ik->jk', self.X, dZ) / self.X.shape[1] # check here
        db = np.einsum('ij->j', dZ) / self.X.shape[0] # check here
        dX = np.einsum('ij,jk->ik', dZ, self.W.T)

        self.W = self.W - learning_rate * dW
        self.b = self.b - learning_rate * db

        return dX

<h2>FLATENNING LAYER</h2>

In [84]:
class Flatenning_Layer:
    def __init__(self):
        self.layer_type = 'Flatten'
    
    def __str__(self):
        return f"{self.layer_type} Layer"
    
    def forward(self, X):
        self.input_shape = X.shape
        return X.reshape((X.shape[0], -1)) # check here
    
    def backward(self, dZ, learning_rate=0.0001):
        dX = np.copy(dZ)
        return dX.reshape(self.input_shape) # check here

In [85]:
# build a NN model
class Model:
    def __init__(self):
        self.layers = []
    
    def add(self, layer):
        self.layers.append(layer)
    
    def __str__(self):
        return f"Model: {self.layers}"
    
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X
    
    def backward(self, dZ, learning_rate=0.0001):
        for layer in reversed(self.layers):
            dZ = layer.backward(dZ, learning_rate)
        return dZ
    
    def fit(self, X, Y, learning_rate=0.0001, epochs=100, batch_size=32, print_loss=False):
        m = X.shape[0]
        for epoch in range(epochs):
            for i in range(0, m, batch_size):
                X_batch = X[i:i+batch_size]
                Y_batch = Y[i:i+batch_size]
                # print(X_batch.shape)
                Z = self.forward(X_batch)
                # print(Z.shape)
                # print(Y_batch.shape)
                dZ = Z - Y_batch
                # print("dZ calc done in fit")
                self.backward(dZ, learning_rate)

            if print_loss and epoch % 100 == 0:
                loss = self.calculate_loss(X, Y)
                print(f"Loss after epoch {epoch}: {loss}")
    
    def predict(self, X):
        Z = self.forward(X)
        # print(Z.shape)
        # print(Z)
        return np.argmax(Z, axis=1)+1
    
    def calculate_loss(self, X, Y):
        m = X.shape[0]
        # Y is one hot encoded
        P = self.forward(X)
        log_likelihood = -np.log(P[range(m), Y.argmax(axis=1)])
        loss = np.sum(log_likelihood) / m
        return loss

In [86]:
data = np.loadtxt("./Toy Dataset/testNN.txt")
X = data[:, 0:4]

# normalize the data
scaler = StandardScaler()
# scaler = MinMaxScaler()
X= scaler.fit_transform(X)
X

array([[ 1.35320359,  1.36824164,  1.34494831,  1.36689489],
       [ 0.5577194 ,  0.42359018,  0.45403648,  0.43159088],
       [-0.39373132, -0.48317386, -0.46162906, -0.40522801],
       ...,
       [-0.39984943, -0.39227624, -0.44497152, -0.42093613],
       [ 1.52610261,  1.35135152,  1.37790393,  1.35749308],
       [-1.27088231, -1.35384488, -1.35058138, -1.32316619]])

In [87]:
Y = data[:, 4]
# print(Y.shape)
# one hot encoding
Y_one_hot = np.zeros((Y.shape[0], 4))
for i in range(Y.shape[0]):
    Y_one_hot[i, int(Y[i])-1] = 1
Y_one_hot = Y_one_hot

In [88]:
basic = Model()
basic.add(Fully_Connected_Layer(6))
basic.add(ReLU_Activation())
basic.add(Fully_Connected_Layer(8))
basic.add(ReLU_Activation())
basic.add(Fully_Connected_Layer(5))
basic.add(ReLU_Activation())
basic.add(Fully_Connected_Layer(4))
basic.add(Softmax_Layer())

basic.fit(X, Y_one_hot, learning_rate=0.00001, epochs=1001, batch_size=32, print_loss=True)

Loss after epoch 0: 6.202386346566593
Loss after epoch 100: 6.198477630583524
Loss after epoch 200: 6.194381263810961
Loss after epoch 300: 6.190011376855234
Loss after epoch 400: 6.184769501506302
Loss after epoch 500: 6.178455115408696
Loss after epoch 600: 6.171538668616236
Loss after epoch 700: 6.164443286786684
Loss after epoch 800: 6.156438712049125
Loss after epoch 900: 6.147377475364741
Loss after epoch 1000: 6.137167243545605


In [89]:
test_data = np.loadtxt("./Toy Dataset/testNN.txt")
test_X = test_data[:, 0:4]
test_X = scaler.transform(test_X)
test_Y = test_data[:, 4]
test_Y = test_Y.astype(int)
test_Y_one_hot = np.zeros((test_Y.shape[0], 4))
for i in range(test_Y.shape[0]):
    test_Y_one_hot[i, int(test_Y[i])-1] = 1


In [90]:
def accuracy(Y, Y_pred):
    return np.sum(Y == Y_pred) / Y.shape[0]

In [91]:
# predict using model
predictions = basic.predict(test_X)
# print("Y \n", test_Y)
print("Predictions: \n", predictions)
print("Accuracy: ", accuracy(test_Y, predictions))

Predictions: 
 [4 2 1 1 4 4 4 2 1 1 1 1 2 1 2 4 2 1 4 1 1 4 2 1 2 1 4 1 1 1 4 4 2 4 4 1 4
 1 4 1 4 2 1 2 1 1 2 2 4 1 1 1 1 4 4 1 2 4 4 4 2 1 1 4 1 2 4 1 2 1 1 4 1 1
 1 1 2 4 1 2 4 4 4 1 2 4 2 4 4 4 1 1 1 1 1 4 2 4 4 1 1 1 1 1 1 1 1 1 1 2 1
 2 1 1 4 4 4 4 2 2 2 1 1 2 1 4 1 1 1 2 4 2 2 2 2 4 1 1 1 4 2 1 2 1 1 2 2 2
 4 2 2 1 1 2 1 1 1 2 2 4 4 4 4 1 4 1 1 1 2 4 2 1 2 2 1 1 2 2 1 1 1 2 1 2 4
 1 2 4 2 2 1 1 1 1 1 1 4 1 4 4 1 1 1 2 1 4 1 1 2 4 2 1 1 4 4 1 1 1 2 1 1 1
 1 1 1 1 1 2 1 4 1 1 2 2 1 1 1 2 4 2 1 2 4 1 4 1 4 4 2 1 4 1 2 2 2 1 1 1 4
 4 1 4 1 1 1 2 2 2 1 1 1 1 2 1 1 2 1 4 2 2 1 4 2 2 1 2 1 2 4 1 2 1 2 1 4 4
 4 1 1 2 4 1 1 4 4 1 4 4 2 1 1 1 2 1 4 1 1 4 2 2 1 1 1 2 2 4 2 2 4 1 1 4 4
 1 1 1 1 1 2 1 1 2 1 1 1 1 1 4 1 4 2 2 2 4 4 1 4 1 1 1 1 1 4 1 4 2 4 1 1 1
 2 1 1 1 2 4 4 1 2 1 1 2 1 2 4 1 1 1 1 2 1 2 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1
 4 1 4 2 1 1 2 1 1 1 4 2 1 2 1 1 2 1 1 1 2 1 4 1 1 2 1 4 2 4 2 1 1 1 2 1 2
 1 1 2 2 4 1 1 1 2 4 4 4 4 1 4 4 4 4 1 2 1 2 4 1 4 1 2 2 2 1 1 4 2 1 2 1 4
 4 1 1 4 1