In [2]:
import pandas as pd
import numpy as np

## **GET DATA**

In [3]:
train_data=pd.read_csv("data/ann/mnist_train.csv", header=None)
test_data=pd.read_csv("data/ann/mnist_test.csv", header=None)

# **DEFINE NEURAL NETWORK**

In [15]:
class ANN:

    def __init__(self, iters, alpha, layers, node_per_layer):
        self.iters = iters
        self.alpha = alpha # learning rate
        self.layers = layers
        self.node_per_layer = node_per_layer
        self.weights, self.bias = self.initialize_parameters()


    def initialize_parameters(self):
        weights = {}
        bias = {}
        W0 = np.random.randn(784, self.node_per_layer[0]) / np.sqrt(784)
        W1 = np.random.randn(self.node_per_layer[0], self.node_per_layer[1]) / np.sqrt(self.node_per_layer[0])
        W2 = np.random.randn(self.node_per_layer[1], self.node_per_layer[2]) / np.sqrt(self.node_per_layer[1])
        W3 = np.random.randn(self.node_per_layer[2], 10) / np.sqrt(self.node_per_layer[2])
        weights["W0"] = W0
        weights["W1"] = W1
        weights["W2"] = W2
        weights["W3"] = W3

        B0 = np.random.randn(1, self.node_per_layer[0])
        B1 = np.random.randn(1, self.node_per_layer[1])
        B2 = np.random.randn(1, self.node_per_layer[2])
        B3 = np.random.randn(1, 10)
        bias["B0"] = B0
        bias["B1"] = B1
        bias["B2"] = B2
        bias["B3"] = B3

        return weights, bias


    # UTILITY FUNCTIONS
    def relu(self, Z):
        return np.maximum(0, Z)


    def diff_relu(self, Z):
        return Z > 0


    def softmax(self, Z):
        exp_scores = np.exp(Z - Z.max())
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs


    def diff_softmax(self, Z):
        exp_scores = np.exp(Z - Z.max())
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True) * (1 - exp_scores / np.sum(exp_scores, axis=1, keepdims=True))


    def error_calculation(self, out, y):
        return 1/(2 * 10) * np.sum((out - y)**2)


    def sigmoid(self, Z):
        return 1/(1 + np.exp(-Z))


    def diff_sigmoid(self, Z):
        return (np.exp(-Z))/((np.exp(-Z)+1)**2)


    def one_hot_encode(self, y):
        arr = [0] * 10
        arr[y[0]] = 1
        return np.array(arr)


    def calculate_accuracy(self, out, y):
        out = np.array(out)
        out = out.reshape(out.shape[0], 1)
        y_ = y.reshape(y.shape[0], 1)
        sum = 0
        for i in range(y.shape[0]):
            if(out[i] == y_[i]):
                sum = sum + 1
        return sum/ y_.shape[0] * 100


    # FORWARD PASS
    def forward_pass(self, x):
        # convert input to 2-d array using reshape
        x = x.values.reshape(1, 784)

        Z1 = x.dot(self.weights["W0"]) + self.bias["B0"]
        A1 = self.sigmoid(Z1)

        Z2 = A1.dot(self.weights["W1"]) + self.bias["B1"]
        A2 = self.sigmoid(Z2)

        Z3 = A2.dot(self.weights["W2"]) + self.bias["B2"]
        A3 = self.sigmoid(Z3)

        Z4 = A3.dot(self.weights["W3"]) + self.bias["B3"]
        A4 = self.softmax(Z4)

        Z = (Z1, Z2, Z3, Z4)
        A = (A1, A2, A3, A4)
        return A, Z


    # BACK PROPAGATION
    def back_prop(self, x, y_, A, Z):
        y_ = self.one_hot_encode(y_).reshape(1, 10)

        dK4 = 2 / 10 * (A[3] - y_) * self.diff_softmax(Z[3])
        dB3 = np.sum(dK4, axis=0, keepdims=True)
        # dW3 = (A[2].T).dot(dK4)
        dW3 = np.outer(dK4.T, A[2].T)
        
        dK3 = (dK4).dot(self.weights["W3"].T) * self.diff_sigmoid(Z[2])
        dB2 = np.sum(dK3, axis=0)
        dW2 = np.outer(dK3.T, A[1].T)

        dK2 = (dK3).dot(self.weights["W2"].T) * self.diff_sigmoid(Z[1])
        dB1 = np.sum(dK2, axis=0)
        dW1 = np.outer(dK2.T, A[0].T)

        x = x.values.reshape(1, 784)
        dK1 = (dK2).dot(self.weights["W1"].T) * self.diff_sigmoid(Z[0])
        dB0 = np.sum(dK1, axis=0)
        dW0 = np.outer(dK1.T, x.T)

        dW = (dW0, dW1, dW2, dW3)
        dB = (dB0, dB1, dB2, dB3)
        return dW, dB


    # UPDATE PARAMETERS
    def update_parameters(self, dW, dB):
        self.weights["W0"] = self.weights["W0"] - (self.alpha * dW[0].T)
        self.weights["W1"] = self.weights["W1"] - (self.alpha * dW[1].T)
        self.weights["W2"] = self.weights["W2"] - (self.alpha * dW[2].T)
        self.weights["W3"] = self.weights["W3"] - (self.alpha * dW[3].T)

        self.bias["B0"] = self.bias["B0"] - (self.alpha * dB[0])
        self.bias["B1"] = self.bias["B1"] - (self.alpha * dB[1])
        self.bias["B2"] = self.bias["B2"] - (self.alpha * dB[2])
        self.bias["B3"] = self.bias["B3"] - (self.alpha * dB[3])


    # TRAIN
    def train_model(self, X, y):
        y = y.reshape(y.shape[1], 1)
        for i in range(self.iters):
            predictions = []
            print(f"ITERATION {i + 1} : ")
            for j in range(y.shape[0]):
                x = X.iloc[j]
                A, Z = self.forward_pass(x)
                prediction = np.argmax(A[3])
                predictions.append(prediction)

                dW, dB = self.back_prop(x, y[j], A, Z)
                self.update_parameters(dW, dB)
            print("     Accuracy : ", self.calculate_accuracy(predictions, y), "%")
            predictions.clear()
            print("-" * 40)


    # TEST
    def test_model(self, X_test, y_test):
        predictions = []
        y_test = y_test.reshape(y_test.shape[1], 1)
        for i in range(y_test.shape[0]):
            A, Z = self.forward_pass(X_test.iloc[i])
            prediction = np.argmax(A[3])
            predictions.append(prediction)
        print("        ACCURACY ON TEST DATASET:")
        print("Accuracy:", self.calculate_accuracy(predictions, y_test), "%")
            

# **NORMALIZING DATA**

In [9]:
y_train = train_data.iloc[:, 0]
y_train = y_train.values.reshape(1, y_train.shape[0])
print(y_train.shape)
X_train = train_data.iloc[:, 1:]
X_train = (X_train/255).astype("float32")

y_test = test_data.iloc[:, 0]
y_test = y_test.values.reshape(1, y_test.shape[0])
X_test = test_data.iloc[:, 1:]
X_test = (X_test/255).astype("float32")

(1, 60000)


In [16]:
iters = 20
alpha = 0.05
layers = 3
node_per_layer = [261, 87, 29]
ann = ANN(iters, alpha, layers, node_per_layer)
ann.train_model(X_train, y_train)
ann.test_model(X_test, y_test)

ITERATION 1 : 
     Accuracy :  10.988333333333333 %
----------------------------------------
ITERATION 2 : 
     Accuracy :  15.735 %
----------------------------------------
ITERATION 3 : 
     Accuracy :  28.15166666666667 %
----------------------------------------
ITERATION 4 : 
     Accuracy :  42.695 %
----------------------------------------
ITERATION 5 : 
     Accuracy :  56.88833333333333 %
----------------------------------------
ITERATION 6 : 
     Accuracy :  69.47166666666666 %
----------------------------------------
ITERATION 7 : 
     Accuracy :  65.735 %
----------------------------------------
ITERATION 8 : 
     Accuracy :  70.86333333333333 %
----------------------------------------
ITERATION 9 : 
     Accuracy :  80.34833333333333 %
----------------------------------------
ITERATION 10 : 
     Accuracy :  87.34833333333333 %
----------------------------------------
ITERATION 11 : 
     Accuracy :  89.08333333333334 %
----------------------------------------
ITERATI