In [19]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [20]:
class Neural_Network_Image:
    def __init__(self):
        #For Training
        self.hidden_neurons = 10
        self.alpha = 0.1
        self.iter = 500
        self.test_ratio = 0.2
        self.x_test, self.y_test = None, None
        self.x_train, self.y_train = None, None
        self.pixels = None
        self.output_size = 10
        ###############
        self.m = None #Sample size
        self.W1, self.b1, self.W2, self.b2 = None, None, None, None
    
    def prepare_data(self, data):
        #This is used for Raw Data,do not call if Data are already in shape
        data = np.array(data)
        self.m, n = data.shape
        self.pixels = n - 1
        np.random.shuffle(data)
        test_size = int(self.test_ratio * self.m)
        test_set = data[:test_size].T
        self.y_test = test_set[0]
        self.x_test = test_set[1:] / 255.0
        train_set = data[test_size:].T
        self.y_train = train_set[0]
        self.x_train = train_set[1:] / 255.0

    def prepare_data_tensorflow(self, type):
        #This is only for MNIST and FASHION of Tensorflow's database
        if type=='fashion':
            fashion_mnist = tf.keras.datasets.fashion_mnist
            (x_train, self.y_train), (x_test, self.y_test) = fashion_mnist.load_data()
        if type=='mnist':
            mnist = tf.keras.datasets.mnist
            (x_train, self.y_train), (x_test, self.y_test) = mnist.load_data()
        self.x_train = x_train.T.reshape(784,60000) / 255.0
        self.x_test = x_test.T.reshape(784,10000) / 255.0
        self.pixels = 784
        self.m = 60000
    
    def init_params(self):
        W1 = np.random.rand(self.hidden_neurons, self.pixels) - 0.5
        b1 = np.random.rand(self.hidden_neurons, 1) - 0.5
        W2 = np.random.rand(self.output_size, self.hidden_neurons) - 0.5
        b2 = np.random.rand(self.output_size, 1) - 0.5
        return W1, b1, W2, b2
    
    def ReLU(self, Z):
        return np.maximum(0, Z)
  
    def ReLU_deriv(self, Z):
        return Z > 0

    def SoftMax(self, Z):
        A = np.exp(Z) / sum(np.exp(Z))
        return A

    def one_hot(self, Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1)) #For digit reg, 10 x m
        one_hot_Y[np.arange(Y.size), Y] = 1
        return one_hot_Y.T
    
    def get_predictions(self, A2):
        return np.argmax(A2, 0)

    def get_accuracy(self, predictions, actual):
        return np.sum(predictions == actual) / actual.size
    
    def forward(self, X, W1, b1, W2, b2):
        Z1 = W1 @ X
        A1 = self.ReLU(Z1 + b1)
        Z2 = W2 @ A1
        A2 = self.SoftMax(Z2 + b2)
        return Z1, A1, A2
    
    def backward(self, X, one_hot_Y, Z1, A1, A2, W2):
        dZ2 =  A2 - one_hot_Y
        dW2 =  1/self.m * dZ2 @ A1.T
        db2 = 1/self.m * np.sum(dZ2)
        dZ1 = W2.T @ dZ2 * self.ReLU_deriv(Z1)
        dW1 = 1/self.m * dZ1 @ X.T
        db1 = 1/self.m * np.sum(dZ1)
        return dW1, db1, dW2, db2
    
    def gradient_descent(self):
        one_hot_Y = self.one_hot(self.y_train)
        W1, b1, W2, b2 = self.init_params()
        for i in range(self.iter+1):
            Z1, A1, A2 = self.forward(self.x_train, W1, b1, W2, b2)
            dW1, db1, dW2, db2 = self.backward(self.x_train, one_hot_Y, Z1, A1, A2, W2)
            if (i % 50 == 0):
                predictions = self.get_predictions(A2)
                accuracy = self.get_accuracy(predictions, self.y_train)
                print(f'epoch {i}_accuracy {(accuracy * 100):.3f} %')
        
            #Update Weights and biases
            W1 = W1 - self.alpha * dW1
            b1 = b1 - self.alpha * db1
            W2 = W2 - self.alpha * dW2
            b2 = b2 - self.alpha * db2
        
        self.W1, self.b1, self.W2, self.b2 = W1, b1, W2, b2

    def predict(self, test_data=None, test_label=None):
        if test_data == None:
            test_data = self.x_test
            test_label = self.y_test
        _,_,A2 = self.forward(test_data, self.W1, self.b1, self.W2, self.b2)
        predictions = self.get_predictions(A2)
        accuracy = self.get_accuracy(predictions, test_label)
        print(f'accuracy {(accuracy * 100):.2f} % on {test_label.size} tests')

    def fit(self, train_data=None, tensorflow_data=None):
        if tensorflow_data:
            #Options: mnist or fashion
            self.prepare_data_tensorflow(tensorflow_data)
        else:
            self.prepare_data(train_data)
        self.gradient_descent()

In [21]:
data = pd.read_csv("datasets/mnist_test.csv")

In [22]:
nn = Neural_Network_Image()
nn.fit(train_data=data,tensorflow_data=None)

epoch 0_accuracy 12.375 %
epoch 20_accuracy 25.875 %
epoch 40_accuracy 34.913 %
epoch 60_accuracy 40.062 %
epoch 80_accuracy 46.150 %
epoch 100_accuracy 56.500 %
epoch 120_accuracy 62.213 %
epoch 140_accuracy 66.850 %
epoch 160_accuracy 70.125 %
epoch 180_accuracy 72.312 %
epoch 200_accuracy 74.275 %
epoch 220_accuracy 75.750 %
epoch 240_accuracy 76.850 %
epoch 260_accuracy 77.825 %
epoch 280_accuracy 78.850 %
epoch 300_accuracy 79.463 %
epoch 320_accuracy 80.125 %
epoch 340_accuracy 80.688 %
epoch 360_accuracy 81.350 %
epoch 380_accuracy 81.812 %
epoch 400_accuracy 82.450 %
epoch 420_accuracy 82.763 %
epoch 440_accuracy 83.188 %
epoch 460_accuracy 83.525 %
epoch 480_accuracy 83.788 %
epoch 500_accuracy 84.213 %


In [23]:
nn.predict()

accuracy 83.55 % on 2000 tests
