In [None]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, plot_confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

import time
%matplotlib inline

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
   
def to_grey(data):
    assert data.shape[-1] == 3072
    return ((0.3 * data[:,:1024]) + (0.59 * data[:,1024:2048]) + (0.11 * data[:,2048:])) / 255
    
def flatten(data):
    return np.reshape(data, (-1, np.prod(data.shape[1:])))

def pre_process(x, y):
    x = flatten(x[:10000]) / 255
    y = y[:10000]
    I = np.eye(len(np.unique(y)))
    return x, np.squeeze(I[y])
    

In [5]:
x_train, y_train = pre_process(x_train, y_train)
x_test, y_test = pre_process(x_test, y_test)

In [29]:
class NeuralNetwork():

    def __init__(self, in_nodes, h_nodes = 10, output_layer = 10, epochs = 10, lr = 0.1):
        self.lr = lr
        self.epochs = epochs + 1
        self.W1 = np.random.randn(in_nodes, h_nodes) * 0.01
        self.B1 = 0.1
        self.W2 = np.random.randn(h_nodes, 10) * 0.01
        self.B2 = 0.1
        self.lambd = 0

    def relu(self,z):
        return z*(z>0) 

    def relu_prime(self, z):
        return 1.0 *(z>0)

    def cost(self, Y):
        m = Y.shape[0]
        cost = (1/m) * -(np.sum(np.multiply(np.log(self.A2), Y) + np.multiply((1-Y), np.log(1-self.A2))))
        return cost
    
    def nllloss(self, Y, epsilon = 1e-12):
        m = Y.shape[0]
        Y = np.clip(Y, epsilon, 1. - epsilon)
        log_pred_y = np.log(self.A2)
        
        l2_cost = 0
        if self.lambd != 0:        
            l2_cost = self.lambd * (np.sum(np.square(self.W1)) + np.sum(np.square(self.W2))) / (2 * m)
        return -(Y*log_pred_y).mean() + l2_cost
    
    def softmax(self, x, axis=-1):
        # when x is a 2 dimensional tensor
        e = np.exp(x - np.max(x, axis=axis, keepdims=True))
        s = np.sum(e, axis=axis, keepdims=True)
        return e / s  

    def forward(self, X):       
        self.Z1 = np.dot(X, self.W1) + self.B1
        self.A1 = self.relu(self.Z1) 
        self.Z2 = np.dot(self.A1, self.W2) + self.B2
        self.A2 = self.softmax(self.Z2)
        return self.A2
    
    def backprop(self, X, Y):
        m = X.shape[0]        

        dZ2 = self.A2 - Y
        dW2 = (1/m) * (np.dot(self.A1.T, dZ2)) + (self.lambd * self.W2)/m
        db2 = (1/m) * (np.sum(dZ2, axis = 0, keepdims = True))
        dZ1 = np.dot(dZ2,self.W2.T) * self.relu_prime(self.A1)
        dW1 = (1/m) * (np.dot(X.T, dZ1)) + (self.lambd * self.W1)/m
        db1 = (1/m) * (np.sum(dZ1, axis = 0, keepdims = True))
        

        self.W2 = self.W2 - self.lr * dW2
        self.B2 = self.B2 - self.lr * db2
        self.W1 = self.W1 - self.lr * dW1
        self.B1 = self.B1 - self.lr * db1
    

    def fit(self, X, Y, lambd):
        self.lambd = lambd
        for i in range(self.epochs):
            x, y = self.batch(X, Y)
            epoch_cost = 0
            for batch_no in range(len(x)):
                self.forward(x[batch_no])
                self.backprop(x[batch_no], y[batch_no])
                epoch_cost += (np.round(self.nllloss(y[batch_no]), 4))
            self.forward(X)
            cost = np.round(self.nllloss(Y), 4)
            if i % 10 == 0:
                print("Cost after", i, "epochs:", cost, end=".\n")
                
    def batch(self, X, Y, batch_size = 64):
        x_batch = [x_train[i:i+batch_size,:] for i in range(0, len(x_train)-batch_size, batch_size)]
        y_batch = [y_train[i:i+batch_size,:] for i in range(0, len(y_train)-batch_size, batch_size)]
        return x_batch, y_batch
          
    def predict(self, X):
        A2 = self.propagate(X)
        return np.round(A2)    
    
    def __call__(self, X):
        return self.propagate(X)

In [30]:
net = NeuralNetwork(x_train.shape[1], epochs = 100, h_nodes = 100)

In [31]:
net.fit(x_train, y_train, lambd = 0.1)

Cost after 0 epochs: 0.0619.
Cost after 10 epochs: 0.0195.
Cost after 20 epochs: 0.0131.
Cost after 30 epochs: 0.0102.
Cost after 40 epochs: 0.0086.
Cost after 50 epochs: 0.0077.
Cost after 60 epochs: 0.0071.
Cost after 70 epochs: 0.0068.
Cost after 80 epochs: 0.0065.
Cost after 90 epochs: 0.0063.
Cost after 100 epochs: 0.0062.


In [27]:
y_pred = np.argmax(net(x_test), axis = 1)
y = np.argmax(y_test, axis = 1)
print("Model accuracy: {}".format((y_pred == y).mean()))

Model accuracy: 0.9527


In [28]:
print(classification_report(y_pred.flatten(), y.flatten()))

              precision    recall  f1-score   support

           0       0.98      0.97      0.97       993
           1       0.98      0.98      0.98      1136
           2       0.95      0.95      0.95      1028
           3       0.94      0.96      0.95       984
           4       0.95      0.95      0.95       983
           5       0.93      0.96      0.94       864
           6       0.95      0.95      0.95       965
           7       0.94      0.96      0.95      1008
           8       0.96      0.90      0.93      1038
           9       0.94      0.94      0.94      1001

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95      0.95      0.95     10000

