In [5]:
import numpy as np

class QuadraticCost:
    
    @staticmethod
    def fn(a, y): # a, y are vectors
        return 0.5*np.linalg.norm(a-y)**2
    
    @staticmethod
    def delta(z, a, y): # z, a, y are vectors
        return (a-y)*sigmoid_prime(z) 
    
class CrosssEntropyCost:
    
    @staticmethod
    def fn(a, y): 
        return np.sum(np.nan_to_num(- y*np.log(a) - (1-y)*np.log(1-a)))
    
    @staticmethod
    def delta(z, a, y):
        return (a-y)

class Network:
    
    def __init__(self, n_neurons=[784,30,10], cost=CrosssEntropyCost):
        self.n_neurons = n_neurons # each layer's size
        self.n_layer = len(n_neurons)
        self.default_weight_initializer() # how we initialize weight and bias
        self.cost = cost # cost function to use
    
    def default_weight_initializer(self):
        # initialize weights matrix using Gaussian r.v with mean:0, std:1/input_size
        self.weights = [np.random.randn(x,y) / np.sqrt(y) for x, y in zip(self.n_neurons[1:], self.n_neurons[:-1])] 
        self.bias = [np.random.randn(x) for x in self.n_neurons[1:]]
        
    def large_weight_initializer(self):
        # initialize weights matrix using normal Gaussian r.v
        self.weights = [np.random.randn(x,y) for x, y in zip(self.n_neurons[1:], self.n_neurons[:-1])] 
        self.bias = [np.random.randn(x) for x in self.n_neurons[1:]]
        
    # to prevent overfitting and to get generalized hyperparameters, we can use validation-data    
    def fit(self, X, y, learning_rate=0.1, batch_size=10, lmbda=5.0, epochs=30, x_test=None, y_test=None,\
            monitor_training_cost=False, monitor_test_cost=False, monitor_training_accuracy=True, monitor_test_accuracy=False):
        
        # make mini-batch lists for each x and y
        x_batch_list, y_batch_list = self._mini_batch(X, y, batch_size)
        n = len(X)
        
        lr_scheduler = 0
        test_acc_per_epoch = []
        
        for epoch in range(epochs):
            
            for x_batch, y_batch in zip(x_batch_list, y_batch_list):
                
                self._update_mini_batch(x_batch, y_batch, batch_size, n, learning_rate, lmbda)
                
            print(f"\nEPOCH : {epoch} completed!!!")
            
            if monitor_training_cost:
                training_cost = self._total_cost(X, y, lmbda)
                print(f"Training Data Cost : {training_cost}")
                
            if monitor_test_cost:
                test_cost = self._total_cost(x_test, y_test, lmbda)
                print(f"Test Data Cost : {test_cost}")
                
            if monitor_training_accuracy:
                training_acc = self._total_accuracy(X, y)
                print(f"Training Data Accuracy : {training_acc}%")
                
            if monitor_test_accuracy:
                test_acc = self._total_accuracy(x_test, y_test)
                print(f"Test Data Accuracy : {test_acc}%")
            '''
            # Learning rate scheduler
            test_acc = self._total_accuracy(x_test, y_test)
            test_acc_per_epoch.append(test_acc)
            if epoch >= 9:
                if np.argmax(test_acc_per_epoch) < (epoch-9):
                    lr_scheduler += 1
                    learning_rate *= 0.5
                    if lr_scheduler == 10:
                        break   
            '''
                
    def _total_cost(self, X, y, lmbda):
        cost = 0.0
        for x_i, y_i in zip(X, y):
            a = self._feedforward(x_i)
            cost += self.cost.fn(a,y_i) / len(X)
        cost += 0.5*(lmbda/len(X))*np.sum([np.linalg.norm(w)**2 for w in self.weights]) # using Frabenius Norm(Euclidean Norm) for matrix norm
        return cost
    
    def _total_accuracy(self, X, y):
        i = 0
        for x, y in zip(X, y):
            pred = np.argmax(self._feedforward(x))
            if pred == np.argmax(y):
                i += 1
        return i*100 / len(X)
    
    def _mini_batch(self, X, y, batch_size):
        n_data = len(X)
        n_batch = int(n_data / batch_size)
        idxSet = np.random.permutation(n_data)
        x_batch_list = []
        y_batch_list = []
        for i in range(n_batch):
            x_batch = X[idxSet[i*batch_size:(i+1)*batch_size], :]
            x_batch_list.append(x_batch)
            y_batch = y[idxSet[i*batch_size:(i+1)*batch_size], :]
            y_batch_list.append(y_batch)
        return x_batch_list, y_batch_list
            
    def _feedforward(self, a):
        # return last layer's output vector
        for w, b in zip(self.weights, self.bias):
            a = np.dot(w, a) + b
        return sigmoid(a)
    
    def _update_mini_batch(self, x_batch, y_batch, batch_size, n_data, eta, lmbda):
        n_layer = len(self.n_neurons)
        dW = [np.zeros(w.shape) for w in self.weights]
        db = [np.zeros(b.shape) for b in self.bias]

        for x_i, y_i in zip(x_batch, y_batch):
            mini_dW, mini_db = self._backpropagation(x_i, y_i, n_layer)
            dW = [w + nw for w, nw in zip(dW, mini_dW)]
            db = [b + nb for b, nb in zip(db, mini_db)]

        # update using stochastic gradient descent and L2 regularization
        self.weights = [(1-eta*lmbda/n_data)*w - (eta / batch_size) * nw for w, nw in zip(self.weights, dW)]
        self.bias = [b - (eta / batch_size) * nb for b, nb in zip(self.bias, db)]
    
    def _backpropagation(self, x, y_i, n_layer):
        mini_dW = [np.zeros(w.shape) for w in self.weights] # each layer's weight
        mini_db = [np.zeros(b.shape) for b in self.bias] # each layer's bias
        
        # feedforward
        activation = x
        activations = [x]
        zs = []
        for w, b in zip(self.weights, self.bias):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
            
        # backward
        delta = self.cost.delta(zs[-1], activations[-1], y_i)
        mini_db[-1] = delta
        mini_dW[-1] = np.outer(delta,activations[-2])
        
        for i in range(2, n_layer):
            delta = np.multiply(np.dot(self.weights[-i+1].T, delta), sigmoid_prime(zs[-i]))
            mini_db[-i] = delta
            mini_dW[-i] = np.outer(delta,activations[-i-1])
        
        # update each layer's weights and bias using backpropagation for one input
        return mini_dW, mini_db
        
# miscellaneous functions
def sigmoid(x):
    return 1 / (1+np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))


In [8]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)
y_train_onehat = tf.keras.utils.to_categorical(y_train, 10)
y_test_onehat = tf.keras.utils.to_categorical(y_test, 10)

# test
nw = Network()
nw.fit(x_train[:10000], y_train_onehat[:10000], epochs=30, x_test=x_test[:100], y_test=y_test_onehat[:100],\
       monitor_training_cost=False, monitor_test_cost=False, monitor_training_accuracy=False, monitor_test_accuracy=True)




EPOCH : 0 completed!!!
Test Data Accuracy : 86.0%

EPOCH : 1 completed!!!
Test Data Accuracy : 86.0%

EPOCH : 2 completed!!!
Test Data Accuracy : 82.0%

EPOCH : 3 completed!!!
Test Data Accuracy : 82.0%

EPOCH : 4 completed!!!
Test Data Accuracy : 80.0%

EPOCH : 5 completed!!!
Test Data Accuracy : 76.0%

EPOCH : 6 completed!!!
Test Data Accuracy : 73.0%

EPOCH : 7 completed!!!
Test Data Accuracy : 71.0%

EPOCH : 8 completed!!!
Test Data Accuracy : 70.0%

EPOCH : 9 completed!!!
Test Data Accuracy : 71.0%

EPOCH : 10 completed!!!
Test Data Accuracy : 71.0%

EPOCH : 11 completed!!!
Test Data Accuracy : 70.0%

EPOCH : 12 completed!!!
Test Data Accuracy : 69.0%

EPOCH : 13 completed!!!
Test Data Accuracy : 69.0%

EPOCH : 14 completed!!!
Test Data Accuracy : 67.0%

EPOCH : 15 completed!!!
Test Data Accuracy : 66.0%

EPOCH : 16 completed!!!
Test Data Accuracy : 65.0%

EPOCH : 17 completed!!!
Test Data Accuracy : 64.0%

EPOCH : 18 completed!!!
Test Data Accuracy : 64.0%

EPOCH : 19 completed!