In [1]:
import numpy as np

In [2]:
train = np.genfromtxt('MNIST_CSV/mnist_train.csv', delimiter=',',dtype='int')
test = np.genfromtxt('MNIST_CSV/mnist_test.csv', delimiter=',',dtype='int')

X_train = train[:,1:].reshape(-1,1,28,28)
y_train = train[:,0]

X_test = test[:,1:].reshape(-1,1,28,28)
y_test = test[:,0]

In [None]:
class Layer:

    def __init__(self):
        pass

class LayerInput(Layer):

    def __init__(self):
        self.activation = 'linear'

    def __repr__(self):
        return f'{type(self).__name__}'
        
class LayerDense(Layer):

    def __init__(self,n_neurons,activation='linear'):
        self.n_neurons = n_neurons
        self.activation = activation

    def __repr__(self):
        return f'{type(self).__name__}: activation = {self.activation}'
    
    def initialize_weights_and_biases(self,n_neurons_prev):
        self.W = np.random.rand(self.n_neurons, n_neurons_prev) - 0.5
        self.b = np.random.rand(self.n_neurons, 1) - 0.5

    def update_weights_and_biases(self,dW,db,lr=0.01):
        self.W -= lr*dW
        self.b -= lr*db
        
class LayerFlatten(Layer):

    def __init__(self):
        self.activation = 'linear'

    def __repr__(self):
        return f'{type(self).__name__}'

In [26]:
class FullyConnected:

    def __init__(self,file=None,layers=None):
        if file:
            self.load(file)
        elif layers:
            self.layers = layers
        else:
            self.layers = [LayerInput()]
            self.score = 0.0
            self.best_score = 0.0
            self.best = None

    def __repr__(self):
        return f'{type(self).__name__}'

    def add_layer(self,layer):
        self.layers.append(layer)
        
    def compile(self,X_shape):
        self.activations = []
        if len(X_shape) == 4:
            _, channels, height, width = X_shape
            input_flat = False
        elif len(X_shape) == 2:
            n_neurons, _ = X_shape
            input_flat = True
        for i,layer in enumerate(self.layers):
            self.activations.append(layer.activation)
            if type(layer) == LayerInput:
                layer.input_shape = (n_neurons, None) if input_flat else (None, channels, height, width)
                layer.output_shape = (n_neurons, None) if input_flat else (None, channels, height, width)
                layer.n_trainable_parameters = 0
            if type(layer) == LayerFlatten:
                n_neurons = channels*height*width
                layer.input_shape = (None, channels, height, width)
                layer.output_shape = (n_neurons, None)
                layer.n_trainable_parameters = 0
                n_neurons, _ = layer.output_shape
            if type(layer) == LayerDense:
                layer.initialize_weights_and_biases(n_neurons)
                layer.input_shape = (n_neurons, None)
                layer.output_shape = (layer.n_neurons, None)
                layer.n_trainable_parameters = layer.n_neurons * (n_neurons + 1)
                n_neurons, _ = layer.output_shape
        self.describe()
                
    def describe(self):
        print(f' -----------------------------------------------------------------------------------------------------------------')
        print(f'| Layer |     Layer type      |      Input shape      |      Output shape     | Activation | Trainable Parameters |')
        print(f' -----------------------------------------------------------------------------------------------------------------')
        for i,layer in enumerate(self.layers):
            if type(layer) == LayerDense:
                layer_type = "{:^21}".format('LayerDense')
            elif type(layer) == LayerFlatten:
                layer_type = "{:^21}".format('LayerFlatten')
            elif type(layer) == LayerInput:
                layer_type = "{:^21}".format('LayerInput')
            input_shape = "{:^23}".format(str(layer.input_shape))
            output_shape = "{:^23}".format(str(layer.output_shape))
            n_trainable_parameters = "{:^22}".format(layer.n_trainable_parameters)
            activation = "{:^12}".format(layer.activation)
            print(f'|{"{:^7}".format(i)}|{layer_type}|{input_shape}|{output_shape}|{activation}|{n_trainable_parameters}|')
        print(f' -----------------------------------------------------------------------------------------------------------------') 

    def one_hot(self,Y):
        Y_gt = np.zeros((Y.size, self.layers[-1].n_neurons))
        Y_gt[np.arange(Y.size), Y] = 1
        return Y_gt.T

    def ReLu(self,A):
        return np.maximum(A,0)
    
    def dReLu(self,Z):
        return Z > 0
    
    def SoftMax(self,A):
        return np.exp(A) / sum(np.exp(A))
    
    def activate(self,A,activation):
        if activation == 'relu':
            return self.ReLu(A)
        if activation == 'softmax':
            return self.SoftMax(A)
        if activation == 'linear':
            return A
        
    def dActivation(self,Z,activation):
        if activation == 'relu':
            return self.dReLu(Z)
        if activation == 'linear':
            return 1

    def forward_flatten(self,A):
        batch_size, _, _, _ = A.shape
        return A.reshape(batch_size,-1).T
    
    def forward_dense(self,A,Z,layer,is_training=False):
        if is_training:
            layer.cache = (A,Z)
        Z = layer.W.dot(A) + layer.b
        A = self.activate(Z,layer.activation)
        return A, Z
    
    def backward_dense(self,dA_prev,layer,batch_size,lr,activation):
        A_prev, Z_prev = layer.cache
        dW = 1/batch_size * dA_prev.dot(A_prev.T)
        db = 1/batch_size * np.sum(dA_prev)
        dA = layer.W.T.dot(dA_prev) * self.dActivation(Z_prev,activation)
        layer.update_weights_and_biases(dW,db,lr)
        layer.cache = None
        return dA

    def forward(self,A,is_training=False):
        Z = A.copy()
        for layer in self.layers:
            if type(layer) == LayerFlatten:
                A = self.forward_flatten(A)
            elif type(layer) == LayerDense:
                A, Z = self.forward_dense(A,Z,layer,is_training)
        return A
    
    def backward(self,dA,batch_size,lr):
        for layer, activation in zip(self.layers[1:][::-1],self.activations[:-1][::-1]):
            if type(layer) == LayerDense:
                dA = self.backward_dense(dA,layer,batch_size,lr,activation)
        
    def fit(self,X_train,y_train,X_test=None,y_test=None,batch_size=100,epochs=10,lr=0.01,verbose=True):
        self.print_progress(verbose,'start')
        for epoch in range(epochs):
            num_batches = X_train.shape[0] // batch_size
            for i in range(0,X_train.shape[0],batch_size):
                X_i = X_train[i:i+batch_size]
                y_i = y_train[i:i+batch_size]
                y_pred_oh = self.forward(X_i,is_training=True)
                y_true_oh = self.one_hot(y_i)
                dA = y_pred_oh - y_true_oh
                self.backward(dA,batch_size,lr)
                self.print_progress(verbose,'batch',epoch=epoch,i=i,num_batches=num_batches,batch_size=batch_size,y_i=y_i,y_pred_oh=y_pred_oh)
            train_acc = self.val(X_train,y_train)
            if (X_test is not None) and (y_test is not None):
                test_acc = self.val(X_test,y_test)
                self.print_progress(verbose,'epoch',epoch=epoch,train_acc=train_acc,test_acc=test_acc)
                self.best_score = max(self.best_score,test_acc)
                self.score = test_acc
            else:
                self.print_progress(verbose,'epoch',epoch=epoch,train_acc=train_acc,test_acc=None)
                self.best_score = max(self.best_score,train_acc)
                self.score = train_acc
        self.print_progress(verbose,'end')

    def print_progress(self,verbose,which,epoch=None,i=None,num_batches=None,batch_size=None,y_i=None,y_pred_oh=None,train_acc=None,test_acc=None):
        if verbose:
            if which == 'start':
                print(f'{"{:^7}".format("epoch")} | {"{:^12}".format("progress")} | {"{:^10}".format("accuracy")}')
                print(f'{"{:<53}".format("-"*53)}')
            elif which == 'batch':
                print(f'{"{:^7}".format(str(epoch))} | [{"{:<10}".format("#"*int(i*10/num_batches/batch_size))}] | {"{:^7}".format(str(round(self.evaluate_accuracy(y_i,np.argmax(y_pred_oh,axis=0)),4)))}',end='\r', flush=True)
            elif which == 'epoch':
                if test_acc is not None:
                    print(f'{"{:^7}".format(str(epoch))} | [{"{:<10}".format("#"*10)}] | train: {"{:^7}".format(str(round(train_acc,4)))} test: {"{:^7}".format(str(round(test_acc,4)))}')
                else:
                    print(f'{"{:^7}".format(str(epoch))} | [{"{:<10}".format("#"*10)}] | train: {"{:^7}".format(str(round(train_acc,4)))}')
            elif which == 'end':
                print(f'{"{:<53}".format("-"*53)}')

    def predict(self,X):
        y = self.forward(X)
        return np.argmax(y,0)

    def evaluate_accuracy(self,y_true,y_pred):
        return (y_true == y_pred).sum() / len(y_true)

    def val(self,X_test,y_true):
        y_pred = self.predict(X_test)
        acc = self.evaluate_accuracy(y_true,y_pred)
        return acc
                
    def save(self,file):
        if not file.endswith('.npz'):
            file = file +'.npz'
        np.savez(file=file,
                 layers=self.layers,
                 activations=self.activations,
                 score = self.score,
                 best_score=self.best_score)
        print(f'Saved as {file}')
        
    def load(self,file):
        if not file.endswith('.npz'):
            file = file + '.npz'
            
        model_archieve = np.load(file,allow_pickle=True)
        self.layers = model_archieve['layers'].tolist()
        self.activations = model_archieve['activations'].tolist()
        self.score = float(model_archieve['score'])
        self.best_score = float(model_archieve['best_score'])
        
        

In [34]:
model = FullyConnected()

model.add_layer(LayerFlatten())
model.add_layer(LayerDense(112,activation='relu'))
model.add_layer(LayerDense(16,activation='relu'))
model.add_layer(LayerDense(10,activation='softmax'))

model.compile(X_test.shape)

 -----------------------------------------------------------------------------------------------------------------
| Layer |     Layer type      |      Input shape      |      Output shape     | Activation | Trainable Parameters |
 -----------------------------------------------------------------------------------------------------------------
|   0   |     LayerInput      |   (None, 1, 28, 28)   |   (None, 1, 28, 28)   |   linear   |          0           |
|   1   |    LayerFlatten     |   (None, 1, 28, 28)   |      (784, None)      |   linear   |          0           |
|   2   |     LayerDense      |      (784, None)      |      (112, None)      |    relu    |        87920         |
|   3   |     LayerDense      |      (112, None)      |      (16, None)       |    relu    |         1808         |
|   4   |     LayerDense      |      (16, None)       |      (10, None)       |  softmax   |         170          |
 -------------------------------------------------------------------------

In [35]:
model.fit(X_train/256,y_train,X_test/256,y_test,batch_size=10000,epochs=20,lr=0.1)

 epoch  |   progress   |  accuracy 
-----------------------------------------------------
   0    | [##########] | train: 0.1278  test: 0.1303 
   1    | [##########] | train: 0.1812  test: 0.1853 
   2    | [##########] | train: 0.2442  test: 0.2514 
   3    | [##########] | train: 0.2693  test: 0.2752 
   4    | [##########] | train: 0.3092  test: 0.3212 
   5    | [##########] | train: 0.3653  test: 0.3839 
   6    | [##########] | train:  0.425  test: 0.4488 
   7    | [##########] | train: 0.4769  test: 0.5002 
   8    | [##########] | train: 0.5232  test: 0.5459 
   9    | [##########] | train: 0.5602  test: 0.5794 
  10    | [##########] | train: 0.5883  test: 0.6058 
  11    | [##########] | train: 0.6126  test: 0.6326 
  12    | [##########] | train:  0.634  test: 0.6559 
  13    | [##########] | train: 0.6542  test: 0.6738 
  14    | [##########] | train: 0.6699  test: 0.6894 
  15    | [##########] | train: 0.6763  test: 0.6928 
  16    | [##########] | train: 0.6828  test: 