In [1]:
import tensorflow as tf
import numpy as np 
import random 
import matplotlib.pyplot as plt 

In [7]:
# Load data, normalize, convert labels to array

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train, x_test = x_train/255, x_test/255

TRAIN_LENGTH = len(x_train) # 60000
TEST_LENGTH = len(x_test) # 10000

train_labels = np.zeros((TRAIN_LENGTH, 10))
for i, label in enumerate(y_train):
    train_labels[i] = (label_to_output(label))

test_labels = np.zeros((TEST_LENGTH, 10))
for i, label in enumerate(y_test):
    test_labels[i] = (label_to_output(label))

In [9]:
# Defining some key methods

def label_to_output(label):
    o = [0]*10
    o[label] = 1
    return o

def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def d_sigmoid(x):
    return sigmoid(x) * (1-sigmoid(x))

In [None]:
'''
MLN algorithm:: 

* Input: (m) x 1 x 784

* FCL:   64 neurons
* FCL_w: (784 x 64)
* FCL_b: (1 x 64)

* output layer: 10 neurons
* output_w:     (64 x 10)
* output_b:     (1 x 10)


Forward: 
* x -> Input                                    m x 1 x 784

* z1 = Input * FCL_w + FCL_b                    m x 1 x 64
* a1 = sigmoid(z1)

* z2 = z1 * output_w + output_b                 m x 1 x 10
* y  = sigmoid(z2)                              

Backward: 

Step 1: Error
* output_error  = (y - loss) .* d_sigmoid(z2)                           m x 1 x 10

                   (m x 1 x 10)      (64 x 10)^T    (1) x (1 x 64)
* FCL_error     = (output_error * output_w^T) .* (d_sigmoid(z1))        m x 1 x 64


Step 2: Gradient descent

                 (64 x 10)    constant           m x (1 x 10)^T   m x 1 x 64  
* output_w      = output_w - (alpha / m) * sum( output_error^T  *    z1      )^T     64 x 10

                  (1 x 10)    constant           m x 1 x 10
* output_b      = output_b - (alpha / m) * sum( output_error )

                 (784 x 64)   constant           m x (1 x 64)^T   m x 1 x 784  
* FCL_w         = FCL_w    - (alpha / m) * sum( FCL_error^T     *    Input   )^T    784 x 64

                  (1 x 64)    constant           m x 1 x 64
* FCL_b         = FCL_b    - (alpha / m) * sum( output_error )


'''


In [None]:
'''

batch size

'''