In [1]:
import os
import numpy as np
import tensorflow as tf
import time
import datetime
import matplotlib.pyplot as plt
np.random.seed(3122)
tf.random.set_seed(3131)
category_num = 10
#MNIST = tf.keras.datasets.fashion_mnist
MNIST = tf.keras.datasets.mnist

In [2]:
#load data and rescale
(x_train, y_train),(x_test, y_test) = MNIST.load_data()
x_train = np.reshape(x_train / 255.0, (len(x_train),-1))
x_test = np.reshape(x_test / 255.0, (len(x_test),-1))

In [3]:
#Use one-hot encoding to represent category
one_hot = lambda m: [1 if i == m else 0 for i in range(category_num)]
y_train = np.array([one_hot(y) for y in y_train])
y_test = np.array([one_hot(y) for y in y_test])

In [4]:
class MLP(tf.keras.Model):
    def __init__(self, size_input, size_hidden, size_output, device=None):
        super(MLP, self).__init__()
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden, self.size_output, self.device =\
        size_input, size_hidden, size_output, device

        # Initialize weights between input layer and hidden layer 1
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden]))
        # Initialize biases for hidden layer 1
        self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden]))
         # Initialize weights between hidden layer 1 and hidden layer 2
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden]))
        # Initialize biases for hidden layer 2
        self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden]))
         # Initialize weights between hidden layer 2 and output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden, self.size_output]))
        # Initialize biases for output layer
        self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))

        # Define variables to be updated during backpropagation
        self.MLP_variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
        # self.MLP_variables = [self.W1, self.W3, self.b1, self.b3]

    
    def forward(self, X, dropout_rate=0):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X, dropout_rate=dropout_rate)
        else:
            self.y = self.compute_output(X, dropout_rate=dropout_rate)

        return self.y
  
    def loss(self, y_pred, y_true, L1=0, L2=0):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        l2_penlty = tf.nn.l2_loss(self.W1)*L2
        loss_with_l2 = l2_penlty+tf.losses.categorical_crossentropy(y_true_tf, y_pred_tf)
        return tf.losses.categorical_crossentropy(y_true_tf, y_pred_tf)
  
    def backward(self, X_train, y_train, dropout_rate, learning_rate, L1=0, L2=0):
        """
        backward pass
        """
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train,dropout_rate=dropout_rate)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.MLP_variables)
        optimizer.apply_gradients(zip(grads, self.MLP_variables))
        return current_loss, predicted
        
        
    def compute_output(self, X, dropout_rate=0):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        #Remember to normalize your dataset before moving forward
        # Compute values in hidden layer 1
        what = tf.matmul(X_tf, self.W1) + self.b1
        hhat = tf.nn.relu(what)
        hhat = tf.nn.dropout(hhat, rate = dropout_rate)

        # Compute values in hidden layer 2
        what = tf.matmul(hhat, self.W2) + self.b2
        hhat = tf.nn.relu(what)
        hhat = tf.nn.dropout(hhat, rate = dropout_rate)

        # Compute output
        output = tf.matmul(hhat, self.W3) + self.b3
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        return tf.nn.softmax(output)
#         return output
# Set hyper-parameters



In [5]:
#Hyer-paarameter
Dropout_rate = 0
Batch_Size = 300
Hidden_Size = 128
Epochs_num = 50
Learning_rate = 0.003
L2_Penalty = 0.3

In [6]:
size_input = len(x_train[0])
size_hidden = Hidden_Size
size_output = len(y_train[0])
number_of_train_examples = len(x_train[0])
number_of_test_examples = len(x_test[0])

In [None]:
rand_seed = np.random.randint(low=1,high=1000,size=10)
loss_res = np.zeros(10)
train_acc = np.zeros(10)
test_acc = np.zeros(10)
for k in range(20):
    #np.random.seed(rand_seed[k])
    #tf.random.set_seed(rand_seed[k])
    L2_Penalty = 0.05*k
    #Dropout_rate = 0.05*k
    mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='cpu')
    time_start = time.time()
    epoch = 0
    last_loss = 0
    for epoch in range(Epochs_num):
        time_start = time.time()
        loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
        train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(25, seed=epoch*(1234)).batch(Batch_Size)
        for inputs, outputs in train_ds:
            cur_loss, preds = mlp_on_gpu.backward(inputs, outputs, dropout_rate=Dropout_rate, learning_rate=Learning_rate, L2=L2_Penalty)
            loss_total_gpu += cur_loss
      # Calculate Accuracy
        train_accuracy, test_accuracy = tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.CategoricalAccuracy()
        train_accuracy.update_state(y_train, mlp_on_gpu.forward(x_train))
        test_accuracy.update_state(y_test, mlp_on_gpu.forward(x_test))

        print(f'Epoch = {epoch+1} , Training Cross Entropy:= {np.sum(loss_total_gpu) / len(x_train)} , Training Accuracy:= {train_accuracy.result().numpy()} , Test Accuracy:= {test_accuracy.result().numpy()}')
        time_taken = time.time() - time_start
        print('Time : {:.2f}s'.format(time_taken))
    loss_res[k] = np.sum(loss_total_gpu) / len(x_train)
    train_acc[k] = train_accuracy.result().numpy()
    test_acc[k] = test_accuracy.result().numpy()

    

Epoch = 1 , Training Cross Entropy:= 9.966797916666666 , Training Accuracy:= 0.45445001125335693 , Test Accuracy:= 0.4602000117301941
Time : 2.67s
Epoch = 2 , Training Cross Entropy:= 7.8688515625 , Training Accuracy:= 0.5565500259399414 , Test Accuracy:= 0.5598999857902527
Time : 2.63s
Epoch = 3 , Training Cross Entropy:= 6.717480208333333 , Training Accuracy:= 0.5999500155448914 , Test Accuracy:= 0.6007000207901001
Time : 2.62s
Epoch = 4 , Training Cross Entropy:= 6.270572916666667 , Training Accuracy:= 0.6146000027656555 , Test Accuracy:= 0.6162999868392944
Time : 2.62s
Epoch = 5 , Training Cross Entropy:= 6.123541666666667 , Training Accuracy:= 0.6250166893005371 , Test Accuracy:= 0.6274999976158142
Time : 2.61s
Epoch = 6 , Training Cross Entropy:= 5.9936265625 , Training Accuracy:= 0.6262500286102295 , Test Accuracy:= 0.6291000247001648
Time : 2.62s
Epoch = 7 , Training Cross Entropy:= 5.900351041666666 , Training Accuracy:= 0.6364333629608154 , Test Accuracy:= 0.6366000175476074


Epoch = 8 , Training Cross Entropy:= 6.358015104166666 , Training Accuracy:= 0.6062666773796082 , Test Accuracy:= 0.6014999747276306
Time : 2.69s
Epoch = 9 , Training Cross Entropy:= 6.28583125 , Training Accuracy:= 0.6166999936103821 , Test Accuracy:= 0.6136999726295471
Time : 2.70s
Epoch = 10 , Training Cross Entropy:= 6.195946875 , Training Accuracy:= 0.6189500093460083 , Test Accuracy:= 0.6166999936103821
Time : 2.70s
Epoch = 11 , Training Cross Entropy:= 6.167797916666666 , Training Accuracy:= 0.6176999807357788 , Test Accuracy:= 0.620199978351593
Time : 2.70s
Epoch = 12 , Training Cross Entropy:= 6.106105208333333 , Training Accuracy:= 0.62131667137146 , Test Accuracy:= 0.6215000152587891
Time : 2.69s
Epoch = 13 , Training Cross Entropy:= 6.1037083333333335 , Training Accuracy:= 0.6273833513259888 , Test Accuracy:= 0.6273999810218811
Time : 2.69s
Epoch = 14 , Training Cross Entropy:= 6.028122916666667 , Training Accuracy:= 0.6270666718482971 , Test Accuracy:= 0.6256999969482422
T

Epoch = 15 , Training Cross Entropy:= 5.426417708333333 , Training Accuracy:= 0.6651999950408936 , Test Accuracy:= 0.666700005531311
Time : 2.67s
Epoch = 16 , Training Cross Entropy:= 5.4067125 , Training Accuracy:= 0.6660666465759277 , Test Accuracy:= 0.6669999957084656
Time : 2.67s
Epoch = 17 , Training Cross Entropy:= 5.376924479166667 , Training Accuracy:= 0.6675000190734863 , Test Accuracy:= 0.6664999723434448
Time : 2.68s
Epoch = 18 , Training Cross Entropy:= 5.397786458333333 , Training Accuracy:= 0.6649166941642761 , Test Accuracy:= 0.6653000116348267
Time : 2.66s
Epoch = 19 , Training Cross Entropy:= 5.353049479166667 , Training Accuracy:= 0.6699333190917969 , Test Accuracy:= 0.6700000166893005
Time : 2.69s
Epoch = 20 , Training Cross Entropy:= 5.337302604166666 , Training Accuracy:= 0.6700833439826965 , Test Accuracy:= 0.6693000197410583
Time : 2.67s
Epoch = 21 , Training Cross Entropy:= 5.3530114583333335 , Training Accuracy:= 0.668316662311554 , Test Accuracy:= 0.6682999730

Epoch = 22 , Training Cross Entropy:= 2.6259020833333335 , Training Accuracy:= 0.8375666737556458 , Test Accuracy:= 0.8327000141143799
Time : 2.68s
Epoch = 23 , Training Cross Entropy:= 2.609696614583333 , Training Accuracy:= 0.8366833329200745 , Test Accuracy:= 0.832099974155426
Time : 2.71s
Epoch = 24 , Training Cross Entropy:= 2.6117658854166668 , Training Accuracy:= 0.8418999910354614 , Test Accuracy:= 0.8370000123977661
Time : 2.69s
Epoch = 25 , Training Cross Entropy:= 2.5900638020833333 , Training Accuracy:= 0.8314333558082581 , Test Accuracy:= 0.8305000066757202
Time : 2.67s
Epoch = 26 , Training Cross Entropy:= 2.5540114583333335 , Training Accuracy:= 0.8428833484649658 , Test Accuracy:= 0.8378999829292297
Time : 2.66s
Epoch = 27 , Training Cross Entropy:= 2.5412825520833335 , Training Accuracy:= 0.8427833318710327 , Test Accuracy:= 0.8374000191688538
Time : 2.69s
Epoch = 28 , Training Cross Entropy:= 2.51298671875 , Training Accuracy:= 0.8426166772842407 , Test Accuracy:= 0.8

Epoch = 29 , Training Cross Entropy:= 6.891573958333334 , Training Accuracy:= 0.5739166736602783 , Test Accuracy:= 0.5738999843597412
Time : 2.66s
Epoch = 30 , Training Cross Entropy:= 6.8955078125 , Training Accuracy:= 0.5742499828338623 , Test Accuracy:= 0.5764999985694885
Time : 2.67s
Epoch = 31 , Training Cross Entropy:= 6.869058854166667 , Training Accuracy:= 0.5743833184242249 , Test Accuracy:= 0.5759000182151794
Time : 2.65s
Epoch = 32 , Training Cross Entropy:= 6.869332291666667 , Training Accuracy:= 0.5728166699409485 , Test Accuracy:= 0.5769000053405762
Time : 2.66s
Epoch = 33 , Training Cross Entropy:= 6.849088020833333 , Training Accuracy:= 0.576366662979126 , Test Accuracy:= 0.5774999856948853
Time : 2.67s
Epoch = 34 , Training Cross Entropy:= 6.847454166666667 , Training Accuracy:= 0.5755500197410583 , Test Accuracy:= 0.5770000219345093
Time : 2.67s
Epoch = 35 , Training Cross Entropy:= 6.833017708333333 , Training Accuracy:= 0.5741166472434998 , Test Accuracy:= 0.5753999

Epoch = 36 , Training Cross Entropy:= 4.185298958333333 , Training Accuracy:= 0.7437833547592163 , Test Accuracy:= 0.7368999719619751
Time : 2.71s
Epoch = 37 , Training Cross Entropy:= 4.17910625 , Training Accuracy:= 0.7422666549682617 , Test Accuracy:= 0.7366999983787537
Time : 2.72s
Epoch = 38 , Training Cross Entropy:= 4.1650703125 , Training Accuracy:= 0.7380499839782715 , Test Accuracy:= 0.7311000227928162
Time : 2.69s
Epoch = 39 , Training Cross Entropy:= 4.157534635416667 , Training Accuracy:= 0.7423999905586243 , Test Accuracy:= 0.7368999719619751
Time : 2.73s
Epoch = 40 , Training Cross Entropy:= 4.158047395833333 , Training Accuracy:= 0.7432000041007996 , Test Accuracy:= 0.7365000247955322
Time : 2.69s
Epoch = 41 , Training Cross Entropy:= 4.1502755208333335 , Training Accuracy:= 0.743399977684021 , Test Accuracy:= 0.7383999824523926
Time : 2.71s
Epoch = 42 , Training Cross Entropy:= 4.125791666666666 , Training Accuracy:= 0.7447500228881836 , Test Accuracy:= 0.7379000186920

Epoch = 43 , Training Cross Entropy:= 3.900937760416667 , Training Accuracy:= 0.7592833042144775 , Test Accuracy:= 0.7555999755859375
Time : 2.76s
Epoch = 44 , Training Cross Entropy:= 3.904011458333333 , Training Accuracy:= 0.7605500221252441 , Test Accuracy:= 0.7581999897956848
Time : 3.10s
Epoch = 45 , Training Cross Entropy:= 3.8835200520833335 , Training Accuracy:= 0.7583666443824768 , Test Accuracy:= 0.7554000020027161
Time : 2.92s
Epoch = 46 , Training Cross Entropy:= 3.8720091145833333 , Training Accuracy:= 0.7616833448410034 , Test Accuracy:= 0.7595999836921692
Time : 2.72s
Epoch = 47 , Training Cross Entropy:= 3.8619091145833333 , Training Accuracy:= 0.7620499730110168 , Test Accuracy:= 0.7583000063896179
Time : 2.65s
Epoch = 48 , Training Cross Entropy:= 3.873925520833333 , Training Accuracy:= 0.7587666511535645 , Test Accuracy:= 0.7556999921798706
Time : 2.69s
Epoch = 49 , Training Cross Entropy:= 3.8587994791666667 , Training Accuracy:= 0.7560499906539917 , Test Accuracy:=

Epoch = 50 , Training Cross Entropy:= 5.127490104166666 , Training Accuracy:= 0.6823333501815796 , Test Accuracy:= 0.6808000206947327
Time : 2.70s
Epoch = 1 , Training Cross Entropy:= 12.395439583333333 , Training Accuracy:= 0.3723333477973938 , Test Accuracy:= 0.3684000074863434
Time : 2.67s
Epoch = 2 , Training Cross Entropy:= 9.500958333333333 , Training Accuracy:= 0.438400000333786 , Test Accuracy:= 0.43810001015663147
Time : 2.72s
Epoch = 3 , Training Cross Entropy:= 8.682933333333333 , Training Accuracy:= 0.4841666519641876 , Test Accuracy:= 0.4851999878883362
Time : 2.69s
Epoch = 4 , Training Cross Entropy:= 8.0215 , Training Accuracy:= 0.5199999809265137 , Test Accuracy:= 0.5223000049591064
Time : 2.70s
Epoch = 5 , Training Cross Entropy:= 7.638320833333333 , Training Accuracy:= 0.5321833491325378 , Test Accuracy:= 0.5299999713897705
Time : 2.71s
Epoch = 6 , Training Cross Entropy:= 7.505898958333334 , Training Accuracy:= 0.5396000146865845 , Test Accuracy:= 0.5390999913215637


In [14]:
loss_res.var()

0.8441758087876516

In [15]:
loss_res.mean()

4.772867447916667

In [16]:
train_acc.mean()

0.7036133289337159

In [17]:
train_acc.var()

0.0031539036356461733

In [18]:
test_acc.mean()

0.7009700000286102

In [19]:
test_acc.var()

0.003102608525094617