In [7]:
import os
import numpy as np
import time
import tensorflow as tf

np.random.seed(1234)
tf.random.set_seed(1234)

In [8]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data(
            path='mnist.npz'
        )

size_input   = X_train.shape[1]*X_train.shape[2]
size_hidden1 = 256
size_hidden2 = 128
size_output  = 10

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test  = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

y_train = tf.keras.utils.to_categorical(y_train, num_classes=size_output)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=size_output)

X_train = X_train.astype(float) / 255.0
X_test = X_test.astype(float) / 255.0

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)



(60000, 784)
(60000, 10)
(10000, 784)
(10000, 10)


In [2]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(50)
test_ds  = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(15)

NameError: name 'tf' is not defined

In [10]:
class MLP(object):
    
    def __init__(self, size_input, size_hidden1, size_hidden2, size_output, device=None):
       
        self.size_input, self.size_hidden1, self.size_hidden2, self.size_output, self.device =\
        size_input, size_hidden1, size_hidden2, size_output, device
        
        self.W1 = tf.Variable(0.01*tf.random.normal([self.size_input, self.size_hidden1]))
        self.b1 = tf.Variable(0.01*tf.random.normal([1, self.size_hidden1]))
        
        self.W2 = tf.Variable(0.01*tf.random.normal([self.size_hidden1, self.size_hidden2]))
        self.b2 = tf.Variable(0.01*tf.random.normal([1, self.size_hidden2]))
        
        self.W3 = tf.Variable(0.01*tf.random.normal([self.size_hidden2, self.size_output]))
        self.b3 = tf.Variable(0.01*tf.random.normal([1, self.size_output]))
        
        self.varibles = [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]
        
        
    def forward(self, X):
        
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
            
        return self.y
    
    def loss(self, y_pred, y_true):
        
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        
        return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
    
    def backward(self, X_train, y_train):
        
        optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
        
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
            
        grads = tape.gradient(current_loss, self.varibles)
        optimizer.apply_gradients(zip(grads, self.varibles))
    
    def backward_L1Reg(self, X_train, y_train, myLambda):
        
        optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
        
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            L1 = (myLambda/X_train.shape[0])*tf.abs((tf.reduce_sum(self.W1) + 
                                                     tf.reduce_sum(self.W2) + 
                                                     tf.reduce_sum(self.W3)))
            
            current_loss = self.loss(predicted, y_train)
            current_loss += L1
            
        grads = tape.gradient(current_loss, self.varibles)
        optimizer.apply_gradients(zip(grads, self.varibles))
        
    def compute_output(self, X):
        
        X_tf = tf.cast(X, dtype=tf.float32)

        w1Hat = tf.matmul(X_tf, self.W1) + self.b1
        h1Hat = tf.nn.relu(w1Hat)
        
        w2Hat = tf.matmul(h1Hat, self.W2) + self.b2
        h2Hat = tf.nn.relu(w2Hat)
        
        w3Hat = tf.matmul(h2Hat, self.W3) + self.b3
        output = tf.nn.softmax(w3Hat)
        
        return output

# Model Training

In [11]:
NUM_EPOCHS = 10
mlp_on_gpu = MLP(size_input, size_hidden1, size_hidden2, size_output, device='gpu')
time_start = time.time()
RegCoefficient = 0.1

for epoch in range(NUM_EPOCHS):
    loss_total = tf.zeros([1, 1], dtype=tf.float32)
    lt = 0
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed = epoch*(1234)).batch(50)
    
    for inputs, outputs in train_ds:
        preds = mlp_on_gpu.forward(inputs)
        loss_total = loss_total + mlp_on_gpu.loss(preds, outputs)

        # lt += mlp_on_gpu.loss(preds, outputs)
        mlp_on_gpu.backward(inputs, outputs)
        
    print(f'Number of Epoch = {epoch+1} - Average CrossEntropy:= {np.sum(loss_total)/X_train.shape[0]}')
    
time_taken = time.time()-time_start
print(f'\nTotal time taken (in seconds): {time_taken: .2f}')

Number of Epoch = 1 - Average CrossEntropy:= 0.027832185872395835
Number of Epoch = 2 - Average CrossEntropy:= 0.007344691467285156
Number of Epoch = 3 - Average CrossEntropy:= 0.004818090311686198
Number of Epoch = 4 - Average CrossEntropy:= 0.0034687909444173176
Number of Epoch = 5 - Average CrossEntropy:= 0.002708165995279948
Number of Epoch = 6 - Average CrossEntropy:= 0.0021945823669433594
Number of Epoch = 7 - Average CrossEntropy:= 0.0018486012776692709
Number of Epoch = 8 - Average CrossEntropy:= 0.0015724979400634765
Number of Epoch = 9 - Average CrossEntropy:= 0.0013675230662027996
Number of Epoch = 10 - Average CrossEntropy:= 0.0011816466013590495

Total time taken (in seconds):  132.04


# Inference

In [12]:
test_loss_total = tf.Variable(0, dtype=tf.float32)


preds = mlp_on_gpu.forward(X_test)

pred = np.argmax(preds, axis=1)
y_true= np.argmax(y_test, axis=1)

accuracy = (pred==y_true).mean()
print(f'accuracy = {accuracy}')
    


accuracy = 0.9733
