 Deep Learning Begins:
 Assignment 1
 Task 1 Implement 2-layer MLP for MNIST dataset

Import the needed libraries

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

Load, concatenate, and then properly split dataset (70/30)

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
X = np.concatenate([x_train, x_test])
y = np.concatenate([y_train, y_test])

#then split using train_test_split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=(0.3))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Python starts with 0 so if label is 5 one-hot encoding will show it in 6th place

# Here we have a class number for each image
print("Class label of first image :", y_train[0])

# Convert to One-Hot Encoding
y_train = tf.keras.utils.to_categorical(y_train, 10) 
y_test = tf.keras.utils.to_categorical(y_test, 10)

print("After converting the output into a vector : ",y_train[0])

Class label of first image : 9
After converting the output into a vector :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [4]:
# Inspect the image pixel value
x_train[0].min(), x_train[0].max()

(0, 255)

In [5]:
# Normalize the data between [0,1]

x_train = x_train/255.0
x_test = x_test/255.0

x_train[0].min(), x_train[0].max()

(0.0, 1.0)

In [6]:
# Check the shapes of the data

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((49000, 28, 28), (21000, 28, 28), (49000, 10), (21000, 10))

In [7]:
# Reshape so each 'dataset' above is flattened into one vector 

x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)


x_train.shape, x_test.shape, y_train.shape, y_test.shape

((49000, 784), (21000, 784), (49000, 10), (21000, 10))

In [8]:
# Build the model

# Define the optimizer
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.0001,
                                    beta_1=0.9,
                                    beta_2=0.999)
# Define the loss
loss_ = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Define the number of units per layer
feature_size = 784
unit_size = 100
n_hidden_layers = 2

def build_mlp_model(feature_size, n_hidden_layers, unit_size, opt_, loss_):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(unit_size, activation=tf.nn.relu, input_shape=(feature_size,)),
        tf.keras.layers.Dense(unit_size, activation=tf.nn.relu),
        tf.keras.layers.Dense(10)
    ])
    
    model.compile(optimizer=opt_, 
                  loss=loss_,
                  metrics=['accuracy'])
    
    return model


mlp = build_mlp_model(feature_size, n_hidden_layers, unit_size, adam_opt, loss_)

mlp.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               78500     
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 89,610
Trainable params: 89,610
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Start training the model

batch_size=32
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
history = mlp.fit(x_train, y_train, 
                    epochs=50, 
                    verbose=1,
                    batch_size=batch_size,
                    validation_data = (x_test, y_test),
                    callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Train & test accuracies are breaking the ceiling at 0.9999 & 0.9739 respectively. What a beast!

Task 2 Compare the performance of 2-layer MLP when using different activation functions.

In [10]:
# Build the model

# Define the optimizer
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.0001,
                                    beta_1=0.9,
                                    beta_2=0.999)
# Instead of defining the loss, I'm going to use Softmax on the last layer
# Define the loss
# loss_ = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Define the number of units per layer
feature_size = 784
unit_size = 100
n_hidden_layers = 2

def build_mlp_model(feature_size, n_hidden_layers, unit_size, opt_, loss_):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(unit_size, activation=tf.nn.tanh, input_shape=(feature_size,)),
        tf.keras.layers.Dense(unit_size, activation=tf.nn.tanh),
        tf.keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    
    model.compile(optimizer=opt_, 
                  loss=loss_,
                  metrics=['accuracy'])
    
    return model


mlp = build_mlp_model(feature_size, n_hidden_layers, unit_size, adam_opt, loss_)

mlp.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 100)               78500     
                                                                 
 dense_4 (Dense)             (None, 100)               10100     
                                                                 
 dense_5 (Dense)             (None, 10)                1010      
                                                                 
Total params: 89,610
Trainable params: 89,610
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Start training the model using a tanh activation function

batch_size=32
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
history = mlp.fit(x_train, y_train, 
                    epochs=50, 
                    verbose=1,
                    batch_size=batch_size,
                    validation_data = (x_test, y_test),
                    callbacks=[early_stop])

Epoch 1/50


  output, from_logits = _get_logits(


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Fun Fact: you can stop training if you "can't" wait anymore.

Train accuracy: 0.9998
Test accuracy: 0.9738

We are up & running!

In [12]:
# Build the model

# Define the optimizer
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.0001,
                                    beta_1=0.9,
                                    beta_2=0.999)
# Define the loss
loss_ = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Define the number of units per layer
feature_size = 784
unit_size = 100
n_hidden_layers = 2

def build_mlp_model(feature_size, n_hidden_layers, unit_size, opt_, loss_):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(unit_size, activation=tf.nn.sigmoid, input_shape=(feature_size,)),
        tf.keras.layers.Dense(unit_size, activation=tf.nn.sigmoid),
        tf.keras.layers.Dense(10)
    ])
    
    model.compile(optimizer=opt_, 
                  loss=loss_,
                  metrics=['accuracy'])
    
    return model


mlp = build_mlp_model(feature_size, n_hidden_layers, unit_size, adam_opt, loss_)

mlp.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 100)               78500     
                                                                 
 dense_7 (Dense)             (None, 100)               10100     
                                                                 
 dense_8 (Dense)             (None, 10)                1010      
                                                                 
Total params: 89,610
Trainable params: 89,610
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Start training the model using a sigmoid activation function

batch_size=32
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
history = mlp.fit(x_train, y_train, 
                    epochs=50, 
                    verbose=1,
                    batch_size=batch_size,
                    validation_data = (x_test, y_test),
                    callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


KeyboardInterrupt: ignored

 Training accuracy is 0.9877 and test accuracy is 0.9722 after 50 epochs. Snap.