## Task 2.4: MLP in Tensorflow

ITU KSADMAL1KU-NLP - Advanced Machine Learning for NLP in KCS 2024

by Stefan Heinrich, Bertram Højer, Christian H. Rasmussen, & material by Kevin Murphy.

All info and static material: https://learnit.itu.dk/course/view.php?id=3024579

-------------------------------------------------------------------------------

This notebook is a prototypical blueprint for Deep Learning frameworks, usually following four steps: 
- Data loading and preprocessing (often including Exploratory Data Analysis (EDA))
- Building a model by using the Tensorflow or PyTorch API
- Training a model (including initialising) until termination (often: convergence)
- Analysing the model (often including various steps to achieve interpretability of the model)

In Advanced Machine Learning course, we will detail these steps but often revisit these basic framework steps.

In [None]:
# @title ##### just check for a reasonable recent installed tensorflow version.
# you can ignore this block

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. DNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
# @title #### Import dependencies

from __future__ import absolute_import, division, print_function, unicode_literals

from IPython import display
#import os
#import time
import numpy as np
#import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_datasets as tfds
# TODO: in the future use tensorflow_datasets>4.9.3 when available due to: https://github.com/openvinotoolkit/datumaro/issues/1189

np.random.seed(0)  # test with different seeds (for re-running everything)!

#### Load the data

In [None]:
# @title ##### Variant one: load the dataset from raw data

batch_size = 32

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Rescale the images from [0,255] to the [0.0,1.0] range.
x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0

# Create dataset object:
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

# The raw images are of type tf.uint8, while the model expects tf.float32. 
# Therefore, you need to normalize images.
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., tf.cast(label, tf.int8)
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)

# Some meta information:
print("Number of original training examples:", ds_train.cardinality().numpy())
print("Number of original test examples:", ds_test.cardinality().numpy())
print("Input representation: ", ds_train.element_spec[0])
print("Output representation: ", ds_train.element_spec[1])

figure = plt.figure(figsize=(10, 5))
cols, rows = 4, 2
for i, (img, label) in enumerate(ds_train.take(cols*rows)):
  figure.add_subplot(rows, cols, i+1)
  plt.title(f"Class {label}")
  plt.axis("off")
  plt.imshow(img[:,:,0], cmap="gray")
plt.show()

# As you fit the dataset in memory, cache it before shuffling for a better performance.
ds_train, ds_test = ds_train.cache(), ds_test.cache()

# For true randomness, you can set the shuffle buffer to the full dataset size.
ds_train = ds_train.shuffle(ds_train.cardinality().numpy())

# Batch elements of the dataset after shuffling to get unique batches at each epoch.
ds_train, ds_test = ds_train.batch(batch_size), ds_test.batch(batch_size)

# It is good practice to end the data loading pipeline by prefetching for performance.
ds_train = ds_train.prefetch(tf.data.AUTOTUNE) 
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)


In [None]:
# @title ##### Variant two: load prepared dataset object

batch_size = 32

(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

# TFDS provide images of type tf.uint8, while the model expects tf.float32. Therefore, you need to normalize images.
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., tf.cast(label, tf.int8)
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)

# Some meta information:
print("Number of original training examples:", ds_train.cardinality().numpy())
print("Number of original test examples:", ds_test.cardinality().numpy())
print("Input representation: ", ds_train.element_spec[0])
print("Output representation: ", ds_train.element_spec[1])

# Some examples:
figure = plt.figure(figsize=(10, 5))
cols, rows = 4, 2
for i, (img, label) in enumerate(ds_train.take(cols*rows)):
  figure.add_subplot(rows, cols, i+1)
  plt.title(f"Class {label}")
  plt.axis("off")
  plt.imshow(img[:,:,0], cmap="gray")
plt.show()

# As you fit the dataset in memory, cache it before shuffling for a better performance.
ds_train, ds_test = ds_train.cache(), ds_test.cache()

# For true randomness, you can set the shuffle buffer to the full dataset size.
ds_train = ds_train.shuffle(ds_train.cardinality().numpy())

# Batch elements of the dataset after shuffling to get unique batches at each epoch.
ds_train, ds_test = ds_train.batch(batch_size), ds_test.batch(batch_size)

# It is good practice to end the data loading pipeline by prefetching for performance.
ds_train = ds_train.prefetch(tf.data.AUTOTUNE) 
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)



#### Build the model

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.summary()

#### Train the model

In [None]:
# Define hyperparameters
learning_rate = 0.001
momentum = 0.9
epochs = 5

# Setup for training
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, 
                                    momentum=momentum)
criterion = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]  # a set of metrics!
                                    
model.compile(optimizer=optimizer, 
              loss=criterion,
              metrics=metrics)

# Training loop
model.fit(
    ds_train,
    epochs=epochs
)

#### Analyse the model

In [None]:
# @title ##### Prediction and visualisation
# Overall accuracy
train_loss, train_acc = model.evaluate(ds_train)
print('Train accuracy:', train_acc)
test_loss, test_acc = model.evaluate(ds_test)
print('Test accuracy:', test_acc)

In [None]:
# Derive an histogram over the 10 classes 
# by counting over the predictions on the test data

hist = {k: {l:0 for l in range(10)} for k in range(10)}

for images, labels in ds_test:
  outputs = model.predict(images, verbose=0)
  pred = np.argmax(outputs, axis=1)
  ground_truth = labels.numpy()
  for k in range(len(outputs)):
    hist[ground_truth[k]][pred[k]] += 1


In [None]:
fig = plt.figure()
fig.set_figheight(15)
fig.set_figwidth(15)
for idx, (key, val) in enumerate(hist.items()):
  ax = fig.add_subplot(3, 4, idx+1)
  ax.bar(list(val.keys()), val.values(), color='r')
  ax.set_title(f"Prediction for {key}")
  ax.set_xticks(range(0,10))
  ax.set_ylim(0, 1200)

plt.show()