In [1]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
# export CUDA_VISIBLE_DEVICES=1
# os.environ["CUDA_VISIBLE_DEVICES"]="1"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
tf.config.experimental.set_virtual_device_configuration(gpus[0], 
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3300)])

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# load and prepare data for training
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

## Dense Feed Forward Neural Networks (DFNNs)  also known as multi-layer perceptrons(MLPs)
* well suited for inferencing from tabular data

### Construct DFNN model using Sequential API  
* simple interface
* limited functionality

In [4]:
# Sequential API (Very convenient, not very flexible)
modelDFNN1 = keras.Sequential(
    [
        keras.Input(shape=(28 * 28)),
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        layers.Dense(10),
    ]
)

modelDFNN1 = keras.Sequential()
modelDFNN1.add(keras.Input(shape=(784)))
modelDFNN1.add(layers.Dense(512, activation="relu"))
modelDFNN1.add(layers.Dense(256, activation="relu", name="my_layer"))
modelDFNN1.add(layers.Dense(10))

In [5]:
# ToDo #1: List what objects the modelDFNN1 is comprised of (built from)

In [6]:
modelDFNN1.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)



In [7]:
modelDFNN1.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
modelDFNN1.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 3s - loss: 2.3229 - accuracy: 0.1857
Epoch 2/5
1875/1875 - 1s - loss: 2.3026 - accuracy: 0.1479
Epoch 3/5
1875/1875 - 1s - loss: 2.3026 - accuracy: 0.1479
Epoch 4/5
1875/1875 - 1s - loss: 2.3026 - accuracy: 0.1479
Epoch 5/5
1875/1875 - 1s - loss: 2.3026 - accuracy: 0.1479
313/313 - 0s - loss: 2.3026 - accuracy: 0.1524


[2.30259108543396, 0.15240000188350677]

In [8]:
# ToDo #2: What train and test accuracy do you obtain when running for 5 epochs ?  




In [9]:
# ToDo #3: Now write code (add code cells to this notebook) to train the model for 20 additional epochs. What train and test accuracy do you obtain? 




### Construct DFNN model using Functional API  
* slightly more complex interface
* additional functionality

In [10]:
# ToDo #4: Now practice using the OO functional API of keras by constructing your own functional model, modelDFNN2.
# To simplify the task, create this model so that it uses the same architecture as the sequential model.
# For the final Dense layer use activation="softmax"


In [11]:
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation="relu", name="first_layer")(inputs)
x = layers.Dense(256, activation="relu", name="second_layer")(x)
outputs = layers.Dense(10, activation="softmax")(x)
modelDFNN2 = keras.Model(inputs=inputs, outputs=outputs)





In [12]:
# Now to gain experience invoking methods on objects you have created,
# write code to compile(), fit() your new model on the training data. 
# Then write code to evaluate your new model on the test data. 


In [13]:
modelDFNN2.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)

In [14]:
modelDFNN2.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
modelDFNN2.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 2s - loss: 0.1859 - accuracy: 0.9430
Epoch 2/5
1875/1875 - 1s - loss: 0.0821 - accuracy: 0.9748
Epoch 3/5
1875/1875 - 1s - loss: 0.0538 - accuracy: 0.9826
Epoch 4/5
1875/1875 - 1s - loss: 0.0409 - accuracy: 0.9865
Epoch 5/5
1875/1875 - 1s - loss: 0.0325 - accuracy: 0.9896
313/313 - 0s - loss: 0.0743 - accuracy: 0.9796


[0.07427527010440826, 0.9796000123023987]

In [None]:
# ToDo #5. Write down what train and test accuracy do you observe? How does it compare to the sequential model?

In [None]:
# For fun: explore the architecture space by adding or removing layers from your model, retrain from scratch. 
# Learn how that impacts model performance. State of the art accuracy is around 99.97% . 
# You do not need to achieve that (nor is that expected here) but to give you an idea.