## Lessons
* create a model and use it using two types of APIs
* checking outputs from inner layers
* SparseCategoricalCrossentropy vs CategoricalCrossentropy

### Resources
* https://www.youtube.com/watch?v=pAhPiF3yiXI&list=PLhhyoLH6IjfxVOdVC1P1L5z5azs0XjMsb&index=3
* https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/TensorFlow/Basics/tutorial3-neuralnetwork.py

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [10]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape, x_test.shape)

(60000, 28, 28) (10000, 28, 28)


In [11]:
# convert to "float32" and normalize the value for faster computation
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28*28).astype("float32") / 255.0
print(x_train.shape, x_test.shape)

(60000, 784) (10000, 784)


In [26]:
# Option A: Sequential API (very convenient, not ver flexible)
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)),
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        layers.Dense(10),
    ]
)

model.compile(
    # if were to use CategoricalCrossentropy, need to do hot-encoding
    # from_logits=True... because we are not activating "softmax"
    # set from_logits to False, if we are activating "softmax" on the output layer
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ['accuracy'],
)

print("Fit...")
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
print("Evaluate...")
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Fit...
Epoch 1/5
1875/1875 - 7s - loss: 0.1836 - accuracy: 0.9444
Epoch 2/5
1875/1875 - 7s - loss: 0.0794 - accuracy: 0.9760
Epoch 3/5
1875/1875 - 7s - loss: 0.0537 - accuracy: 0.9822
Epoch 4/5
1875/1875 - 7s - loss: 0.0398 - accuracy: 0.9871
Epoch 5/5
1875/1875 - 6s - loss: 0.0329 - accuracy: 0.9890
Evaluate...
313/313 - 0s - loss: 0.0748 - accuracy: 0.9802


[0.07483774423599243, 0.9801999926567078]

In [29]:
# Another way for using Sequential API (add a layer at a time)
model = keras.Sequential()
model.add(keras.Input(shape=(28*28)))
model.add(layers.Dense(512, activation="relu"))
print(model.summary()) # print out the model after adding a layer
model.add(layers.Dense(256, activation="relu", name="my_layer"))
model.add(layers.Dense(10))

print(model.summary())

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 512)               401920    
Total params: 401,920
Trainable params: 401,920
Non-trainable params: 0
_________________________________________________________________
None
Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 512)               401920    
_________________________________________________________________
my_layer (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_46 (Dense)             (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [42]:
# Option B: Functional API (a bit more flexible)
inputs = keras.Input(shape=(28*28))
x = layers.Dense(512, activation="relu", name="first_layer")(inputs)
x = layers.Dense(218, activation="relu", name="second_layer")(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    # if were to use CategoricalCrossentropy, need to do hot-encoding
    # from_logits=False... because we are activating "softmax"
    # set from_logits to True, if we are not activating "softmax" on the output layer
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ['accuracy'],
)

print(model.summary())
print("Fit...")
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
print("Evaluate...")
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Model: "functional_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        [(None, 784)]             0         
_________________________________________________________________
first_layer (Dense)          (None, 512)               401920    
_________________________________________________________________
second_layer (Dense)         (None, 218)               111834    
_________________________________________________________________
dense_51 (Dense)             (None, 10)                2190      
Total params: 515,944
Trainable params: 515,944
Non-trainable params: 0
_________________________________________________________________
None
Fit...
Epoch 1/5
1875/1875 - 7s - loss: 0.1846 - accuracy: 0.9438
Epoch 2/5
1875/1875 - 7s - loss: 0.0781 - accuracy: 0.9759
Epoch 3/5
1875/1875 - 7s - loss: 0.0544 - accuracy: 0.9829
Epoch 4/5
1875/1875 - 7s - loss: 0.0389 - accuracy: 0.98

[0.10425542294979095, 0.972100019454956]

In [47]:
# Checking out the outputs from inner layer(s) 
# Option A. search with index
subModel = keras.Model(inputs=model.inputs, 
                       outputs=[model.layers[-2].output])
features = subModel0.predict(x_train)
print(features.shape)

# Option B. search with name
subModel = keras.Model(inputs=model.inputs, 
                       outputs=[model.get_layer('first_layer').output])
features = subModel.predict(x_train)
print(features.shape)

# Option C. show all layers' outputs
subModel = keras.Model(inputs=model.inputs, 
                       outputs=[layer.output for layer in model.layers])
features = subModel.predict(x_train)
for feature in features:
    print(feature.shape)

(60000, 218)
(60000, 512)
(60000, 784)
(60000, 512)
(60000, 218)
(60000, 10)
