In [1]:
# Practicing Keras functional api



In [2]:
# import libraries
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
from keras import layers
import pydot
import graphviz

# 1. Build a simple model
Functional API is more flexible than Sequential API. It can handle models with non-linear topology, shared layers, and even multiple inputs or outputs.

In [3]:
inputs = keras.Input(shape=(784,), name='input_layer') # 784 is the number of input features
# shape of input layer 
print("shape of input layer: , type of input layer: ", inputs.shape, inputs.dtype)

shape of input layer: , type of input layer:  (None, 784) <dtype: 'float32'>


In [4]:
# hidden layer with relu activation
h1 = layers.Dense(64, activation='relu', name='hidden_layer1')(inputs)

In [5]:
h2 = layers.Dense(64, activation='relu', name='hidden_layer2')(h1)

In [6]:
# output layer 
outputs = layers.Dense(10, name='output_layer')(h2)

In [7]:
# create a model
model = keras.Model(inputs=inputs, outputs=outputs, name='mnist_model')
# summary of the model
model.summary()

Model: "mnist_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 784)]             0         
                                                                 
 hidden_layer1 (Dense)       (None, 64)                50240     
                                                                 
 hidden_layer2 (Dense)       (None, 64)                4160      
                                                                 
 output_layer (Dense)        (None, 10)                650       
                                                                 
Total params: 55050 (215.04 KB)
Trainable params: 55050 (215.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
# loading training MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.reshape(60000, 784).astype('float32')/255  # 60000 is the number of samples, divide by 255 to normalize the data
x_test = x_test.reshape(10000, 784).astype('float32')/255

loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) # raw output from the model, not the probability, SparseCategoricalCrossentropy is better for integer labels
optimizer = keras.optimizers.RMSprop() # the Root Mean Square Propagation algorithm
metrics = [keras.metrics.SparseCategoricalAccuracy()] # accuracy is the metric to eval

model.compile(loss=loss_fn, optimizer=optimizer, metrics=metrics)

In [9]:
# training the model
history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2) # 20% of the training data is used for validation

Epoch 1/2
Epoch 2/2


In [10]:
# evaluate the model
test_scores = model.evaluate(x_test, y_test, verbose=2)
print(f"Test loss: {test_scores[0]}, Test accuracy: {test_scores[1]}")

313/313 - 0s - loss: 0.1358 - sparse_categorical_accuracy: 0.9599 - 273ms/epoch - 874us/step
Test loss: 0.13576193153858185, Test accuracy: 0.9599000215530396


In [11]:
# save the model
path = './weights/mnist_model.keras'
model.save(path)

In [12]:
# delete the model
del model

In [13]:
# load the model
model = keras.models.load_model(path)

In [14]:
# all models are callable
encoder_input = keras.Input(shape=(28, 28, 1), name="original_img")
x = layers.Conv2D(16, 3, activation="relu")(encoder_input)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.Conv2D(16, 3, activation="relu")(x)
encoder_output = layers.GlobalMaxPooling2D()(x)

encoder = keras.Model(encoder_input, encoder_output, name="encoder")
encoder.summary()

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 original_img (InputLayer)   [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 16)        160       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        4640      
                                                                 
 max_pooling2d (MaxPooling2  (None, 8, 8, 32)          0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 6, 6, 32)          9248      
                                                                 
 conv2d_3 (Conv2D)           (None, 4, 4, 16)          4624      
                                                           

In [15]:
decoder_input = keras.Input(shape=(16,), name="encoded_img")
x = layers.Reshape((4, 4, 1))(decoder_input)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu")(x)
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x)

decoder = keras.Model(decoder_input, decoder_output, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoded_img (InputLayer)    [(None, 16)]              0         
                                                                 
 reshape (Reshape)           (None, 4, 4, 1)           0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 6, 6, 16)          160       
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 8, 8, 32)          4640      
 Transpose)                                                      
                                                                 
 up_sampling2d (UpSampling2  (None, 24, 24, 32)        0         
 D)                                                              
                                                           

In [16]:
autoencoder_input = keras.Input(shape=(28, 28, 1), name="img")
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
autoencoder = keras.Model(autoencoder_input, decoded_img, name="autoencoder")
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 img (InputLayer)            [(None, 28, 28, 1)]       0         
                                                                 
 encoder (Functional)        (None, 16)                18672     
                                                                 
 decoder (Functional)        (None, 28, 28, 1)         9569      
                                                                 
Total params: 28241 (110.32 KB)
Trainable params: 28241 (110.32 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# composing models by using the Keras.Model with the input and output layers

In [17]:
# ensemble model
def get_model():
    inputs = keras.Input(shape=(128,))
    outputs = layers.Dense(1)(inputs)
    return keras.Model(inputs, outputs)

model1 = get_model()
model2 = get_model()
model3 = get_model()

inputs = keras.Input(shape=(128,))
y1 = model1(inputs)
y2 = model2(inputs)
y3 = model3(inputs)

outputs = layers.average([y1, y2, y3]) # average the output of the three models as layer
ensemble_model = keras.Model(inputs=inputs, outputs=outputs)

ensemble_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 128)]                0         []                            
                                                                                                  
 model (Functional)          (None, 1)                    129       ['input_4[0][0]']             
                                                                                                  
 model_1 (Functional)        (None, 1)                    129       ['input_4[0][0]']             
                                                                                                  
 model_2 (Functional)        (None, 1)                    129       ['input_4[0][0]']             
                                                                                            

In [18]:
# Complex graph topologies

# For example, if you're building a system for ranking customer issue tickets by priority and routing them to the correct department, then the model will have three inputs:
# 
# the title of the ticket (text input),
# the text body of the ticket (text input), and
# any tags added by the user (categorical input)
#
# This model will have two outputs:
# 
# the priority score between 0 and 1 (scalar sigmoid output), and
# the department that should handle the ticket (softmax output over the set of departments).
num_tags = 12  # Number of unique issue tags
num_words = 10000  # Size of vocabulary obtained when preprocessing text data
num_departments = 4  # Number of departments for predictions

title_input = keras.Input(shape=(None,), name="title")  # Variable-length sequence of ints
body_input = keras.Input(shape=(None,), name="body")  # Variable-length sequence of ints
tags_input = keras.Input(shape=(num_tags,), name="tags")  # Binary vectors of size `num_tags`

# Embed each word in the title into a 64-dimensional vector
title_features = layers.Embedding(num_words, 64)(title_input)
# Embed each word in the text into a 64-dimensional vector
body_features = layers.Embedding(num_words, 64)(body_input)

# Reduce sequence of embedded words in the title into a single 128-dimensional vector
title_features = layers.LSTM(128)(title_features)
# Reduce sequence of embedded words in the body into a single 32-dimensional vector
body_features = layers.LSTM(32)(body_features)

# Merge all available features into a single large vector via concatenation
x = layers.concatenate([title_features, body_features, tags_input])

# Stick a logistic regression for priority prediction on top of the features
priority_pred = layers.Dense(1, name="priority")(x)
# Stick a department classifier on top of the features
department_pred = layers.Dense(num_departments, name="department")(x)

# Instantiate an end-to-end model predicting both priority and department
model = keras.Model(
    inputs=[title_input, body_input, tags_input], # can define multiple inputs
    outputs=[priority_pred, department_pred], # can define multiple outputs
)

model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 title (InputLayer)          [(None, None)]               0         []                            
                                                                                                  
 body (InputLayer)           [(None, None)]               0         []                            
                                                                                                  
 embedding (Embedding)       (None, None, 64)             640000    ['title[0][0]']               
                                                                                                  
 embedding_1 (Embedding)     (None, None, 64)             640000    ['body[0][0]']                
                                                                                            

In [19]:
keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [20]:
# compile the model
optim = keras.optimizers.RMSprop(1e-3)
loss = {
    "priority": keras.losses.BinaryCrossentropy(from_logits=True),
    "department": keras.losses.CategoricalCrossentropy(from_logits=True),
}
loss_weights = {"priority": 1.0, "department": 0.2} # the loss of the priority output is 5 times the loss of the department output
model.compile(optimizer=optim, loss=loss, loss_weights=loss_weights)

In [21]:
# train the model
# Dummy input data
title_data = np.random.randint(num_words, size=(1280, 10))
body_data = np.random.randint(num_words, size=(1280, 100))
tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32")

# Dummy target data
priority_targets = np.random.random(size=(1280, 1))
dept_targets = np.random.randint(2, size=(1280, num_departments))

History = model.fit(
    {"title": title_data, "body": body_data, "tags": tags_data}, # keys match the input layer names
    {"priority": priority_targets, "department": dept_targets}, # keys match the output layer names
    epochs=2,
    batch_size=32,
)

Epoch 1/2
Epoch 2/2


# ResNet toy model not connected sequentially

In [22]:
inputs = keras.Input(shape=(32, 32, 3), name="img") # 32x32 RGB images input
x = layers.Conv2D(32, 3, activation="relu")(inputs)
x = layers.Conv2D(64, 3, activation="relu")(x)
block_1_output = layers.MaxPooling2D(3)(x) # max pooling layer

x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_1_output)
x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
block_2_output = layers.add([x, block_1_output]) # skip connection to the first block

x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_2_output)
x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
block_3_output = layers.add([x, block_2_output]) # skip connection to the second block

x = layers.Conv2D(64, 3, activation="relu")(block_3_output)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10)(x) # 10 classes output, called functional API because the layers are callable like functions

model = keras.Model(inputs, outputs, name="toy_resnet")
model.summary()

Model: "toy_resnet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 img (InputLayer)            [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv2d_4 (Conv2D)           (None, 30, 30, 32)           896       ['img[0][0]']                 
                                                                                                  
 conv2d_5 (Conv2D)           (None, 28, 28, 64)           18496     ['conv2d_4[0][0]']            
                                                                                                  
 max_pooling2d_1 (MaxPoolin  (None, 9, 9, 64)             0         ['conv2d_5[0][0]']            
 g2D)                                                                                    

In [25]:
# train the model
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

x_train = x_train.astype("float32") / 255.0 # normalize the data
x_test = x_test.astype("float32") / 255.0
y_train = keras.utils.to_categorical(y_train, 10) # one-hot encoding
y_test = keras.utils.to_categorical(y_test, 10) # one-hot encoding

optim = keras.optimizers.RMSprop(1e-3)
loss = keras.losses.CategoricalCrossentropy(from_logits=True)
metrics = [keras.metrics.CategoricalAccuracy()]

model.compile(optimizer=optim, loss=loss, metrics=metrics)


# Train until convergence
epochs = 0
previous_val_loss = float('inf')
while True:
    history = model.fit(x_train, y_train, batch_size=64, epochs=1, validation_split=0.2, verbose=2)
    
    # Check for convergence
    val_loss = history.history['val_loss'][0]
    if val_loss > previous_val_loss:
        print("Validation loss increased. Stopping training.")
        break
    
    previous_val_loss = val_loss
    epochs += 1
    print(f"Epochs trained: {epochs}")

625/625 - 63s - loss: 1.4755 - categorical_accuracy: 0.4598 - val_loss: 1.2660 - val_categorical_accuracy: 0.5393 - 63s/epoch - 101ms/step
Epochs trained: 1
625/625 - 57s - loss: 1.2489 - categorical_accuracy: 0.5513 - val_loss: 1.0945 - val_categorical_accuracy: 0.5966 - 57s/epoch - 91ms/step
Epochs trained: 2
625/625 - 57s - loss: 1.0874 - categorical_accuracy: 0.6147 - val_loss: 0.9787 - val_categorical_accuracy: 0.6495 - 57s/epoch - 92ms/step
Epochs trained: 3
625/625 - 61s - loss: 0.9627 - categorical_accuracy: 0.6599 - val_loss: 1.1865 - val_categorical_accuracy: 0.6056 - 61s/epoch - 97ms/step
Validation loss increased. Stopping training.


# Shared layers and multiple branches

In [26]:
# Embedding for 1000 unique words mapped to 128-dimensional vectors
shared_embedding = layers.Embedding(1000, 128)

# Variable-length sequence of integers
text_input_a = keras.Input(shape=(None,), dtype="int32")

# Variable-length sequence of integers
text_input_b = keras.Input(shape=(None,), dtype="int32")

# Reuse the same layer to encode both inputs
encoded_input_a = shared_embedding(text_input_a)
encoded_input_b = shared_embedding(text_input_b)
# NOTE: to share a layer across different inputs, simply instantiate the layer once, then call it on as many inputs as you want.

In [27]:
# Extract and Reuse Nodes in the Graph of Layers, useful for feature extraction and building complex graphs of layers
vgg19 = keras.applications.VGG19()
features_list = [layer.output for layer in vgg19.layers]
feature_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list) # extract features from the VGG19 model

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5


In [30]:
img = np.random.random((1, 224, 224, 3)).astype('float32')
extracted_features = feature_extraction_model(img)
print(extracted_features)

[<tf.Tensor: shape=(1, 224, 224, 3), dtype=float32, numpy=
array([[[[0.7180326 , 0.81667626, 0.90034145],
         [0.5271022 , 0.1656228 , 0.53568804],
         [0.52404755, 0.78543353, 0.36643353],
         ...,
         [0.5591577 , 0.9380828 , 0.22866987],
         [0.38242632, 0.38079384, 0.4480558 ],
         [0.3188455 , 0.43608773, 0.04878962]],

        [[0.37114778, 0.6105545 , 0.16170797],
         [0.73311466, 0.92782396, 0.99721014],
         [0.77676797, 0.01243306, 0.6632397 ],
         ...,
         [0.906403  , 0.10811447, 0.9835297 ],
         [0.7936445 , 0.12338267, 0.34680247],
         [0.980885  , 0.19437666, 0.13995156]],

        [[0.7225141 , 0.03970847, 0.7939923 ],
         [0.74938434, 0.4180945 , 0.755542  ],
         [0.60999465, 0.78831875, 0.6992765 ],
         ...,
         [0.7255576 , 0.91949815, 0.8065938 ],
         [0.9734248 , 0.67175126, 0.76745766],
         [0.41174418, 0.5823031 , 0.45413584]],

        ...,

        [[0.9651948 , 0.36079815,

In [None]:
# TODO: use the functional API more often to build models with non-linear topology, shared layers, and multiple inputs or outputs

# Custom Layers

In [31]:
class CustomDense(layers.Layer):
    def __init__(self, units=32):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b


inputs = keras.Input((4,))
outputs = CustomDense(10)(inputs)

model = keras.Model(inputs, outputs)

In [32]:
# for serialization
@keras.saving.register_keras_serializable()
class CustomDense(layers.Layer):
    def __init__(self, units=32):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

    def get_config(self):
        return {"units": self.units}


inputs = keras.Input((4,))
outputs = CustomDense(10)(inputs)

model = keras.Model(inputs, outputs)
config = model.get_config()

new_model = keras.Model.from_config(config)

Sequential API, Functional API, and Subclass API are interoperable because they all produce Keras models. You can use them together seamlessly in the same project.