In [1]:
import numpy as np 
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import layers 

# Introduction 
The Keras _functional API_ provides a way to create models that re more flexible than `tf.keras.Sequential`. Can handle models with non-linear topology, shared layers, and even multiple inputs or outputs. 

Main idea is that a deep learning model is usually a directed acyclic graph (DAG) of layers. The _functional API_ creates graphs of layers 

In [2]:
inputs = keras.Input(shape=(784,))
print(inputs.shape, inputs.dtype)

(None, 784) <dtype: 'float32'>


To create a new node in the graph of layers, call the layers functiona on the `inputs` object. 

In [3]:
dense = layers.Dense(64, activation='relu')
x = dense(inputs)

In [4]:
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(10)(x)

In [5]:
model = keras.Model(inputs=inputs, outputs=outputs, name='mnist_model')

In [6]:
model.summary()

Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')

In [7]:
keras.utils.plot_model(model, 'my_first_model.png')

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [8]:
keras.utils.plot_model(model, "my_first_model_with_shape_info.png", show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


# Training, evaluation, and inference 

Traning works exactly the same way as for the `Sequential` models. 

`fit()` and `evaluate()` methods are present and can be easily customized to implement traning routines beyond supervised leanring (`GAN`). 

In [9]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [11]:
x_train.shape, x_test.shape

((60000, 28, 28), (10000, 28, 28))

In [12]:

x_train = x_train.reshape(60_000, 784).astype('float32')/255
x_test = x_test.reshape(10_000, 784).astype('float32')/255 

In [15]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    optimizer=keras.optimizers.RMSprop(), 
    metrics=['accuracy']
)

In [16]:
history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2)

Epoch 1/2
Epoch 2/2


In [17]:
test_scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

313/313 - 1s - loss: 0.1326 - accuracy: 0.9598
Test loss: 0.13257351517677307
Test accuracy: 0.9598000049591064


# Save and Serialize 

Saving the model and serialization works the same way for model built with the _functional API_.

`model.save()`: file includes model architecture, model weight values, model training config (passed to `model.compile`), aptimizer and its state, restart training where you left off

In [18]:
model.save('./models/my_first_model_training')
del model 
model = keras.models.load_model('./models/my_first_model_training')

INFO:tensorflow:Assets written to: ./models/my_first_model_training\assets


# Defining multiple models with the same graph of layers 

In the functional API, models are created by specifying their inputs and outputs in a graph of layers. A single graph of layers can be used to generate multiple models. 

In [31]:
encoder_input = keras.Input(shape=(28,28,1), name='img')
x = layers.Conv2D(16,3, activation='relu')(encoder_input)
x = layers.Conv2D(32, 3, activation='relu')(x)
x =layers.MaxPooling2D(3)(x) 
x = layers.Conv2D(32, 3, activation='relu')(x) 
x = layers.Conv2D(16, 3, activation='relu')(x) 
encoder_output = layers.GlobalMaxPool2D()(x) 

encoder = keras.Model(inputs=encoder_input, outputs=encoder_output, name="encoder")
encoder.summary() 

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_4 (Glob (None, 16)                0   

In [32]:
x = layers.Reshape((4,4,1))(encoder_output) 
x = layers.Conv2DTranspose(16,3, activation='relu')(x) 
x = layers.Conv2DTranspose(32, 3, activation='relu')(x) 
x = layers.UpSampling2D(3)(x) 
x = layers.Conv2DTranspose(16,3, activation='relu')(x) 
decoder_output = layers.Conv2DTranspose(1, 3, activation='relu')(x) 

In [33]:
autoencoder = keras.Model(inputs=encoder_input, outputs=decoder_output, name='autoencoder')
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_4 (Glob (None, 16)                

# All models are callable (just like layers)

You can treat any modle as if it were a layer by invoking it on an `Input` or on the output of another layer. By calling a model you aren't just reusing the architecture of the mode, you are also reusing its weights. 

In [34]:
encoder_input = keras.Input(shape=(28,28,1), name='original_img')
x = layers.Conv2D(16, 3, activation='relu')(encoder_input)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(3)(x)
x = layers.Conv2D(32, 3, activation='relu')(x) 
x = layers.Conv2D(16, 3, activation='relu')(x) 
encoder_output = layers.GlobalMaxPool2D()(x)

encoder = keras.Model(encoder_input, encoder_output, name='encoder')

encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
original_img (InputLayer)    [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 6, 6, 32)          9248      
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 4, 4, 16)          4624      
_________________________________________________________________
global_max_pooling2d_5 (Glob (None, 16)                0   

In [36]:
decoder_input = keras.Input(shape=(16,), name='encoder_img')
x = layers.Reshape((4,4,1))(decoder_input)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
x = layers.Conv2DTranspose(32, 3, activation='relu')(x) 
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
decoder_output = layers.Conv2DTranspose(1,3, activation='relu')(x) 

decoder = keras.Model(decoder_input, decoder_output, name='decoder')
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_img (InputLayer)     [(None, 16)]              0         
_________________________________________________________________
reshape_4 (Reshape)          (None, 4, 4, 1)           0         
_________________________________________________________________
conv2d_transpose_16 (Conv2DT (None, 6, 6, 16)          160       
_________________________________________________________________
conv2d_transpose_17 (Conv2DT (None, 8, 8, 32)          4640      
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_transpose_18 (Conv2DT (None, 26, 26, 16)        4624      
_________________________________________________________________
conv2d_transpose_19 (Conv2DT (None, 28, 28, 1)         145 

In [38]:
autoencoder_input = keras.Input(shape=(28,28,1), name='img')
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
autoencoder = keras.Model(autoencoder_input, decoded_img, name='autoencoder')
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
img (InputLayer)             [(None, 28, 28, 1)]       0         
_________________________________________________________________
encoder (Functional)         (None, 16)                18672     
_________________________________________________________________
decoder (Functional)         (None, 28, 28, 1)         9569      
Total params: 28,241
Trainable params: 28,241
Non-trainable params: 0
_________________________________________________________________


# Manipulate Complex graph Topologies

Functional API makes it easy to manipulate multiple inputs and outputs that cannot be handled with the `Sequential` API. 

Example: Building a system for ranking customer issue tickets by priority and routing them to the correct department. Modeli with have three iputs: 

1. Title of the ticket (text input);
2. Text body (text input);
3. any tags added by the user (categorical). 

Model will have 2 outputs: 
1. Prority score between 0 and 1 (scaler sigmoid), and 
2. the department that should handle the ticket (softmax output over the set of departments)

In [39]:
num_tags = 12 # num of unique issue tags 
num_words = 10_000 # size of vocabulary obtained when proprocessing text data 
num_departments = 4 # number of departments for prediction 

In [40]:
title_input = keras.Input(
    shape=(None,), name='title'
) # variable-length sequence of ints 

body_input = keras.Input(
    shape=(None,), 
    name='body'
) # variable-length sequence of ints 

tags_input = keras.Input(
    shape=(num_tags,), name='tags' 
) # Binary vectors of size `num_tags`

In [41]:
# embed each word in the title into a 64-dimensional vector 
title_features = layers.Embedding(num_words, 64)(title_input)

# Embed each word in the text into a 64-dimension vector 
body_features = layers.Embedding(num_words, 64)(body_input) 

# Reduce sequence of embedded words in the title into a single 128-dimensional vector 
title_features = layers.LSTM(128)(title_features)

# Reduce the sequence of embedded words in the body into a single 32-dimensional vector 
body_features = layers.LSTM(32)(body_features)

# Merge all available features into a single large vector via concatenation
x = layers.concatenate([title_features, body_features, tags_input])

In [42]:
# Stick a logistic regression for priority prediction on top of the features 
prority_pred = layers.Dense(1, name='priority')(x)

# Stick a department classifier on top of the features 
department_pred = layers.Dense(num_departments, name='department')(x) 

In [43]:
# end to end model predicting both priority and department 
model = keras.Model(
    inputs=[title_input, body_input, tags_input], 
    outputs=[prority_pred, department_pred]
)

In [44]:
keras.utils.plot_model(model, 'multi_input_and_output_model.png', show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


When compiling the model, you can assign different losses to each output. You can also assign different weights to each loss (to modulate their contribution to the total training loss)

In [46]:
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3), 
    loss=[
        keras.losses.BinaryCrossentropy(from_logits=True), 
        keras.losses.CategoricalCrossentropy(from_logits=True)
    ], 
    loss_weights=[1.0, 0.2]
)

Due to the different names assigned to each output, the losses can be assgiend based on those values 

In [47]:
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3), 
    loss={
        'priority': keras.losses.BinaryCrossentropy(from_logits=True), 
        'department': keras.losses.CategoricalCrossentropy(from_logits=True)
    }, 
    loss_weights={'priority': 1.0, 'department': 0.2}
)

In [48]:
# training with dummy data 
title_data = np.random.randint(num_words, size=(1280,10))
body_data = np.random.randint(num_words, size=(1280, 100))
tags_data = np.random.randint(2, size=(1280, num_tags)).astype('float32')

priority_targets = np.random.random(size=(1280, 1))
dept_targets = np.random.randint(2, size=(1280, num_departments))

In [49]:
model.fit(
    {'title': title_data, 'body': body_data, 'tags': tags_data}, 
    {'priority': priority_targets, 'department': dept_targets}, 
    epochs=2, 
    batch_size=32
)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x18f4a617dc0>

# Toy ResNet Model 
also easy to manipulate non-linear connectivity topologies (these are models with layers that ar enot connected sequentially). Common use case is the Resnet model 

In [2]:
inputs = keras.Input(shape=(32,32,3), name='img')
x = layers.Conv2D(32, 3, activation='relu')(inputs)
x = layers.Conv2D(64, 3, activation='relu')(x)
block1_output = layers.MaxPool2D(3)(x) 

x = layers.Conv2D(64, 3, activation='relu', padding='same')(block1_output)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x) 
block_2_output = layers.add([x, block1_output])

x = layers.Conv2D(64, 3, activation='relu', padding='same')(block_2_output)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
block_3_output = layers.add([x, block_2_output])

x = layers.Conv2D(64, 3, activation='relu')(block_3_output)
x = layers.GlobalAveragePooling2D()(x) 
x = layers.Dense(256, activation='relu')(x) 
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10)(x) 


In [3]:
model = keras.Model(inputs, outputs, name='toy_resnet')
model.summary()

Model: "toy_resnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
img (InputLayer)                [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 30, 30, 32)   896         img[0][0]                        
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 28, 28, 64)   18496       conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 9, 9, 64)     0           conv2d_1[0][0]                   
_________________________________________________________________________________________

In [4]:
keras.utils.plot_model(model, "mini_resnet.png", show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [7]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train

array([[6],
       [9],
       [9],
       ...,
       [9],
       [1],
       [1]], dtype=uint8)

In [8]:

x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0 

y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10) 

In [9]:
y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)

In [12]:
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3), 
    loss=keras.losses.CategoricalCrossentropy(from_logits=True), 
    metrics=['acc']
)

In [14]:
model.fit(x_train[:1_000], y_train[:1_000], batch_size=64, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x28fc41f18b0>