# Functional API

### Building a complex model using the Functional API

##### Not all Neural network are simply sequential. Some have complex topologies, some have multiple input/output. For ex deep & wide NN connect its some input direct to the output.

#### Lets load the california dataset

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
housing = datasets.fetch_california_housing()
x_train, x_test, y_train, y_test   = train_test_split(housing.data, housing.target, random_state=42)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, random_state=42)
print(x_train.shape, x_valid.shape, x_test.shape)

(11610, 8) (3870, 8) (5160, 8)


In [4]:
# Reset the name counter
def cls():
    tf.keras.backend.clear_session()
    tf.random.set_seed(42)

## 01. Network with one input and one output. Input alo connect with output directly.

In [14]:
cls()
normalization_layer = keras.layers.Normalization()
input_  = keras.layers.Input(shape=x_train.shape[1:])
normal  = normalization_layer(input_)
hidden1 = keras.layers.Dense(30, activation='relu')(normal)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat  = keras.layers.Concatenate()([input_, hidden2])
output_ = keras.layers.Dense(1)(concat)
model   = keras.Model(inputs=[input_], outputs=[output_])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 8)]          0                                            
__________________________________________________________________________________________________
normalization (Normalization)   (None, 8)            17          input_1[0][0]                    
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           270         normalization[0][0]              
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 30)           930         dense[0][0]                      
______________________________________________________________________________________________

In [15]:
model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=[keras.metrics.RootMeanSquaredError()])
normalization_layer.adapt(x_train)
history = model.fit(x_train, y_train, epochs=5, validation_data=(x_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
model.evaluate(x_test, y_test)



[0.548128604888916, 0.7403571009635925]

## 02. Network with multiple input and single output
Support I want to pass 4 starting features directly to the last layer and rest of the features or overlap through the hidden layers


In [7]:
cls()
input_wide  = keras.layers.Input(shape=[5], name='wide_input') # 5 features [0 to 4] directly to the output layer
input_deep  = keras.layers.Input(shape=[6], name='deep_input') # 6 features [2 to 7] through hidden layers.
normal_layer_wide = keras.layers.Normalization()
normal_layer_deep = keras.layers.Normalization()
normal_wide = normal_layer_wide(input_wide) 
normal_deep = normal_layer_deep(input_deep)
hidden1 = keras.layers.Dense(30, activation='relu')(normal_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.Concatenate()([normal_wide, hidden2])
output = keras.layers.Dense(1, name='output')(concat)
model = keras.Model(inputs=[input_wide, input_deep], outputs=[output])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
deep_input (InputLayer)         [(None, 6)]          0                                            
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 6)            13          deep_input[0][0]                 
__________________________________________________________________________________________________
wide_input (InputLayer)         [(None, 5)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           210         normalization_1[0][0]            
______________________________________________________________________________________________

In [8]:
model.compile(loss=keras.losses.MeanSquaredError(), optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=keras.metrics.RootMeanSquaredError())
x_train_wide, x_train_deep = x_train[:,:5], x_train[:,2:]
x_valid_wide, x_valid_deep = x_valid[:,:5], x_valid[:,2:]
x_test_wide , x_test_deep  = x_test[:,:5] , x_test[:,2:]
normal_layer_wide.adapt(x_train_wide)
normal_layer_deep.adapt(x_train_deep)
history = model.fit({'wide_input':x_train_wide, "deep_input":x_train_deep}, y_train, epochs=5, validation_data=((x_valid_wide, x_valid_deep), y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
model.evaluate((x_test_wide,x_test_deep), y_test)



[0.38654041290283203, 0.6217237710952759]

In [10]:
model.predict((x_test_wide[:1], x_test_deep[:1])), y_test[:1]

(array([[0.35351694]], dtype=float32), array([0.477]))

## 03. Handling multiple output

In [52]:
cls()
input_wide  = keras.layers.Input(shape=[5], name='wide_input') # 5 features [0 to 4] directly to the output layer
input_deep  = keras.layers.Input(shape=[6], name='deep_input') # 6 features [2 to 7] through hidden layers.
normal_layer_wide = keras.layers.Normalization()
normal_layer_deep = keras.layers.Normalization()
normal_wide = normal_layer_wide(input_wide) 
normal_deep = normal_layer_deep(input_deep)
hidden1 = keras.layers.Dense(30, activation='relu')(normal_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.Concatenate()([normal_wide, hidden2])
output = keras.layers.Dense(1, name='main_output')(concat)
aux_output = keras.layers.Dense(1, name='aux_output')(hidden2) 
model = keras.Model(inputs=[input_wide, input_deep], outputs=[output, aux_output])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
deep_input (InputLayer)         [(None, 6)]          0                                            
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 6)            13          deep_input[0][0]                 
__________________________________________________________________________________________________
wide_input (InputLayer)         [(None, 5)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           210         normalization_1[0][0]            
______________________________________________________________________________________________

In [53]:
model.compile(loss={"main_output":"mse", "aux_output":"mse"}, loss_weights=(0.9, 0.1), optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=keras.metrics.RootMeanSquaredError())
normal_layer_wide.adapt(x_train_wide)
normal_layer_deep.adapt(x_train_deep)
history = model.fit((x_train_wide, x_train_deep), (y_train, y_train), epochs=5, validation_data=((x_valid_wide, x_valid_deep), (y_valid,y_valid)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [54]:
weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse = model.evaluate((x_test_wide,x_test_deep), (y_test,y_test))
print(weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse)

0.396294504404068 0.380817174911499 0.535590648651123 0.6171038746833801 0.7318406105041504


In [55]:
model.predict((x_test_wide[:1], x_test_deep[:1])), y_test[:1]

([array([[0.5382582]], dtype=float32), array([[0.67674875]], dtype=float32)],
 array([0.477]))

In [58]:
# Model successfully saved and you can read as well
model.save("multi_input_multi_output.h5")
keras.models.load_model('multi_input_multi_output.h5').summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
deep_input (InputLayer)         [(None, 6)]          0                                            
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 6)            13          deep_input[0][0]                 
__________________________________________________________________________________________________
wide_input (InputLayer)         [(None, 5)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 30)           210         normalization_1[0][0]            
______________________________________________________________________________________________

# Building Dynamic Models using SubClassing API
Both the sequential API and the Functional API are declarative: you start by declaring which layers you want to use and how they should be connected, and only then can you start feeding the data to the model for training or inference

In [11]:
class WideAndDeepModel(keras.Model):
    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)  # needed to support naming the model
        self.norm_layer_wide = keras.layers.Normalization()
        self.norm_layer_deep = keras.layers.Normalization()
        self.hidden1 = keras.layers.Dense(units, activation=activation)
        self.hidden2 = keras.layers.Dense(units, activation=activation)
        self.main_output = keras.layers.Dense(1)
        self.aux_output  = keras.layers.Dense(1)
        
    def call(self, inputs):
        input_wide, input_deep = inputs
        norm_wide = self.norm_layer_wide(input_wide)
        norm_deep = self.norm_layer_deep(input_deep)
        hidden1 = self.hidden1(norm_deep)
        hidden2 = self.hidden2(hidden1)
        concat = keras.layers.concatenate([norm_wide, hidden2])
        output = self.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return output, aux_output

cls()
model = WideAndDeepModel(30, activation="relu", name="my_cool_model")

# You can do anything in call() for loops, if statements, low-level tensorflow operations

In [12]:
model.compile(loss=("mse","mse"), loss_weights=(0.9, 0.1), optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=keras.metrics.RootMeanSquaredError())
model.norm_layer_wide.adapt(x_train_wide)
model.norm_layer_deep.adapt(x_train_deep)
history = model.fit((x_train_wide, x_train_deep), (y_train, y_train), epochs=5, validation_data=((x_valid_wide, x_valid_deep), (y_valid,y_valid)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
# Keras will not know about what is placed inside call() so it would let us know the connections of layers, shapes, dtypes, etc
model.summary()

Model: "my_cool_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization multiple                  11        
_________________________________________________________________
normalization_1 (Normalizati multiple                  13        
_________________________________________________________________
dense (Dense)                multiple                  210       
_________________________________________________________________
dense_1 (Dense)              multiple                  930       
_________________________________________________________________
dense_2 (Dense)              multiple                  36        
_________________________________________________________________
dense_3 (Dense)              multiple                  31        
Total params: 1,231
Trainable params: 1,207
Non-trainable params: 24
__________________________________________________

In [69]:
weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse = model.evaluate((x_test_wide,x_test_deep), (y_test,y_test))
print(weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse)

0.396294504404068 0.380817174911499 0.535590648651123 0.6171038746833801 0.7318406105041504


In [70]:
model.predict((x_test_wide[:1], x_test_deep[:1])), y_test[:1]



((array([[0.5382582]], dtype=float32), array([[0.67674875]], dtype=float32)),
 array([0.477]))

In [76]:
# Now it will not save to .h5 format directly because it's not functional or sequential model
model.save("multi_input_multi_output.h5")

NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.

In [77]:
model.save("multi_input_multi_output")

INFO:tensorflow:Assets written to: multi_input_multi_output\assets


In [97]:
# To check how many directories created
[print(i) for i in sorted(Path("multi_input_multi_output").glob("**/*"))]

multi_input_multi_output\assets
multi_input_multi_output\keras_metadata.pb
multi_input_multi_output\saved_model.pb
multi_input_multi_output\variables
multi_input_multi_output\variables\variables.data-00000-of-00001
multi_input_multi_output\variables\variables.index


[None, None, None, None, None, None]

In [99]:
model = keras.models.load_model("multi_input_multi_output")

model.predict((x_test_wide[:1], x_test_deep[:1])), y_test[:1]

((array([[0.5382582]], dtype=float32), array([[0.67674875]], dtype=float32)),
 array([0.477]))

In [101]:
model.save_weights("weights/multi_input_multi_output_weights")

In [102]:
model.load_weights('weights/multi_input_multi_output_weights') # First build the model the load the weights

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1c8fe2e3e50>

In [105]:
# Remove the directory recursively
import shutil
shutil.rmtree("weights", ignore_errors=True)
shutil.rmtree("multi_input_multi_output", ignore_errors=True)


# Miscellaneous
To improve the performance of the Model

## 01. Callbacks

In [19]:
# We are using the same model object()

# Save the last trained epoch 
cb_checkpoint = keras.callbacks.ModelCheckpoint("model/checkpoint") # save_best_only=True will save the best model where validation score low

history = model.fit((x_train_wide, x_train_deep), (y_train, y_train), epochs=5, 
                    validation_data=((x_valid_wide, x_valid_deep), (y_valid,y_valid)),
                    callbacks=[cb_checkpoint])

Epoch 1/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 2/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 3/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 4/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 5/5
INFO:tensorflow:Assets written to: model\checkpoint\assets


In [21]:
cb_earlystopping = keras.callbacks.EarlyStopping(patience=3)
history = model.fit((x_train_wide, x_train_deep), (y_train, y_train), epochs=5, 
                    validation_data=((x_valid_wide, x_valid_deep), (y_valid,y_valid)),
                    callbacks=[cb_checkpoint, cb_earlystopping])

Epoch 1/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 2/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 3/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 4/5
INFO:tensorflow:Assets written to: model\checkpoint\assets
Epoch 5/5
INFO:tensorflow:Assets written to: model\checkpoint\assets


## 02. Custom callback

In [22]:
class PrintValTrainRatioCallback(keras.callbacks.Callback):
    # You can also implement
    # on_train_begin(), on_train_end(), on_epoch_begin(), on_epoch_end(), on_batch_begin(), on_batch_end(), on_test_begin(), on_test_end()
    # on_test_batch_begin(), on_predict_begin(), on_predict_batch_begin()
    def on_epoch_end(self, epoch, logs):
        print("\nval/train: {:.2f}".format(logs['val_loss'] / logs['loss']))



In [26]:
cb_val_train_ratio = PrintValTrainRatioCallback()
history = model.fit((x_train_wide, x_train_deep), (y_train, y_train), epochs=5, 
                    validation_data=((x_valid_wide, x_valid_deep), (y_valid,y_valid)),
                    callbacks=[cb_val_train_ratio], verbose=0)


val/train: 0.99

val/train: 1.07

val/train: 0.95

val/train: 0.99

val/train: 1.06
