# Functional API and Callbacks

### Imports

In [1]:
import os
import tensorflow.keras as keras
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
from keras.losses import mean_squared_error
from keras.models import Sequential
from keras.layers import Flatten, Dense, SimpleRNN, Input, Concatenate

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Using TensorFlow backend.


### Data
Same as California Pricing.

In [2]:
# load data and split into training, validation and test set
housing = fetch_california_housing()
x_train, x_test, y_train, y_test = train_test_split(
                                housing.data, housing.target)
x_train, x_val, y_train, y_val = train_test_split(
                                x_train, y_train)

# check data dimensions
x_train.shape

# scale data
scl = StandardScaler()
x_train = scl.fit_transform(x_train)
x_val = scl.transform(x_val)
x_test = scl.transform(x_test)

### Multiple inputs network

In [4]:
input_A = Input(shape = [5], name='wide_input')
input_B = Input(shape = [6], name='deep_input')
hidden1 = Dense(50, activation='relu')(input_B)
hidden2 = Dense(50, activation='relu')(hidden1)
concat = Concatenate()([input_A, hidden2])
output = Dense(1, name='output')(concat)
model = keras.Model(inputs = [input_A, input_B], outputs=[output])

model.compile(loss = 'mse', optimizer = 'sgd')
keras.utils.plot_model(model, "multi_inputs_model.png", show_shapes=True)
model.summary()

# split data into two groups, one for each input
# dataframe has dimensions: (11610, 8)
x_train_A, x_train_B = x_train[:,:5], x_train[:,2:]
x_val_A, x_val_B = x_val[:,:5], x_val[:,2:]
x_test_A, x_test_B = x_test[:,:5], x_test[:,2:]

history = model.fit((x_train_A, x_train_B), y_train, epochs=50, 
                    validation_data=((x_val_A, x_val_B), y_val))


AttributeError: 'tuple' object has no attribute 'layer'

### Multiple outputs network

In [8]:
input_A = Input(shape = [5], name='wide_input')
input_B = Input(shape = [6], name='deep_input')
hidden1 = Dense(50, activation='relu')(input_B)
hidden2 = Dense(50, activation='relu')(hidden1)
concat = Concatenate()([input_A, hidden2])
main_output = Dense(1, name='main_output')(concat)
aux_output = Dense(1, name='aux_output')(hidden2)
model2 = keras.Model(inputs = [input_A, input_B], outputs=[main_output, aux_output])

model2.compile(loss = ['mse', 'mse'], loss_weights=[0.9, 0.1], optimizer = 'sgd')
keras.utils.plot_model(model2, "multi_outputs_model.png", show_shapes=True)
model2.summary()

history = model2.fit([x_train_A, x_train_B], [y_train, y_train], epochs=50, 
                    validation_data=([x_val_A, x_val_B], [y_val, y_val]))

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
deep_input (InputLayer)         (None, 6)            0                                            
__________________________________________________________________________________________________
dense_11 (Dense)                (None, 50)           350         deep_input[0][0]                 
__________________________________________________________________________________________________
wide_input (InputLayer)         (None, 5)            0                                            
__________________________________________________________________________________________________
dense_12 (Dense)                (None, 50)           2550        dense_11[0][0]                   
____________________________________________________________________________________________

Notice that When evaluating the model, the loss is split into total, main and auxiliary. 

In [13]:
total_mse, main_mse, aux_mse = model2.evaluate(
                    [x_test_A, x_test_B], [y_test, y_test])
print(f'Total MSE: {total_mse} \n Main MSE: {main_mse} \n Aux MSE: {aux_mse}')

y_preds_main, y_preds_aux = model2.predict([x_test_A[:5], x_test_B[:5]])
print(f'Main y_preds: {y_preds_main} \n Aux y_preds: {y_preds_aux}')

Total MSE: 0.4038230770318083 
 Main MSE: 0.3991163372993469 
 Aux MSE: 0.4355330169200897
Main y_preds: [[1.6943412]
 [1.4466107]
 [1.9087393]
 [2.3763099]
 [5.0115895]] 
 Aux y_preds: [[1.3439986]
 [1.564224 ]
 [1.8508682]
 [2.3151948]
 [4.2783275]]


### Callbacks

In [14]:
model = Sequential([
        Dense(50, activation='relu', input_shape = x_train.shape[1:]),
        Dense(50, activation='relu'),
        Dense(1)
        ])

1) Checkpointing: model can be saved during or after training to be restored at a later time.

In [15]:
checkpoint_cb = keras.callbacks.ModelCheckpoint('my_model.h5', save_best_only = True)

2) Early Stopping: can be used to avoid overfitting

In [16]:
earlystop_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights= True)

3) TensorBoard: can be used to keep track of training performances in real time.

In [17]:
log_dir = os.path.join(os.curdir, 'logs')
tensorboard_cb = keras.callbacks.TensorBoard(log_dir)

Finally, train the model with callbacks by providing them as argument to the fit function.

In [18]:
model.compile(loss = 'mse', optimizer = 'sgd')
history = model.fit(x_train, y_train, epochs=100, validation_data=(x_val, y_val),
                    callbacks=[checkpoint_cb, earlystop_cb])
model = keras.models.load_model('my_model.h5')

Train on 11610 samples, validate on 3870 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
