Themes of the chapter:

* The Keras functional API
* Using Keras callback
* Working with the TensorBoard visualization tool
* Important best practices for developing state-of-the-art models

## Keras functional API

The functional API allows multimodal inputs, multiple outputs or branched structures (i.e. acyclic graphs).

### Intro to the functional API

In [5]:
''' Intro to the functional API '''

# Main principle - use layers as functions

from keras import Input, layers

input_tensor = Input(shape=(32,))
dense = layers.Dense(32, activation='relu')
output_tensor = dense(input_tensor)

In [1]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')

Using Theano backend.


In [2]:
''' In input tensor and output tensor are unrelated, you get Runtime error '''

unrelated_input = Input(shape=(32,1))
bad_model = Model(unrelated_input, output_tensor)

ValueError: Output tensors to a Model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.layers.core.Dense object at 0x1c20ef2160>

### Multi-input models

In [4]:
''' Implementation of a two-input question-answer model '''

from keras.models import Model
from keras import Input, layers

text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

# Branch 1
text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input) # There was errata
encoded_text = layers.LSTM(32)(embedded_text)

# Branch 2
question_input = Input(shape=(None,),
                       dtype='int32',
                       name='question')
embedded_question = layers.Embedding(question_vocabulary_size, 32)(question_input) # And there too
encoded_question = layers.LSTM(16)(embedded_question)

# Concatenation & following
concatenated = layers.concatenate([encoded_text, encoded_question],
                                  axis=-1)
answer = layers.Dense(answer_vocabulary_size,
                      activation='softmax')(concatenated)

model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['acc'])

In [5]:
''' Feeding data to a multi-input model '''

import numpy as np

print(model.summary())

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulary_size,
                         size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size,
                         size=(num_samples, max_length))
answers = np.random.randint(0, 1, size=(num_samples, answer_vocabulary_size))

# Fit using list of inputs
model.fit([text, question], answers, epochs=10, batch_size=128)

# OR

# Fit using a dictionary of inputs
#model.fit({'text': text, 'question': question}, answers,
#          epochs=10, batch_size=128)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, None)         0                                            
__________________________________________________________________________________________________
question (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, None, 64)     640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, None, 32)     320000      question[0][0]                   
__________________________________________________________________________________________________
lstm_5 (LS

<keras.callbacks.History at 0x1c21a147f0>

### Multi-output model

In [2]:
''' Implementation of a three-output model '''

from keras import layers
from keras import Input
from keras.models import Model

vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(vocabulary_size, 256)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_groups,
                                 activation='softmax',
                                 name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, 
              [age_prediction, income_prediction, gender_prediction])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
posts (InputLayer)              (None, None)         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 256)    12800000    posts[0][0]                      
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, None, 128)    163968      embedding_2[0][0]                
__________________________________________________________________________________________________
max_pooling1d_3 (MaxPooling1D)  (None, None, 128)    0           conv1d_6[0][0]                   
__________________________________________________________________________________________________
conv1d_7 (

In [3]:
''' Computation options for multi-output model '''

model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])
model.compile(optimizer='rmsprop',
              loss={'age': 'mse', 
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'})

Very imbalanced loss contributions will cause the model representations be optimized preferentially for the task with the largest individual loss. You can assign different levels of importance to the loss values.

In [4]:
''' Multi-output model: loss weightning '''

model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1., 19.])

model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'},
              loss_weights={'age': 0.25,
                            'income': 1.,
                            'gender': 10.})

In [8]:
''' Feeding data do the multi-output model '''

import numpy as np
from keras.utils import to_categorical

num_samples = 1000
max_length = 100

posts = np.random.randint(1, vocabulary_size,
                          size=(num_samples, max_length))

age_targets = np.random.randint(10, 110, 
                                size=(num_samples,))
income_targets = np.random.randint(1, num_income_groups,
                                   size=(num_samples,))
income_targets = to_categorical(income_targets)
gender_targets = np.random.randint(0, 1,
                                   size=(num_samples,))

print(posts[0], age_targets[0], income_targets[0], gender_targets[0])

model.fit(posts, [age_targets, income_targets, gender_targets],
          epochs=10, batch_size=64)

#model.fit(posts, {'age': age_targets,
#                  'income': income_targets,
#                  'gender': gender_targets},
#          epochs=10,
#          batch_size=64)

[ 6678 15066  2211 15517 29700 39689 25137 35749 38410  3585 37127 27703
 47282 36086  8457 10156 45894 42184 34690 40079 36743 24087 17001 48053
 41357 40823  6109  4511 33572 35463 22061 47371  2542 24392  3802 36994
 17638 19228 38421  4602  9389   462 17402 21366 33524  5803 26991  3291
 11658 10207 28056  7644 26447  1071 37190  6072 30503  8019 38051  7235
 46840 13061 35982 39424  3917 15809  5048 49601 12788  9111 22795 18508
 33409 49123 44322 11227 36520 43573 15303 46674 37656 19823 16954 48117
   405 41882 29671 48025 24879 44490 17995 44241 17722 26103 16169 45939
 22380 38617  3979 27196] 105 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.] 0
Epoch 1/10


ValueError: CorrMM: impossible output shape
  bottom shape: 64 x 256 x 2 x 1
  weights shape: 256 x 256 x 5 x 1
  top shape: 64 x 256 x -2 x 1

Apply node that caused the error: CorrMM{valid, (1, 1), (1, 1), 1 False}(InplaceDimShuffle{0,2,1,x}.0, Subtensor{::, ::, ::int64, ::int64}.0)
Toposort index: 171
Inputs types: [TensorType(float32, (False, False, False, True)), TensorType(float32, (False, False, False, True))]
Inputs shapes: [(64, 256, 2, 1), (256, 256, 5, 1)]
Inputs strides: [(2048, 8, 4, 4), (4, 1024, -262144, -4)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[InplaceDimShuffle{0,2,3,1}(CorrMM{valid, (1, 1), (1, 1), 1 False}.0)]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-2c93c1ba62f7>", line 17, in <module>
    x = layers.Conv1D(256, 5, activation='relu')(x)
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/keras/engine/base_layer.py", line 460, in __call__
    output = self.call(inputs, **kwargs)
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/keras/layers/convolutional.py", line 160, in call
    dilation_rate=self.dilation_rate[0])
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/keras/backend/theano_backend.py", line 1972, in conv1d
    data_format=data_format, dilation_rate=dilation_rate)
  File "/Users/A.Miroshnikova/Public/Conda/anaconda3/lib/python3.6/site-packages/keras/backend/theano_backend.py", line 2013, in conv2d
    filter_dilation=dilation_rate)

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.