In [3]:
from keras.models import Model
from keras import layers

## Functional API

Makes you use layers as functions (take tensors and return tensors)

In [4]:
input_tensor = layers.Input(shape=(32,))  # tensor
dense = layers.Dense(32, activation='relu')  # layer as function
output_tensor = dense(input_tensor)  # layer called on a tensor, returns tensor

### Multi-input models

text -> embedd -> LSTM ------\\
question -> embedd -> LSTM  --> concatenate -> dense -> answer



In [6]:
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

# first input to the model
text_input = layers.Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)
encoded_text = layers.LSTM(32)(embedded_text)

# second input to the model
question_input = layers.Input(shape=(None,), dtype='int32', name='question')
embedded_quest = layers.Embedding(32, question_vocabulary_size)(question_input)
encoded_quest = layers.LSTM(16)(embedded_quest)

concatenated = layers.concatenate([encoded_text, encoded_quest], axis=-1)
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)

model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['acc'])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None, None)]       0                                            
__________________________________________________________________________________________________
question (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 10000)  640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, None, 10000)  320000      question[0][0]                   
____________________________________________________________________________________________

In [None]:
import numpy as np

num_samples = 1000
max_len = 100

text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_len))
question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_len))
answers = np.random.randint(0, 1, size=(num_samples, max_len))

model.fit([text, question], answers,
          epochs=10,
          batch_size=128)
# or if inputs are named:
model.fit({'text': text, 'question': question}, answers,
          epochs=10,
          batch_size=128)



### Multi-output (head) models

A simple example is a network that attempts to simultaneously
predict different properties of the data, such as a network that takes as input a series
of social media posts from a single anonymous person and tries to predict attributes of
that person, such as age, gender, and income level.

Social media post -> 1D ConvNet

-> Dense -> Age
-> Dense -> Income
-> Dense -> Gender

In [10]:
vocabulary_size = 50000
num_income_groups = 10

# 1D ConvNet
posts_input = layers.Input(shape=(None, ), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

# heads
age_prediction = layers.Dense(1, activation='sigmoid', name='age')(x)
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

Importantly, training such a model requires the ability to specify different loss func-
tions for different heads of the network: for instance, age prediction is a scalar regres-
sion task, but gender prediction is a binary classification task, requiring a different
training procedure. But because gradient descent requires you to minimize a scalar,
you must combine these losses into a single value in order to train the model. The
simplest way to combine different losses is to sum them all.

In [11]:
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])
# or if inputs are named:
model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'})

Note that very imbalanced loss contributions will cause the model representations to
be optimized preferentially for the task with the largest individual loss, at the expense
of the other tasks. To remedy this, you can assign different levels of importance to the
loss values in their contribution to the final loss. This is useful in particular if the
losses’ values use different scales. For instance, the mean squared error ( MSE ) loss
used for the age-regression task typically takes a value around 3–5, whereas the cross-
entropy loss used for the gender-classification task can be as low as 0.1. In such a situa-
tion, to balance the contribution of the different losses, you can assign a weight of 10
to the crossentropy loss and a weight of 0.25 to the MSE loss.

In [12]:
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1., 10.])
# or if inputs are named:
model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'},
              loss_weights={'age': 0.25,
                            'income': 1.,
                            'gender': 10.})

In [17]:
# Training
model.fit(posts, [age_targets, income_targets, gender_targets],
          epochs=10, batch_size=64)

model.fit(posts, {'age': age_targets,
                  'income': income_targets,
                  'gender': gender_targets},
          epochs=10, batch_size=64)

Epoch 1/10


ValueError: in user code:

    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/training.py:787 train_step
        y_pred = self(x, training=True)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/functional.py:415 call
        inputs, training=training, mask=mask)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/functional.py:550 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/engine/base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/keras/layers/convolutional.py:249 call
        outputs = self._convolution_op(inputs, self.kernel)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py:1138 convolution_v2
        name=name)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py:1268 convolution_internal
        name=name)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:617 new_func
        return func(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:617 new_func
        return func(*args, **kwargs)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py:2011 conv1d
        name=name)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py:973 conv2d
        data_format=data_format, dilations=dilations, name=name)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
        compute_device)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:3569 _create_op_internal
        op_def=op_def)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:2042 __init__
        control_input_ops, op_def)
    /home/przemek/Deep Learning/road-to-deep-learning/road-to-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
        raise ValueError(str(e))

    ValueError: Negative dimension size caused by subtracting 5 from 1 for '{{node model_2/conv1d/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](model_2/conv1d/conv1d/ExpandDims, model_2/conv1d/conv1d/ExpandDims_1)' with input shapes: [?,1,1,50000], [1,5,50000,128].
