This repository has been archived by the owner on Jan 19, 2019. It is now read-only.
/
decomposable_attention.py
89 lines (73 loc) · 4.42 KB
/
decomposable_attention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from typing import Dict
from keras.layers import Input
from overrides import overrides
from ...data.instances.entailment.snli_instance import SnliInstance
from ...training.text_trainer import TextTrainer
from ...layers.entailment_models import DecomposableAttentionEntailment
from ...training.models import DeepQaModel
from ...common.params import Params
class DecomposableAttention(TextTrainer):
'''
This ``TextTrainer`` implements the Decomposable Attention model described in "A Decomposable
Attention Model for Natural Language Inference", by Parikh et al., 2016, with some optional
enhancements before the decomposable attention actually happens. Specifically, Parikh's
original model took plain word embeddings as input to the decomposable attention; we allow
other operations the transform these word embeddings, such as running a biLSTM on them, before
running the decomposable attention layer.
Inputs:
- A "text" sentence, with shape (batch_size, sentence_length)
- A "hypothesis" sentence, with shape (batch_size, sentence_length)
Outputs:
- An entailment decision per input text/hypothesis pair, in {entails, contradicts, neutral}.
Parameters
----------
num_seq2seq_layers : int, optional (default=0)
After getting a word embedding, how many stacked seq2seq encoders should we use before
doing the decomposable attention? The default of 0 recreates the original decomposable
attention model.
share_encoders : bool, optional (default=True)
Should we use the same seq2seq encoder for the text and hypothesis, or different ones?
decomposable_attention_params : Dict[str, Any], optional (default={})
These parameters get passed to the
:class:`~deep_qa.layers.entailment_models.decomposable_attention.DecomposableAttentionEntailment`
layer object, and control things like the number of output labels, number of hidden layers
in the entailment MLPs, etc. See that class for a complete description of options here.
'''
def __init__(self, params: Params):
self.num_seq2seq_layers = params.pop('num_seq2seq_layers', 0)
self.share_encoders = params.pop('share_encoders', True)
self.decomposable_attention_params = params.pop('decomposable_attention_params', {})
super(DecomposableAttention, self).__init__(params)
@overrides
def _instance_type(self):
return SnliInstance
@overrides
def _build_model(self):
text_input = Input(shape=self._get_sentence_shape(), dtype='int32', name="text_input")
hypothesis_input = Input(shape=self._get_sentence_shape(), dtype='int32', name="hypothesis_input")
text_embedding = self._embed_input(text_input)
hypothesis_embedding = self._embed_input(hypothesis_input)
for i in range(self.num_seq2seq_layers):
text_encoder_name = "hidden_{}".format(i) if self.share_encoders else "text_{}".format(i)
text_encoder = self._get_seq2seq_encoder(name=text_encoder_name,
fallback_behavior="use default params")
text_embedding = text_encoder(text_embedding)
hypothesis_encoder_name = "hidden_{}".format(i) if self.share_encoders else "hypothesis_{}".format(i)
hypothesis_encoder = self._get_seq2seq_encoder(name=hypothesis_encoder_name,
fallback_behavior="use default params")
hypothesis_embedding = hypothesis_encoder(hypothesis_embedding)
entailment_layer = DecomposableAttentionEntailment(**self.decomposable_attention_params)
entailment_probabilities = entailment_layer([text_embedding, hypothesis_embedding])
return DeepQaModel(inputs=[text_input, hypothesis_input], outputs=entailment_probabilities)
@overrides
def get_padding_memory_scaling(self, padding_lengths: Dict[str, int]) -> int:
return padding_lengths['num_sentence_words'] ** 2
@overrides
def _set_padding_lengths_from_model(self):
print("Model input shape:", self.model.get_input_shape_at(0))
self._set_text_lengths_from_model_input(self.model.get_input_shape_at(0)[0][1:])
@classmethod
def _get_custom_objects(cls):
custom_objects = super(DecomposableAttention, cls)._get_custom_objects()
custom_objects["DecomposableAttentionEntailment"] = DecomposableAttentionEntailment
return custom_objects