Delta-ML · applenob · Apr 27, 2020 · Jan 15, 2020 · Jan 15, 2020 · Feb 4, 2020
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ private
 !delta/data/feat/python_speech_features/english.wav
 *.mp3
 tags
+venv
 gen
 *.cxx
 *.o

diff --git a/delta/data/task/text_match_task.py b/delta/data/task/text_match_task.py
@@ -27,6 +27,8 @@
 from delta.utils.register import registers
 from delta.layers.utils import compute_sen_lens
 from delta import utils
+
+
 # pylint: disable=too-many-instance-attributes
 
 
@@ -45,10 +47,10 @@ def __init__(self, config, mode):
 
     self.paths = self.data_config[mode]['paths']
     self.paths_after_pre_process = [
-        one_path + ".after" for one_path in self.paths
+      one_path + ".after" for one_path in self.paths
     ]
     self.infer_no_label = self.config["data"][utils.INFER].get(
-        'infer_no_label', False)
+      'infer_no_label', False)
     self.infer_without_label = bool(mode == utils.INFER and self.infer_no_label)
 
     self.prepare()
@@ -59,26 +61,26 @@ def generate_data(self):
     if self.infer_without_label:
       column_num = 2
       text_ds_left, text_ds_right = load_textline_dataset(
-          self.paths_after_pre_process, column_num)
+        self.paths_after_pre_process, column_num)
     else:
       column_num = 3
       label, text_ds_left, text_ds_right = load_textline_dataset(
-          self.paths_after_pre_process, column_num)
+        self.paths_after_pre_process, column_num)
 
     input_pipeline_func = self.get_input_pipeline(for_export=False)
     text_ds_left = text_ds_left.map(
-        input_pipeline_func, num_parallel_calls=self.num_parallel_calls)
+      input_pipeline_func, num_parallel_calls=self.num_parallel_calls)
     text_ds_right = text_ds_right.map(
-        input_pipeline_func, num_parallel_calls=self.num_parallel_calls)
+      input_pipeline_func, num_parallel_calls=self.num_parallel_calls)
     text_size_ds_left = text_ds_left.map(
-        lambda x: compute_sen_lens(x, padding_token=0),
-        num_parallel_calls=self.num_parallel_calls)
+      lambda x: compute_sen_lens(x, padding_token=0),
+      num_parallel_calls=self.num_parallel_calls)
     text_size_ds_right = text_ds_right.map(
-        lambda x: compute_sen_lens(x, padding_token=0),
-        num_parallel_calls=self.num_parallel_calls)
+      lambda x: compute_sen_lens(x, padding_token=0),
+      num_parallel_calls=self.num_parallel_calls)
     text_ds_left_right = tf.data.Dataset.zip((text_ds_left, text_ds_right))
     text_len_left_right = tf.data.Dataset.zip(
-        (text_size_ds_left, text_size_ds_right))
+      (text_size_ds_left, text_size_ds_right))
     if self.infer_without_label:
       data_set_left_right = text_ds_left_right
     else:
@@ -89,7 +91,7 @@ def generate_data(self):
 
     self.config['data']['vocab_size'] = vocab_size
     self.config['data']['{}_data_size'.format(self.mode)] = get_file_len(
-        self.paths_after_pre_process)
+      self.paths_after_pre_process)
 
     return data_set_left_right, text_len_left_right
 
@@ -99,8 +101,12 @@ def feature_spec(self):
                        tf.TensorShape([self.max_seq_len]))]
     if not self.infer_without_label:
       feature_shapes.append(tf.TensorShape([self.num_classes]))
+
+    feature_shapes = [tuple(feature_shapes), (tf.TensorShape([]), tf.TensorShape([]))]
+
     if len(feature_shapes) == 1:
       return feature_shapes[0]
+
     return tuple(feature_shapes)
 
   def export_inputs(self):
@@ -110,79 +116,84 @@ def export_inputs(self):
     self.config['data']['vocab_size'] = vocab_size
 
     input_sent_left = tf.placeholder(
-        shape=(None,), dtype=tf.string, name="input_sent_left")
+      shape=(None,), dtype=tf.string, name="input_sent_left")
     input_sent_right = tf.placeholder(
-        shape=(None,), dtype=tf.string, name="input_sent_right")
+      shape=(None,), dtype=tf.string, name="input_sent_right")
     input_pipeline_func = self.get_input_pipeline(for_export=True)
 
     token_ids_left = input_pipeline_func(input_sent_left)
     token_ids_right = input_pipeline_func(input_sent_right)
     token_ids_len_left = tf.map_fn(
-        lambda x: compute_sen_lens(x, padding_token=0), token_ids_left)
+      lambda x: compute_sen_lens(x, padding_token=0), token_ids_left)
     token_ids_len_right = tf.map_fn(
-        lambda x: compute_sen_lens(x, padding_token=0), token_ids_right)
+      lambda x: compute_sen_lens(x, padding_token=0), token_ids_right)
+
     export_data = {
-        "export_inputs": {
-            "input_sent_left": input_sent_left,
-            "input_sent_right": input_sent_right,
-        },
-        "model_inputs": {
-            "input_x_left": token_ids_left,
-            "input_x_right": token_ids_right,
-            "input_x_len": [token_ids_len_left, token_ids_len_right]
-        }
+      "export_inputs": {
+        "input_sent_left": input_sent_left,
+        "input_sent_right": input_sent_right,
+      },
+      "model_inputs": {
+        "input_x_left": token_ids_left,
+        "input_x_right": token_ids_right,
+        "input_x_left_len": token_ids_len_left,
+        "input_x_right_len": token_ids_len_right,
+        "input_x_len": [token_ids_len_left, token_ids_len_right]
+      }
     }
     return export_data
 
   def dataset(self):
     """Data set function"""
-    data_set_left_right, text_len_left_right = self.generate_data()
+    ds_left_right, ds_left_right_len = self.generate_data()
+    text_ds_left_right = tf.data.Dataset.zip((ds_left_right, ds_left_right_len))
 
-    logging.debug("data_set_left_right: {}".format(data_set_left_right))
     if self.mode == 'train':
       if self.need_shuffle:
         # shuffle batch size and repeat
         logging.debug("shuffle and repeat dataset ...")
-        data_set_left_right = data_set_left_right.apply(
-            tf.data.experimental.shuffle_and_repeat(
-                buffer_size=self.shuffle_buffer_size, count=None))
+        text_ds_left_right = text_ds_left_right.apply(
+          tf.data.experimental.shuffle_and_repeat(
+            buffer_size=self.shuffle_buffer_size, count=None))
       else:
         logging.debug("repeat dataset ...")
-        data_set_left_right = data_set_left_right.repeat(count=None)
+        text_ds_left_right = text_ds_left_right.repeat(count=None)
+
     feature_shape = self.feature_spec()
     logging.debug("feature_shape: {}".format(feature_shape))
 
-    data_set_left_right = data_set_left_right.padded_batch(
-        batch_size=self.batch_size, padded_shapes=feature_shape)
-    text_len_left_right = text_len_left_right.batch(self.batch_size)
+    # logging.debug("data_set_left_right：{}".format(data_set_left_right))
+
+    text_ds_left_right = text_ds_left_right.padded_batch(
+      batch_size=self.batch_size, padded_shapes=feature_shape)
 
-    data_set_left_right = data_set_left_right.prefetch(self.num_prefetch_batch)
-    text_len_left_right = text_len_left_right.prefetch(self.num_prefetch_batch)
+    text_ds_left_right = text_ds_left_right.prefetch(self.num_prefetch_batch)
 
-    iterator = data_set_left_right.make_initializable_iterator()
-    iterator_len = text_len_left_right.make_initializable_iterator()
+    iterator = text_ds_left_right.make_initializable_iterator()
     # pylint: disable=unused-variable
     if self.infer_without_label:
-      input_x_left, input_x_right = iterator.get_next()
+      (input_x_left, input_x_right), (input_x_left_len, input_x_right_len) = iterator.get_next()
     else:
+      ((input_x_left, input_x_right), input_y), (input_x_left_len, input_x_right_len) = iterator.get_next()
 
-      (input_x_left, input_x_right), input_y = iterator.get_next()
-
-    input_x_left_len, input_x_right_len = iterator_len.get_next()
     input_x_dict = collections.OrderedDict([("input_x_left", input_x_left),
-                                            ("input_x_right", input_x_right)])
+                                            ("input_x_right", input_x_right),
+                                            ("input_x_left_len", input_x_left_len),
+                                            ("input_x_right_len", input_x_right_len),
+                                            ])
     input_x_len = collections.OrderedDict([
-        ("input_x_left_len", input_x_left_len),
-        ("input_x_right_len", input_x_right_len)
+      ("input_x_left_len", input_x_left_len),
+      ("input_x_right_len", input_x_right_len)
     ])
+
     return_dict = {
-        "input_x_dict": input_x_dict,
-        "input_x_len": input_x_len,
-        "iterator": iterator,
-        "iterator_len": iterator_len,
+      "input_x_dict": input_x_dict,
+      "input_x_len": input_x_len,
+      "iterator": iterator,
     }
 
     if not self.infer_without_label:
       return_dict["input_y_dict"] = collections.OrderedDict([("input_y",
                                                               input_y)])
     return return_dict
+
diff --git a/delta/data/task/text_match_task_test.py b/delta/data/task/text_match_task_test.py
@@ -72,7 +72,7 @@ def test_english(self):
     # with self.cached_session(use_gpu=False, force_gpu=False) as sess:
     #  sess.run(data["iterator"].initializer)
     with self.cached_session(use_gpu=False, force_gpu=False) as sess:
-      sess.run([data["iterator"].initializer, data["iterator_len"].initializer])
+      sess.run([data["iterator"].initializer])
       res = sess.run([
           data["input_x_dict"]["input_x_left"],
           data["input_x_dict"]["input_x_right"],

diff --git a/delta/layers/dynamic_pooling.py b/delta/layers/dynamic_pooling.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An implementation of Dynamic Pooling Layer."""
+import typing
+
+import delta.compat as tf
+from delta.layers.base_layer import Layer
+
+
+class DynamicPoolingLayer(Layer):
+    """
+    Layer that computes dynamic pooling of one tensor.
+    :param psize1: pooling size of dimension 1
+    :param psize2: pooling size of dimension 2
+    :param kwargs: Standard layer keyword arguments.
+    Examples:
+        >>> import delta
+        >>> layer = delta.layers.DynamicPoolingLayer(3, 2)
+        >>> num_batch, left_len, right_len, num_dim = 5, 3, 2, 10
+        >>> layer.build([[num_batch, left_len, right_len, num_dim],
+        ...              [num_batch, left_len, right_len, 3]])
+    """
+
+    def __init__(self,
+                 psize1: int,
+                 psize2: int,
+                 **kwargs):
+        """:class:`DynamicPoolingLayer` constructor."""
+        super().__init__(**kwargs)
+        self._psize1 = psize1
+        self._psize2 = psize2
+
+    def build(self, input_shape: typing.List[int]):
+        """
+        Build the layer.
+        :param input_shape: the shapes of the input tensors,
+            for DynamicPoolingLayer we need tow input tensors.
+        """
+        super().build(input_shape)
+        input_shape_one = input_shape[0]
+        self._msize1 = input_shape_one[1]
+        self._msize2 = input_shape_one[2]
+
+    def call(self, inputs: list, **kwargs) -> typing.Any:
+        """
+        The computation logic of DynamicPoolingLayer.
+        :param inputs: two input tensors.
+        """
+        self._validate_dpool_size()
+        x, dpool_index = inputs
+        dpool_shape = tf.shape(dpool_index)
+        batch_index_one = tf.expand_dims(
+            tf.expand_dims(
+                tf.range(dpool_shape[0]), axis=-1),
+            axis=-1)
+        batch_index = tf.expand_dims(
+            tf.tile(batch_index_one, [1, self._msize1, self._msize2]),
+            axis=-1)
+        dpool_index_ex = tf.concat([batch_index, dpool_index], axis=3)
+        x_expand = tf.gather_nd(x, dpool_index_ex)
+        stride1 = self._msize1 // self._psize1
+        stride2 = self._msize2 // self._psize2
+
+        x_pool = tf.nn.max_pool(x_expand,
+                                [1, stride1, stride2, 1],
+                                [1, stride1, stride2, 1],
+                                "VALID")
+        return x_pool
+
+    def compute_output_shape(self, input_shape: list) -> tuple:
+        """
+        Calculate the layer output shape.
+        :param input_shape: the shapes of the input tensors,
+            for DynamicPoolingLayer we need tow input tensors.
+        """
+        input_shape_one = input_shape[0]
+        return (None, self._psize1, self._psize2, input_shape_one[3])
+
+    def get_config(self) -> dict:
+        """Get the config dict of DynamicPoolingLayer."""
+        config = {
+            'psize1': self._psize1,
+            'psize2': self._psize2
+        }
+        base_config = super(DynamicPoolingLayer, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def _validate_dpool_size(self):
+        suggestion = self.get_size_suggestion(
+            self._msize1, self._msize2, self._psize1, self._psize2
+        )
+        if suggestion != (self._psize1, self._psize2):
+            raise ValueError(
+                "DynamicPooling Layer can not "
+                f"generate ({self._psize1} x {self._psize2}) output "
+                f"feature map, please use ({suggestion[0]} x {suggestion[1]})"
+                f" instead. `model.params['dpool_size'] = {suggestion}` "
+            )
+
+    @classmethod
+    def get_size_suggestion(
+        cls,
+        msize1: int,
+        msize2: int,
+        psize1: int,
+        psize2: int
+    ) -> typing.Tuple[int, int]:
+        """
+        Get `dpool_size` suggestion for a given shape.
+        Returns the nearest legal `dpool_size` for the given combination of
+        `(psize1, psize2)`.
+        :param msize1: size of the left text.
+        :param msize2: size of the right text.
+        :param psize1: base size of the pool.
+        :param psize2: base size of the pool.
+        :return:
+        """
+        stride1 = msize1 // psize1
+        stride2 = msize2 // psize2
+        suggestion1 = msize1 // stride1
+        suggestion2 = msize2 // stride2
+        return (suggestion1, suggestion2)
+