Merge branch 'develop' of https://github.com/PaddlePaddle/models into…

… model_avg
PaddlePaddle · Mar 27, 2018 · 7df53c9 · 7df53c9
2 parents 311c92a + 36ca387
commit 7df53c9
Show file tree

Hide file tree

Showing 115 changed files with 8,568 additions and 545 deletions.
diff --git a/conv_seq2seq/README.md b/conv_seq2seq/README.md
@@ -1,3 +1,7 @@
+The minimum PaddlePaddle version needed for the code sample in this directory is v0.11.0. If you are on a version of PaddlePaddle earlier than v0.11.0, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+
+---
+
 # Convolutional Sequence to Sequence Learning
 This model implements the work in the following paper:
 

diff --git a/ctr/README.cn.md b/ctr/README.cn.md
@@ -1,3 +1,7 @@
+运行本目录下的程序示例需要使用PaddlePaddle v0.10.0 版本。如果您的PaddlePaddle安装版本低于此要求，请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。
+
+---
+
 # 点击率预估
 
 以下是本例目录包含的文件以及对应说明:

diff --git a/ctr/README.md b/ctr/README.md
@@ -1,3 +1,7 @@
+The minimum PaddlePaddle version needed for the code sample in this directory is v0.10.0. If you are on a version of PaddlePaddle earlier than v0.10.0, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+
+---
+
 # Click-Through Rate Prediction
 
 ## Introduction

diff --git a/deep_fm/README.md b/deep_fm/README.md
@@ -1,3 +1,7 @@
+The minimum PaddlePaddle version needed for the code sample in this directory is v0.11.0. If you are on a version of PaddlePaddle earlier than v0.11.0, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+
+---
+
 # Deep Factorization Machine for Click-Through Rate prediction
 
 ## Introduction

diff --git a/dssm/README.cn.md b/dssm/README.cn.md
@@ -1,3 +1,7 @@
+运行本目录下的程序示例需要使用PaddlePaddle v0.10.0 版本。如果您的PaddlePaddle安装版本低于此版本要求，请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。
+
+---
+
 # 深度结构化语义模型 (Deep Structured Semantic Models, DSSM)
 DSSM使用DNN模型在一个连续的语义空间中学习文本低纬的表示向量，并且建模两个句子间的语义相似度。本例演示如何使用PaddlePaddle实现一个通用的DSSM 模型，用于建模两个字符串间的语义相似度，模型实现支持通用的数据格式，用户替换数据便可以在真实场景中使用该模型。
 

diff --git a/dssm/README.md b/dssm/README.md
@@ -1,3 +1,7 @@
+The minimum PaddlePaddle version needed for the code sample in this directory is v0.10.0. If you are on a version of PaddlePaddle earlier than v0.10.0, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+
+---
+
 # Deep Structured Semantic Models (DSSM)
 Deep Structured Semantic Models (DSSM) is simple but powerful DNN based model for matching web search queries and the URL based documents. This example demonstrates how to use PaddlePaddle to implement a generic DSSM model for modeling the semantic similarity between two strings.
 

diff --git a/fluid/DeepASR/README.md b/fluid/DeepASR/README.md
@@ -1 +1,6 @@
-Deep ASR Kickoff
+The minimum PaddlePaddle version needed for the code sample in this directory is the lastest develop branch. If you are on a version of PaddlePaddle earlier than this, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+
+---
+### TODO
+
+This project is still under active development.
diff --git a/fluid/DeepASR/data_utils/data_reader.py → ...d/DeepASR/data_utils/async_data_reader.py b/fluid/DeepASR/data_utils/data_reader.py → ...d/DeepASR/data_utils/async_data_reader.py
diff --git a/fluid/DeepASR/data_utils/augmentor/tests/__init__.py b/fluid/DeepASR/data_utils/augmentor/tests/__init__.py
@@ -0,0 +1,7 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.augmentor.trans_add_delta as trans_add_delta
+import data_utils.augmentor.trans_splice as trans_splice
diff --git a/fluid/DeepASR/data_utils/util.py b/fluid/DeepASR/data_utils/util.py
@@ -1,9 +1,11 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import sys
+import sys, time
 from six import reraise
 from tblib import Traceback
+from multiprocessing import Manager, Process
+import posix_ipc, mmap
 
 import numpy as np
 
@@ -35,21 +37,177 @@ def lodtensor_to_ndarray(lod_tensor):
     return ret, lod_tensor.lod()
 
 
+def batch_to_ndarray(batch_samples, lod):
+    frame_dim = batch_samples[0][0].shape[1]
+    batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
+    batch_label = np.zeros((lod[-1], 1), dtype="int64")
+    start = 0
+    for sample in batch_samples:
+        frame_num = sample[0].shape[0]
+        batch_feature[start:start + frame_num, :] = sample[0]
+        batch_label[start:start + frame_num, :] = sample[1]
+        start += frame_num
+    return (batch_feature, batch_label)
+
+
+def split_infer_result(infer_seq, lod):
+    infer_batch = []
+    for i in xrange(0, len(lod[0]) - 1):
+        infer_batch.append(infer_seq[lod[0][i]:lod[0][i + 1]])
+    return infer_batch
+
+
+class DaemonProcessGroup(object):
+    def __init__(self, proc_num, target, args):
+        self._proc_num = proc_num
+        self._workers = [
+            Process(
+                target=target, args=args) for _ in xrange(self._proc_num)
+        ]
+
+    def start_all(self):
+        for w in self._workers:
+            w.daemon = True
+            w.start()
+
+    @property
+    def proc_num(self):
+        return self._proc_num
+
+
+class EpochEndSignal(object):
+    pass
+
+
+class CriticalException(Exception):
+    pass
+
+
+class SharedNDArray(object):
+    """SharedNDArray utilizes shared memory to avoid data serialization when
+    data object shared among different processes. We can reconstruct the
+    `ndarray` when memory address, shape and dtype provided.
+
+    Args:
+        name (str): Address name of shared memory.
+        whether_verify (bool): Whether to validate the writing operation.
+    """
+
+    def __init__(self, name, whether_verify=False):
+        self._name = name
+        self._shm = None
+        self._buf = None
+        self._array = np.zeros(1, dtype=np.float32)
+        self._inited = False
+        self._whether_verify = whether_verify
+
+    def zeros_like(self, shape, dtype):
+        size = int(np.prod(shape)) * np.dtype(dtype).itemsize
+        if self._inited:
+            self._shm = posix_ipc.SharedMemory(self._name)
+        else:
+            self._shm = posix_ipc.SharedMemory(
+                self._name, posix_ipc.O_CREAT, size=size)
+        self._buf = mmap.mmap(self._shm.fd, size)
+        self._array = np.ndarray(shape, dtype, self._buf, order='C')
+
+    def copy(self, ndarray):
+        size = int(np.prod(ndarray.shape)) * np.dtype(ndarray.dtype).itemsize
+        self.zeros_like(ndarray.shape, ndarray.dtype)
+        self._array[:] = ndarray
+        self._buf.flush()
+        self._inited = True
+
+        if self._whether_verify:
+            shm = posix_ipc.SharedMemory(self._name)
+            buf = mmap.mmap(shm.fd, size)
+            array = np.ndarray(ndarray.shape, ndarray.dtype, buf, order='C')
+            np.testing.assert_array_equal(array, ndarray)
+
+    @property
+    def ndarray(self):
+        return self._array
+
+    def recycle(self, pool):
+        self._buf.close()
+        self._shm.close_fd()
+        self._inited = False
+        pool[self._name] = self
+
+    def __getstate__(self):
+        return (self._name, self._array.shape, self._array.dtype, self._inited,
+                self._whether_verify)
+
+    def __setstate__(self, state):
+        self._name = state[0]
+        self._inited = state[3]
+        self.zeros_like(state[1], state[2])
+        self._whether_verify = state[4]
+
+
+class SharedMemoryPoolManager(object):
+    """SharedMemoryPoolManager maintains a multiprocessing.Manager.dict object.
+    All available addresses are allocated once and will be reused. Though this
+    class is not process-safe, the pool can be shared between processes. All
+    shared memory should be unlinked before the main process exited.
+
+    Args:
+        pool_size (int): Size of shared memory pool.
+        manager (dict): A multiprocessing.Manager object, the pool is
+                        maintained by the proxy process.
+        name_prefix (str): Address prefix of shared memory.
+    """
+
+    def __init__(self, pool_size, manager, name_prefix='/deep_asr'):
+        self._names = []
+        self._dict = manager.dict()
+        self._time_prefix = time.strftime('%Y%m%d%H%M%S')
+
+        for i in xrange(pool_size):
+            name = name_prefix + '_' + self._time_prefix + '_' + str(i)
+            self._dict[name] = SharedNDArray(name)
+            self._names.append(name)
+
+    @property
+    def pool(self):
+        return self._dict
+
+    def __del__(self):
+        for name in self._names:
+            # have to unlink the shared memory
+            posix_ipc.unlink_shared_memory(name)
+
+
 def suppress_signal(signo, stack_frame):
     pass
 
 
-def suppress_complaints(verbose):
+def suppress_complaints(verbose, notify=None):
     def decorator_maker(func):
         def suppress_warpper(*args, **kwargs):
             try:
                 func(*args, **kwargs)
             except:
                 et, ev, tb = sys.exc_info()
-                tb = Traceback(tb)
-                if verbose == 1:
-                    reraise(et, ev, tb.as_traceback())
+
+                if notify is not None:
+                    notify(except_type=et, except_value=ev, traceback=tb)
+
+                if verbose == 1 or isinstance(ev, CriticalException):
+                    reraise(et, ev, Traceback(tb).as_traceback())
 
         return suppress_warpper
 
     return decorator_maker
+
+
+class ForceExitWrapper(object):
+    def __init__(self, exit_flag):
+        self._exit_flag = exit_flag
+
+    @suppress_complaints(verbose=0)
+    def __call__(self, *args, **kwargs):
+        self._exit_flag.value = True
+
+    def __eq__(self, flag):
+        return self._exit_flag.value == flag