PaddlePaddle · Haichao-Zhang · Dec 1, 2016 · Dec 3, 2016 · emailweixu · Dec 2, 2016
diff --git a/demo/memnet/README.md b/demo/memnet/README.md
@@ -0,0 +1,21 @@
+# Memory Network  
+
+## Introduction ##
+This demo provides a simple example usage of the external memory in a way similar to the Neural Turing Machine (NTM) with content based addressing and differentiable read and write head.
+For more technical details, please refer to the [NTM paper](https://arxiv.org/abs/1410.5401).
+
+## Task Description ##
+Here we design a simple task for illustration purpose. The input is a sequence with variable number of zeros followed with a variable number of non-zero elements, e.g., [0, 0, 0, 3, 1, 5, ...]. The task is to memorize the first non-zero number (e.g., 3) and to output this number in the end after going through the whole sequence. 
+
+## Folder Structure ##
+* external_memory.py: the implementation of the external memory class. 
+* external_memory_example.conf: example usage of the external memory class.
+* data_provider_mem.py: generates the training and testing data for the example.
+* train.sh and test.sh: the scripts to run training and testing.
+
+## How to Run ##
+* training: ./train.sh
+* testing: ./test.sh
+
+
+
diff --git a/demo/memnet/data_provider_mem.py b/demo/memnet/data_provider_mem.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer.PyDataProvider2 import *
+import numpy as np
+
+########################### Parameters for Data Generation #################
+gen_range = 8 # same as the size of the dictionary
+#--------------- parameters for generating training data -------------------
+# the sequence has a all-zero sub-vector in the beginning followed with a non-zero vector
+# seq = [zero_sub_seq, non_zero_sub_seq]
+
+# parameters for non_zero_sub_seq
+seq_len = 10     # length of the non_zero_sub_seq
+seq_len_min = 2  # minimum length if is_fixed_len is False; 
+                 # seq_len will be used as the maximum length in this case, 
+                 # i.e., the length will be sampled from [seq_len_min, seq_len]
+# parameters for zero_sub_seq 
+seq_len_pre = 10 
+seq_len_pre_min = 2
+# number of training data
+sample_num = 1000
+
+# -------------- parameters for generating testing data --------------------
+seq_len_test = 10
+seq_len_min_test = 3
+seq_len_pre_test = 10
+seq_len_pre_test_min = 2 
+sample_num_test = 1
+
+
+seq_len = max(seq_len, seq_len_min)
+
+def gen_data(sample_number, gen_range, seq_len, seq_len_min, seq_len_pre, seq_len_pre_min, is_fixed_len = True):
+    data = []
+
+    if is_fixed_len:
+        seq_len_actual = seq_len
+
+    for i in range(0, sample_number):
+        sample = []
+        if not is_fixed_len:
+            seq_len_actual = np.random.randint(seq_len_min, seq_len)
+            seq_len_actual_pre = np.random.randint(seq_len_pre_min, seq_len_pre)
+        sample0 = np.random.randint(1, gen_range, size=seq_len_actual)
+        sample_pre = np.zeros(seq_len_actual_pre)
+        sample_pre = sample_pre.astype(int)
+        sample = np.concatenate([sample_pre, sample0])
+        data.append([sample.tolist(), sample0[0]])
+
+    return data
+
+def gen_data_prefix(sample_number, gen_range, seq_len, seq_len_min, seq_len_pre, is_fixed_len = True):
+    data = []
+
+    if is_fixed_len:
+        seq_len_actual = seq_len
+
+    for i in range(0, sample_number):
+        sample = []
+        if not is_fixed_len:
+            seq_len_actual = np.random.randint(seq_len)+1
+            seq_len_actual = max(seq_len_actual, seq_len_min)
+        sample = np.random.randint(gen_range, size=seq_len_actual)
+        data.append([sample.tolist(), sample[1]])
+
+    return data
+
+
+data = gen_data(sample_num, gen_range, seq_len, seq_len_min, seq_len_pre, seq_len_pre_min, False)
+data_test = gen_data(sample_num_test, gen_range, seq_len_test, seq_len_min_test, seq_len_pre_test, seq_len_pre_test_min, False)
+
+
+@provider(input_types={"input_sequence" : integer_value_sequence(gen_range+1),
+                       "ground_truth": integer_value(gen_range+1)})
+def process_seq_train(settings, file_name):
+    for d in data:
+        yield {"input_sequence": d[0], 'ground_truth': d[1]} 
+
+
+@provider(input_types={"input_sequence" : integer_value_sequence(gen_range+1),
+                       "ground_truth": integer_value(gen_range+1)})
+def process_seq_test(settings, file_name):
+    for d in data_test:
+        yield {"input_sequence": d[0], 'ground_truth': d[1]}
diff --git a/demo/memnet/dummy.list b/demo/memnet/dummy.list
@@ -0,0 +1 @@
+dummy_file_no_use
diff --git a/demo/memnet/external_memory.py b/demo/memnet/external_memory.py
@@ -0,0 +1,151 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+
+class ExternalMemory(object):
+    """
+    External memory network class, with differentiable read/write heads.
+
+    :param name: name for the external memory
+    :type name:  basestring
+    :param mem_slot_size: number of slots to be used for the external memory
+    :type mem_slot_size: int
+    :param mem_fea_size: size of each memory slot
+    :type mem_fea_size: int
+    :param is_test: flag indicating training (is_test=False) or testing (is_test=True)
+    :type is_test: bool
+    :param scale: a multiplicative factor applied to the read/write weights
+    :param scale: int
+    """
+    def __init__(self, name, mem_slot_size, mem_fea_size, is_test=False, scale=5):
+        self.name = name
+        self.mem_slot_size = mem_slot_size
+        self.mem_fea_size = mem_fea_size
+        self.scale = scale
+        self.external_memory = memory(name=self.name,                           	
+                                      size=mem_fea_size*mem_slot_size,
+                                      boot_bias= ParamAttr(initial_std=0.01,
+                                                           initial_mean=0.))
+        self.is_test = is_test
+
+    def read(self, read_key):
+        """
+        Read head for the external memory. 
+        :param read_key: key used for reading via content-based addressing, 
+                         with size as mem_fea_size 
+        :type read_key: LayerOutput
+        :return: memory_read
+        :rtype: LayerOutput 
+        """
+        cosine_similarity_read = cos_sim(read_key, self.external_memory, scale=self.scale, size=self.mem_slot_size)
+        norm_cosine_similarity_read = mixed_layer(input=
+                                                  identity_projection(cosine_similarity_read),
+                                                  bias_attr = False,
+                                                  act = SoftmaxActivation(),
+                                                  size = self.mem_slot_size,
+                                                  name=self.name+'_read_weight')
+
+        memory_read = linear_comb_layer(weights=norm_cosine_similarity_read, 
+                                        vectors=self.external_memory,
+                                        size=self.mem_fea_size, name=self.name+'_read_content')
+
+        if self.is_test:
+            print_layer(input=[norm_cosine_similarity_read, memory_read])
+
+        return memory_read
+
+    def write(self, write_key):
+        """
+        Write head for the external memory. 
+        :param write_key: the key (and content) used for writing via content-based addressing,
+                          with size as mem_fea_size  
+        :type write_key: LayerOutput
+        :return: updated memory content
+        :rtype: LayerOutput 
+        """
+        cosine_similarity_write = cos_sim(write_key, self.external_memory, 
+                                          scale=self.scale, size=self.mem_slot_size)
+        norm_cosine_similarity_write = mixed_layer(input=
+                                                   identity_projection(cosine_similarity_write),
+                                                   bias_attr = False,
+                                                   act = SoftmaxActivation(),
+                                                   size = self.mem_slot_size,
+                                                   name=self.name+'_write_weight')
+        if self.is_test:
+            print_layer(input=[norm_cosine_similarity_write])
+
+        add_vec = mixed_layer(input = full_matrix_projection(write_key),
+                              bias_attr = None,
+                              act = SoftmaxActivation(),
+                              size = self.mem_fea_size,
+                              name=self.name+'_add_vector')
+
+        erase_vec = self.make_constant_vector(self.mem_fea_size, 1.0, write_key, self.name+"_constant_vector")
+
+        if self.is_test:
+            print_layer(input=[erase_vec])
+            print_layer(input=[add_vec])
+
+        out_prod = out_prod_layer(norm_cosine_similarity_write, erase_vec, name=self.name+"_outer")
+
+        memory_remove = mixed_layer(input=dotmul_operator(a=self.external_memory, b=out_prod))
+
+        memory_removed = self.external_memory - memory_remove
+
+        out_prod_add = out_prod_layer(norm_cosine_similarity_write, add_vec, name=self.name+"_outer_add")
+        memory_output = addto_layer(input=[memory_removed, out_prod_add], name=self.name)
+
+        if self.is_test:
+            print_layer(input=[memory_output])
+
+        return memory_output
+
+    def make_constant_vector(self, vec_size, value, dummy_input, layer_name):
+        """
+        Auxiliary function for generating a constant vector. 
+        :param vec_size: the size of the constant vector 
+        :type vec_size: int
+        :param value: value of the elements in the constant vector
+        :type value: float
+        :param dummy_input: a dummy input layer to the constant vector network
+        :type LayerOutput
+        :param layer_name: name for the constant vector
+        :type layer_name: basestring
+        :return: memory_read
+        :rtype: LayerOutput 
+        """
+        constant_scalar = mixed_layer(input=full_matrix_projection(input=dummy_input,
+                                      param_attr = ParamAttr(learning_rate = 0, 
+                                                             initial_mean = 0,
+                                                             initial_std = 0)),
+                                      bias_attr = ParamAttr(initial_mean=value, 
+                                                            initial_std=0.0, 
+                                                            learning_rate=0),
+                                      act = LinearActivation(),
+                                      size = 1,
+                                      name = layer_name+'_constant_scalar')
+        constant = mixed_layer(input=full_matrix_projection(input=constant_scalar,
+                                      param_attr=ParamAttr(learning_rate = 0,
+                                                           initial_mean = 1, 
+                                                           initial_std = 0)),
+                                      bias_attr = False,
+                                      act = LinearActivation(),
+                                      size = vec_size,
+                                      name = layer_name)
+        return constant
+
+
diff --git a/demo/memnet/external_memory_example.conf b/demo/memnet/external_memory_example.conf
@@ -0,0 +1,117 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+from external_memory import *
+
+######################### parameters ###############################
+is_test = get_config_arg('is_test', bool, False)
+dict_dim = get_config_arg('dict_size', int, 8) # size of the dictionary
+label_dim = dict_dim # the prediction has the same range as the input
+word_embedding_dim = get_config_arg('word_emb_dim', int, 6) # dimension of the embedding
+mem_fea_size = get_config_arg('size of each memory slot', int, 6)
+mem_slot_size = get_config_arg('number of memory slots', int, 5)
+controller_signal_dim = get_config_arg('the dim of the controller signal', int, 4)
+
+
+######################## data source ################################                                                                                                                                       
+if not is_test:
+    define_py_data_sources2(train_list='dummy.list',
+                        test_list=None,
+                        module='data_provider_mem',
+                        obj='process_seq_train')
+else:
+    define_py_data_sources2(train_list=None,
+                        test_list='dummy.list',
+                        module='data_provider_mem',
+                        obj='process_seq_test')
+
+
+settings(
+    batch_size=10,
+    learning_method=AdamOptimizer(),
+    learning_rate=1e-3)
+
+
+######################## network configure ################################   
+data = data_layer(name="input_sequence", size=dict_dim)
+gt_label = data_layer(name="ground_truth", size=label_dim)
+
+
+
+emb = embedding_layer(input=data, size=word_embedding_dim)
+
+def step_mem(y):
+    external_memory = ExternalMemory('external_memory', mem_slot_size, mem_fea_size, False)
+    rnn_memory = memory(name="rnn_memory",
+                        size=controller_signal_dim,
+                        boot_bias= ParamAttr(initial_std=0.0,
+                                             initial_mean=0.))
+    rnn_mem_out = mixed_layer(input = [full_matrix_projection(y),
+                                       full_matrix_projection(rnn_memory)],
+                                 bias_attr = None,
+                                 act = LinearActivation(),
+                                 name='rnn_memory',
+                                 size = controller_signal_dim)
+
+    control_signal = mixed_layer(input = [full_matrix_projection(y),
+                                          full_matrix_projection(rnn_mem_out)],
+                                 bias_attr = None,
+                                 act = LinearActivation(),
+                                 name = 'control_signal',
+                                 size = controller_signal_dim)
+    read_key = mixed_layer(input = [full_matrix_projection(y),
+                                    full_matrix_projection(control_signal)],
+                           bias_attr = None,
+                           act = LinearActivation(),
+                           size = mem_fea_size)
+    memory_read = external_memory.read(read_key)
+    write_key = mixed_layer(input = [full_matrix_projection(y),
+                                     full_matrix_projection(control_signal)],
+                            bias_attr = None,
+                            act = LinearActivation(),
+                            size = mem_fea_size)
+    memory_out = external_memory.write(write_key)
+    return memory_read
+
+
+
+out = recurrent_group(
+    name="rnn",
+    step=step_mem,
+    input=[emb])
+
+if not is_test:
+    out = last_seq(input=out)
+
+pred = mixed_layer(input = full_matrix_projection(out),
+                          bias_attr = True,
+                          act = SoftmaxActivation(),
+                          size = label_dim)
+
+
+
+if is_test:
+    pred = last_seq(input=pred)
+    pred_id = maxid_layer(pred, name="prediction")
+    print_layer(input=[data])
+    print_layer(input=[gt_label])
+    print_layer(input=[pred_id])
+
+    cost = cross_entropy(input=pred, label=gt_label, name='cost_cls')
+    outputs(cost)
+else:
+    outputs(classification_cost(input=pred,
+                            label=gt_label))