From 2a4a9764451777865dfb96e7ddba5e5c4679ac5e Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 5 May 2017 14:53:00 +0800
Subject: [PATCH 01/43] add code

---
 .../random_schedule_generator.py              |  58 ++++
 scheduled_sampling/scheduled_sampling.py      | 278 ++++++++++++++++++
 2 files changed, 336 insertions(+)
 create mode 100644 scheduled_sampling/random_schedule_generator.py
 create mode 100644 scheduled_sampling/scheduled_sampling.py

diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
new file mode 100644
index 0000000000..b86c867e4a
--- /dev/null
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -0,0 +1,58 @@
+import numpy as np
+import math
+import pdb
+
+'''
+The random sampling rate for scheduled sampling algoithm, which uses devcayed
+sampling rate.
+'''
+
+
+class RandomScheduleGenerator:
+    '''
+    schduled_type: is the type of the decay. It supports constant, linear,
+    exponential, and inverse_sigmoid right now.
+    a: parameter of the decay (MUST BE DOUBLE)
+    b: parameter of the decay (MUST BE DOUBLE)
+    '''
+
+    def __init__(self, schedule_type, a, b):
+        self.schedule_type = schedule_type
+        self.a = a
+        self.b = b
+        self.data_processed_ = 0
+        self.schedule_computers = {
+            "constant": lambda a, b, d: a,
+            "linear": lambda a, b, d: max(a, 1 - d / b),
+            "exponential": lambda a, b, d: pow(a, d / b),
+            "inverse_sigmoid": lambda a, b, d: b / (b + exp(d * a / b)),
+        }
+        assert (self.schedule_type in self.schedule_computers)
+        self.schedule_computer = self.schedule_computers[self.schedule_type]
+
+    '''
+    Get the schedule sampling rate. Usually not needed to be called by the users
+    '''
+
+    def getScheduleRate(self):
+        return self.schedule_computer(self.a, self.b, self.data_processed_)
+
+    '''
+    Get a batch_size of sampled indexes. These indexes can be passed to a
+    MultiplexLayer to select from the grouth truth and generated samples
+    from the last time step.
+    '''
+
+    def processBatch(self, batch_size):
+        rate = self.getScheduleRate()
+        numbers = np.random.rand(batch_size)
+        indexes = (numbers >= rate).astype('int32').tolist()
+        self.data_processed_ += batch_size
+        return indexes
+
+
+if __name__ == "__main__":
+    schedule_generator = RandomScheduleGenerator("linear", 0.1, 500000)
+    true_token_flag = schedule_generator.processBatch(5)
+    pdb.set_trace()
+    pass
\ No newline at end of file
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
new file mode 100644
index 0000000000..3caf23004a
--- /dev/null
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -0,0 +1,278 @@
+import sys
+import paddle.v2 as paddle
+from random_schedule_generator import RandomScheduleGenerator
+
+
+schedule_generator = RandomScheduleGenerator("linear", 0.75, 1000000)
+
+
+def gen_schedule_data(reader):
+    """
+    Creates a data reader for scheduled sampling.
+
+    Output from the iterator that created by original reader will be
+    appended with "true_token_flag" to indicate whether to use true token.
+
+    :param reader: the original reader.
+    :type reader: callable
+
+    :return: the new reader with the field "true_token_flag".
+    :rtype: callable
+    """
+    def data_reader():
+        for src_ids, trg_ids, trg_ids_next in reader():
+            yield src_ids, trg_ids, trg_ids_next, \
+                  schedule_generator.processBatch(len(trg_ids))
+
+    return data_reader
+
+
+def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
+    ### Network Architecture
+    word_vector_dim = 512  # dimension of word vector
+    decoder_size = 512  # dimension of hidden unit in GRU Decoder network
+    encoder_size = 512  # dimension of hidden unit in GRU Encoder network
+
+    beam_size = 3
+    max_length = 250
+
+    #### Encoder
+    src_word_id = paddle.layer.data(
+        name='source_language_word',
+        type=paddle.data_type.integer_value_sequence(source_dict_dim))
+    src_embedding = paddle.layer.embedding(
+        input=src_word_id,
+        size=word_vector_dim,
+        param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
+    src_forward = paddle.networks.simple_gru(
+        input=src_embedding, size=encoder_size)
+    src_backward = paddle.networks.simple_gru(
+        input=src_embedding, size=encoder_size, reverse=True)
+    encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
+
+    #### Decoder
+    with paddle.layer.mixed(size=decoder_size) as encoded_proj:
+        encoded_proj += paddle.layer.full_matrix_projection(
+            input=encoded_vector)
+
+    backward_first = paddle.layer.first_seq(input=src_backward)
+
+    with paddle.layer.mixed(
+            size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
+        decoder_boot += paddle.layer.full_matrix_projection(
+            input=backward_first)
+
+    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, generated_word, true_token_flag):
+
+        decoder_mem = paddle.layer.memory(
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
+
+        context = paddle.networks.simple_attention(
+            encoded_sequence=enc_vec,
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem)
+
+        current_word = paddle.layer.multiplex([true_token_flag, true_word, generated_word])
+
+        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
+            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
+            decoder_inputs += paddle.layer.full_matrix_projection(
+                input=current_word)
+
+        gru_step = paddle.layer.gru_step(
+            name='gru_decoder',
+            input=decoder_inputs,
+            output_mem=decoder_mem,
+            size=decoder_size)
+
+        with paddle.layer.mixed(
+                size=target_dict_dim,
+                bias_attr=True,
+                act=paddle.activation.Softmax()) as out:
+            out += paddle.layer.full_matrix_projection(input=gru_step)
+        return out
+
+    def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
+
+        decoder_mem = paddle.layer.memory(
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
+
+        context = paddle.networks.simple_attention(
+            encoded_sequence=enc_vec,
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem)
+
+        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
+            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
+            decoder_inputs += paddle.layer.full_matrix_projection(
+                input=current_word)
+
+        gru_step = paddle.layer.gru_step(
+            name='gru_decoder',
+            input=decoder_inputs,
+            output_mem=decoder_mem,
+            size=decoder_size)
+
+        with paddle.layer.mixed(
+                size=target_dict_dim,
+                bias_attr=True,
+                act=paddle.activation.Softmax()) as out:
+            out += paddle.layer.full_matrix_projection(input=gru_step)
+        return out
+
+    decoder_group_name = "decoder_group"
+    group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
+    group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
+    group_inputs = [group_input1, group_input2]
+
+    if not is_generating:
+        trg_embedding = paddle.layer.embedding(
+            input=paddle.layer.data(
+                name='target_language_word',
+                type=paddle.data_type.integer_value_sequence(target_dict_dim)),
+            size=word_vector_dim,
+            param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
+        group_inputs.append(trg_embedding)
+
+        generated_embedding = paddle.layer.GeneratedInputV2(
+            size=target_dict_dim,
+            embedding_name='_target_language_embedding',
+            embedding_size=word_vector_dim)
+        group_inputs.append(generated_embedding)
+
+        true_token_flags = paddle.layer.data(
+            name='true_token_flag',
+            type=paddle.data_type.integer_value_sequence(2))
+        group_inputs.append(true_token_flags)
+
+        # For decoder equipped with attention mechanism, in training,
+        # target embeding (the groudtruth) is the data input,
+        # while encoded source sequence is accessed to as an unbounded memory.
+        # Here, the StaticInput defines a read-only memory
+        # for the recurrent_group.
+        decoder = paddle.layer.recurrent_group(
+            name=decoder_group_name,
+            step=gru_decoder_with_attention_train,
+            input=group_inputs)
+
+        lbl = paddle.layer.data(
+            name='target_language_next_word',
+            type=paddle.data_type.integer_value_sequence(target_dict_dim))
+        cost = paddle.layer.classification_cost(input=decoder, label=lbl)
+
+        return cost
+    else:
+        # In generation, the decoder predicts a next target word based on
+        # the encoded source sequence and the last generated target word.
+
+        # The encoded source sequence (encoder's output) must be specified by
+        # StaticInput, which is a read-only memory.
+        # Embedding of the last generated word is automatically gotten by
+        # GeneratedInputs, which is initialized by a start mark, such as <s>,
+        # and must be included in generation.
+
+        trg_embedding = paddle.layer.GeneratedInputV2(
+            size=target_dict_dim,
+            embedding_name='_target_language_embedding',
+            embedding_size=word_vector_dim)
+        group_inputs.append(trg_embedding)
+
+        beam_gen = paddle.layer.beam_search(
+            name=decoder_group_name,
+            step=gru_decoder_with_attention_test,
+            input=group_inputs,
+            bos_id=0,
+            eos_id=1,
+            beam_size=beam_size,
+            max_length=max_length)
+
+        return beam_gen
+
+
+def main():
+    paddle.init(use_gpu=False, trainer_count=1)
+    is_generating = False
+
+    # source and target dict dim.
+    dict_size = 30000
+    source_dict_dim = target_dict_dim = dict_size
+
+    # train the network
+    if not is_generating:
+        cost = seqToseq_net(source_dict_dim, target_dict_dim)
+        parameters = paddle.parameters.create(cost)
+
+        # define optimize method and trainer
+        optimizer = paddle.optimizer.Adam(
+            learning_rate=5e-5,
+            regularization=paddle.optimizer.L2Regularization(rate=8e-4))
+        trainer = paddle.trainer.SGD(
+            cost=cost, parameters=parameters, update_equation=optimizer)
+        # define data reader
+        wmt14_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.wmt14.train(dict_size), buf_size=8192),
+            batch_size=5)
+
+        # define event_handler callback
+        def event_handler(event):
+            if isinstance(event, paddle.event.EndIteration):
+                if event.batch_id % 10 == 0:
+                    print "\nPass %d, Batch %d, Cost %f, %s" % (
+                        event.pass_id, event.batch_id, event.cost,
+                        event.metrics)
+                else:
+                    sys.stdout.write('.')
+                    sys.stdout.flush()
+
+        # start to train
+        trainer.train(
+            reader=wmt14_reader, event_handler=event_handler, num_passes=2)
+
+    # generate a english sequence to french
+    else:
+        # use the first 3 samples for generation
+        gen_creator = paddle.dataset.wmt14.gen(dict_size)
+        gen_data = []
+        gen_num = 3
+        for item in gen_creator():
+            gen_data.append((item[0], ))
+            if len(gen_data) == gen_num:
+                break
+
+        beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
+        # get the pretrained model, whose bleu = 26.92
+        parameters = paddle.dataset.wmt14.model()
+        # prob is the prediction probabilities, and id is the prediction word.
+        beam_result = paddle.infer(
+            output_layer=beam_gen,
+            parameters=parameters,
+            input=gen_data,
+            field=['prob', 'id'])
+
+        # get the dictionary
+        src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
+
+        # the delimited element of generated sequences is -1,
+        # the first element of each generated sequence is the sequence length
+        seq_list = []
+        seq = []
+        for w in beam_result[1]:
+            if w != -1:
+                seq.append(w)
+            else:
+                seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
+                seq = []
+
+        prob = beam_result[0]
+        beam_size = 3
+        for i in xrange(gen_num):
+            print "\n*******************************************************\n"
+            print "src:", ' '.join(
+                [src_dict.get(w) for w in gen_data[i][0]]), "\n"
+            for j in xrange(beam_size):
+                print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
+
+
+if __name__ == '__main__':
+    main()

From 641d0e77300300e48f6c0ed588618e2712dee2d8 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 5 May 2017 15:49:40 +0800
Subject: [PATCH 02/43] bug fix

---
 scheduled_sampling/scheduled_sampling.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 3caf23004a..30d15425d2 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -62,7 +62,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
         decoder_boot += paddle.layer.full_matrix_projection(
             input=backward_first)
 
-    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, generated_word, true_token_flag):
+    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_flag):
 
         decoder_mem = paddle.layer.memory(
             name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
@@ -72,7 +72,10 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, generated_wor
             encoded_proj=enc_proj,
             decoder_state=decoder_mem)
 
-        current_word = paddle.layer.multiplex([true_token_flag, true_word, generated_word])
+        generated_word_memory = paddle.layer.memory(
+            name='generated_word', size=1, boot_with_const_id=0)
+
+        current_word = paddle.layer.multiplex([true_token_flag, true_word, generated_word_memory])
 
         with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
             decoder_inputs += paddle.layer.full_matrix_projection(input=context)
@@ -90,6 +93,9 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, generated_wor
                 bias_attr=True,
                 act=paddle.activation.Softmax()) as out:
             out += paddle.layer.full_matrix_projection(input=gru_step)
+
+        max_id(input=out, name='generated_word')
+
         return out
 
     def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):

From 01f506b20be132321aceca00c4f31f8a0c3c7ca4 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 5 May 2017 15:52:10 +0800
Subject: [PATCH 03/43] bug fix

---
 scheduled_sampling/scheduled_sampling.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 30d15425d2..74e22da6cb 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -140,12 +140,6 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
             param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
         group_inputs.append(trg_embedding)
 
-        generated_embedding = paddle.layer.GeneratedInputV2(
-            size=target_dict_dim,
-            embedding_name='_target_language_embedding',
-            embedding_size=word_vector_dim)
-        group_inputs.append(generated_embedding)
-
         true_token_flags = paddle.layer.data(
             name='true_token_flag',
             type=paddle.data_type.integer_value_sequence(2))

From 6b4d2744fabbd537526a712643700f1cfa652d9c Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 5 May 2017 15:58:29 +0800
Subject: [PATCH 04/43] bug fix

---
 scheduled_sampling/scheduled_sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 74e22da6cb..3e58786f36 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -75,7 +75,7 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_fl
         generated_word_memory = paddle.layer.memory(
             name='generated_word', size=1, boot_with_const_id=0)
 
-        current_word = paddle.layer.multiplex([true_token_flag, true_word, generated_word_memory])
+        current_word = paddle.layer.multiplex(input=[true_token_flag, true_word, generated_word_memory])
 
         with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
             decoder_inputs += paddle.layer.full_matrix_projection(input=context)

From 3bd88f6a6734c1dd0bdb96583e5991868a7c9517 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 5 May 2017 16:09:11 +0800
Subject: [PATCH 05/43] bug fix

---
 scheduled_sampling/scheduled_sampling.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 3e58786f36..f52ac2151e 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -75,7 +75,12 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_fl
         generated_word_memory = paddle.layer.memory(
             name='generated_word', size=1, boot_with_const_id=0)
 
-        current_word = paddle.layer.multiplex(input=[true_token_flag, true_word, generated_word_memory])
+        generated_word_emb = embedding(
+            input=generated_word_memory,
+            size=word_vector_dim,
+            param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
+
+        current_word = paddle.layer.multiplex(input=[true_token_flag, true_word, generated_word_emb])
 
         with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
             decoder_inputs += paddle.layer.full_matrix_projection(input=context)

From 363b62d130ab03f29b3134d76f22f75c878a14ee Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Mon, 8 May 2017 11:09:24 +0800
Subject: [PATCH 06/43] bug fix

---
 scheduled_sampling/scheduled_sampling.py | 49 +++++++++++-------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index f52ac2151e..24c15756b6 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -22,7 +22,7 @@ def gen_schedule_data(reader):
     def data_reader():
         for src_ids, trg_ids, trg_ids_next in reader():
             yield src_ids, trg_ids, trg_ids_next, \
-                  schedule_generator.processBatch(len(trg_ids))
+                  [0] + schedule_generator.processBatch(len(trg_ids) - 1)
 
     return data_reader
 
@@ -72,11 +72,13 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_fl
             encoded_proj=enc_proj,
             decoder_state=decoder_mem)
 
-        generated_word_memory = paddle.layer.memory(
-            name='generated_word', size=1, boot_with_const_id=0)
+        gru_out_memory = paddle.layer.memory(
+            name='gru_out', size=target_dict_dim)  # , boot_with_const_id=0)
 
-        generated_word_emb = embedding(
-            input=generated_word_memory,
+        generated_word = paddle.layer.max_id(input=gru_out_memory)
+
+        generated_word_emb = paddle.layer.embedding(
+            input=generated_word,
             size=word_vector_dim,
             param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
 
@@ -94,13 +96,12 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_fl
             size=decoder_size)
 
         with paddle.layer.mixed(
+                name='gru_out',
                 size=target_dict_dim,
                 bias_attr=True,
                 act=paddle.activation.Softmax()) as out:
             out += paddle.layer.full_matrix_projection(input=gru_step)
 
-        max_id(input=out, name='generated_word')
-
         return out
 
     def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
@@ -150,11 +151,6 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
             type=paddle.data_type.integer_value_sequence(2))
         group_inputs.append(true_token_flags)
 
-        # For decoder equipped with attention mechanism, in training,
-        # target embeding (the groudtruth) is the data input,
-        # while encoded source sequence is accessed to as an unbounded memory.
-        # Here, the StaticInput defines a read-only memory
-        # for the recurrent_group.
         decoder = paddle.layer.recurrent_group(
             name=decoder_group_name,
             step=gru_decoder_with_attention_train,
@@ -167,15 +163,6 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
 
         return cost
     else:
-        # In generation, the decoder predicts a next target word based on
-        # the encoded source sequence and the last generated target word.
-
-        # The encoded source sequence (encoder's output) must be specified by
-        # StaticInput, which is a read-only memory.
-        # Embedding of the last generated word is automatically gotten by
-        # GeneratedInputs, which is initialized by a start mark, such as <s>,
-        # and must be included in generation.
-
         trg_embedding = paddle.layer.GeneratedInputV2(
             size=target_dict_dim,
             embedding_name='_target_language_embedding',
@@ -197,6 +184,7 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
 def main():
     paddle.init(use_gpu=False, trainer_count=1)
     is_generating = False
+    model_path_for_generating = 'params_pass_1.tar.gz'
 
     # source and target dict dim.
     dict_size = 30000
@@ -215,10 +203,14 @@ def main():
             cost=cost, parameters=parameters, update_equation=optimizer)
         # define data reader
         wmt14_reader = paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.wmt14.train(dict_size), buf_size=8192),
+            gen_schedule_data(
+                paddle.reader.shuffle(
+                    paddle.dataset.wmt14.train(dict_size), buf_size=8192)),
             batch_size=5)
 
+        feeding = {'source_language_word': 0, 'target_language_word': 1,
+                   'target_language_next_word': 2, 'true_token_flag': 3}
+
         # define event_handler callback
         def event_handler(event):
             if isinstance(event, paddle.event.EndIteration):
@@ -229,10 +221,14 @@ def event_handler(event):
                 else:
                     sys.stdout.write('.')
                     sys.stdout.flush()
+            if isinstance(event, paddle.event.EndPass):
+                # save parameters
+                with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
+                    parameters.to_tar(f)
 
         # start to train
         trainer.train(
-            reader=wmt14_reader, event_handler=event_handler, num_passes=2)
+            reader=wmt14_reader, event_handler=event_handler, feeding=feeding, num_passes=2)
 
     # generate a english sequence to french
     else:
@@ -246,8 +242,9 @@ def event_handler(event):
                 break
 
         beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
-        # get the pretrained model, whose bleu = 26.92
-        parameters = paddle.dataset.wmt14.model()
+        # get the trained model
+        with gzip.open(model_path_for_generating, 'r') as f:
+            parameters = Parameters.from_tar(f)
         # prob is the prediction probabilities, and id is the prediction word.
         beam_result = paddle.infer(
             output_layer=beam_gen,

From bb93d5c09349a30dcce55b1795374771632f4e10 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Mon, 8 May 2017 15:50:48 +0800
Subject: [PATCH 07/43] correct the code style

---
 .../random_schedule_generator.py              |  2 --
 scheduled_sampling/scheduled_sampling.py      | 24 +++++++++++++------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index b86c867e4a..046dce6352 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -1,7 +1,6 @@
 import numpy as np
 import math
 import pdb
-
 '''
 The random sampling rate for scheduled sampling algoithm, which uses devcayed
 sampling rate.
@@ -55,4 +54,3 @@ def processBatch(self, batch_size):
     schedule_generator = RandomScheduleGenerator("linear", 0.1, 500000)
     true_token_flag = schedule_generator.processBatch(5)
     pdb.set_trace()
-    pass
\ No newline at end of file
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 24c15756b6..e641c44846 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -2,7 +2,6 @@
 import paddle.v2 as paddle
 from random_schedule_generator import RandomScheduleGenerator
 
-
 schedule_generator = RandomScheduleGenerator("linear", 0.75, 1000000)
 
 
@@ -19,6 +18,7 @@ def gen_schedule_data(reader):
     :return: the new reader with the field "true_token_flag".
     :rtype: callable
     """
+
     def data_reader():
         for src_ids, trg_ids, trg_ids_next in reader():
             yield src_ids, trg_ids, trg_ids_next, \
@@ -62,7 +62,8 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
         decoder_boot += paddle.layer.full_matrix_projection(
             input=backward_first)
 
-    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_flag):
+    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
+                                         true_token_flag):
 
         decoder_mem = paddle.layer.memory(
             name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
@@ -82,7 +83,8 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_fl
             size=word_vector_dim,
             param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
 
-        current_word = paddle.layer.multiplex(input=[true_token_flag, true_word, generated_word_emb])
+        current_word = paddle.layer.multiplex(
+            input=[true_token_flag, true_word, generated_word_emb])
 
         with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
             decoder_inputs += paddle.layer.full_matrix_projection(input=context)
@@ -208,8 +210,12 @@ def main():
                     paddle.dataset.wmt14.train(dict_size), buf_size=8192)),
             batch_size=5)
 
-        feeding = {'source_language_word': 0, 'target_language_word': 1,
-                   'target_language_next_word': 2, 'true_token_flag': 3}
+        feeding = {
+            'source_language_word': 0,
+            'target_language_word': 1,
+            'target_language_next_word': 2,
+            'true_token_flag': 3
+        }
 
         # define event_handler callback
         def event_handler(event):
@@ -223,12 +229,16 @@ def event_handler(event):
                     sys.stdout.flush()
             if isinstance(event, paddle.event.EndPass):
                 # save parameters
-                with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
+                with gzip.open('params_pass_%d.tar.gz' % event.pass_id,
+                               'w') as f:
                     parameters.to_tar(f)
 
         # start to train
         trainer.train(
-            reader=wmt14_reader, event_handler=event_handler, feeding=feeding, num_passes=2)
+            reader=wmt14_reader,
+            event_handler=event_handler,
+            feeding=feeding,
+            num_passes=2)
 
     # generate a english sequence to french
     else:

From b1ab60da42d808d7ff91f724474e4d15deb0e423 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Wed, 10 May 2017 11:29:14 +0800
Subject: [PATCH 08/43] adjust some comments

---
 .../random_schedule_generator.py              | 47 ++++++++-----------
 scheduled_sampling/scheduled_sampling.py      |  2 +-
 2 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index 046dce6352..7569eaffc2 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -1,21 +1,20 @@
 import numpy as np
 import math
-import pdb
-'''
-The random sampling rate for scheduled sampling algoithm, which uses devcayed
-sampling rate.
-'''
 
 
 class RandomScheduleGenerator:
-    '''
-    schduled_type: is the type of the decay. It supports constant, linear,
-    exponential, and inverse_sigmoid right now.
-    a: parameter of the decay (MUST BE DOUBLE)
-    b: parameter of the decay (MUST BE DOUBLE)
-    '''
+    """
+    The random sampling rate for scheduled sampling algoithm, which uses devcayed
+    sampling rate.
+    """
 
     def __init__(self, schedule_type, a, b):
+        """
+        schduled_type: is the type of the decay. It supports constant, linear,
+        exponential, and inverse_sigmoid right now.
+        a: parameter of the decay (MUST BE DOUBLE)
+        b: parameter of the decay (MUST BE DOUBLE)
+        """
         self.schedule_type = schedule_type
         self.a = a
         self.b = b
@@ -24,33 +23,25 @@ def __init__(self, schedule_type, a, b):
             "constant": lambda a, b, d: a,
             "linear": lambda a, b, d: max(a, 1 - d / b),
             "exponential": lambda a, b, d: pow(a, d / b),
-            "inverse_sigmoid": lambda a, b, d: b / (b + exp(d * a / b)),
+            "inverse_sigmoid": lambda a, b, d: b / (b + math.exp(d * a / b)),
         }
         assert (self.schedule_type in self.schedule_computers)
         self.schedule_computer = self.schedule_computers[self.schedule_type]
 
-    '''
-    Get the schedule sampling rate. Usually not needed to be called by the users
-    '''
-
     def getScheduleRate(self):
+        """
+        Get the schedule sampling rate. Usually not needed to be called by the users
+        """
         return self.schedule_computer(self.a, self.b, self.data_processed_)
 
-    '''
-    Get a batch_size of sampled indexes. These indexes can be passed to a
-    MultiplexLayer to select from the grouth truth and generated samples
-    from the last time step.
-    '''
-
     def processBatch(self, batch_size):
+        """
+        Get a batch_size of sampled indexes. These indexes can be passed to a
+        MultiplexLayer to select from the grouth truth and generated samples
+        from the last time step.
+        """
         rate = self.getScheduleRate()
         numbers = np.random.rand(batch_size)
         indexes = (numbers >= rate).astype('int32').tolist()
         self.data_processed_ += batch_size
         return indexes
-
-
-if __name__ == "__main__":
-    schedule_generator = RandomScheduleGenerator("linear", 0.1, 500000)
-    true_token_flag = schedule_generator.processBatch(5)
-    pdb.set_trace()
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index e641c44846..1dead8969a 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -74,7 +74,7 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
             decoder_state=decoder_mem)
 
         gru_out_memory = paddle.layer.memory(
-            name='gru_out', size=target_dict_dim)  # , boot_with_const_id=0)
+            name='gru_out', size=target_dict_dim)
 
         generated_word = paddle.layer.max_id(input=gru_out_memory)
 

From 6f4ea4b8450cf5bf6f65191e258b88d61a473285 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Wed, 10 May 2017 15:55:14 +0800
Subject: [PATCH 09/43] add copyright

---
 scheduled_sampling/random_schedule_generator.py | 14 ++++++++++++++
 scheduled_sampling/scheduled_sampling.py        | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index 7569eaffc2..694a98611f 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import math
 
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 1dead8969a..c561995be9 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import paddle.v2 as paddle
 from random_schedule_generator import RandomScheduleGenerator

From dbf7106ebf455066d5be48e252bb02807ee5d622 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Wed, 10 May 2017 15:56:47 +0800
Subject: [PATCH 10/43] remove copyright

---
 scheduled_sampling/random_schedule_generator.py | 14 --------------
 scheduled_sampling/scheduled_sampling.py        | 14 --------------
 2 files changed, 28 deletions(-)

diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index 694a98611f..7569eaffc2 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -1,17 +1,3 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 import numpy as np
 import math
 
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index c561995be9..1dead8969a 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -1,17 +1,3 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 import sys
 import paddle.v2 as paddle
 from random_schedule_generator import RandomScheduleGenerator

From 1fd8161472bf87fe23e5c27c1407c131859f3307 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Mon, 15 May 2017 15:42:55 +0800
Subject: [PATCH 11/43] add doc

---
 scheduled_sampling/README.md             | 165 ++++++++++++++++++++++-
 scheduled_sampling/scheduled_sampling.py |  39 +++++-
 2 files changed, 200 insertions(+), 4 deletions(-)

diff --git a/scheduled_sampling/README.md b/scheduled_sampling/README.md
index a0990367ef..18371b6a12 100644
--- a/scheduled_sampling/README.md
+++ b/scheduled_sampling/README.md
@@ -1 +1,164 @@
-TBD
+# Scheduled Sampling
+
+## 概述
+序列生成任务的训练目标是在给定源输入的条件下，最大化目标序列的概率。训练时该模型将目标序列中的真实元素作为解码阶段每一步的输入，然后最大化下一个元素的概率。生成时上一步解码得到的元素被用作当前的输入，然后生成下一个元素。可见这种情况下训练阶段和生成阶段的解码层输入数据的概率分布并不一致。如果序列前面生成了错误的元素，后面的输入状态将会收到影响，而该误差会随着生成过程不断向后累积。
+Scheduled Sampling是一种解决训练和生成时输入数据分布不一致的方法。在训练早期该方法主要使用真实元素作为解码输入，以将模型从随机初始化的状态快速引导至一个合理的状态。随着训练的进行该方法会逐渐更多的使用生成元素作为解码输入，以解决数据分布不一致的问题。
+
+## 算法简介
+Scheduled Sampling主要应用在Sequence to Sequence模型的训练上，而生成阶段则不需要使用。
+解码阶段在生成第`t`个元素时，标准Sequence to Sequence模型使用上一时刻的真实元素`y(t-1)`作为输入。设上一时刻生成的元素为`g(t-1)`，Scheduled Sampling算法会以一定概率使用`g(t-1)`作为解码输入。
+设当前已经训练到了第`i`个mini-batch，在`t`时刻Scheduled Sampling以概率`epsilon_i`使用上一时刻的真实元素`y(t-1)`作为解码输入，以概率`1-epsilon_i`使用上一时刻生成的元素`g(t-1)`作为解码输入。
+随着`i`的增大`epsilon_i`会不断减小，解码阶段将不断倾向于使用生成的元素作为输入，训练阶段和生成阶段的数据分布将变得越来越一致。
+`epsilon_i`可以使用不同的方式衰减，常见的方式有：
+
+ - 线性衰减：`epsilon_i=max(epsilon,k-c*i)`，其中`epsilon`限制`epsilon_i`的最小值，`k`和`c`控制线性衰减的幅度。
+ - 指数衰减：`epsilon_i=k^i`，其中`0<k<1`，`k`控制着指数衰减的幅度。
+ - 反向Sigmoid衰减：`epsilon_i=k/(k+exp(i/k))`，其中`k>1`，`k`同样控制衰减的幅度。
+
+## 模型实现
+由于Scheduled Sampling是对Sequence to Sequence模型的改进，其整体实现框架与Sequence to Sequence模型较为相似。为突出本文重点，这里仅介绍与Scheduled Sampling相关的部分，完整的代码见`scheduled_sampling.py`。
+
+首先定义控制衰减概率的类`RandomScheduleGenerator`，如下：
+```python
+import numpy as np
+import math
+
+
+class RandomScheduleGenerator:
+    """
+    The random sampling rate for scheduled sampling algoithm, which uses devcayed
+    sampling rate.
+    """
+
+    def __init__(self, schedule_type, a, b):
+        """
+        schduled_type: is the type of the decay. It supports constant, linear,
+        exponential, and inverse_sigmoid right now.
+        a: parameter of the decay (MUST BE DOUBLE)
+        b: parameter of the decay (MUST BE DOUBLE)
+        """
+        self.schedule_type = schedule_type
+        self.a = a
+        self.b = b
+        self.data_processed_ = 0
+        self.schedule_computers = {
+            "constant": lambda a, b, d: a,
+            "linear": lambda a, b, d: max(a, 1 - d / b),
+            "exponential": lambda a, b, d: pow(a, d / b),
+            "inverse_sigmoid": lambda a, b, d: b / (b + math.exp(d * a / b)),
+        }
+        assert (self.schedule_type in self.schedule_computers)
+        self.schedule_computer = self.schedule_computers[self.schedule_type]
+
+    def getScheduleRate(self):
+        """
+        Get the schedule sampling rate. Usually not needed to be called by the users
+        """
+        return self.schedule_computer(self.a, self.b, self.data_processed_)
+
+    def processBatch(self, batch_size):
+        """
+        Get a batch_size of sampled indexes. These indexes can be passed to a
+        MultiplexLayer to select from the grouth truth and generated samples
+        from the last time step.
+        """
+        rate = self.getScheduleRate()
+        numbers = np.random.rand(batch_size)
+        indexes = (numbers >= rate).astype('int32').tolist()
+        self.data_processed_ += batch_size
+        return indexes
+```
+其中`__init__`方法定义了几种不同的衰减概率，`processBatch`方法根据该概率进行采样，最终确定解码时是使用真实元素还是使用生成的元素。
+
+
+这里对数据reader进行封装，加入从`RandomScheduleGenerator`采样得到的`true_token_flag`作为另一组数据输入，控制解码使用的元素。
+
+```python
+schedule_generator = RandomScheduleGenerator("linear", 0.75, 1000000)
+
+def gen_schedule_data(reader):
+    """
+    Creates a data reader for scheduled sampling.
+
+    Output from the iterator that created by original reader will be
+    appended with "true_token_flag" to indicate whether to use true token.
+
+    :param reader: the original reader.
+    :type reader: callable
+
+    :return: the new reader with the field "true_token_flag".
+    :rtype: callable
+    """
+
+    def data_reader():
+        for src_ids, trg_ids, trg_ids_next in reader():
+            yield src_ids, trg_ids, trg_ids_next, \
+                  [0] + schedule_generator.processBatch(len(trg_ids) - 1)
+
+    return data_reader
+```
+
+训练时`recurrent_group`每一步调用的解码函数如下：
+
+```python
+    def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
+                                         true_token_flag):
+        """
+        The decoder step for training.
+        :param enc_vec: the encoder vector for attention
+        :type enc_vec: Layer
+        :param enc_proj: the encoder projection for attention
+        :type enc_proj: Layer
+        :param true_word: the ground-truth target word
+        :type true_word: Layer
+        :param true_token_flag: the flag of using the ground-truth target word
+        :type true_token_flag: Layer
+        :return: the softmax output layer
+        :rtype: Layer
+        """
+
+        decoder_mem = paddle.layer.memory(
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
+
+        context = paddle.networks.simple_attention(
+            encoded_sequence=enc_vec,
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem)
+
+        gru_out_memory = paddle.layer.memory(
+            name='gru_out', size=target_dict_dim)
+
+        generated_word = paddle.layer.max_id(input=gru_out_memory)
+
+        generated_word_emb = paddle.layer.embedding(
+            input=generated_word,
+            size=word_vector_dim,
+            param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
+
+        current_word = paddle.layer.multiplex(
+            input=[true_token_flag, true_word, generated_word_emb])
+
+        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
+            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
+            decoder_inputs += paddle.layer.full_matrix_projection(
+                input=current_word)
+
+        gru_step = paddle.layer.gru_step(
+            name='gru_decoder',
+            input=decoder_inputs,
+            output_mem=decoder_mem,
+            size=decoder_size)
+
+        with paddle.layer.mixed(
+                name='gru_out',
+                size=target_dict_dim,
+                bias_attr=True,
+                act=paddle.activation.Softmax()) as out:
+            out += paddle.layer.full_matrix_projection(input=gru_step)
+
+        return out
+```
+
+该函数使用`memory`层`gru_out_memory`记忆不同时刻生成的元素，并使用`multiplex`层选择是否使用生成的元素作为解码输入。
+
+### 训练结果待调参完成后补充
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index 1dead8969a..a516bd2d4b 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -28,6 +28,17 @@ def data_reader():
 
 
 def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
+    """
+    The definition of the sequence to sequence model
+    :param source_dict_dim: the dictionary size of the source language
+    :type source_dict_dim: int
+    :param target_dict_dim: the dictionary size of the target language
+    :type target_dict_dim: int
+    :param is_generating: whether in generating mode
+    :type is_generating: Bool
+    :return: the last layer of the network
+    :rtype: Layer
+    """
     ### Network Architecture
     word_vector_dim = 512  # dimension of word vector
     decoder_size = 512  # dimension of hidden unit in GRU Decoder network
@@ -41,9 +52,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
         name='source_language_word',
         type=paddle.data_type.integer_value_sequence(source_dict_dim))
     src_embedding = paddle.layer.embedding(
-        input=src_word_id,
-        size=word_vector_dim,
-        param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
+        input=src_word_id, size=word_vector_dim)
     src_forward = paddle.networks.simple_gru(
         input=src_embedding, size=encoder_size)
     src_backward = paddle.networks.simple_gru(
@@ -64,6 +73,19 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
 
     def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
                                          true_token_flag):
+        """
+        The decoder step for training.
+        :param enc_vec: the encoder vector for attention
+        :type enc_vec: Layer
+        :param enc_proj: the encoder projection for attention
+        :type enc_proj: Layer
+        :param true_word: the ground-truth target word
+        :type true_word: Layer
+        :param true_token_flag: the flag of using the ground-truth target word
+        :type true_token_flag: Layer
+        :return: the softmax output layer
+        :rtype: Layer
+        """
 
         decoder_mem = paddle.layer.memory(
             name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
@@ -107,6 +129,17 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
         return out
 
     def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
+        """
+        The decoder step for generating.
+        :param enc_vec: the encoder vector for attention
+        :type enc_vec: Layer
+        :param enc_proj: the encoder projection for attention
+        :type enc_proj: Layer
+        :param current_word: the previously generated word
+        :type current_word: Layer
+        :return: the softmax output layer
+        :rtype: Layer
+        """
 
         decoder_mem = paddle.layer.memory(
             name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)

From 6dd3895efd4176a6d6baa771912006d49c0a015f Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Wed, 24 May 2017 17:45:04 +0800
Subject: [PATCH 12/43] add v2 API for imagenet models

---
 image_classification/alexnet.py   |  48 +++++++++
 image_classification/googlenet.py | 161 ++++++++++++++++++++++++++++++
 image_classification/resnet.py    |  93 +++++++++++++++++
 image_classification/train.py     |  75 ++++++++++----
 4 files changed, 355 insertions(+), 22 deletions(-)
 create mode 100644 image_classification/alexnet.py
 create mode 100644 image_classification/googlenet.py
 create mode 100644 image_classification/resnet.py
 mode change 100644 => 100755 image_classification/train.py

diff --git a/image_classification/alexnet.py b/image_classification/alexnet.py
new file mode 100644
index 0000000000..eaa7a3dc54
--- /dev/null
+++ b/image_classification/alexnet.py
@@ -0,0 +1,48 @@
+import paddle.v2 as paddle
+
+__all__ = ['alexnet']
+
+
+def alexnet(input):
+    conv1 = paddle.layer.img_conv(
+        input=input,
+        filter_size=11,
+        num_channels=3,
+        num_filters=96,
+        stride=4,
+        padding=1)
+    cmrnorm1 = paddle.layer.img_cmrnorm(
+        input=conv1, size=5, scale=0.0001, power=0.75)
+    pool1 = paddle.layer.img_pool(input=cmrnorm1, pool_size=3, stride=2)
+
+    conv2 = paddle.layer.img_conv(
+        input=pool1,
+        filter_size=5,
+        num_filters=256,
+        stride=1,
+        padding=2,
+        groups=1)
+    cmrnorm2 = paddle.layer.img_cmrnorm(
+        input=conv2, size=5, scale=0.0001, power=0.75)
+    pool2 = paddle.layer.img_pool(input=cmrnorm2, pool_size=3, stride=2)
+
+    pool3 = paddle.networks.img_conv_group(
+        input=pool2,
+        pool_size=3,
+        pool_stride=2,
+        conv_num_filter=[384, 384, 256],
+        conv_filter_size=3,
+        pool_type=paddle.pooling.Max())
+
+    fc1 = paddle.layer.fc(
+        input=pool3,
+        size=4096,
+        act=paddle.activation.Relu(),
+        layer_attr=paddle.attr.Extra(drop_rate=0.5))
+    fc2 = paddle.layer.fc(
+        input=fc1,
+        size=4096,
+        act=paddle.activation.Relu(),
+        layer_attr=paddle.attr.Extra(drop_rate=0.5))
+
+    return fc2
diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py
new file mode 100644
index 0000000000..60cfa9d4f8
--- /dev/null
+++ b/image_classification/googlenet.py
@@ -0,0 +1,161 @@
+import paddle.v2 as paddle
+
+__all__ = ['googlenet']
+
+
+def inception(name, input, channels, filter1, filter3R, filter3, filter5R,
+              filter5, proj):
+    cov1 = paddle.layer.conv_projection(
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter1,
+        stride=1,
+        padding=0)
+
+    cov3r = paddle.layer.img_conv(
+        name=name + '_3r',
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter3R,
+        stride=1,
+        padding=0)
+    cov3 = paddle.layer.conv_projection(
+        input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
+
+    cov5r = paddle.layer.img_conv(
+        name=name + '_5r',
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter5R,
+        stride=1,
+        padding=0)
+    cov5 = paddle.layer.conv_projection(
+        input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
+
+    pool1 = paddle.layer.img_pool(
+        name=name + '_max',
+        input=input,
+        pool_size=3,
+        num_channels=channels,
+        stride=1,
+        padding=1)
+    covprj = paddle.layer.conv_projection(
+        input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
+
+    cat = paddle.layer.concat(
+        name=name,
+        input=[cov1, cov3, cov5, covprj],
+        bias_attr=True,
+        act=paddle.activation.Relu())
+    return cat
+
+
+def googlenet(input):
+    # stage 1
+    conv1 = paddle.layer.img_conv(
+        name="conv1",
+        input=input,
+        filter_size=7,
+        num_channels=3,
+        num_filters=64,
+        stride=2,
+        padding=3)
+    pool1 = paddle.layer.img_pool(
+        name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
+
+    # stage 2
+    conv2_1 = paddle.layer.img_conv(
+        name="conv2_1",
+        input=pool1,
+        filter_size=1,
+        num_filters=64,
+        stride=1,
+        padding=0)
+    conv2_2 = paddle.layer.img_conv(
+        name="conv2_2",
+        input=conv2_1,
+        filter_size=3,
+        num_filters=192,
+        stride=1,
+        padding=1)
+    pool2 = paddle.layer.img_pool(
+        name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
+
+    # stage 3
+    ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+    ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+    pool3 = paddle.layer.img_pool(
+        name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
+
+    # stage 4
+    ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+    ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+    ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
+    ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+    ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+    pool4 = paddle.layer.img_pool(
+        name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
+
+    # stage 5
+    ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+    ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
+    pool5 = paddle.layer.img_pool(
+        name="pool5",
+        input=ince5b,
+        num_channels=1024,
+        pool_size=7,
+        stride=7,
+        pool_type=paddle.pooling.Avg())
+    dropout = paddle.layer.addto(
+        input=pool5,
+        layer_attr=paddle.attr.Extra(drop_rate=0.4),
+        act=paddle.activation.Linear())
+
+    # fc for output 1
+    pool_o1 = paddle.layer.img_pool(
+        name="pool_o1",
+        input=ince4a,
+        num_channels=512,
+        pool_size=5,
+        stride=3,
+        pool_type=paddle.pooling.Avg())
+    conv_o1 = paddle.layer.img_conv(
+        name="conv_o1",
+        input=pool_o1,
+        filter_size=1,
+        num_filters=128,
+        stride=1,
+        padding=0)
+    fc_o1 = paddle.layer.fc(
+        name="fc_o1",
+        input=conv_o1,
+        size=1024,
+        layer_attr=paddle.attr.Extra(drop_rate=0.7),
+        act=paddle.activation.Relu())
+
+    # fc for output 2
+    pool_o2 = paddle.layer.img_pool(
+        name="pool_o2",
+        input=ince4d,
+        num_channels=528,
+        pool_size=5,
+        stride=3,
+        pool_type=paddle.pooling.Avg())
+    conv_o2 = paddle.layer.img_conv(
+        name="conv_o2",
+        input=pool_o2,
+        filter_size=1,
+        num_filters=128,
+        stride=1,
+        padding=0)
+    fc_o2 = paddle.layer.fc(
+        name="fc_o2",
+        input=conv_o2,
+        size=1024,
+        layer_attr=paddle.attr.Extra(drop_rate=0.7),
+        act=paddle.activation.Relu())
+
+    return dropout, fc_o1, fc_o2
diff --git a/image_classification/resnet.py b/image_classification/resnet.py
new file mode 100644
index 0000000000..1da44aadb3
--- /dev/null
+++ b/image_classification/resnet.py
@@ -0,0 +1,93 @@
+import paddle.v2 as paddle
+
+__all__ = ['resnet_imagenet', 'resnet_cifar10']
+
+
+def conv_bn_layer(input,
+                  ch_out,
+                  filter_size,
+                  stride,
+                  padding,
+                  active_type=paddle.activation.Relu(),
+                  ch_in=None):
+    tmp = paddle.layer.img_conv(
+        input=input,
+        filter_size=filter_size,
+        num_channels=ch_in,
+        num_filters=ch_out,
+        stride=stride,
+        padding=padding,
+        act=paddle.activation.Linear(),
+        bias_attr=False)
+    return paddle.layer.batch_norm(input=tmp, act=active_type)
+
+
+def shortcut(input, n_out, stride, b_projection):
+    if b_projection:
+        return conv_bn_layer(input, n_out, 1, stride, 0,
+                             paddle.activation.Linear())
+    else:
+        return input
+
+
+def basicblock(input, ch_out, stride, b_projection):
+    # TODO: bug fix for ch_in = input.num_filters
+    conv1 = conv_bn_layer(input, ch_out, 3, stride, 1)
+    conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear())
+    short = shortcut(input, ch_out, stride, b_projection)
+    return paddle.layer.addto(
+        input=[conv2, short], act=paddle.activation.Relu())
+
+
+def bottleneck(input, ch_out, stride, b_projection):
+    # TODO: bug fix for ch_in = input.num_filters
+    conv1 = conv_bn_layer(input, ch_out, 1, stride, 0)
+    conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1)
+    conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0,
+                          paddle.activation.Linear())
+    short = shortcut(input, ch_out * 4, stride, b_projection)
+    return paddle.layer.addto(
+        input=[conv3, short], act=paddle.activation.Relu())
+
+
+def layer_warp(block_func, input, features, count, stride):
+    conv = block_func(input, features, stride, True)
+    for i in range(1, count):
+        conv = block_func(conv, features, 1, False)
+    return conv
+
+
+def resnet_imagenet(input, depth=50):
+    cfg = {
+        18: ([2, 2, 2, 1], basicblock),
+        34: ([3, 4, 6, 3], basicblock),
+        50: ([3, 4, 6, 3], bottleneck),
+        101: ([3, 4, 23, 3], bottleneck),
+        152: ([3, 8, 36, 3], bottleneck)
+    }
+    stages, block_func = cfg[depth]
+    conv1 = conv_bn_layer(
+        input, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
+    pool1 = paddle.layer.img_pool(input=conv1, pool_size=3, stride=2)
+    res1 = layer_warp(block_func, pool1, 64, stages[0], 1)
+    res2 = layer_warp(block_func, res1, 128, stages[1], 2)
+    res3 = layer_warp(block_func, res2, 256, stages[2], 2)
+    res4 = layer_warp(block_func, res3, 512, stages[3], 2)
+    pool2 = paddle.layer.img_pool(
+        input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
+    return pool2
+
+
+def resnet_cifar10(input, depth=32):
+    # depth should be one of 20, 32, 44, 56, 110, 1202
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    nStages = {16, 64, 128}
+    conv1 = conv_bn_layer(
+        input, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 64, n, 2)
+    pool = paddle.layer.img_pool(
+        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
+    return pool
diff --git a/image_classification/train.py b/image_classification/train.py
old mode 100644
new mode 100755
index d917bd8019..a8817c606f
--- a/image_classification/train.py
+++ b/image_classification/train.py
@@ -1,38 +1,63 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
-
 import gzip
-
 import paddle.v2 as paddle
 import reader
 import vgg
+import resnet
+import alexnet
+import googlenet
+import argparse
+import os
 
 DATA_DIM = 3 * 224 * 224
-CLASS_DIM = 1000
+CLASS_DIM = 100
 BATCH_SIZE = 128
 
 
 def main():
+    # parse the argument
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'data_dir',
+        help='The data directory which contains train.list and val.list')
+    parser.add_argument(
+        'model',
+        help='The model for image classification',
+        choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet'])
+    args = parser.parse_args()
 
     # PaddlePaddle init
-    paddle.init(use_gpu=True, trainer_count=4)
+    paddle.init(use_gpu=True, trainer_count=1)
 
     image = paddle.layer.data(
         name="image", type=paddle.data_type.dense_vector(DATA_DIM))
     lbl = paddle.layer.data(
         name="label", type=paddle.data_type.integer_value(CLASS_DIM))
-    net = vgg.vgg13(image)
+
+    extra_layers = None
+    if args.model == 'alexnet':
+        net = alexnet.alexnet(image)
+    elif args.model == 'vgg13':
+        net = vgg.vgg13(image)
+    elif args.model == 'vgg16':
+        net = vgg.vgg16(image)
+    elif args.model == 'vgg19':
+        net = vgg.vgg19(image)
+    elif args.model == 'resnet':
+        net = resnet.resnet_imagenet(image)
+    elif args.model == 'googlenet':
+        net, fc_o1, fc_o2 = googlenet.googlenet(image)
+        out1 = paddle.layer.fc(
+            input=fc_o1, size=CLASS_DIM, act=paddle.activation.Softmax())
+        loss1 = paddle.layer.cross_entropy_cost(
+            input=out1, label=lbl, coeff=0.3)
+        paddle.evaluator.classification_error(input=out1, label=lbl)
+        out2 = paddle.layer.fc(
+            input=fc_o2, size=CLASS_DIM, act=paddle.activation.Softmax())
+        loss2 = paddle.layer.cross_entropy_cost(
+            input=out2, label=lbl, coeff=0.3)
+        paddle.evaluator.classification_error(input=out2, label=lbl)
+        extra_layers = [loss1, loss2]
+
     out = paddle.layer.fc(
         input=net, size=CLASS_DIM, act=paddle.activation.Softmax())
     cost = paddle.layer.classification_cost(input=out, label=lbl)
@@ -45,16 +70,19 @@ def main():
         momentum=0.9,
         regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
                                                          BATCH_SIZE),
-        learning_rate=0.01 / BATCH_SIZE,
+        learning_rate=0.001 / BATCH_SIZE,
         learning_rate_decay_a=0.1,
         learning_rate_decay_b=128000 * 35,
         learning_rate_schedule="discexp", )
 
     train_reader = paddle.batch(
-        paddle.reader.shuffle(reader.test_reader("train.list"), buf_size=1000),
+        paddle.reader.shuffle(
+            reader.test_reader(os.path.join(args.data_dir, 'train.list')),
+            buf_size=1000),
         batch_size=BATCH_SIZE)
     test_reader = paddle.batch(
-        reader.train_reader("test.list"), batch_size=BATCH_SIZE)
+        reader.train_reader(os.path.join(args.data_dir, 'val.list')),
+        batch_size=BATCH_SIZE)
 
     # End batch and end pass event handler
     def event_handler(event):
@@ -71,7 +99,10 @@ def event_handler(event):
 
     # Create trainer
     trainer = paddle.trainer.SGD(
-        cost=cost, parameters=parameters, update_equation=optimizer)
+        cost=cost,
+        parameters=parameters,
+        update_equation=optimizer,
+        extra_layers=extra_layers)
 
     trainer.train(
         reader=train_reader, num_passes=200, event_handler=event_handler)

From 8848164129b0e38898c7752915880d38f153edec Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Thu, 1 Jun 2017 15:29:34 +0800
Subject: [PATCH 13/43] add doc and reorginize net output

---
 image_classification/README.md    | 183 +++++++++++++++++++++++++++++-
 image_classification/alexnet.py   |   6 +-
 image_classification/googlenet.py |  91 +++++++++++++--
 image_classification/resnet.py    |  12 +-
 image_classification/train.py     |  18 +--
 image_classification/vgg.py       |  18 +--
 6 files changed, 290 insertions(+), 38 deletions(-)

diff --git a/image_classification/README.md b/image_classification/README.md
index a0990367ef..0010fe5b0a 100644
--- a/image_classification/README.md
+++ b/image_classification/README.md
@@ -1 +1,182 @@
-TBD
+图像分类
+=======================
+
+这里将介绍如何在PaddlePaddle下使用AlexNet、VGG、GoogLeNet和ResNet模型进行图像分类。图像分类问题的描述和这四种模型的介绍可以参考[PaddlePaddle book](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification)。
+
+## 数据格式
+reader.py定义了数据格式，它读取一个图像列表文件，并从中解析出图像路径和类别标签。
+
+图像列表文件是一个文本文件，其中每一行由一个图像路径和类别标签构成，二者以跳格符（Tab）隔开。类别标签用整数表示，其最小值为0。下面给出一个图像列表文件的片段示例：
+
+```
+dataset_100/train_images/n03982430_23191.jpeg    1
+dataset_100/train_images/n04461696_23653.jpeg    7
+dataset_100/train_images/n02441942_3170.jpeg 8
+dataset_100/train_images/n03733281_31716.jpeg    2
+dataset_100/train_images/n03424325_240.jpeg  0
+dataset_100/train_images/n02643566_75.jpeg   8
+```
+
+## 训练模型
+
+### 初始化
+
+在初始化阶段需要导入所用的包，并对PaddlePaddle进行初始化。
+
+```python
+import gzip
+import paddle.v2 as paddle
+import reader
+import vgg
+import resnet
+import alexnet
+import googlenet
+import argparse
+import os
+
+# PaddlePaddle init
+paddle.init(use_gpu=False, trainer_count=1)
+```
+
+### 定义参数和输入
+
+设置算法参数（如数据维度、类别数目和batch size等参数），定义数据输入层`image`和类别标签`lbl`。
+
+```python
+DATA_DIM = 3 * 224 * 224
+CLASS_DIM = 100
+BATCH_SIZE = 128
+
+image = paddle.layer.data(
+    name="image", type=paddle.data_type.dense_vector(DATA_DIM))
+lbl = paddle.layer.data(
+    name="label", type=paddle.data_type.integer_value(CLASS_DIM))
+```
+
+### 获得所用模型
+
+这里可以选择使用AlexNet、VGG、GoogLeNet和ResNet模型中的一个模型进行图像分类。通过调用相应的方法可以获得网络最后的Softmax层。
+
+1. 使用AlexNet模型
+
+指定输入层`image`和类别数目`CLASS_DIM`后，可以通过下面的代码得到AlexNet的Softmax层。
+
+```python
+out = alexnet.alexnet(image, class_dim=CLASS_DIM)
+```
+
+2. 使用VGG模型
+
+根据层数的不同，VGG分为VGG13、VGG16和VGG19。使用VGG16模型的代码如下：
+
+```python
+out = vgg.vgg16(image, class_dim=CLASS_DIM)
+```
+
+类似地，VGG13和VGG19可以分别通过`vgg.vgg13`和`vgg.vgg19`方法获得。
+
+3. 使用GoogLeNet模型
+
+GoogLeNet在训练阶段使用两个辅助的分类器强化梯度信息并进行额外的正则化。因此`googlenet.googlenet`共返回三个Softmax层，如下面的代码所示：
+
+```python
+out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM)
+loss1 = paddle.layer.cross_entropy_cost(
+    input=out1, label=lbl, coeff=0.3)
+paddle.evaluator.classification_error(input=out1, label=lbl)
+loss2 = paddle.layer.cross_entropy_cost(
+    input=out2, label=lbl, coeff=0.3)
+paddle.evaluator.classification_error(input=out2, label=lbl)
+extra_layers = [loss1, loss2]
+```
+
+对于两个辅助的输出，这里分别对其计算损失函数并评价错误率，然后将损失作为后文SGD的extra_layers。
+
+4. 使用ResNet模型
+
+ResNet模型可以通过下面的代码获取：
+
+```python
+out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM)
+```
+
+### 定义损失函数
+
+```python
+cost = paddle.layer.classification_cost(input=out, label=lbl)
+```
+
+### 创建参数和优化方法
+
+```python
+# Create parameters
+parameters = paddle.parameters.create(cost)
+
+# Create optimizer
+optimizer = paddle.optimizer.Momentum(
+    momentum=0.9,
+    regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
+                                                     BATCH_SIZE),
+    learning_rate=0.001 / BATCH_SIZE,
+    learning_rate_decay_a=0.1,
+    learning_rate_decay_b=128000 * 35,
+    learning_rate_schedule="discexp", )
+```
+
+### 定义数据读取方法和事件处理程序
+
+读取数据时需要分别指定训练集和验证集的图像列表文件，这里假设这两个文件分别为`train.list`和`val.list`。
+
+```python
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        reader.test_reader('train.list'),
+        buf_size=1000),
+    batch_size=BATCH_SIZE)
+test_reader = paddle.batch(
+    reader.train_reader('val.list'),
+    batch_size=BATCH_SIZE)
+
+# End batch and end pass event handler
+def event_handler(event):
+    if isinstance(event, paddle.event.EndIteration):
+        if event.batch_id % 1 == 0:
+            print "\nPass %d, Batch %d, Cost %f, %s" % (
+                event.pass_id, event.batch_id, event.cost, event.metrics)
+    if isinstance(event, paddle.event.EndPass):
+        with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
+            parameters.to_tar(f)
+
+        result = trainer.test(reader=test_reader)
+        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+```
+
+### 定义训练方法
+
+对于AlexNet、VGG和ResNet，可以按下面的代码定义训练方法：
+
+```python
+# Create trainer
+trainer = paddle.trainer.SGD(
+    cost=cost,
+    parameters=parameters,
+    update_equation=optimizer)
+```
+
+GoogLeNet有两个额外的输出层，因此需要指定`extra_layers`，如下所示：
+
+```python
+# Create trainer
+trainer = paddle.trainer.SGD(
+    cost=cost,
+    parameters=parameters,
+    update_equation=optimizer,
+    extra_layers=extra_layers)
+```
+
+### 开始训练
+
+```python
+trainer.train(
+    reader=train_reader, num_passes=200, event_handler=event_handler)
+```
diff --git a/image_classification/alexnet.py b/image_classification/alexnet.py
index eaa7a3dc54..8aa53814b1 100644
--- a/image_classification/alexnet.py
+++ b/image_classification/alexnet.py
@@ -3,7 +3,7 @@
 __all__ = ['alexnet']
 
 
-def alexnet(input):
+def alexnet(input, class_dim=100):
     conv1 = paddle.layer.img_conv(
         input=input,
         filter_size=11,
@@ -45,4 +45,6 @@ def alexnet(input):
         act=paddle.activation.Relu(),
         layer_attr=paddle.attr.Extra(drop_rate=0.5))
 
-    return fc2
+    out = paddle.layer.fc(
+        input=fc2, size=class_dim, act=paddle.activation.Softmax())
+    return out
diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py
index 60cfa9d4f8..2e4153ccb6 100644
--- a/image_classification/googlenet.py
+++ b/image_classification/googlenet.py
@@ -53,7 +53,69 @@ def inception(name, input, channels, filter1, filter3R, filter3, filter5R,
     return cat
 
 
-def googlenet(input):
+def inception2(name, input, channels, filter1, filter3R, filter3, filter5R,
+               filter5, proj):
+    cov1 = paddle.layer.img_conv(
+        name=name + '_1',
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter1,
+        stride=1,
+        padding=0)
+
+    cov3r = paddle.layer.img_conv(
+        name=name + '_3r',
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter3R,
+        stride=1,
+        padding=0)
+    cov3 = paddle.layer.img_conv(
+        name=name + '_3',
+        input=cov3r,
+        filter_size=3,
+        num_filters=filter3,
+        stride=1,
+        padding=1)
+
+    cov5r = paddle.layer.img_conv(
+        name=name + '_5r',
+        input=input,
+        filter_size=1,
+        num_channels=channels,
+        num_filters=filter5R,
+        stride=1,
+        padding=0)
+    cov5 = paddle.layer.img_conv(
+        name=name + '_5',
+        input=cov5r,
+        filter_size=5,
+        num_filters=filter5,
+        stride=1,
+        padding=2)
+
+    pool1 = paddle.layer.img_pool(
+        name=name + '_max',
+        input=input,
+        pool_size=3,
+        num_channels=channels,
+        stride=1,
+        padding=1)
+    covprj = paddle.layer.img_conv(
+        name=name + '_proj',
+        input=pool1,
+        filter_size=1,
+        num_filters=proj,
+        stride=1,
+        padding=0)
+
+    cat = paddle.layer.concat(name=name, input=[cov1, cov3, cov5, covprj])
+    return cat
+
+
+def googlenet(input, class_dim=100):
     # stage 1
     conv1 = paddle.layer.img_conv(
         name="conv1",
@@ -85,23 +147,23 @@ def googlenet(input):
         name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
 
     # stage 3
-    ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
-    ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+    ince3a = inception2("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+    ince3b = inception2("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
     pool3 = paddle.layer.img_pool(
         name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
 
     # stage 4
-    ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
-    ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
-    ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
-    ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
-    ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+    ince4a = inception2("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+    ince4b = inception2("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+    ince4c = inception2("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
+    ince4d = inception2("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+    ince4e = inception2("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
     pool4 = paddle.layer.img_pool(
         name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
 
     # stage 5
-    ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
-    ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
+    ince5a = inception2("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+    ince5b = inception2("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
     pool5 = paddle.layer.img_pool(
         name="pool5",
         input=ince5b,
@@ -114,6 +176,9 @@ def googlenet(input):
         layer_attr=paddle.attr.Extra(drop_rate=0.4),
         act=paddle.activation.Linear())
 
+    out = paddle.layer.fc(
+        input=dropout, size=class_dim, act=paddle.activation.Softmax())
+
     # fc for output 1
     pool_o1 = paddle.layer.img_pool(
         name="pool_o1",
@@ -135,6 +200,8 @@ def googlenet(input):
         size=1024,
         layer_attr=paddle.attr.Extra(drop_rate=0.7),
         act=paddle.activation.Relu())
+    out1 = paddle.layer.fc(
+        input=fc_o1, size=class_dim, act=paddle.activation.Softmax())
 
     # fc for output 2
     pool_o2 = paddle.layer.img_pool(
@@ -157,5 +224,7 @@ def googlenet(input):
         size=1024,
         layer_attr=paddle.attr.Extra(drop_rate=0.7),
         act=paddle.activation.Relu())
+    out2 = paddle.layer.fc(
+        input=fc_o2, size=class_dim, act=paddle.activation.Softmax())
 
-    return dropout, fc_o1, fc_o2
+    return out, out1, out2
diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index 1da44aadb3..7ef551b3bb 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -57,7 +57,7 @@ def layer_warp(block_func, input, features, count, stride):
     return conv
 
 
-def resnet_imagenet(input, depth=50):
+def resnet_imagenet(input, depth=50, class_dim=100):
     cfg = {
         18: ([2, 2, 2, 1], basicblock),
         34: ([3, 4, 6, 3], basicblock),
@@ -75,10 +75,12 @@ def resnet_imagenet(input, depth=50):
     res4 = layer_warp(block_func, res3, 512, stages[3], 2)
     pool2 = paddle.layer.img_pool(
         input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
-    return pool2
+    out = paddle.layer.fc(
+        input=pool2, size=class_dim, act=paddle.activation.Softmax())
+    return out
 
 
-def resnet_cifar10(input, depth=32):
+def resnet_cifar10(input, depth=32, class_dim=10):
     # depth should be one of 20, 32, 44, 56, 110, 1202
     assert (depth - 2) % 6 == 0
     n = (depth - 2) / 6
@@ -90,4 +92,6 @@ def resnet_cifar10(input, depth=32):
     res3 = layer_warp(basicblock, res2, 64, n, 2)
     pool = paddle.layer.img_pool(
         input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
-    return pool
+    out = paddle.layer.fc(
+        input=pool, size=class_dim, act=paddle.activation.Softmax())
+    return out
diff --git a/image_classification/train.py b/image_classification/train.py
index a8817c606f..3613561629 100755
--- a/image_classification/train.py
+++ b/image_classification/train.py
@@ -35,31 +35,25 @@ def main():
 
     extra_layers = None
     if args.model == 'alexnet':
-        net = alexnet.alexnet(image)
+        out = alexnet.alexnet(image, class_dim=CLASS_DIM)
     elif args.model == 'vgg13':
-        net = vgg.vgg13(image)
+        out = vgg.vgg13(image, class_dim=CLASS_DIM)
     elif args.model == 'vgg16':
-        net = vgg.vgg16(image)
+        out = vgg.vgg16(image, class_dim=CLASS_DIM)
     elif args.model == 'vgg19':
-        net = vgg.vgg19(image)
+        out = vgg.vgg19(image, class_dim=CLASS_DIM)
     elif args.model == 'resnet':
-        net = resnet.resnet_imagenet(image)
+        out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM)
     elif args.model == 'googlenet':
-        net, fc_o1, fc_o2 = googlenet.googlenet(image)
-        out1 = paddle.layer.fc(
-            input=fc_o1, size=CLASS_DIM, act=paddle.activation.Softmax())
+        out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM)
         loss1 = paddle.layer.cross_entropy_cost(
             input=out1, label=lbl, coeff=0.3)
         paddle.evaluator.classification_error(input=out1, label=lbl)
-        out2 = paddle.layer.fc(
-            input=fc_o2, size=CLASS_DIM, act=paddle.activation.Softmax())
         loss2 = paddle.layer.cross_entropy_cost(
             input=out2, label=lbl, coeff=0.3)
         paddle.evaluator.classification_error(input=out2, label=lbl)
         extra_layers = [loss1, loss2]
 
-    out = paddle.layer.fc(
-        input=net, size=CLASS_DIM, act=paddle.activation.Softmax())
     cost = paddle.layer.classification_cost(input=out, label=lbl)
 
     # Create parameters
diff --git a/image_classification/vgg.py b/image_classification/vgg.py
index e21504ab54..b272320b26 100644
--- a/image_classification/vgg.py
+++ b/image_classification/vgg.py
@@ -17,7 +17,7 @@
 __all__ = ['vgg13', 'vgg16', 'vgg19']
 
 
-def vgg(input, nums):
+def vgg(input, nums, class_dim=100):
     def conv_block(input, num_filter, groups, num_channels=None):
         return paddle.networks.img_conv_group(
             input=input,
@@ -48,19 +48,21 @@ def conv_block(input, num_filter, groups, num_channels=None):
         size=fc_dim,
         act=paddle.activation.Relu(),
         layer_attr=paddle.attr.Extra(drop_rate=0.5))
-    return fc2
+    out = paddle.layer.fc(
+        input=fc2, size=class_dim, act=paddle.activation.Softmax())
+    return out
 
 
-def vgg13(input):
+def vgg13(input, class_dim=100):
     nums = [2, 2, 2, 2, 2]
-    return vgg(input, nums)
+    return vgg(input, nums, class_dim)
 
 
-def vgg16(input):
+def vgg16(input, class_dim=100):
     nums = [2, 2, 3, 3, 3]
-    return vgg(input, nums)
+    return vgg(input, nums, class_dim)
 
 
-def vgg19(input):
+def vgg19(input, class_dim=100):
     nums = [2, 2, 4, 4, 4]
-    return vgg(input, nums)
+    return vgg(input, nums, class_dim)

From d7d1ae5a9eb8a02ea63af2e55fb782ab74e2a1a9 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Fri, 2 Jun 2017 14:00:09 +0800
Subject: [PATCH 14/43] minor revision

---
 image_classification/README.md    |  5 ++++
 image_classification/googlenet.py | 50 -------------------------------
 image_classification/resnet.py    |  2 --
 3 files changed, 5 insertions(+), 52 deletions(-)

diff --git a/image_classification/README.md b/image_classification/README.md
index 0010fe5b0a..39167fa19e 100644
--- a/image_classification/README.md
+++ b/image_classification/README.md
@@ -123,6 +123,11 @@ optimizer = paddle.optimizer.Momentum(
     learning_rate_schedule="discexp", )
 ```
 
+通过 `learning_rate_decay_a` (简写$a$） 、`learning_rate_decay_b` (简写$b$) 和 `learning_rate_schedule` 指定学习率调整策略，这里采用离散指数的方式调节学习率，计算公式如下， $n$ 代表已经处理过的累计总样本数，$lr_{0}$ 即为参数里设置的 `learning_rate`。
+
+$$  lr = lr_{0} * a^ {\lfloor \frac{n}{ b}\rfloor} $$
+
+
 ### 定义数据读取方法和事件处理程序
 
 读取数据时需要分别指定训练集和验证集的图像列表文件，这里假设这两个文件分别为`train.list`和`val.list`。
diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py
index 2e4153ccb6..e21a036024 100644
--- a/image_classification/googlenet.py
+++ b/image_classification/googlenet.py
@@ -3,56 +3,6 @@
 __all__ = ['googlenet']
 
 
-def inception(name, input, channels, filter1, filter3R, filter3, filter5R,
-              filter5, proj):
-    cov1 = paddle.layer.conv_projection(
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter1,
-        stride=1,
-        padding=0)
-
-    cov3r = paddle.layer.img_conv(
-        name=name + '_3r',
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter3R,
-        stride=1,
-        padding=0)
-    cov3 = paddle.layer.conv_projection(
-        input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
-
-    cov5r = paddle.layer.img_conv(
-        name=name + '_5r',
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter5R,
-        stride=1,
-        padding=0)
-    cov5 = paddle.layer.conv_projection(
-        input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
-
-    pool1 = paddle.layer.img_pool(
-        name=name + '_max',
-        input=input,
-        pool_size=3,
-        num_channels=channels,
-        stride=1,
-        padding=1)
-    covprj = paddle.layer.conv_projection(
-        input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
-
-    cat = paddle.layer.concat(
-        name=name,
-        input=[cov1, cov3, cov5, covprj],
-        bias_attr=True,
-        act=paddle.activation.Relu())
-    return cat
-
-
 def inception2(name, input, channels, filter1, filter3R, filter3, filter5R,
                filter5, proj):
     cov1 = paddle.layer.img_conv(
diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index 7ef551b3bb..63bc4409b7 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -31,7 +31,6 @@ def shortcut(input, n_out, stride, b_projection):
 
 
 def basicblock(input, ch_out, stride, b_projection):
-    # TODO: bug fix for ch_in = input.num_filters
     conv1 = conv_bn_layer(input, ch_out, 3, stride, 1)
     conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear())
     short = shortcut(input, ch_out, stride, b_projection)
@@ -40,7 +39,6 @@ def basicblock(input, ch_out, stride, b_projection):
 
 
 def bottleneck(input, ch_out, stride, b_projection):
-    # TODO: bug fix for ch_in = input.num_filters
     conv1 = conv_bn_layer(input, ch_out, 1, stride, 0)
     conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1)
     conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0,

From 98b2a225af942739ba48e59b8c17ecc4ac212e86 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Thu, 8 Jun 2017 17:18:38 +0800
Subject: [PATCH 15/43] Add error rate calculation script.

---
 deep_speech_2/error_rate.py | 138 ++++++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 deep_speech_2/error_rate.py

diff --git a/deep_speech_2/error_rate.py b/deep_speech_2/error_rate.py
new file mode 100644
index 0000000000..4739238e72
--- /dev/null
+++ b/deep_speech_2/error_rate.py
@@ -0,0 +1,138 @@
+# -- * -- coding: utf-8 -- * --
+import numpy as np
+
+
+def levenshtein_distance(ref, hyp):
+    ref_len = len(ref)
+    hyp_len = len(hyp)
+
+    # special case
+    if ref == hyp:
+        return 0
+    if ref_len == 0:
+        return hyp_len
+    if hyp_len == 0:
+        return ref_len
+
+    distance = np.zeros((ref_len + 1) * (hyp_len + 1), dtype=np.int64)
+    distance = distance.reshape((ref_len + 1, hyp_len + 1))
+
+    # initialization distance matrix
+    for j in xrange(hyp_len + 1):
+        distance[0][j] = j
+    for i in xrange(ref_len + 1):
+        distance[i][0] = i
+
+    # calculate levenshtein distance
+    for i in xrange(1, ref_len + 1):
+        for j in xrange(1, hyp_len + 1):
+            if ref[i - 1] == hyp[j - 1]:
+                distance[i][j] = distance[i - 1][j - 1]
+            else:
+                s_num = distance[i - 1][j - 1] + 1
+                i_num = distance[i][j - 1] + 1
+                d_num = distance[i - 1][j] + 1
+                distance[i][j] = min(s_num, i_num, d_num)
+
+    return distance[ref_len][hyp_len]
+
+
+def wer(reference, hypophysis, delimiter=' ', filter_none=True):
+    """
+    Calculate word error rate (WER). WER is a popular evaluation metric used
+    in speech recognition. It compares a reference to an hypophysis and
+    is defined like this:
+
+    .. math::
+        WER = (Sw + Dw + Iw) / Nw
+
+    where
+
+    .. code-block:: text
+
+        Sw is the number of words subsituted,
+        Dw is the number of words deleted,
+        Iw is the number of words inserted,
+        Nw is the number of words in the reference
+
+    We can use levenshtein distance to calculate WER. Take an attention that 
+    this function will truncate the beginning and ending delimiter for 
+    reference and hypophysis sentences before calculating WER.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypophysis: The hypophysis sentence.
+    :type reference: str
+    :param delimiter: Delimiter of input sentences.
+    :type delimiter: char
+    :param filter_none: Whether to remove None value when splitting sentence.
+    :type filter_none: bool
+    :return: WER
+    :rtype: float
+    """
+
+    if len(reference.strip(delimiter)) == 0:
+        raise ValueError("Reference's word number should be greater than 0.")
+
+    if filter_none == True:
+        ref_words = filter(None, reference.strip(delimiter).split(delimiter))
+        hyp_words = filter(None, hypophysis.strip(delimiter).split(delimiter))
+    else:
+        ref_words = reference.strip(delimiter).split(delimiter)
+        hyp_words = reference.strip(delimiter).split(delimiter)
+
+    edit_distance = levenshtein_distance(ref_words, hyp_words)
+    wer = float(edit_distance) / len(ref_words)
+    return wer
+
+
+def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''):
+    """
+    Calculate charactor error rate (CER). CER will compare reference text and
+    hypophysis text in char-level. CER is defined as:
+
+    .. math::
+        CER = (Sc + Dc + Ic) / Nc
+
+    where
+
+    .. code-block:: text
+
+        Sc is the number of character substituted,
+        Dc is the number of deleted,
+        Ic is the number of inserted
+        Nc is the number of characters in the reference
+
+    We can use levenshtein distance to calculate CER. Chinese input should be 
+    encoded to unicode.
+
+    :param reference: The reference sentence.
+    :type reference: str
+    :param hypophysis: The hypophysis sentence.
+    :type reference: str
+    :param squeeze: If set true, consecutive space character 
+    will be squeezed to one
+    :type squeezed: bool
+    :param ignore_case: Whether ignoring character case.
+    :type ignore_case: bool
+    :param strip_char: If not set to '', strip_char in beginning and ending of
+    sentence will be truncated.
+    :type strip_char: char
+    :return: CER
+    :rtype: float
+    """
+    if ignore_case == True:
+        reference = reference.lower()
+        hypophysis = hypophysis.lower()
+    if strip_char != '':
+        reference = reference.strip(strip_char)
+        hypophysis = hypophysis.strip(strip_char)
+    if squeeze == True:
+        reference = ' '.join(filter(None, reference.split(' ')))
+        hypophysis = ' '.join(filter(None, hypophysis.split(' ')))
+
+    if len(reference) == 0:
+        raise ValueError("Length of reference should be greater than 0.")
+    edit_distance = levenshtein_distance(reference, hypophysis)
+    cer = float(edit_distance) / len(reference)
+    return cer

From 8e3c26fe72cd7ec79b11ce359ce1bf040d2e5e86 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Thu, 8 Jun 2017 21:35:17 +0800
Subject: [PATCH 16/43] Fix typos and follow comments.

---
 deep_speech_2/error_rate.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/deep_speech_2/error_rate.py b/deep_speech_2/error_rate.py
index 4739238e72..f216177e00 100644
--- a/deep_speech_2/error_rate.py
+++ b/deep_speech_2/error_rate.py
@@ -14,8 +14,7 @@ def levenshtein_distance(ref, hyp):
     if hyp_len == 0:
         return ref_len
 
-    distance = np.zeros((ref_len + 1) * (hyp_len + 1), dtype=np.int64)
-    distance = distance.reshape((ref_len + 1, hyp_len + 1))
+    distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int64)
 
     # initialization distance matrix
     for j in xrange(hyp_len + 1):
@@ -40,7 +39,7 @@ def levenshtein_distance(ref, hyp):
 def wer(reference, hypophysis, delimiter=' ', filter_none=True):
     """
     Calculate word error rate (WER). WER is a popular evaluation metric used
-    in speech recognition. It compares a reference to an hypophysis and
+    in speech recognition. It compares a reference with an hypophysis and
     is defined like this:
 
     .. math::
@@ -55,8 +54,8 @@ def wer(reference, hypophysis, delimiter=' ', filter_none=True):
         Iw is the number of words inserted,
         Nw is the number of words in the reference
 
-    We can use levenshtein distance to calculate WER. Take an attention that 
-    this function will truncate the beginning and ending delimiter for 
+    We can use levenshtein distance to calculate WER. Please draw an attention 
+    that this function will truncate the beginning and ending delimiter for 
     reference and hypophysis sentences before calculating WER.
 
     :param reference: The reference sentence.
@@ -111,12 +110,12 @@ def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''):
     :param hypophysis: The hypophysis sentence.
     :type reference: str
     :param squeeze: If set true, consecutive space character 
-    will be squeezed to one
-    :type squeezed: bool
-    :param ignore_case: Whether ignoring character case.
+                    will be squeezed to one
+    :type squeeze: bool
+    :param ignore_case: Whether case-sensitive or not.
     :type ignore_case: bool
     :param strip_char: If not set to '', strip_char in beginning and ending of
-    sentence will be truncated.
+                       sentence will be truncated.
     :type strip_char: char
     :return: CER
     :rtype: float

From 9752884e3317095716a61fb523e7207a49e605a6 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Mon, 12 Jun 2017 12:51:01 +0800
Subject: [PATCH 17/43] Follow comments.

---
 deep_speech_2/error_rate.py | 94 ++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/deep_speech_2/error_rate.py b/deep_speech_2/error_rate.py
index f216177e00..2bb6371149 100644
--- a/deep_speech_2/error_rate.py
+++ b/deep_speech_2/error_rate.py
@@ -1,4 +1,9 @@
-# -- * -- coding: utf-8 -- * --
+# -*- coding: utf-8 -*-
+"""
+    This module provides functions to calculate error rate in different level.
+    e.g. wer for word-level, cer for char-level.
+"""
+
 import numpy as np
 
 
@@ -14,9 +19,9 @@ def levenshtein_distance(ref, hyp):
     if hyp_len == 0:
         return ref_len
 
-    distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int64)
+    distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int32)
 
-    # initialization distance matrix
+    # initialize distance matrix
     for j in xrange(hyp_len + 1):
         distance[0][j] = j
     for i in xrange(ref_len + 1):
@@ -36,11 +41,10 @@ def levenshtein_distance(ref, hyp):
     return distance[ref_len][hyp_len]
 
 
-def wer(reference, hypophysis, delimiter=' ', filter_none=True):
+def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
     """
-    Calculate word error rate (WER). WER is a popular evaluation metric used
-    in speech recognition. It compares a reference with an hypophysis and
-    is defined like this:
+    Calculate word error rate (WER). WER compares reference text and 
+    hypothesis text in word-level. WER is defined as:
 
     .. math::
         WER = (Sw + Dw + Iw) / Nw
@@ -54,41 +58,39 @@ def wer(reference, hypophysis, delimiter=' ', filter_none=True):
         Iw is the number of words inserted,
         Nw is the number of words in the reference
 
-    We can use levenshtein distance to calculate WER. Please draw an attention 
-    that this function will truncate the beginning and ending delimiter for 
-    reference and hypophysis sentences before calculating WER.
+    We can use levenshtein distance to calculate WER. Please draw an attention that 
+    empty items will be removed when splitting sentences by delimiter.
 
     :param reference: The reference sentence.
-    :type reference: str
-    :param hypophysis: The hypophysis sentence.
-    :type reference: str
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
     :param delimiter: Delimiter of input sentences.
     :type delimiter: char
-    :param filter_none: Whether to remove None value when splitting sentence.
-    :type filter_none: bool
-    :return: WER
+    :return: Word error rate.
     :rtype: float
     """
+    if ignore_case == True:
+        reference = reference.lower()
+        hypothesis = hypothesis.lower()
 
-    if len(reference.strip(delimiter)) == 0:
-        raise ValueError("Reference's word number should be greater than 0.")
+    ref_words = filter(None, reference.split(delimiter))
+    hyp_words = filter(None, hypothesis.split(delimiter))
 
-    if filter_none == True:
-        ref_words = filter(None, reference.strip(delimiter).split(delimiter))
-        hyp_words = filter(None, hypophysis.strip(delimiter).split(delimiter))
-    else:
-        ref_words = reference.strip(delimiter).split(delimiter)
-        hyp_words = reference.strip(delimiter).split(delimiter)
+    if len(ref_words) == 0:
+        raise ValueError("Reference's word number should be greater than 0.")
 
     edit_distance = levenshtein_distance(ref_words, hyp_words)
     wer = float(edit_distance) / len(ref_words)
     return wer
 
 
-def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''):
+def cer(reference, hypothesis, ignore_case=False):
     """
-    Calculate charactor error rate (CER). CER will compare reference text and
-    hypophysis text in char-level. CER is defined as:
+    Calculate charactor error rate (CER). CER compares reference text and
+    hypothesis text in char-level. CER is defined as:
 
     .. math::
         CER = (Sc + Dc + Ic) / Nc
@@ -97,41 +99,35 @@ def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''):
 
     .. code-block:: text
 
-        Sc is the number of character substituted,
-        Dc is the number of deleted,
-        Ic is the number of inserted
+        Sc is the number of characters substituted,
+        Dc is the number of characters deleted,
+        Ic is the number of characters inserted
         Nc is the number of characters in the reference
 
     We can use levenshtein distance to calculate CER. Chinese input should be 
-    encoded to unicode.
+    encoded to unicode. Please draw an attention that the leading and tailing 
+    white space characters will be truncated and multiple consecutive white 
+    space characters in a sentence will be replaced by one white space character.
 
     :param reference: The reference sentence.
-    :type reference: str
-    :param hypophysis: The hypophysis sentence.
-    :type reference: str
-    :param squeeze: If set true, consecutive space character 
-                    will be squeezed to one
-    :type squeeze: bool
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
     :param ignore_case: Whether case-sensitive or not.
     :type ignore_case: bool
-    :param strip_char: If not set to '', strip_char in beginning and ending of
-                       sentence will be truncated.
-    :type strip_char: char
-    :return: CER
+    :return: Character error rate.
     :rtype: float
     """
     if ignore_case == True:
         reference = reference.lower()
-        hypophysis = hypophysis.lower()
-    if strip_char != '':
-        reference = reference.strip(strip_char)
-        hypophysis = hypophysis.strip(strip_char)
-    if squeeze == True:
-        reference = ' '.join(filter(None, reference.split(' ')))
-        hypophysis = ' '.join(filter(None, hypophysis.split(' ')))
+        hypothesis = hypothesis.lower()
+
+    reference = ' '.join(filter(None, reference.split(' ')))
+    hypothesis = ' '.join(filter(None, hypothesis.split(' ')))
 
     if len(reference) == 0:
         raise ValueError("Length of reference should be greater than 0.")
-    edit_distance = levenshtein_distance(reference, hypophysis)
+
+    edit_distance = levenshtein_distance(reference, hypothesis)
     cer = float(edit_distance) / len(reference)
     return cer

From 0116bc8dd26182b2f04322a100e1dd52a978e49e Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Tue, 13 Jun 2017 19:05:14 +0800
Subject: [PATCH 18/43] add infer.py and flower dataset

---
 image_classification/README.md | 88 ++++++++++++++++++++++++++--------
 image_classification/infer.py  | 83 ++++++++++++++++++++++++++++++++
 image_classification/resnet.py | 32 ++++++-------
 image_classification/train.py  | 15 +++---
 4 files changed, 176 insertions(+), 42 deletions(-)
 create mode 100644 image_classification/infer.py

diff --git a/image_classification/README.md b/image_classification/README.md
index 39167fa19e..acb8b45109 100644
--- a/image_classification/README.md
+++ b/image_classification/README.md
@@ -3,20 +3,6 @@
 
 这里将介绍如何在PaddlePaddle下使用AlexNet、VGG、GoogLeNet和ResNet模型进行图像分类。图像分类问题的描述和这四种模型的介绍可以参考[PaddlePaddle book](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification)。
 
-## 数据格式
-reader.py定义了数据格式，它读取一个图像列表文件，并从中解析出图像路径和类别标签。
-
-图像列表文件是一个文本文件，其中每一行由一个图像路径和类别标签构成，二者以跳格符（Tab）隔开。类别标签用整数表示，其最小值为0。下面给出一个图像列表文件的片段示例：
-
-```
-dataset_100/train_images/n03982430_23191.jpeg    1
-dataset_100/train_images/n04461696_23653.jpeg    7
-dataset_100/train_images/n02441942_3170.jpeg 8
-dataset_100/train_images/n03733281_31716.jpeg    2
-dataset_100/train_images/n03424325_240.jpeg  0
-dataset_100/train_images/n02643566_75.jpeg   8
-```
-
 ## 训练模型
 
 ### 初始化
@@ -25,14 +11,14 @@ dataset_100/train_images/n02643566_75.jpeg   8
 
 ```python
 import gzip
+import paddle.v2.dataset.flowers as flowers
 import paddle.v2 as paddle
 import reader
 import vgg
 import resnet
 import alexnet
 import googlenet
-import argparse
-import os
+
 
 # PaddlePaddle init
 paddle.init(use_gpu=False, trainer_count=1)
@@ -44,7 +30,7 @@ paddle.init(use_gpu=False, trainer_count=1)
 
 ```python
 DATA_DIM = 3 * 224 * 224
-CLASS_DIM = 100
+CLASS_DIM = 102
 BATCH_SIZE = 128
 
 image = paddle.layer.data(
@@ -128,9 +114,35 @@ optimizer = paddle.optimizer.Momentum(
 $$  lr = lr_{0} * a^ {\lfloor \frac{n}{ b}\rfloor} $$
 
 
-### 定义数据读取方法和事件处理程序
+### 定义数据读取
+
+首先以[花卉数据](http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html)为例说明如何定义输入。下面的代码定义了花卉数据训练集和验证集的输入：
+
+```python
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        flowers.train(),
+        buf_size=1000),
+    batch_size=BATCH_SIZE)
+test_reader = paddle.batch(
+    flowers.valid(),
+    batch_size=BATCH_SIZE)
+```
+
+若需要使用其他数据，则需要先建立图像列表文件。`reader.py`定义了这种文件的读取方式，它从图像列表文件中解析出图像路径和类别标签。
+
+图像列表文件是一个文本文件，其中每一行由一个图像路径和类别标签构成，二者以跳格符（Tab）隔开。类别标签用整数表示，其最小值为0。下面给出一个图像列表文件的片段示例：
 
-读取数据时需要分别指定训练集和验证集的图像列表文件，这里假设这两个文件分别为`train.list`和`val.list`。
+```
+dataset_100/train_images/n03982430_23191.jpeg    1
+dataset_100/train_images/n04461696_23653.jpeg    7
+dataset_100/train_images/n02441942_3170.jpeg 8
+dataset_100/train_images/n03733281_31716.jpeg    2
+dataset_100/train_images/n03424325_240.jpeg  0
+dataset_100/train_images/n02643566_75.jpeg   8
+```
+
+训练时需要分别指定训练集和验证集的图像列表文件。这里假设这两个文件分别为`train.list`和`val.list`，数据读取方式如下：
 
 ```python
 train_reader = paddle.batch(
@@ -141,7 +153,10 @@ train_reader = paddle.batch(
 test_reader = paddle.batch(
     reader.train_reader('val.list'),
     batch_size=BATCH_SIZE)
+```
 
+### 定义事件处理程序
+```python
 # End batch and end pass event handler
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
@@ -185,3 +200,38 @@ trainer = paddle.trainer.SGD(
 trainer.train(
     reader=train_reader, num_passes=200, event_handler=event_handler)
 ```
+
+## 应用模型
+模型训练好后，可以使用下面的代码预测给定图片的类别。
+
+```python
+# load parameters
+with gzip.open('params_pass_10.tar.gz', 'r') as f:
+    parameters = paddle.parameters.Parameters.from_tar(f)
+
+def load_image(file):
+    im = Image.open(file)
+    im = im.resize((224, 224), Image.ANTIALIAS)
+    im = np.array(im).astype(np.float32)
+    # The storage order of the loaded image is W(widht),
+    # H(height), C(channel). PaddlePaddle requires
+    # the CHW order, so transpose them.
+    im = im.transpose((2, 0, 1))  # CHW
+    # In the training phase, the channel order of CIFAR
+    # image is B(Blue), G(green), R(Red). But PIL open
+    # image in RGB mode. It must swap the channel order.
+    im = im[(2, 1, 0), :, :]  # BGR
+    im = im.flatten()
+    im = im / 255.0
+    return im
+
+file_list = [line.strip() for line in open(image_list_file)]
+test_data = [(load_image(image_file),) for image_file in file_list]
+probs = paddle.infer(
+    output_layer=out, parameters=parameters, input=test_data)
+lab = np.argsort(-probs)
+for file_name, result in zip(file_list, lab):
+    print "Label of %s is: %d" % (file_name, result[0])
+```
+
+首先从文件中加载训练好的模型（代码里以第10轮迭代的结果为例），然后读取`image_list_file`中的图像。`image_list_file`是一个文本文件，每一行为一个图像路径。`load_image`是一个加载图像的函数。代码使用`paddle.infer`判断`image_list_file`中每个图像的类别，并进行输出。
diff --git a/image_classification/infer.py b/image_classification/infer.py
new file mode 100644
index 0000000000..c48a29336f
--- /dev/null
+++ b/image_classification/infer.py
@@ -0,0 +1,83 @@
+import gzip
+import paddle.v2 as paddle
+import reader
+import vgg
+import resnet
+import alexnet
+import googlenet
+import argparse
+import os
+from PIL import Image
+import numpy as np
+
+WIDTH = 224
+HEIGHT = 224
+DATA_DIM = 3 * WIDTH * HEIGHT
+CLASS_DIM = 102
+
+
+def main():
+    # parse the argument
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'data_list',
+        help='The path of data list file, which consists of one image path per line'
+    )
+    parser.add_argument(
+        'model',
+        help='The model for image classification',
+        choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet'])
+    parser.add_argument(
+        'params_path', help='The file which stores the parameters')
+    args = parser.parse_args()
+
+    # PaddlePaddle init
+    paddle.init(use_gpu=True, trainer_count=1)
+
+    image = paddle.layer.data(
+        name="image", type=paddle.data_type.dense_vector(DATA_DIM))
+
+    if args.model == 'alexnet':
+        out = alexnet.alexnet(image, class_dim=CLASS_DIM)
+    elif args.model == 'vgg13':
+        out = vgg.vgg13(image, class_dim=CLASS_DIM)
+    elif args.model == 'vgg16':
+        out = vgg.vgg16(image, class_dim=CLASS_DIM)
+    elif args.model == 'vgg19':
+        out = vgg.vgg19(image, class_dim=CLASS_DIM)
+    elif args.model == 'resnet':
+        out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM)
+    elif args.model == 'googlenet':
+        out, _, _ = googlenet.googlenet(image, class_dim=CLASS_DIM)
+
+    # load parameters
+    with gzip.open(args.params_path, 'r') as f:
+        parameters = paddle.parameters.Parameters.from_tar(f)
+
+    def load_image(file):
+        im = Image.open(file)
+        im = im.resize((WIDTH, HEIGHT), Image.ANTIALIAS)
+        im = np.array(im).astype(np.float32)
+        # The storage order of the loaded image is W(widht),
+        # H(height), C(channel). PaddlePaddle requires
+        # the CHW order, so transpose them.
+        im = im.transpose((2, 0, 1))  # CHW
+        # In the training phase, the channel order of CIFAR
+        # image is B(Blue), G(green), R(Red). But PIL open
+        # image in RGB mode. It must swap the channel order.
+        im = im[(2, 1, 0), :, :]  # BGR
+        im = im.flatten()
+        im = im / 255.0
+        return im
+
+    file_list = [line.strip() for line in open(args.data_list)]
+    test_data = [(load_image(image_file), ) for image_file in file_list]
+    probs = paddle.infer(
+        output_layer=out, parameters=parameters, input=test_data)
+    lab = np.argsort(-probs)
+    for file_name, result in zip(file_list, lab):
+        print "Label of %s is: %d" % (file_name, result[0])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index 63bc4409b7..9c3c46d8ca 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -22,36 +22,36 @@ def conv_bn_layer(input,
     return paddle.layer.batch_norm(input=tmp, act=active_type)
 
 
-def shortcut(input, n_out, stride, b_projection):
-    if b_projection:
-        return conv_bn_layer(input, n_out, 1, stride, 0,
+def shortcut(input, ch_in, ch_out, stride):
+    if ch_in != ch_out:
+        return conv_bn_layer(input, ch_out, 1, stride, 0,
                              paddle.activation.Linear())
     else:
         return input
 
 
-def basicblock(input, ch_out, stride, b_projection):
+def basicblock(input, ch_in, ch_out, stride):
+    short = shortcut(input, ch_in, ch_out, stride)
     conv1 = conv_bn_layer(input, ch_out, 3, stride, 1)
     conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear())
-    short = shortcut(input, ch_out, stride, b_projection)
     return paddle.layer.addto(
-        input=[conv2, short], act=paddle.activation.Relu())
+        input=[short, conv2], act=paddle.activation.Relu())
 
 
-def bottleneck(input, ch_out, stride, b_projection):
+def bottleneck(input, ch_in, ch_out, stride):
+    short = shortcut(input, ch_in, ch_out * 4, stride)
     conv1 = conv_bn_layer(input, ch_out, 1, stride, 0)
     conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1)
     conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0,
                           paddle.activation.Linear())
-    short = shortcut(input, ch_out * 4, stride, b_projection)
     return paddle.layer.addto(
-        input=[conv3, short], act=paddle.activation.Relu())
+        input=[short, conv3], act=paddle.activation.Relu())
 
 
-def layer_warp(block_func, input, features, count, stride):
-    conv = block_func(input, features, stride, True)
+def layer_warp(block_func, input, ch_in, ch_out, count, stride):
+    conv = block_func(input, ch_in, ch_out, stride)
     for i in range(1, count):
-        conv = block_func(conv, features, 1, False)
+        conv = block_func(conv, ch_in, ch_out, 1)
     return conv
 
 
@@ -67,10 +67,10 @@ def resnet_imagenet(input, depth=50, class_dim=100):
     conv1 = conv_bn_layer(
         input, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
     pool1 = paddle.layer.img_pool(input=conv1, pool_size=3, stride=2)
-    res1 = layer_warp(block_func, pool1, 64, stages[0], 1)
-    res2 = layer_warp(block_func, res1, 128, stages[1], 2)
-    res3 = layer_warp(block_func, res2, 256, stages[2], 2)
-    res4 = layer_warp(block_func, res3, 512, stages[3], 2)
+    res1 = layer_warp(block_func, pool1, 64, 64, stages[0], 1)
+    res2 = layer_warp(block_func, res1, 64, 128, stages[1], 2)
+    res3 = layer_warp(block_func, res2, 128, 256, stages[2], 2)
+    res4 = layer_warp(block_func, res3, 256, 512, stages[3], 2)
     pool2 = paddle.layer.img_pool(
         input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
     out = paddle.layer.fc(
diff --git a/image_classification/train.py b/image_classification/train.py
index 3613561629..0a3fdb49a2 100755
--- a/image_classification/train.py
+++ b/image_classification/train.py
@@ -1,4 +1,5 @@
 import gzip
+import paddle.v2.dataset.flowers as flowers
 import paddle.v2 as paddle
 import reader
 import vgg
@@ -6,19 +7,15 @@
 import alexnet
 import googlenet
 import argparse
-import os
 
 DATA_DIM = 3 * 224 * 224
-CLASS_DIM = 100
+CLASS_DIM = 102
 BATCH_SIZE = 128
 
 
 def main():
     # parse the argument
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'data_dir',
-        help='The data directory which contains train.list and val.list')
     parser.add_argument(
         'model',
         help='The model for image classification',
@@ -71,11 +68,15 @@ def main():
 
     train_reader = paddle.batch(
         paddle.reader.shuffle(
-            reader.test_reader(os.path.join(args.data_dir, 'train.list')),
+            flowers.train(),
+            # To use other data, replace the above line with:
+            # reader.test_reader('train.list'),
             buf_size=1000),
         batch_size=BATCH_SIZE)
     test_reader = paddle.batch(
-        reader.train_reader(os.path.join(args.data_dir, 'val.list')),
+        flowers.valid(),
+        # To use other data, replace the above line with:
+        # reader.train_reader('val.list'),
         batch_size=BATCH_SIZE)
 
     # End batch and end pass event handler

From 208ca38a204748108d088bc1b6336e2d965dc71d Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Tue, 13 Jun 2017 19:34:00 +0800
Subject: [PATCH 19/43] fix bug for resnet_cifar10 and adjust learning rate

---
 image_classification/resnet.py | 6 +++---
 image_classification/train.py  | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index 9c3c46d8ca..eeed714167 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -85,9 +85,9 @@ def resnet_cifar10(input, depth=32, class_dim=10):
     nStages = {16, 64, 128}
     conv1 = conv_bn_layer(
         input, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 64, n, 2)
+    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
     pool = paddle.layer.img_pool(
         input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
     out = paddle.layer.fc(
diff --git a/image_classification/train.py b/image_classification/train.py
index 0a3fdb49a2..b3de41348d 100755
--- a/image_classification/train.py
+++ b/image_classification/train.py
@@ -31,6 +31,7 @@ def main():
         name="label", type=paddle.data_type.integer_value(CLASS_DIM))
 
     extra_layers = None
+    learning_rate = 0.01
     if args.model == 'alexnet':
         out = alexnet.alexnet(image, class_dim=CLASS_DIM)
     elif args.model == 'vgg13':
@@ -41,6 +42,7 @@ def main():
         out = vgg.vgg19(image, class_dim=CLASS_DIM)
     elif args.model == 'resnet':
         out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM)
+        learning_rate = 0.1
     elif args.model == 'googlenet':
         out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM)
         loss1 = paddle.layer.cross_entropy_cost(
@@ -61,7 +63,7 @@ def main():
         momentum=0.9,
         regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
                                                          BATCH_SIZE),
-        learning_rate=0.001 / BATCH_SIZE,
+        learning_rate=learning_rate / BATCH_SIZE,
         learning_rate_decay_a=0.1,
         learning_rate_decay_b=128000 * 35,
         learning_rate_schedule="discexp", )

From e9b94cabbf46578058407c2b051a8e13f55e0420 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Tue, 13 Jun 2017 19:41:25 +0800
Subject: [PATCH 20/43] fix bug

---
 image_classification/resnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index eeed714167..ca9330e63b 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -51,7 +51,7 @@ def bottleneck(input, ch_in, ch_out, stride):
 def layer_warp(block_func, input, ch_in, ch_out, count, stride):
     conv = block_func(input, ch_in, ch_out, stride)
     for i in range(1, count):
-        conv = block_func(conv, ch_in, ch_out, 1)
+        conv = block_func(conv, ch_out, ch_out, 1)
     return conv
 
 
From d8345eb658940c846a412f46757f078c6d436a41 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Wed, 14 Jun 2017 15:00:10 +0800
Subject: [PATCH 21/43] Add unittest.

---
 deep_speech_2/tests/test_error_rate.py | 29 ++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 deep_speech_2/tests/test_error_rate.py

diff --git a/deep_speech_2/tests/test_error_rate.py b/deep_speech_2/tests/test_error_rate.py
new file mode 100644
index 0000000000..bb6dca30a0
--- /dev/null
+++ b/deep_speech_2/tests/test_error_rate.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+import unittest
+import sys
+sys.path.append('..')
+import error_rate
+
+
+class TestParse(unittest.TestCase):
+    def test_wer(self):
+        ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night'
+        hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night'
+        word_error_rate = error_rate.wer(ref, hyp)
+        self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6)
+
+    def test_cer_en(self):
+        ref = 'werewolf'
+        hyp = 'weae  wolf'
+        char_error_rate = error_rate.cer(ref, hyp)
+        self.assertTrue(abs(char_error_rate - 0.25) < 1e-6)
+
+    def test_cer_zh(self):
+        ref = u'我是中国人'
+        hyp = u'我是 美洲人'
+        char_error_rate = error_rate.cer(ref, hyp)
+        self.assertTrue(abs(char_error_rate - 0.6) < 1e-6)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 02da6a2b1d5b44945a7e771de7c99e526e83f0bb Mon Sep 17 00:00:00 2001
From: zhaopu <puzhao@pku.edu.cn>
Date: Wed, 14 Jun 2017 18:06:27 +0800
Subject: [PATCH 22/43] update README.md

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 8fd9edfec3..87ac8f2c5a 100644
--- a/README.md
+++ b/README.md
@@ -55,5 +55,13 @@ PaddlePaddle提供了丰富的运算单元，帮助大家以模块化的方式
 
 - 6.1 [无注意力机制的编码器解码器模型](https://github.com/PaddlePaddle/models/tree/develop/nmt_without_attention)
 
+## 7. 语言模型
+
+语言模型是自然语言处理领域里一个重要的基础模型，它是一个概率分布模型，利用它可以确定哪个词序列的可能性更大，或者给定若干个词，可以预测下一个最可能出现的词。语言模型被应用在很多领域，如：自动写作、QA、机器翻译、拼写检查、语音识别、词性标注等。
+
+在语言模型的例子中，我们以文本生成为例，提供了RNN LM（包括LSTM、GRU）和N-Gram LM，供大家学习和使用。用户可以通过文档中的 “使用说明” 快速上手：适配训练语料，以训练 “自动写诗”、“自动写散文” 等有趣的模型。
+
+- 7.1 [语言模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
+
 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).

From 5f8bdf0b08d0ad33d612bc15ec36bc211b38b0f3 Mon Sep 17 00:00:00 2001
From: zhaopu <puzhao@pku.edu.cn>
Date: Wed, 14 Jun 2017 18:14:55 +0800
Subject: [PATCH 23/43] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 87ac8f2c5a..a53bc0fc6f 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ PaddlePaddle提供了丰富的运算单元，帮助大家以模块化的方式
 
 在语言模型的例子中，我们以文本生成为例，提供了RNN LM（包括LSTM、GRU）和N-Gram LM，供大家学习和使用。用户可以通过文档中的 “使用说明” 快速上手：适配训练语料，以训练 “自动写诗”、“自动写散文” 等有趣的模型。
 
-- 7.1 [语言模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
+- 7.1 [基于LSTM、GRU、N-Gram的文本生成模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
 
 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).

From b72aec53ae5c18cb9f46c509939e5abb4df934fc Mon Sep 17 00:00:00 2001
From: Xinghai Sun <sunxinghai1216@gmail.com>
Date: Wed, 14 Jun 2017 18:14:50 +0800
Subject: [PATCH 24/43] Enable min_batch_num in train.py and update train info
 print.

---
 deep_speech_2/compute_mean_std.py                      |  0
 deep_speech_2/data_utils/__init__.py                   |  0
 deep_speech_2/data_utils/audio.py                      |  0
 deep_speech_2/data_utils/augmentor/__init__.py         |  0
 deep_speech_2/data_utils/augmentor/augmentation.py     |  0
 deep_speech_2/data_utils/augmentor/base.py             |  0
 deep_speech_2/data_utils/augmentor/volume_perturb.py   |  0
 deep_speech_2/data_utils/featurizer/__init__.py        |  0
 .../data_utils/featurizer/audio_featurizer.py          |  0
 .../data_utils/featurizer/speech_featurizer.py         |  0
 deep_speech_2/data_utils/featurizer/text_featurizer.py |  0
 deep_speech_2/data_utils/normalizer.py                 |  0
 deep_speech_2/data_utils/speech.py                     |  0
 deep_speech_2/data_utils/utils.py                      |  0
 deep_speech_2/datasets/run_all.sh                      |  0
 deep_speech_2/decoder.py                               |  0
 deep_speech_2/train.py                                 | 10 ++++++----
 17 files changed, 6 insertions(+), 4 deletions(-)
 mode change 100755 => 100644 deep_speech_2/compute_mean_std.py
 mode change 100755 => 100644 deep_speech_2/data_utils/__init__.py
 mode change 100755 => 100644 deep_speech_2/data_utils/audio.py
 mode change 100755 => 100644 deep_speech_2/data_utils/augmentor/__init__.py
 mode change 100755 => 100644 deep_speech_2/data_utils/augmentor/augmentation.py
 mode change 100755 => 100644 deep_speech_2/data_utils/augmentor/base.py
 mode change 100755 => 100644 deep_speech_2/data_utils/augmentor/volume_perturb.py
 mode change 100755 => 100644 deep_speech_2/data_utils/featurizer/__init__.py
 mode change 100755 => 100644 deep_speech_2/data_utils/featurizer/audio_featurizer.py
 mode change 100755 => 100644 deep_speech_2/data_utils/featurizer/speech_featurizer.py
 mode change 100755 => 100644 deep_speech_2/data_utils/featurizer/text_featurizer.py
 mode change 100755 => 100644 deep_speech_2/data_utils/normalizer.py
 mode change 100755 => 100644 deep_speech_2/data_utils/speech.py
 mode change 100755 => 100644 deep_speech_2/data_utils/utils.py
 mode change 100755 => 100644 deep_speech_2/datasets/run_all.sh
 mode change 100755 => 100644 deep_speech_2/decoder.py

diff --git a/deep_speech_2/compute_mean_std.py b/deep_speech_2/compute_mean_std.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/__init__.py b/deep_speech_2/data_utils/__init__.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/augmentor/__init__.py b/deep_speech_2/data_utils/augmentor/__init__.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/augmentor/augmentation.py b/deep_speech_2/data_utils/augmentor/augmentation.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/augmentor/base.py b/deep_speech_2/data_utils/augmentor/base.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/augmentor/volume_perturb.py b/deep_speech_2/data_utils/augmentor/volume_perturb.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/featurizer/__init__.py b/deep_speech_2/data_utils/featurizer/__init__.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/featurizer/audio_featurizer.py b/deep_speech_2/data_utils/featurizer/audio_featurizer.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/featurizer/speech_featurizer.py b/deep_speech_2/data_utils/featurizer/speech_featurizer.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/featurizer/text_featurizer.py b/deep_speech_2/data_utils/featurizer/text_featurizer.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/normalizer.py b/deep_speech_2/data_utils/normalizer.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/data_utils/utils.py b/deep_speech_2/data_utils/utils.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/datasets/run_all.sh b/deep_speech_2/datasets/run_all.sh
old mode 100755
new mode 100644
diff --git a/deep_speech_2/decoder.py b/deep_speech_2/decoder.py
old mode 100755
new mode 100644
diff --git a/deep_speech_2/train.py b/deep_speech_2/train.py
index 7ac4626f4c..6074aa358d 100644
--- a/deep_speech_2/train.py
+++ b/deep_speech_2/train.py
@@ -143,11 +143,13 @@ def data_generator():
     train_batch_reader = train_generator.batch_reader_creator(
         manifest_path=args.train_manifest_path,
         batch_size=args.batch_size,
+        min_batch_size=args.trainer_count,
         sortagrad=args.use_sortagrad if args.init_model_path is None else False,
         batch_shuffle=True)
     test_batch_reader = test_generator.batch_reader_creator(
         manifest_path=args.dev_manifest_path,
         batch_size=args.batch_size,
+        min_batch_size=1,  # must be 1, but will have errors.
         sortagrad=False,
         batch_shuffle=False)
 
@@ -157,11 +159,11 @@ def event_handler(event):
         if isinstance(event, paddle.event.EndIteration):
             cost_sum += event.cost
             cost_counter += 1
-            if event.batch_id % 50 == 0:
-                print("\nPass: %d, Batch: %d, TrainCost: %f" %
-                      (event.pass_id, event.batch_id, cost_sum / cost_counter))
+            if (event.batch_id + 1) % 100 == 0:
+                print("\nPass: %d, Batch: %d, TrainCost: %f" % (
+                    event.pass_id, event.batch_id + 1, cost_sum / cost_counter))
                 cost_sum, cost_counter = 0.0, 0
-                with gzip.open("params_tmp.tar.gz", 'w') as f:
+                with gzip.open("params.tar.gz", 'w') as f:
                     parameters.to_tar(f)
             else:
                 sys.stdout.write('.')

From ae3af235eab9fdd5793e7bdab4c498b474193cca Mon Sep 17 00:00:00 2001
From: zhaopu <puzhao@pku.edu.cn>
Date: Wed, 14 Jun 2017 18:20:44 +0800
Subject: [PATCH 25/43] update readme

---
 README.md | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index a53bc0fc6f..fb0e20bf42 100644
--- a/README.md
+++ b/README.md
@@ -14,54 +14,57 @@ PaddlePaddle提供了丰富的运算单元，帮助大家以模块化的方式
 在词向量的例子中，我们向大家展示如何使用Hierarchical-Sigmoid 和噪声对比估计（Noise Contrastive Estimation，NCE）来加速词向量的学习。
 
 - 1.1 [Hsigmoid加速词向量训练](https://github.com/PaddlePaddle/models/tree/develop/word_embedding)
+- 1.2 [噪声对比估计加速词向量训练](https://github.com/PaddlePaddle/models/tree/develop/nce_cost)
 
-## 2. 点击率预估
+
+## 2. 语言模型
+
+语言模型是自然语言处理领域里一个重要的基础模型，它是一个概率分布模型，利用它可以确定哪个词序列的可能性更大，或者给定若干个词，可以预测下一个最可能出现的词。语言模型被应用在很多领域，如：自动写作、QA、机器翻译、拼写检查、语音识别、词性标注等。
+
+在语言模型的例子中，我们以文本生成为例，提供了RNN LM（包括LSTM、GRU）和N-Gram LM，供大家学习和使用。用户可以通过文档中的 “使用说明” 快速上手：适配训练语料，以训练 “自动写诗”、“自动写散文” 等有趣的模型。
+
+- 2.1 [基于LSTM、GRU、N-Gram的文本生成模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
+
+## 3. 点击率预估
 
 点击率预估模型预判用户对一条广告点击的概率，对每次广告的点击情况做出预测，是广告技术的核心算法之一。逻谛斯克回归对大规模稀疏特征有着很好的学习能力，在点击率预估任务发展的早期一统天下。近年来，DNN 模型由于其强大的学习能力逐渐接过点击率预估任务的大旗。
 
 在点击率预估的例子中，我们给出谷歌提出的 Wide & Deep 模型。这一模型融合了适用于学习抽象特征的 DNN 和适用于大规模稀疏特征的逻谛斯克回归两者模型的优点，可以作为一种相对成熟的模型框架使用， 在工业界也有一定的应用。
 
-- 2.1 [Wide & deep 点击率预估模型](https://github.com/PaddlePaddle/models/tree/develop/ctr)
+- 3.1 [Wide & deep 点击率预估模型](https://github.com/PaddlePaddle/models/tree/develop/ctr)
 
-## 3. 文本分类
+## 4. 文本分类
 
 文本分类是自然语言处理领域最基础的任务之一，深度学习方法能够免除复杂的特征工程，直接使用原始文本作为输入，数据驱动地最优化分类准确率。
 
 在文本分类的例子中，我们以情感分类任务为例，提供了基于DNN的非序列文本分类模型，以及基于CNN的序列模型供大家学习和使用（基于LSTM的模型见PaddleBook中[情感分类](https://github.com/PaddlePaddle/book/blob/develop/06.understand_sentiment/README.cn.md)一课）。
 
-- 3.1 [基于 DNN / CNN 的情感分类](https://github.com/PaddlePaddle/models/tree/develop/text_classification)
+- 4.1 [基于 DNN / CNN 的情感分类](https://github.com/PaddlePaddle/models/tree/develop/text_classification)
 
-## 4. 排序学习
+## 5. 排序学习
 
 排序学习(Learning to Rank， LTR)是信息检索和搜索引擎研究的核心问题之一，通过机器学习方法学习一个分值函数对待排序的候选进行打分，再根据分值的高低确定序关系。深度神经网络可以用来建模分值函数，构成各类基于深度学习的LTR模型。
 
 在排序学习的例子中，我们介绍基于 RankLoss 损失函数的 Pairwise 排序模型和基于LambdaRank损失函数的Listwise排序模型(Pointwise学习策略见PaddleBook中[推荐系统](https://github.com/PaddlePaddle/book/blob/develop/05.recommender_system/README.cn.md)一课）。
 
-- 4.1 [基于 Pairwise 和 Listwise 的排序学习](https://github.com/PaddlePaddle/models/tree/develop/ltr)
+- 5.1 [基于 Pairwise 和 Listwise 的排序学习](https://github.com/PaddlePaddle/models/tree/develop/ltr)
 
-## 5. 序列标注
+## 6. 序列标注
 
 给定输入序列，序列标注模型为序列中每一个元素贴上一个类别标签，是自然语言处理领域最基础的任务之一。随着深度学习的不断探索和发展，利用循环神经网络学习输入序列的特征表示，条件随机场（Conditional Random Field, CRF）在特征基础上完成序列标注任务，逐渐成为解决序列标注问题的标配解决方案。
 
 在序列标注的例子中，我们以命名实体识别（Named Entity Recognition，NER）任务为例，介绍如何训练一个端到端的序列标注模型。
 
-- 5.1 [命名实体识别](https://github.com/PaddlePaddle/models/tree/develop/sequence_tagging_for_ner)
+- 6.1 [命名实体识别](https://github.com/PaddlePaddle/models/tree/develop/sequence_tagging_for_ner)
 
-## 6. 序列到序列学习
+## 7. 序列到序列学习
 
 序列到序列学习实现两个甚至是多个不定长模型之间的映射，有着广泛的应用，包括：机器翻译、智能对话与问答、广告创意语料生成、自动编码（如金融画像编码）、判断多个文本串之间的语义相关性等。
 
 在序列到序列学习的例子中，我们以机器翻译任务为例，提供了多种改进模型，供大家学习和使用。包括：不带注意力机制的序列到序列映射模型，这一模型是所有序列到序列学习模型的基础；使用 scheduled sampling 改善 RNN 模型在生成任务中的错误累积问题；带外部记忆机制的神经机器翻译，通过增强神经网络的记忆能力，来完成复杂的序列到序列学习任务。
 
-- 6.1 [无注意力机制的编码器解码器模型](https://github.com/PaddlePaddle/models/tree/develop/nmt_without_attention)
-
-## 7. 语言模型
-
-语言模型是自然语言处理领域里一个重要的基础模型，它是一个概率分布模型，利用它可以确定哪个词序列的可能性更大，或者给定若干个词，可以预测下一个最可能出现的词。语言模型被应用在很多领域，如：自动写作、QA、机器翻译、拼写检查、语音识别、词性标注等。
-
-在语言模型的例子中，我们以文本生成为例，提供了RNN LM（包括LSTM、GRU）和N-Gram LM，供大家学习和使用。用户可以通过文档中的 “使用说明” 快速上手：适配训练语料，以训练 “自动写诗”、“自动写散文” 等有趣的模型。
+- 7.1 [无注意力机制的编码器解码器模型](https://github.com/PaddlePaddle/models/tree/develop/nmt_without_attention)
 
-- 7.1 [基于LSTM、GRU、N-Gram的文本生成模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
 
 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).

From a84bdf646c8a609acaf3710a48f52bb99ee6af11 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Thu, 15 Jun 2017 03:08:30 +0800
Subject: [PATCH 26/43] add augmentation

---
 deep_speech_2/data_utils/audio.py             | 396 ++++++++++++++++-
 .../data_utils/augmentor/audio_database.py    | 401 ++++++++++++++++++
 .../data_utils/augmentor/augmentation.py      |  15 +
 .../data_utils/augmentor/implus_response.py   |  76 ++++
 .../data_utils/augmentor/noise_speech.py      | 318 ++++++++++++++
 .../online_bayesian_normalization.py          |  57 +++
 .../data_utils/augmentor/resampler.py         |  30 ++
 .../data_utils/augmentor/speed_perturb.py     |  53 +++
 .../data_utils/augmentor/volume_perturb.py    |   4 +-
 9 files changed, 1337 insertions(+), 13 deletions(-)
 create mode 100755 deep_speech_2/data_utils/augmentor/audio_database.py
 create mode 100755 deep_speech_2/data_utils/augmentor/implus_response.py
 create mode 100755 deep_speech_2/data_utils/augmentor/noise_speech.py
 create mode 100755 deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
 create mode 100755 deep_speech_2/data_utils/augmentor/resampler.py
 create mode 100755 deep_speech_2/data_utils/augmentor/speed_perturb.py

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 916c8ac1ae..aef13c30ff 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -6,6 +6,8 @@
 import numpy as np
 import io
 import soundfile
+import scikits.samplerate
+from scipy import signal
 
 
 class AudioSegment(object):
@@ -62,6 +64,69 @@ def from_file(cls, file):
         samples, sample_rate = soundfile.read(file, dtype='float32')
         return cls(samples, sample_rate)
 
+    @classmethod
+    def slice_from_file(cls, fname, start=None, end=None):
+        """ 
+        Loads a small section of an audio without having to load
+        the entire file into the memory which can be incredibly wasteful.
+
+        :param fname: input audio file name
+        :type fname: bsaestring
+        :param start: start time in seconds (supported granularity is ms)
+            If start is negative, it wraps around from the end. If not
+            provided, this function reads from the very beginning.
+        :type start: float
+        :param end: start time in seconds (supported granularity is ms)
+            If end is negative, it wraps around from the end. If not
+            provided, the default behvaior is to read to the end of the
+            file.
+        :type end: float
+
+        :return:the specified slice of input audio in the audio.AudioSegment
+            format.
+        """
+        sndfile = soundfile.SoundFile(fname)
+
+        sample_rate = sndfile.samplerate
+        if sndfile.channels != 1:
+            raise TypeError("{} has more than 1 channel.".format(fname))
+
+        duration = float(len(sndfile)) / sample_rate
+
+        if start is None:
+            start = 0.0
+        if end is None:
+            end = duration
+
+        if start < 0.0:
+            start += duration
+        if end < 0.0:
+            end += duration
+
+        if start < 0.0:
+            raise IndexError("The slice start position ({} s) is out of "
+                             "bounds. Filename: {}".format(start, fname))
+        if end < 0.0:
+            raise IndexError("The slice end position ({} s) is out of bounds "
+                             "Filename: {}".format(end, fname))
+
+        if start > end:
+            raise IndexError("The slice start position ({} s) is later than "
+                             "the slice end position ({} s)."
+                             .format(start, end))
+
+        if end > duration:
+            raise ValueError("The slice end time ({} s) is out of "
+                             "bounds (> {} s) Filename: {}"
+                             .format(end, duration, fname))
+
+        start_frame = int(start * sample_rate)
+        end_frame = int(end * sample_rate)
+        sndfile.seek(start_frame)
+        data = sndfile.read(frames=end_frame - start_frame, dtype='float32')
+
+        return cls(data, sample_rate)
+
     @classmethod
     def from_bytes(cls, bytes):
         """Create audio segment from a byte string containing audio samples.
@@ -75,6 +140,44 @@ def from_bytes(cls, bytes):
             io.BytesIO(bytes), dtype='float32')
         return cls(samples, sample_rate)
 
+    @classmethod
+    def make_silence(cls, duration, sample_rate):
+        """Creates a silent audio segment of the given duration and
+        sample rate.
+
+        :param duration: length of silence in seconds
+        :type duration: scalar
+        :param sample_rate: sample rate
+        :type sample_rate: scalar
+        :returns: silence of the given duration
+        :rtype: AudioSegment
+        """
+        samples = np.zeros(int(float(duration) * sample_rate))
+        return cls(samples, sample_rate)
+
+    @classmethod
+    def concatenate(cls, *segments):
+        """Concatenate an arbitrary number of audio segments together.
+
+        :param *segments: input audio segments
+        :type *segments: [AudioSegment]
+        """
+        # Perform basic sanity-checks.
+        N = len(segments)
+        if N == 0:
+            raise ValueError("No audio segments are given to concatenate.")
+        sample_rate = segments[0]._sample_rate
+        for segment in segments:
+            if sample_rate != segment._sample_rate:
+                raise ValueError("Can't concatenate segments with "
+                                 "different sample rates")
+            if type(segment) is not cls:
+                raise TypeError("Only audio segments of the same type "
+                                "instance can be concatenated.")
+
+        samples = np.concatenate([seg.samples for seg in segments])
+        return cls(samples, sample_rate)
+
     def to_wav_file(self, filepath, dtype='float32'):
         """Save audio segment to disk as wav file.
         
@@ -143,23 +246,288 @@ def change_speed(self, speed_rate):
         new_indices = np.linspace(start=0, stop=old_length, num=new_length)
         self._samples = np.interp(new_indices, old_indices, self._samples)
 
-    def normalize(self, target_sample_rate):
-        raise NotImplementedError()
+    def normalize(self, target_db=-20, max_gain_db=300.0):
+        """Normalize audio to desired RMS value in decibels.
+
+        Note that this is an in-place transformation.
+
+        :param target_db: Target RMS value in decibels.This value 
+            should be less than 0.0 as 0.0 is full-scale audio.
+        :type target_db: float, optional
+        :param max_gain_db: Max amount of gain in dB that can be applied
+            for normalization.  This is to prevent nans when attempting
+            to normalize a signal consisting of all zeros.
+        :type max_gain_db: float, optional
 
-    def resample(self, target_sample_rate):
-        raise NotImplementedError()
+        :raises NormalizationWarning: if the required gain to normalize the
+            segment to the target_db value exceeds max_gain_db.
+        """
+        gain = target_db - self.rms_db
+        if gain > max_gain_db:
+            raise ValueError(
+                "Unable to normalize segment to {} dB because it has an RMS "
+                "value of {} dB and the difference exceeds max_gain_db ({} dB)"
+                .format(target_db, self.rms_db, max_gain_db))
+        gain = min(max_gain_db, target_db - self.rms_db)
+        self.apply_gain(gain)
+
+    def normalize_online_bayesian(self,
+                                  target_db,
+                                  prior_db,
+                                  prior_samples,
+                                  startup_delay=0.0):
+        """
+        Normalize audio using a production-compatible online/causal algorithm.
+        This uses an exponential likelihood and gamma prior to make
+        online estimates of the RMS even when there are very few samples.
+
+        Note that this is an in-place transformation.
+
+        :param target_db: Target RMS value in decibels
+        :type target_bd: scalar
+        :param prior_db: Prior RMS estimate in decibels
+        :type prior_db: scalar
+        :param prior_samples: Prior strength in number of samples
+        :type prior_samples: scalar
+        :param startup_delay: Default: 0.0 s. If provided, this
+            function will accrue statistics for the first startup_delay
+            seconds before applying online normalization.
+        :type startup_delay: scalar
+        """
+        # Estimate total RMS online
+        startup_sample_idx = min(self.num_samples - 1,
+                                 int(self.sample_rate * startup_delay))
+        prior_mean_squared = 10.**(prior_db / 10.)
+        prior_sum_of_squares = prior_mean_squared * prior_samples
+        cumsum_of_squares = np.cumsum(self.samples**2)
+        sample_count = np.arange(len(self)) + 1
+        if startup_sample_idx > 0:
+            cumsum_of_squares[:startup_sample_idx] = \
+                cumsum_of_squares[startup_sample_idx]
+            sample_count[:startup_sample_idx] = \
+                sample_count[startup_sample_idx]
+        mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) /
+                                 (sample_count + prior_samples))
+        rms_estimate_db = 10 * np.log10(mean_squared_estimate)
+
+        # Compute required time-varying gain
+        gain_db = target_db - rms_estimate_db
+
+        # Apply gain to new segment
+        self.apply_gain(gain_db)
+
+    def normalize_ewma(self,
+                       target_db,
+                       decay_rate,
+                       startup_delay,
+                       rms_eps=1e-6,
+                       max_gain_db=300.0):
+        startup_sample_idx = min(self.num_samples - 1,
+                                 int(self.sample_rate * startup_delay))
+        mean_sq = self.samples**2
+        if startup_sample_idx > 0:
+            mean_sq[:startup_sample_idx] = \
+                np.sum(mean_sq[:startup_sample_idx]) / startup_sample_idx
+        idx_start = max(0, startup_sample_idx - 1)
+        initial_condition = mean_sq[idx_start] * decay_rate
+        mean_sq[idx_start:] = lfilter(
+            [1.0 - decay_rate], [1.0, -decay_rate],
+            mean_sq[idx_start:],
+            axis=0,
+            zi=[initial_condition])[0]
+        rms_estimate_db = 10.0 * np.log10(mean_sq + rms_eps)
+        gain_db = target_db - rms_estimate_db
+        if np.any(gain_db > max_gain_db):
+            warnings.warn(
+                "Unable to normalize segment to {} dB because it has an RMS "
+                "value of {} dB and the difference exceeds max_gain_db ({} dB)"
+                .format(target_db, self.rms_db, max_gain_db),
+                NormalizationWarning)
+            gain_db = np.minimum(gain_db, max_gain_db)
+        self.apply_gain(gain_db)
+
+    def resample(self, target_sample_rate, quality='sinc_medium'):
+        """Resample audio and return new AudioSegment.
+        This resamples the audio to a new sample rate and returns a brand
+        new AudioSegment.  The existing AudioSegment is unchanged.
+
+        Note that this is an in-place transformation.
+
+        :param new_sample_rate: target sample rate
+        :type new_sample_rate: scalar
+        :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
+            Sets resampling speed/quality tradeoff.
+            See http://www.mega-nerd.com/SRC/api_misc.html#Converters
+        :type quality: basestring
+        """
+        resample_ratio = target_sample_rate / self._sample_rate
+        new_samples = scikits.samplerate.resample(
+            self._samples, r=resample_ratio, type=quality)
+        self._samples = new_samples
+        self._sample_rate = new_sample_rate
 
     def pad_silence(self, duration, sides='both'):
-        raise NotImplementedError()
+        """Pads this audio sample with a period of silence.
+
+        Note that this is an in-place transformation.
+
+        :param duration: length of silence in seconds to pad
+        :type duration: float
+        :param sides:
+            'beginning' - adds silence in the beginning
+            'end' - adds silence in the end
+            'both' - adds silence in both the beginning and the end.
+        :type sides: basestring
+        """
+        if duration == 0.0:
+            return self
+        cls = type(self)
+        silence = cls.make_silence(duration, self._sample_rate)
+        if sides == "beginning":
+            padded = cls.concatenate(silence, self)
+        elif sides == "end":
+            padded = cls.concatenate(self, silence)
+        elif sides == "both":
+            padded = cls.concatenate(silence, self, silence)
+        else:
+            raise ValueError("Unknown value for the kwarg 'sides'")
+        self._samples = padded._samples
+        self._sample_rate = padded._sample_rate
 
     def subsegment(self, start_sec=None, end_sec=None):
-        raise NotImplementedError()
+        """Return new AudioSegment containing audio between given boundaries.
+
+        :param start_sec: Beginning of subsegment in seconds,
+            (beginning of segment if None).
+        :type start_sec:  scalar
+        :param end_sec: End of subsegment in seconds,
+            (end of segment if None).
+        :type end_sec: scalar
+
+        :return: New AudioSegment containing specified
+            subsegment.
+        :trype: AudioSegment
+        """
+        # Default boundaries
+        if start_sec is None:
+            start_sec = 0.0
+        if end_sec is None:
+            end_sec = self.duration
+
+        # negative boundaries are relative to end of segment
+        if start_sec < 0.0:
+            start_sec = self.duration + start_sec
+        if end_sec < 0.0:
+            end_sec = self.duration + end_sec
 
-    def convolve(self, filter, allow_resample=False):
-        raise NotImplementedError()
+        start_sample = int(round(start_sec * self._sample_rate))
+        end_sample = int(round(end_sec * self._sample_rate))
+        samples = self._samples[start_sample:end_sample]
 
-    def convolve_and_normalize(self, filter, allow_resample=False):
-        raise NotImplementedError()
+        return type(self)(samples, sample_rate=self._sample_rate)
+
+    def random_subsegment(self, subsegment_length, rng=None):
+        """
+        Return a random subsegment of a specified length in seconds.
+
+        :param subsegment_length: Subsegment length in seconds.
+        :type subsegment_length: scalar
+        :param rng: Random number generator state
+        :type rng: random.Random [optional]
+
+
+        :return:clip (SpeechDLSegment): New SpeechDLSegmen containing random
+            subsegment of original segment.
+        """
+        if rng is None:
+            rng = random.Random()
+
+        if subsegment_length > self.duration:
+            raise ValueError("Length of subsegment must not be greater "
+                             "than original segment.")
+        start_time = rng.uniform(0.0, self.duration - subsegment_length)
+        return self.subsegment(start_time, start_time + subsegment_length)
+
+    def convolve(self, ir, allow_resampling=False):
+        """Convolve this audio segment with the given filter.
+
+        :param ir: impulse response
+        :type ir: AudioSegment
+        :param allow_resampling: indicates whether resampling is allowed
+                when the ir has a different sample rate from this signal.
+        :type allow_resampling: boolean
+        """
+        if allow_resampling and self.sample_rate != ir.sample_rate:
+            ir = ir.resample(self.sample_rate)
+
+        if self.sample_rate != ir.sample_rate:
+            raise ValueError("Impulse response sample rate ({}Hz) is "
+                             "equal to base signal sample rate ({}Hz)."
+                             .format(ir.sample_rate, self.sample_rate))
+
+        samples = signal.fftconvolve(self.samples, ir.samples, "full")
+        self._samples = samples
+
+    def convolve_and_normalize(self, ir, allow_resample=False):
+        """Convolve and normalize the resulting audio segment so that it
+        has the same average power as the input signal.
+
+        :param ir: impulse response
+        :type ir: AudioSegment
+        :param allow_resampling: indicates whether resampling is allowed
+            when the ir has a different sample rate from this signal.
+        :type allow_resampling: boolean
+        """
+        self.convolve(ir, allow_resampling=allow_resampling)
+        self.normalize(target_db=self.rms_db)
+
+    def add_noise(self,
+                  noise,
+                  snr_dB,
+                  allow_downsampling=False,
+                  max_gain_db=300.0,
+                  rng=None):
+        """Adds the given noise segment at a specific signal-to-noise ratio.
+        If the noise segment is longer than this segment, a random subsegment
+        of matching length is sampled from it and used instead.
+
+        :param noise: Noise signal to add.
+        :type noise: SpeechDLSegment
+        :param snr_dB: Signal-to-Noise Ratio, in decibels.
+        :type snr_dB: scalar
+        :param allow_downsampling: whether to allow the noise signal
+            to be downsampled to match the base signal sample rate.
+        :type allow_downsampling: boolean
+        :param max_gain_db: Maximum amount of gain to apply to noise
+            signal before adding it in.  This is to prevent attempting
+            to apply infinite gain to a zero signal.
+        :type max_gain_db: scalar
+        :param rng: Random number generator state.
+        :type rng: random.Random
+
+        Returns:
+            SpeechDLSegment: signal with noise added.
+        """
+        if rng is None:
+            rng = random.Random()
+
+        if allow_downsampling and noise.sample_rate > self.sample_rate:
+            noise = noise.resample(self.sample_rate)
+
+        if noise.sample_rate != self.sample_rate:
+            raise ValueError("Noise sample rate ({}Hz) is not equal to "
+                             "base signal sample rate ({}Hz)."
+                             .format(noise.sample_rate, self.sample_rate))
+        if noise.duration < self.duration:
+            raise ValueError("Noise signal ({} sec) must be at "
+                             "least as long as base signal ({} sec)."
+                             .format(noise.duration, self.duration))
+        noise_gain_db = self.rms_db - noise.rms_db - snr_dB
+        noise_gain_db = min(max_gain_db, noise_gain_db)
+        noise_subsegment = noise.random_subsegment(self.duration, rng=rng)
+        output = self + self.tranform_noise(noise_subsegment, noise_gain_db)
+        self._samples = output._samples
+        self._sample_rate = output._sample_rate
 
     @property
     def samples(self):
@@ -186,7 +554,7 @@ def num_samples(self):
         :return: Number of samples.
         :rtype: int
         """
-        return self._samples.shape(0)
+        return self._samples.shape[0]
 
     @property
     def duration(self):
@@ -250,3 +618,9 @@ def _convert_samples_from_float32(self, samples, dtype):
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)
         return output_samples.astype(dtype)
+
+    def tranform_noise(self, noise_subsegment, noise_gain_db):
+        """ tranform noise file
+        """
+        return type(self)(noise_subsegment._samples * (10.**(
+            noise_gain_db / 20.)), noise_subsegment._sample_rate)
diff --git a/deep_speech_2/data_utils/augmentor/audio_database.py b/deep_speech_2/data_utils/augmentor/audio_database.py
new file mode 100755
index 0000000000..e41c6dd72b
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/audio_database.py
@@ -0,0 +1,401 @@
+from __future__ import print_function
+from collections import defaultdict
+import bisect
+import logging
+import numpy as np
+import os
+import random
+import sys
+
+UNK_TAG = "<UNK>"
+
+
+def stream_audio_index(fname, UNK=UNK_TAG):
+    """Reads an audio index file and emits one record in the index at a time.
+
+    :param fname: audio index path
+    :type fname: basestring
+    :param UNK: UNK token to denote that certain audios are not tagged.
+    :type UNK: basesring
+
+    Yields:
+        idx, duration, size, relpath, tags (int, float, int, str, list(str)):
+            audio file id, length of the audio in seconds, size in byte,
+            relative path w.r.t. to the root noise directory, list of tags
+    """
+    with open(fname) as audio_index_file:
+        for i, line in enumerate(audio_index_file):
+            tok = line.strip().split("\t")
+            assert len(tok) >= 4, \
+                "Invalid line at line {} in file {}".format(
+                    i + 1, audio_index_file)
+            idx = int(tok[0])
+            duration = float(tok[1])
+            # Sometimes, the duration can round down to 0.0
+            assert duration >= 0.0, \
+                "Invalid duration at line {} in file {}".format(
+                    i + 1, audio_index_file)
+            size = int(tok[2])
+            assert size > 0, \
+                "Invalid size at line {} in file {}".format(
+                    i + 1, audio_index_file)
+            relpath = tok[3]
+            if len(tok) == 4:
+                tags = [UNK_TAG]
+            else:
+                tags = tok[4:]
+            yield idx, duration, size, relpath, tags
+
+
+def truncate_float(val, ndigits=6):
+    """ Truncates a floating-point value to have the desired number of
+    digits after the decimal point.
+
+    :param val: input value.
+    :type val: float
+    :parma ndigits: desired number of digits.
+    :type ndigits: int
+
+    :return: truncated value
+    :rtype: float
+    """
+    p = 10.0**ndigits
+    return float(int(val * p)) / p
+
+
+def print_audio_index(idx, duration, size, relpath, tags, file=sys.stdout):
+    """Prints an audio record to the index file.
+
+    :param idx: Audio file id.
+    :type idx: int
+    :param duration: length of the audio in seconds
+    :type duration: float
+    :param size: size of the file in bytes
+    :type size: int
+    :param relpath: relative path w.r.t. to the root noise directory.
+    :type relpath:  basestring
+    :parma tags: list of tags
+    :parma tags: list(str)
+    :parma file: file to which we want to write an audio record.
+    :type file: sys.stdout
+    """
+    file.write("{}\t{:.6f}\t{}\t{}"
+               .format(idx, truncate_float(duration, ndigits=6), size, relpath))
+    for tag in tags:
+        file.write("\t{}".format(tag))
+    file.write("\n")
+
+
+class AudioIndex(object):
+    """ In-memory index of audio files that do not have annotations.
+    This supports duration-based sampling and sampling from a target
+    distribution.
+
+    Each line in the index file consists of the following fields:
+        (id (int), duration (float), size (int), relative path (str),
+         list of tags ([str]))
+    """
+
+    def __init__(self):
+        self.audio_dir = None
+        self.index_fname = None
+        self.tags = None
+        self.bin_size = 2.0
+        self.clear()
+
+    def clear(self):
+        """ Clears the index
+
+        Returns:
+            None
+        """
+        self.idx_to_record = {}
+        # The list of indices correspond to audio files whose duration is
+        # greater than or equal to the key.
+        self.duration_to_id_set = {}
+        self.duration_to_id_set_per_tag = defaultdict(lambda: {})
+        self.duration_to_list = defaultdict(lambda: [])
+        self.duration_to_list_per_tag = defaultdict(
+            lambda: defaultdict(lambda: []))
+        self.tag_to_id_set = defaultdict(lambda: set())
+        self.shared_duration_bins = []
+        self.id_set_complete = set()
+        self.id_set = set()
+        self.duration_bins = []
+
+    def has_audio(self, distr=None):
+        """
+        :param distr: The target distribution of audio tags that we want to
+            match. If this is not supplied, the function simply checks that
+            there are some audio files.
+        :parma distr: dict
+        :return: True if there are audio files.
+        :rtype: boolean
+        """
+        if distr is None:
+            return len(self.id_set) > 0
+        else:
+            for tag in distr:
+                if tag not in self.duration_to_list_per_tag:
+                    return False
+            return True
+
+    def _load_all_records_from_disk(self, audio_dir, idx_fname, bin_size):
+        """Loads all audio records from the disk into memory and groups them
+        into chunks based on their duration and the bin_size granalarity.
+
+        Once all the records are read, indices are built from these records
+        by another function so that the audio samples can be drawn efficiently.
+
+        Updates:
+            self.audio_dir (path): audio root directory
+            self.idx_fname (path): audio database index filename
+            self.bin_size (float): granularity of bins
+            self.idx_to_record (dict): maps from the audio id to
+                (duration, file_size, relative_path, tags)
+            self.tag_to_id_set (dict): maps from the tag to
+                the set of id's of audios that have this tag.
+            self.id_set_complete (set): set of all audio id's in the index file
+            self.min_duration (float): minimum audio duration observed in the
+                index file
+            self.duration_bins (list): the lower bounds on the duration of
+                audio files falling in each bin
+            self.duration_to_id_set (dict): contains (k, v) where v is the set
+                of id's of audios whose lengths are longer than or equal to k.
+                (e.g. k is the duration lower bound of this bin).
+            self.duration_to_id_set_per_tag (dict): Something like above but
+                has a finer granularity mapping from the tag to
+                duration_to_id_set.
+            self.shared_duration_bins (list): list of sets where each set
+                contains duration lower bounds whose audio id sets are the
+                same. The rationale for having this is that there are a few
+                but extremely long audio files which lead to a lot of bins.
+                When the id sets do not change across various minimum duration
+                boundaries, we
+                cluster these together and make them point to the same id set
+                reference.
+
+        :return: whether the records were read from the disk. The assumption is
+            that the audio index file on disk and the actual audio files
+            are constructed once and never change during training. We only
+            re-read when either the directory or the index file path change.
+        """
+        if self.audio_dir == audio_dir and self.idx_fname == idx_fname and \
+           self.bin_size == bin_size:
+            # The audio directory and/or the list of audio files
+            # haven't changed. No need to load the list again.
+            return False
+
+        # Remember where the audio index is most recently read from.
+        self.audio_dir = audio_dir
+        self.idx_fname = idx_fname
+        self.bin_size = bin_size
+
+        # Read in the idx and compute the number of bins necessary
+        self.clear()
+        rank = []
+        min_duration = float('inf')
+        max_duration = float('-inf')
+        for idx, duration, file_size, relpath, tags in \
+                stream_audio_index(idx_fname):
+            self.idx_to_record[idx] = (duration, file_size, relpath, tags)
+            max_duration = max(max_duration, duration)
+            min_duration = min(min_duration, duration)
+            rank.append((duration, idx))
+            for tag in tags:
+                self.tag_to_id_set[tag].add(idx)
+        if len(rank) == 0:
+            # file is empty
+            raise IOError("Index file {} is empty".format(idx_fname))
+        for tag in self.tag_to_id_set:
+            self.id_set_complete |= self.tag_to_id_set[tag]
+        dur = min_duration
+        self.min_duration = min_duration
+        while dur < max_duration + bin_size:
+            self.duration_bins.append(dur)
+            dur += bin_size
+
+        # Sort in decreasing order of duration and populate
+        # the cumulative indices lists.
+        rank.sort(reverse=True)
+
+        # These are indices for `rank` and used to keep track of whether
+        # there are new records to add in the current bin.
+        last = 0
+        cur = 0
+
+        # The set of audios falling in the previous bin; in the case,
+        # where we don't find new audios for the current bin, we store
+        # the reference to the last set so as to conserve memory.
+        # This is not such a big problem if the audio duration is
+        # bounded by a small number like 30 seconds and the
+        # bin size is big enough. But, for raw freesound audios,
+        # some audios can be as long as a few hours!
+        last_audio_set = set()
+
+        # The same but for each tag so that we can pick audios based on
+        # tags and also some user-specified tag distribution.
+        last_audio_set_per_tag = defaultdict(lambda: set())
+
+        # Set of lists of bins sharing the same audio sets.
+        shared = set()
+
+        for i in range(len(self.duration_bins) - 1, -1, -1):
+            lower_bound = self.duration_bins[i]
+            new_audio_idxs = set()
+            new_audio_idxs_per_tag = defaultdict(lambda: set())
+            while cur < len(rank) and rank[cur][0] >= lower_bound:
+                idx = rank[cur][1]
+                tags = self.idx_to_record[idx][3]
+                new_audio_idxs.add(idx)
+                for tag in tags:
+                    new_audio_idxs_per_tag[tag].add(idx)
+                cur += 1
+            # This makes certain that the same list is shared across
+            # different bins if no new indices are added.
+            if cur == last:
+                shared.add(lower_bound)
+            else:
+                last_audio_set = last_audio_set | new_audio_idxs
+                for tag in new_audio_idxs_per_tag:
+                    last_audio_set_per_tag[tag] = \
+                        last_audio_set_per_tag[tag] | \
+                        new_audio_idxs_per_tag[tag]
+                if len(shared) > 0:
+                    self.shared_duration_bins.append(shared)
+                shared = set([lower_bound])
+                ### last_audio_set = set()  should set blank
+            last = cur
+            self.duration_to_id_set[lower_bound] = last_audio_set
+            for tag in last_audio_set_per_tag:
+                self.duration_to_id_set_per_tag[lower_bound][tag] = \
+                    last_audio_set_per_tag[tag]
+
+        # The last `shared` record isn't added to the `shared_duration_bins`.
+        self.shared_duration_bins.append(shared)
+
+        # We make sure that the while loop above has exhausted through the
+        # `rank` list by checking if the `cur`rent index in `rank` equals
+        # the length of the array, which is the halting condition.
+        assert cur == len(rank)
+
+        return True
+
+    def _build_index_from_records(self, tag_list):
+        """ Uses the in-memory records read from the index file to build
+        an in-memory index restricted to the given tag list.
+
+        :param tag_list: List of tags we are interested in sampling from.
+        :type tag_list: list(str)
+
+        Updates:
+            self.id_set (set): the set of all audio id's that can be sampled.
+            self.duration_to_list (dict): maps from the duration lower bound
+                to the id's of audios longer than this duration.
+            self.duration_to_list_per_tag (dict): maps from the tag to
+                the same structure as self.duration_to_list. This is to support
+                sampling from a target noise distribution.
+
+        :return: whether the index was built from scratch
+        """
+        if self.tags == tag_list:
+            return False
+
+        self.tags = tag_list
+        if len(tag_list) == 0:
+            self.id_set = self.id_set_complete
+        else:
+            self.id_set = set()
+            for tag in tag_list:
+                self.id_set |= self.tag_to_id_set[tag]
+
+        # Next, we need to take a subset of the audio files
+        for shared in self.shared_duration_bins:
+            # All bins in `shared' have the same index lists
+            # so we can intersect once and set all of them to this list.
+            lb = list(shared)[0]
+            intersected = list(self.id_set & self.duration_to_id_set[lb])
+            duration_to_id_set = self.duration_to_id_set_per_tag[lb]
+            intersected_per_tag = {
+                tag: self.tag_to_id_set[tag] & duration_to_id_set[tag]
+                for tag in duration_to_id_set
+            }
+            for bin_key in shared:
+                self.duration_to_list[bin_key] = intersected
+                for tag in intersected_per_tag:
+                    self.duration_to_list_per_tag[tag][bin_key] = \
+                        intersected_per_tag[tag]
+        assert len(self.duration_to_list) == len(self.duration_to_id_set)
+        return True
+
+    def refresh_records_from_index_file(self,
+                                        audio_dir,
+                                        idx_fname,
+                                        tag_list,
+                                        bin_size=2.0):
+        """ Loads the index file and populates the records
+        for building the internal index.
+
+        If the audio directory or index file name has changed, the whole index
+        is reloaded from scratch. If only the tag_list is changed, then the
+        desired index is built from the complete, in-memory record.
+
+        :param audio_dir: audio directory
+        :type audio_dir: basestring
+        :param idx_fname: audio index file name
+        :type idex_fname: basestring
+        :param tag_list: list of tags we are interested in loading;
+            if empty, we load all.
+        :type tag_list: list
+        :param bin_size: optional argument for controlling the granularity
+            of duration bins
+        :type bin_size: float
+        """
+        if tag_list is None:
+            tag_list = []
+        reloaded_records = self._load_all_records_from_disk(audio_dir,
+                                                            idx_fname, bin_size)
+        if reloaded_records or self.tags != tag_list:
+            self._build_index_from_records(tag_list)
+            logger.info('loaded {} audio files from {}'
+                        .format(len(self.id_set), idx_fname))
+
+    def sample_audio(self, duration, rng=None, distr=None):
+        """ Uniformly draws an audio record of at least the desired duration
+
+        :param duration: minimum desired audio duration
+        :type duration: float
+        :param rng: random number generator
+        :type rng: random.Random
+        :param distr: target distribution of audio tags. If not provided,
+        :type distr: dict
+        all audio files are sampled uniformly at random.
+
+        :returns: success, (duration, file_size, path)
+        """
+        if duration < 0.0:
+            duration = self.min_duration
+        i = bisect.bisect_left(self.duration_bins, duration)
+        if i == len(self.duration_bins):
+            return False, None
+        bin_key = self.duration_bins[i]
+        if distr is None:
+            indices = self.duration_to_list[bin_key]
+        else:
+            # If a desired audio distribution is given, we sample from it.
+            if rng is None:
+                rng = random.Random()
+            nprng = np.random.RandomState(rng.getrandbits(32))
+            prob_masses = distr.values()
+            prob_masses /= np.sum(prob_masses)
+            tag = nprng.choice(distr.keys(), p=prob_masses)
+            indices = self.duration_to_list_per_tag[tag][bin_key]
+        if len(indices) == 0:
+            return False, None
+        else:
+            if rng is None:
+                rng = random.Random()
+            # duration, file size and relative path from root
+            s = self.idx_to_record[rng.sample(indices, 1)[0]]
+            s = (s[0], s[1], os.path.join(self.audio_dir, s[2]))
+            return True, s
diff --git a/deep_speech_2/data_utils/augmentor/augmentation.py b/deep_speech_2/data_utils/augmentor/augmentation.py
index abe1a0ec89..c0a70ad186 100755
--- a/deep_speech_2/data_utils/augmentor/augmentation.py
+++ b/deep_speech_2/data_utils/augmentor/augmentation.py
@@ -6,6 +6,11 @@
 import json
 import random
 from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
+from data_utils.augmentor.resamler import ResamplerAugmentor
+from data_utils.augmentor.speed_perturb import SpeedPerturbatioAugmentor
+from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor
+from data_utils.augmentor.Impulse_response import ImpulseResponseAugmentor
+from data_utils.augmentor.noise_speech import NoiseSpeechAugmentor
 
 
 class AugmentationPipeline(object):
@@ -76,5 +81,15 @@ def _get_augmentor(self, augmentor_type, params):
         """Return an augmentation model by the type name, and pass in params."""
         if augmentor_type == "volume":
             return VolumePerturbAugmentor(self._rng, **params)
+        if augmentor_type == "resamle":
+            return ResamplerAugmentor(self._rng, **params)
+        if augmentor_type == "speed":
+            return SpeedPerturbatioAugmentor(self._rng, **params)
+        if augmentor_type == "online_bayesian_normalization":
+            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
+        if augmentor_type == "Impulse_response":
+            return ImpulseResponseAugmentor(self._rng, **params)
+        if augmentor_type == "noise_speech":
+            return NoiseSpeechAugmentor(self._rng, **params)
         else:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
diff --git a/deep_speech_2/data_utils/augmentor/implus_response.py b/deep_speech_2/data_utils/augmentor/implus_response.py
new file mode 100755
index 0000000000..cc2053421a
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/implus_response.py
@@ -0,0 +1,76 @@
+""" Impulse response"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import base
+from . import audio_database
+from data_utils.speech import SpeechSegment
+
+
+class ImpulseResponseAugmentor(base.AugmentorBase):
+    """ Instantiates an impulse response model
+
+    :param ir_dir: directory containing impulse responses
+    :type ir_dir: basestring
+    :param tags: optional parameter for specifying what
+            particular impulse responses to apply.
+    :type tags: list
+    :parm tag_distr: optional noise distribution
+    :type tag_distr: dict
+    """
+
+    def __init__(self, rng, ir_dir, index_file, tags=None, tag_distr=None):
+        # Define all required parameter maps here.
+        self.ir_dir = ir_dir
+        self.index_file = index_file
+
+        self.tags = tags
+        self.tag_distr = tag_distr
+
+        self.audio_index = audio_database.AudioIndex()
+        self.rng = rng
+
+    def _init_data(self):
+        """ Preloads stuff from disk in an attempt (e.g. list of files, etc)
+        to make later loading faster. If the data configuration remains the
+        same, this function does nothing.
+
+        """
+        self.audio_index.refresh_records_from_index_file(
+            self.ir_dir, self.index_file, self.tags)
+
+    def transform_audio(self, audio_segment):
+        """ Convolves the input audio with an impulse response.
+
+        :param audio_segment: input audio
+        :type audio_segment: AudioSegemnt
+        """
+        # This handles the cases where the data source or directories change.
+        self._init_data()
+
+        read_size = 0
+        tag_distr = self.tag_distr
+        if not self.audio_index.has_audio(tag_distr):
+            if tag_distr is None:
+                if not self.tags:
+                    raise RuntimeError("The ir index does not have audio "
+                                       "files to sample from.")
+                else:
+                    raise RuntimeError("The ir index does not have audio "
+                                       "files of the given tags to sample "
+                                       "from.")
+            else:
+                raise RuntimeError("The ir index does not have audio "
+                                   "files to match the target ir "
+                                   "distribution.")
+        else:
+            # Querying with a negative duration triggers the index to search
+            # from all impulse responses.
+            success, record = self.audio_index.sample_audio(
+                -1.0, rng=self.rng, distr=tag_distr)
+            if success is True:
+                _, read_size, ir_fname = record
+                ir_wav = SpeechSegment.from_file(ir_fname)
+                audio_segment.convolve(ir_wav, allow_resampling=True)
diff --git a/deep_speech_2/data_utils/augmentor/noise_speech.py b/deep_speech_2/data_utils/augmentor/noise_speech.py
new file mode 100755
index 0000000000..8cf7c27b66
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/noise_speech.py
@@ -0,0 +1,318 @@
+""" noise speech
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import logging
+import numpy as np
+import os
+from collections import defaultdict
+
+from . import base
+from . import audio_database
+from data_utils.speech import SpeechSegment
+
+TURK = "turk"
+USE_AUDIO_DATABASE_SOURCES = frozenset(["freesound", "chime"])
+HALF_NOISE_LENGTH_MIN_THRESHOLD = 3.0
+FIND_NOISE_MAX_ATTEMPTS = 20
+
+logger = logging.getLogger(__name__)
+
+
+def get_first_smaller(items, value):
+    index = bisect.bisect_left(items, value) - 1
+    assert items[index] < value, \
+        'get_first_smaller failed! %d %d' % (items[index], value)
+    return items[index]
+
+
+def get_first_larger(items, value):
+    'Find leftmost value greater than value'
+    index = bisect.bisect_right(items, value)
+    assert index < len(items), \
+        "no noise bin exists for this audio length (%f)" % value
+    assert items[index] > value, \
+        'get_first_larger failed! %d %d' % (items[index], value)
+    return items[index]
+
+
+def _get_turk_noise_files(noise_dir, index_file):
+    """ Creates a map from duration => a list of noise filenames
+
+    :param noise_dir: Directory of noise files which contains
+        "noise-samples-list"
+    :type noise_dir: basestring
+    :param index_file: Noise list
+    :type index_file: basestring
+
+    returns:noise_files (defaultdict): A map of bins to noise files.
+        Each key is the duration, and the value is a list of noise
+        files binned to this duration. Each bin is 2 secs.
+
+    Note: noise-samples-list should contain one line per noise (wav) file
+        along with its duration in milliseconds
+    """
+    noise_files = defaultdict(list)
+    if not os.path.exists(index_file):
+        logger.error('No noise files were found at {}'.format(index_file))
+        return noise_files
+    num_noise_files = 0
+    rounded_durations = list(range(0, 65, 2))
+    with open(index_file, 'r') as fl:
+        for line in fl:
+            fname = os.path.join(noise_dir, line.strip().split()[0])
+            duration = float(line.strip().split()[1]) / 1000
+            # bin the noise files into length bins rounded by 2 sec
+            bin_id = get_first_smaller(rounded_durations, duration)
+            noise_files[bin_id].append(fname)
+            num_noise_files += 1
+    logger.info('Loaded {} turk noise files'.format(num_noise_files))
+    return noise_files
+
+
+class NoiseSpeechAugmentor(base.AugmentorBase):
+    """ Noise addition block
+
+    :param snr_min: minimum signal-to-noise ratio
+    :type snr_min: float
+    :param snr_max: maximum signal-to-noise ratio
+    :type snr_max: float
+    :param noise_dir: root of where noise files are stored
+    :type noise_fir: basestring
+    :param index_file: index of noises of interest in noise_dir
+    :type index_file: basestring
+    :param source: select one from
+        - turk
+        - freesound
+        - chime
+        Note that this field is no longer required for the freesound
+        and chime
+    :type source: string
+    :param tags: optional parameter for specifying what
+        particular noises we want to add. See above for the available tags.
+    :type tags: list
+    :param tag_distr: optional noise distribution
+    :type tag_distr: dict
+    """
+
+    def __init__(self,
+                 rng,
+                 snr_min,
+                 snr_max,
+                 noise_dir,
+                 source,
+                 allow_downsampling=None,
+                 index_file=None,
+                 tags=None,
+                 tag_distr=None):
+        # Define all required parameter maps here.
+        self.rng = rng
+        self.snr_min = snr_min
+        self.snr_max = snr_max
+        self.noise_dir = noise_dir
+        self.source = source
+
+        self.allow_downsampling = allow_downsampling
+        self.index_file = index_file
+        self.tags = tags
+        self.tag_distr = tag_distr
+
+        # When new noise sources are added, make sure to define the
+        # associated bookkeeping variables here.
+        self.turk_noise_files = []
+        self.turk_noise_dir = None
+        self.audio_index = audio_database.AudioIndex()
+
+    def _init_data(self):
+        """ Preloads stuff from disk in an attempt (e.g. list of files, etc)
+        to make later loading faster. If the data configuration remains the
+        same, this function does nothing.
+
+        """
+        noise_dir = self.noise_dir
+        index_file = self.index_file
+        source = self.source
+        if not index_file:
+            if source == TURK:
+                index_file = os.path.join(noise_dir, 'noise-samples-list')
+                logger.debug("index_file not provided; " + "defaulting to " +
+                             index_file)
+            else:
+                if source != "":
+                    assert source in USE_AUDIO_DATABASE_SOURCES, \
+                        "{} not supported by audio_database".format(source)
+                index_file = os.path.join(noise_dir,
+                                          "audio_index_commercial.txt")
+                logger.debug("index_file not provided; " + "defaulting to " +
+                             index_file)
+
+        if source == TURK:
+            if self.turk_noise_dir != noise_dir:
+                self.turk_noise_dir = noise_dir
+                self.turk_noise_files = _get_turk_noise_files(noise_dir,
+                                                              index_file)
+        # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES:
+        else:
+            if source != "":
+                assert source in USE_AUDIO_DATABASE_SOURCES, \
+                    "{} not supported by audio_database".format(source)
+            self.audio_index.refresh_records_from_index_file(
+                self.noise_dir, index_file, self.tags)
+
+    def transform_audio(self, audio_segment):
+        """Adds walla noise
+
+        :param audio_segment: Input audio
+        :type audio_segment: SpeechSegment
+        """
+        # This handles the cases where the data source or directories change.
+        self._init_data
+        source = self.source
+        allow_downsampling = self.allow_downsampling
+        if source == TURK:
+            self._add_turk_noise(audio_segment, self.rng, allow_downsampling)
+        # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES:
+        else:
+            self._add_noise(audio_segment, self.rng, allow_downsampling)
+
+    def _sample_snr(self):
+        """ Returns a float sampled in [`self.snr_min`, `self.snr_max`]
+        if both `self.snr_min` and `self.snr_max` are non-zero.
+        """
+        snr_min = self.snr_min
+        snr_max = self.snr_max
+        sampled_snr = self.rng.uniform(snr_min, snr_max)
+        return sampled_snr
+
+    def _add_turk_noise(self, audio_segment, allow_downsampling):
+        """ Adds a turk noise to the input audio.
+
+        :param audio_segment: input audio
+        :type audio_segment: audiosegment
+        :param allow_downsampling: indicates whether downsampling
+            is allowed
+        :type allow_downsampling: boolean 
+        """
+        read_size = 0
+        if len(self.turk_noise_files) > 0:
+            snr = self._sample_snr(self.rng)
+            # Draw the noise file randomly from noise files that are
+            # slightly longer than the utterance
+            noise_bins = sorted(self.turk_noise_files.keys())
+            # note some bins can be empty, so we can't just round up
+            # to the nearest 2-sec interval
+            rounded_duration = get_first_larger(noise_bins,
+                                                audio_segment.duration)
+            noise_fname = \
+                self.rng.sample(self.turk_noise_files[rounded_duration], 1)[0]
+            noise = SpeechSegment.from_wav_file(noise_fname)
+            logger.debug('noise_fname {}'.format(noise_fname))
+            logger.debug('snr {}'.format(snr))
+            read_size = len(noise) * 2
+            # May throw exceptions, but this is caught by
+            # AudioFeaturizer.get_audio_files.
+            audio_segment.add_noise(
+                noise, snr, rng=self.rng, allow_downsampling=allow_downsampling)
+
+    def _add_noise(self, audio_segment, allow_downsampling):
+        """ Adds a noise indexed in audio_database.AudioIndex.
+
+        :param audio_segment: input audio
+        :type audio_segment: SpeechSegment
+        :param allow_downsampling: indicates whether downsampling
+            is allowed
+        :type allow_downsampling: boolean
+
+        Returns:
+            (SpeechSegment, int)
+                - sound with turk noise added
+                - number of bytes read from disk
+        """
+        read_size = 0
+        tag_distr = self.tag_distr
+        if not self.audio_index.has_audio(tag_distr):
+            if tag_distr is None:
+                if not self.tags:
+                    raise RuntimeError("The noise index does not have audio "
+                                       "files to sample from.")
+                else:
+                    raise RuntimeError("The noise index does not have audio "
+                                       "files of the given tags to sample "
+                                       "from.")
+            else:
+                raise RuntimeError("The noise index does not have audio "
+                                   "files to match the target noise "
+                                   "distribution.")
+        else:
+            # Compute audio segment related statistics
+            audio_duration = audio_segment.duration
+
+            # Sample relevant augmentation parameters.
+            snr = self._sample_snr(self.rng)
+
+            # Perhaps, we may not have a sufficiently long noise, so we need
+            # to search iteratively.
+            min_duration = audio_duration + 0.25
+            for _ in range(FIND_NOISE_MAX_ATTEMPTS):
+                logger.debug("attempting to find noise of length "
+                             "at least {}".format(min_duration))
+
+                success, record = \
+                    self.audio_index.sample_audio(min_duration,
+                                                  rng=self.rng,
+                                                  distr=tag_distr)
+
+                if success is True:
+                    noise_duration, read_size, noise_fname = record
+
+                    # Assert after logging so we know
+                    # what caused augmentation to fail.
+                    logger.debug("noise_fname {}".format(noise_fname))
+                    logger.debug("snr {}".format(snr))
+                    assert noise_duration >= min_duration
+                    break
+
+                # Decrease the desired minimum duration linearly.
+                # If the value becomes smaller than some threshold,
+                # we half the value instead.
+                if min_duration > HALF_NOISE_LENGTH_MIN_THRESHOLD:
+                    min_duration -= 2.0
+                else:
+                    min_duration *= 0.5
+
+            if success is False:
+                logger.info("Failed to find a noise file")
+                return
+
+            diff_duration = audio_duration + 0.25 - noise_duration
+            if diff_duration >= 0.0:
+                # Here, the noise is shorter than the audio file, so
+                # we pad with zeros to make sure the noise sound is applied
+                # with a uniformly random shift.
+                noise = SpeechSegment.from_file(noise_fname)
+                noise = noise.pad_silence(diff_duration, sides="both")
+            else:
+                # The noise clip is at least ~25 ms longer than the audio
+                # segment here.
+                diff_duration = int(noise_duration * audio_segment.sample_rate) - \
+                    int(audio_duration * audio_segment.sample_rate) - \
+                    int(0.02 * audio_segment.sample_rate)
+                start = float(self.rng.randint(0, diff_duration)) / \
+                    audio.sample_rate
+                finish = min(start + audio_duration + 0.2, noise_duration)
+                noise = SpeechSegment.slice_from_file(noise_fname, start,
+                                                      finish)
+
+            if len(noise) < len(audio_segment):
+                # This is to ensure that the noise clip is at least as
+                # long as the audio segment.
+                num_samples_to_pad = len(audio_segment) - len(noise)
+                # Padding this amount of silence on both ends ensures that
+                # the placement of the noise clip is uniformly random.
+                silence = SpeechSegment(
+                    np.zeros(num_samples_to_pad), audio_segment.sample_rate)
+                noise = SpeechSegment.concatenate(silence, noise, silence)
+
+            audio_segment.add_noise(
+                noise, snr, rng=self.rng, allow_downsampling=allow_downsampling)
diff --git a/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py b/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
new file mode 100755
index 0000000000..bc2d6c1b65
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
@@ -0,0 +1,57 @@
+""" Online bayesian normalization
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import base
+
+
+class OnlineBayesianNormalizationAugmentor(base.AugmentorBase):
+    """ 
+    Instantiates an online bayesian normalization module.
+    :param target_db: Target RMS value in decibels
+            :type target_db: func[int->scalar]
+            :param prior_db: Prior RMS estimate in decibels
+            :type prior_db: func[int->scalar]
+            :param prior_samples: Prior strength in number of samples
+            :type prior_samples: func[int->scalar]
+            :param startup_delay: Start-up delay in seconds during
+                which normalization statistics is accrued.
+            :type starup_delay: func[int->scalar]
+    """
+
+    def __init__(self,
+                 rng,
+                 target_db,
+                 prior_db,
+                 prior_samples,
+                 startup_delay=base.parse_parameter_from(0.0)):
+
+        self.target_db = target_db
+        self.prior_db = prior_db
+        self.prior_samples = prior_samples
+        self.startup_delay = startup_delay
+        self.rng = rng
+
+    def transform_audio(self, audio_segment):
+        """
+        Normalizes the input audio using the online Bayesian approach.
+
+        :param audio_segment: input audio
+        :type audio_segment: SpeechSegment
+        :param iteration: current iteration
+        :type iteration: int
+        :param text: audio transcription
+        :type text: basestring
+        :param rng: RNG to use for augmentation
+        :type rng: random.Random
+
+        """
+        read_size = 0
+        target_db = self.target_db(iteration)
+        prior_db = self.prior_db(iteration)
+        prior_samples = self.prior_samples(iteration)
+        startup_delay = self.startup_delay(iteration)
+        audio.normalize_online_bayesian(
+            target_db, prior_db, prior_samples, startup_delay=startup_delay)
diff --git a/deep_speech_2/data_utils/augmentor/resampler.py b/deep_speech_2/data_utils/augmentor/resampler.py
new file mode 100755
index 0000000000..1b959be56c
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/resampler.py
@@ -0,0 +1,30 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import base
+
+
+class ResamplerAugmentor(base.AugmentorBase):
+    """ Instantiates a resampler module.
+    
+    :param new_sample_rate: New sample rate in Hz
+    :type new_sample_rate: func[int->scalar]
+    :param rng: Random generator object.
+    :type rng: random.Random
+    """
+
+    def __init__(self, rng, new_sample_rate):
+        self.new_sample_rate = new_sample_rate
+        self._rng = rng
+
+    def transform_audio(self, audio_segment):
+        """ Resamples the input audio to the target sample rate.
+
+        Note that this is an in-place transformation.
+
+        :param audio: input audio
+        :type audio: SpeechDLSegment
+        """
+        new_sample_rate = self.new_sample_rate
+        audio.resample(new_sample_rate)
\ No newline at end of file
diff --git a/deep_speech_2/data_utils/augmentor/speed_perturb.py b/deep_speech_2/data_utils/augmentor/speed_perturb.py
new file mode 100755
index 0000000000..e09be5f74e
--- /dev/null
+++ b/deep_speech_2/data_utils/augmentor/speed_perturb.py
@@ -0,0 +1,53 @@
+"""Speed perturbation module for making ASR robust to different voice
+types (high pitched, low pitched, etc)
+Samples uniformly between speed_min and speed_max
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import base
+
+
+class SpeedPerturbatioAugmentor(base.AugmentorBase):
+    """ 
+    Instantiates a speed perturbation module.
+
+    See reference paper here:
+
+    http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
+
+    :param speed_min: Lower bound on new rate to sample
+    :type speed_min: func[int->scalar]
+    :param speed_max: Upper bound on new rate to sample
+    :type speed_max: func[int->scalar]
+    """
+
+    def __init__(self, rng, speed_min, speed_max):
+
+        if (speed_min < 0.9):
+            raise ValueError(
+                "Sampling speed below 0.9 can cause unnatural effects")
+        if (speed_min > 1.1):
+            raise ValueError(
+                "Sampling speed above 1.1 can cause unnatural effects")
+        self.speed_min = speed_min
+        self.speed_max = speed_max
+        self.rng = rng
+
+    def transform_audio(self, audio_segment):
+        """ 
+        Samples a new speed rate from the given range and
+        changes the speed of the given audio clip.
+
+        Note that this is an in-place transformation.
+
+        :param audio_segment: input audio
+        :type audio_segment: SpeechDLSegment
+        """
+        read_size = 0
+        speed_min = self.speed_min(iteration)
+        speed_max = self.speed_max(iteration)
+        sampled_speed = rng.uniform(speed_min, speed_max)
+        audio = audio.change_speed(sampled_speed)
diff --git a/deep_speech_2/data_utils/augmentor/volume_perturb.py b/deep_speech_2/data_utils/augmentor/volume_perturb.py
index a5a9f6cada..15055b9154 100755
--- a/deep_speech_2/data_utils/augmentor/volume_perturb.py
+++ b/deep_speech_2/data_utils/augmentor/volume_perturb.py
@@ -3,10 +3,10 @@
 from __future__ import division
 from __future__ import print_function
 
-from data_utils.augmentor.base import AugmentorBase
+from . import base
 
 
-class VolumePerturbAugmentor(AugmentorBase):
+class VolumePerturbAugmentor(base.AugmentorBase):
     """Augmentation model for adding random volume perturbation.
     
     This is used for multi-loudness training of PCEN. See

From bdffa40ec943b98abc6a98932995f50c58481f42 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Thu, 15 Jun 2017 10:21:56 +0800
Subject: [PATCH 27/43] add xmap for image list and modify the image reader of
 infer.py

---
 image_classification/README.md    | 26 ++++-----------
 image_classification/alexnet.py   |  2 +-
 image_classification/googlenet.py | 24 +++++++-------
 image_classification/infer.py     | 19 ++---------
 image_classification/reader.py    | 53 +++++++++++++++++--------------
 image_classification/resnet.py    |  4 +--
 image_classification/train.py     |  4 +--
 image_classification/vgg.py       |  8 ++---
 8 files changed, 59 insertions(+), 81 deletions(-)

diff --git a/image_classification/README.md b/image_classification/README.md
index acb8b45109..94a0a1b70e 100644
--- a/image_classification/README.md
+++ b/image_classification/README.md
@@ -147,11 +147,11 @@ dataset_100/train_images/n02643566_75.jpeg   8
 ```python
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        reader.test_reader('train.list'),
+        reader.train_reader('train.list'),
         buf_size=1000),
     batch_size=BATCH_SIZE)
 test_reader = paddle.batch(
-    reader.train_reader('val.list'),
+    reader.test_reader('val.list'),
     batch_size=BATCH_SIZE)
 ```
 
@@ -209,24 +209,10 @@ trainer.train(
 with gzip.open('params_pass_10.tar.gz', 'r') as f:
     parameters = paddle.parameters.Parameters.from_tar(f)
 
-def load_image(file):
-    im = Image.open(file)
-    im = im.resize((224, 224), Image.ANTIALIAS)
-    im = np.array(im).astype(np.float32)
-    # The storage order of the loaded image is W(widht),
-    # H(height), C(channel). PaddlePaddle requires
-    # the CHW order, so transpose them.
-    im = im.transpose((2, 0, 1))  # CHW
-    # In the training phase, the channel order of CIFAR
-    # image is B(Blue), G(green), R(Red). But PIL open
-    # image in RGB mode. It must swap the channel order.
-    im = im[(2, 1, 0), :, :]  # BGR
-    im = im.flatten()
-    im = im / 255.0
-    return im
-
 file_list = [line.strip() for line in open(image_list_file)]
-test_data = [(load_image(image_file),) for image_file in file_list]
+test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False)
+              .flatten().astype('float32'), )
+             for image_file in file_list]
 probs = paddle.infer(
     output_layer=out, parameters=parameters, input=test_data)
 lab = np.argsort(-probs)
@@ -234,4 +220,4 @@ for file_name, result in zip(file_list, lab):
     print "Label of %s is: %d" % (file_name, result[0])
 ```
 
-首先从文件中加载训练好的模型（代码里以第10轮迭代的结果为例），然后读取`image_list_file`中的图像。`image_list_file`是一个文本文件，每一行为一个图像路径。`load_image`是一个加载图像的函数。代码使用`paddle.infer`判断`image_list_file`中每个图像的类别，并进行输出。
+首先从文件中加载训练好的模型（代码里以第10轮迭代的结果为例），然后读取`image_list_file`中的图像。`image_list_file`是一个文本文件，每一行为一个图像路径。代码使用`paddle.infer`判断`image_list_file`中每个图像的类别，并进行输出。
diff --git a/image_classification/alexnet.py b/image_classification/alexnet.py
index 8aa53814b1..5262a97faf 100644
--- a/image_classification/alexnet.py
+++ b/image_classification/alexnet.py
@@ -3,7 +3,7 @@
 __all__ = ['alexnet']
 
 
-def alexnet(input, class_dim=100):
+def alexnet(input, class_dim):
     conv1 = paddle.layer.img_conv(
         input=input,
         filter_size=11,
diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py
index e21a036024..474f948f02 100644
--- a/image_classification/googlenet.py
+++ b/image_classification/googlenet.py
@@ -3,8 +3,8 @@
 __all__ = ['googlenet']
 
 
-def inception2(name, input, channels, filter1, filter3R, filter3, filter5R,
-               filter5, proj):
+def inception(name, input, channels, filter1, filter3R, filter3, filter5R,
+              filter5, proj):
     cov1 = paddle.layer.img_conv(
         name=name + '_1',
         input=input,
@@ -65,7 +65,7 @@ def inception2(name, input, channels, filter1, filter3R, filter3, filter5R,
     return cat
 
 
-def googlenet(input, class_dim=100):
+def googlenet(input, class_dim):
     # stage 1
     conv1 = paddle.layer.img_conv(
         name="conv1",
@@ -97,23 +97,23 @@ def googlenet(input, class_dim=100):
         name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
 
     # stage 3
-    ince3a = inception2("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
-    ince3b = inception2("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+    ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+    ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
     pool3 = paddle.layer.img_pool(
         name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
 
     # stage 4
-    ince4a = inception2("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
-    ince4b = inception2("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
-    ince4c = inception2("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
-    ince4d = inception2("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
-    ince4e = inception2("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+    ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+    ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+    ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
+    ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+    ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
     pool4 = paddle.layer.img_pool(
         name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
 
     # stage 5
-    ince5a = inception2("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
-    ince5b = inception2("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
+    ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+    ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
     pool5 = paddle.layer.img_pool(
         name="pool5",
         input=ince5b,
diff --git a/image_classification/infer.py b/image_classification/infer.py
index c48a29336f..659c4f2a8e 100644
--- a/image_classification/infer.py
+++ b/image_classification/infer.py
@@ -54,24 +54,9 @@ def main():
     with gzip.open(args.params_path, 'r') as f:
         parameters = paddle.parameters.Parameters.from_tar(f)
 
-    def load_image(file):
-        im = Image.open(file)
-        im = im.resize((WIDTH, HEIGHT), Image.ANTIALIAS)
-        im = np.array(im).astype(np.float32)
-        # The storage order of the loaded image is W(widht),
-        # H(height), C(channel). PaddlePaddle requires
-        # the CHW order, so transpose them.
-        im = im.transpose((2, 0, 1))  # CHW
-        # In the training phase, the channel order of CIFAR
-        # image is B(Blue), G(green), R(Red). But PIL open
-        # image in RGB mode. It must swap the channel order.
-        im = im[(2, 1, 0), :, :]  # BGR
-        im = im.flatten()
-        im = im / 255.0
-        return im
-
     file_list = [line.strip() for line in open(args.data_list)]
-    test_data = [(load_image(image_file), ) for image_file in file_list]
+    test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False)
+                  .flatten().astype('float32'), ) for image_file in file_list]
     probs = paddle.infer(
         output_layer=out, parameters=parameters, input=test_data)
     lab = np.argsort(-probs)
diff --git a/image_classification/reader.py b/image_classification/reader.py
index b58807e3a3..b6bad1a24c 100644
--- a/image_classification/reader.py
+++ b/image_classification/reader.py
@@ -1,44 +1,51 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
-
 import random
 from paddle.v2.image import load_and_transform
+import paddle.v2 as paddle
+from multiprocessing import cpu_count
+
+
+def train_mapper(sample):
+    '''
+    map image path to type needed by model input layer for the training set
+    '''
+    img, label = sample
+    img = paddle.image.load_image(img)
+    img = paddle.image.simple_transform(img, 256, 224, True)
+    return img.flatten().astype('float32'), label
+
+
+def test_mapper(sample):
+    '''
+    map image path to type needed by model input layer for the test set
+    '''
+    img, label = sample
+    img = paddle.image.load_image(img)
+    img = paddle.image.simple_transform(img, 256, 224, True)
+    return img.flatten().astype('float32'), label
 
 
-def train_reader(train_list):
+def train_reader(train_list, buffered_size=1024):
     def reader():
         with open(train_list, 'r') as f:
             lines = [line.strip() for line in f]
-            random.shuffle(lines)
             for line in lines:
                 img_path, lab = line.strip().split('\t')
-                im = load_and_transform(img_path, 256, 224, True)
-                yield im.flatten().astype('float32'), int(lab)
+                yield img_path, int(lab)
 
-    return reader
+    return paddle.reader.xmap_readers(train_mapper, reader,
+                                      cpu_count(), buffered_size)
 
 
-def test_reader(test_list):
+def test_reader(test_list, buffered_size=1024):
     def reader():
         with open(test_list, 'r') as f:
             lines = [line.strip() for line in f]
             for line in lines:
                 img_path, lab = line.strip().split('\t')
-                im = load_and_transform(img_path, 256, 224, False)
-                yield im.flatten().astype('float32'), int(lab)
+                yield img_path, int(lab)
 
-    return reader
+    return paddle.reader.xmap_readers(test_mapper, reader,
+                                      cpu_count(), buffered_size)
 
 
 if __name__ == '__main__':
diff --git a/image_classification/resnet.py b/image_classification/resnet.py
index ca9330e63b..5a9f24322c 100644
--- a/image_classification/resnet.py
+++ b/image_classification/resnet.py
@@ -55,7 +55,7 @@ def layer_warp(block_func, input, ch_in, ch_out, count, stride):
     return conv
 
 
-def resnet_imagenet(input, depth=50, class_dim=100):
+def resnet_imagenet(input, class_dim, depth=50):
     cfg = {
         18: ([2, 2, 2, 1], basicblock),
         34: ([3, 4, 6, 3], basicblock),
@@ -78,7 +78,7 @@ def resnet_imagenet(input, depth=50, class_dim=100):
     return out
 
 
-def resnet_cifar10(input, depth=32, class_dim=10):
+def resnet_cifar10(input, class_dim, depth=32):
     # depth should be one of 20, 32, 44, 56, 110, 1202
     assert (depth - 2) % 6 == 0
     n = (depth - 2) / 6
diff --git a/image_classification/train.py b/image_classification/train.py
index b3de41348d..63d5b97aad 100755
--- a/image_classification/train.py
+++ b/image_classification/train.py
@@ -72,13 +72,13 @@ def main():
         paddle.reader.shuffle(
             flowers.train(),
             # To use other data, replace the above line with:
-            # reader.test_reader('train.list'),
+            # reader.train_reader('train.list'),
             buf_size=1000),
         batch_size=BATCH_SIZE)
     test_reader = paddle.batch(
         flowers.valid(),
         # To use other data, replace the above line with:
-        # reader.train_reader('val.list'),
+        # reader.test_reader('val.list'),
         batch_size=BATCH_SIZE)
 
     # End batch and end pass event handler
diff --git a/image_classification/vgg.py b/image_classification/vgg.py
index b272320b26..8d6b115a85 100644
--- a/image_classification/vgg.py
+++ b/image_classification/vgg.py
@@ -17,7 +17,7 @@
 __all__ = ['vgg13', 'vgg16', 'vgg19']
 
 
-def vgg(input, nums, class_dim=100):
+def vgg(input, nums, class_dim):
     def conv_block(input, num_filter, groups, num_channels=None):
         return paddle.networks.img_conv_group(
             input=input,
@@ -53,16 +53,16 @@ def conv_block(input, num_filter, groups, num_channels=None):
     return out
 
 
-def vgg13(input, class_dim=100):
+def vgg13(input, class_dim):
     nums = [2, 2, 2, 2, 2]
     return vgg(input, nums, class_dim)
 
 
-def vgg16(input, class_dim=100):
+def vgg16(input, class_dim):
     nums = [2, 2, 3, 3, 3]
     return vgg(input, nums, class_dim)
 
 
-def vgg19(input, class_dim=100):
+def vgg19(input, class_dim):
     nums = [2, 2, 4, 4, 4]
     return vgg(input, nums, class_dim)

From f545367cfb4f924463c0594e40cf01a9f0c1b492 Mon Sep 17 00:00:00 2001
From: Xinghai Sun <sunxinghai1216@gmail.com>
Date: Thu, 15 Jun 2017 17:05:00 +0800
Subject: [PATCH 28/43] Add shuffle type of instance_shuffle and
 batch_shuffle_clipped.

---
 deep_speech_2/data_utils/data.py              | 50 ++++++++++++++-----
 .../datasets/librispeech/librispeech.py       |  3 +-
 deep_speech_2/decoder.py                      |  6 +--
 deep_speech_2/infer.py                        | 11 ++--
 deep_speech_2/train.py                        | 16 ++++--
 deep_speech_2/utils.py                        | 25 ++++++++++
 6 files changed, 82 insertions(+), 29 deletions(-)
 create mode 100644 deep_speech_2/utils.py

diff --git a/deep_speech_2/data_utils/data.py b/deep_speech_2/data_utils/data.py
index 48e03fe85d..424343a48f 100644
--- a/deep_speech_2/data_utils/data.py
+++ b/deep_speech_2/data_utils/data.py
@@ -80,7 +80,7 @@ def batch_reader_creator(self,
                              padding_to=-1,
                              flatten=False,
                              sortagrad=False,
-                             batch_shuffle=False):
+                             shuffle_method="batch_shuffle"):
         """
         Batch data reader creator for audio data. Return a callable generator
         function to produce batches of data.
@@ -104,12 +104,22 @@ def batch_reader_creator(self,
         :param sortagrad: If set True, sort the instances by audio duration
                           in the first epoch for speed up training.
         :type sortagrad: bool
-        :param batch_shuffle: If set True, instances are batch-wise shuffled.
-                              For more details, please see 
-                              ``_batch_shuffle.__doc__``.
-                              If sortagrad is True, batch_shuffle is disabled
+        :param shuffle_method: Shuffle method. Options:
+                                '' or None: no shuffle.
+                                'instance_shuffle': instance-wise shuffle.
+                                'batch_shuffle': similarly-sized instances are
+                                                 put into batches, and then
+                                                 batch-wise shuffle the batches.
+                                                 For more details, please see
+                                                 ``_batch_shuffle.__doc__``.
+                                'batch_shuffle_clipped': 'batch_shuffle' with
+                                                         head shift and tail
+                                                         clipping. For more
+                                                         details, please see
+                                                         ``_batch_shuffle``.
+                              If sortagrad is True, shuffle is disabled
                               for the first epoch.
-        :type batch_shuffle: bool
+        :type shuffle_method: None|str
         :return: Batch reader function, producing batches of data when called.
         :rtype: callable
         """
@@ -123,8 +133,20 @@ def batch_reader():
             # sort (by duration) or batch-wise shuffle the manifest
             if self._epoch == 0 and sortagrad:
                 manifest.sort(key=lambda x: x["duration"])
-            elif batch_shuffle:
-                manifest = self._batch_shuffle(manifest, batch_size)
+            else:
+                if shuffle_method == "batch_shuffle":
+                    manifest = self._batch_shuffle(
+                        manifest, batch_size, clipped=False)
+                elif shuffle_method == "batch_shuffle_clipped":
+                    manifest = self._batch_shuffle(
+                        manifest, batch_size, clipped=True)
+                elif shuffle_method == "instance_shuffle":
+                    self._rng.shuffle(manifest)
+                elif not shuffle_method:
+                    pass
+                else:
+                    raise ValueError("Unknown shuffle method %s." %
+                                     shuffle_method)
             # prepare batches
             instance_reader = self._instance_reader_creator(manifest)
             batch = []
@@ -218,7 +240,7 @@ def _padding_batch(self, batch, padding_to=-1, flatten=False):
             new_batch.append((padded_audio, text))
         return new_batch
 
-    def _batch_shuffle(self, manifest, batch_size):
+    def _batch_shuffle(self, manifest, batch_size, clipped=False):
         """Put similarly-sized instances into minibatches for better efficiency
         and make a batch-wise shuffle.
 
@@ -233,6 +255,9 @@ def _batch_shuffle(self, manifest, batch_size):
         :param batch_size: Batch size. This size is also used for generate
                            a random number for batch shuffle.
         :type batch_size: int
+        :param clipped: Whether to clip the heading (small shift) and trailing
+                        (incomplete batch) instances.
+        :type clipped: bool
         :return: Batch shuffled mainifest.
         :rtype: list
         """
@@ -241,7 +266,8 @@ def _batch_shuffle(self, manifest, batch_size):
         batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size)
         self._rng.shuffle(batch_manifest)
         batch_manifest = list(sum(batch_manifest, ()))
-        res_len = len(manifest) - shift_len - len(batch_manifest)
-        batch_manifest.extend(manifest[-res_len:])
-        batch_manifest.extend(manifest[0:shift_len])
+        if not clipped:
+            res_len = len(manifest) - shift_len - len(batch_manifest)
+            batch_manifest.extend(manifest[-res_len:])
+            batch_manifest.extend(manifest[0:shift_len])
         return batch_manifest
diff --git a/deep_speech_2/datasets/librispeech/librispeech.py b/deep_speech_2/datasets/librispeech/librispeech.py
index faf038cc19..87e52ae4aa 100644
--- a/deep_speech_2/datasets/librispeech/librispeech.py
+++ b/deep_speech_2/datasets/librispeech/librispeech.py
@@ -37,8 +37,7 @@
 MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"
 MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
 
-parser = argparse.ArgumentParser(
-    description='Downloads and prepare LibriSpeech dataset.')
+parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
     "--target_dir",
     default=DATA_HOME + "/Libri",
diff --git a/deep_speech_2/decoder.py b/deep_speech_2/decoder.py
index 8314885ce6..77d950b8db 100644
--- a/deep_speech_2/decoder.py
+++ b/deep_speech_2/decoder.py
@@ -8,8 +8,7 @@
 
 
 def ctc_best_path_decode(probs_seq, vocabulary):
-    """
-    Best path decoding, also called argmax decoding or greedy decoding.
+    """Best path decoding, also called argmax decoding or greedy decoding.
     Path consisting of the most probable tokens are further post-processed to
     remove consecutive repetitions and all blanks.
 
@@ -38,8 +37,7 @@ def ctc_best_path_decode(probs_seq, vocabulary):
 
 
 def ctc_decode(probs_seq, vocabulary, method):
-    """
-    CTC-like sequence decoding from a sequence of likelihood probablilites. 
+    """CTC-like sequence decoding from a sequence of likelihood probablilites.
 
     :param probs_seq: 2-D list of probabilities over the vocabulary for each
                       character. Each element is a list of float probabilities
diff --git a/deep_speech_2/infer.py b/deep_speech_2/infer.py
index f7c99df117..06449ab05c 100644
--- a/deep_speech_2/infer.py
+++ b/deep_speech_2/infer.py
@@ -10,9 +10,9 @@
 from data_utils.data import DataGenerator
 from model import deep_speech2
 from decoder import ctc_decode
+import utils
 
-parser = argparse.ArgumentParser(
-    description='Simplified version of DeepSpeech2 inference.')
+parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
     "--num_samples",
     default=10,
@@ -62,9 +62,7 @@
 
 
 def infer():
-    """
-    Max-ctc-decoding for DeepSpeech2.
-    """
+    """Max-ctc-decoding for DeepSpeech2."""
     # initialize data generator
     data_generator = DataGenerator(
         vocab_filepath=args.vocab_filepath,
@@ -98,7 +96,7 @@ def infer():
         manifest_path=args.decode_manifest_path,
         batch_size=args.num_samples,
         sortagrad=False,
-        batch_shuffle=False)
+        shuffle_method=None)
     infer_data = batch_reader().next()
 
     # run inference
@@ -123,6 +121,7 @@ def infer():
 
 
 def main():
+    utils.print_arguments(args)
     paddle.init(use_gpu=args.use_gpu, trainer_count=1)
     infer()
 
diff --git a/deep_speech_2/train.py b/deep_speech_2/train.py
index 6074aa358d..c60a039b69 100644
--- a/deep_speech_2/train.py
+++ b/deep_speech_2/train.py
@@ -12,6 +12,7 @@
 import paddle.v2 as paddle
 from model import deep_speech2
 from data_utils.data import DataGenerator
+import utils
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
@@ -51,6 +52,12 @@
     default=True,
     type=distutils.util.strtobool,
     help="Use sortagrad or not. (default: %(default)s)")
+parser.add_argument(
+    "--shuffle_method",
+    default='instance_shuffle',
+    type=str,
+    help="Shuffle method: 'instance_shuffle', 'batch_shuffle', "
+    "'batch_shuffle_batch'. (default: %(default)s)")
 parser.add_argument(
     "--trainer_count",
     default=4,
@@ -93,9 +100,7 @@
 
 
 def train():
-    """
-    DeepSpeech2 training.
-    """
+    """DeepSpeech2 training."""
 
     # initialize data generator
     def data_generator():
@@ -145,13 +150,13 @@ def data_generator():
         batch_size=args.batch_size,
         min_batch_size=args.trainer_count,
         sortagrad=args.use_sortagrad if args.init_model_path is None else False,
-        batch_shuffle=True)
+        shuffle_method=args.shuffle_method)
     test_batch_reader = test_generator.batch_reader_creator(
         manifest_path=args.dev_manifest_path,
         batch_size=args.batch_size,
         min_batch_size=1,  # must be 1, but will have errors.
         sortagrad=False,
-        batch_shuffle=False)
+        shuffle_method=None)
 
     # create event handler
     def event_handler(event):
@@ -186,6 +191,7 @@ def event_handler(event):
 
 
 def main():
+    utils.print_arguments(args)
     paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)
     train()
 
diff --git a/deep_speech_2/utils.py b/deep_speech_2/utils.py
new file mode 100644
index 0000000000..9ca363c8f5
--- /dev/null
+++ b/deep_speech_2/utils.py
@@ -0,0 +1,25 @@
+"""Contains common utility functions."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+def print_arguments(args):
+    """Print argparse's arguments.
+
+    Usage:
+
+    .. code-block:: python
+        
+        parser = argparse.ArgumentParser()
+        parser.add_argument("name", default="Jonh", type=str, help="User name.")
+        args = parser.parse_args() 
+        print_arguments(args)
+    
+    :param args: Input argparse.Namespace for printing.
+    :type args: argparse.Namespace
+    """
+    print("-----  Configuration Arguments -----")
+    for arg, value in vars(args).iteritems():
+        print("%s: %s" % (arg, value))
+    print("------------------------------------")

From 59447821513ae8e3ee6aba44a5aff2c336870d8b Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Thu, 15 Jun 2017 19:13:50 +0800
Subject: [PATCH 29/43] revise README and some small problems

---
 scheduled_sampling/img/Scheduled_Sampling.jpg | Bin 0 -> 60598 bytes
 scheduled_sampling/img/decay.jpg              | Bin 0 -> 45631 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 scheduled_sampling/img/Scheduled_Sampling.jpg
 create mode 100644 scheduled_sampling/img/decay.jpg

diff --git a/scheduled_sampling/img/Scheduled_Sampling.jpg b/scheduled_sampling/img/Scheduled_Sampling.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..27f568a45f41af0aa3c0d982e64233e058302100
GIT binary patch
literal 60598
zcmeEv2Ut_t*7iY~Qbj<DKtxcGB1(}?K%|R^3MgF>5ks$`21Ke90RcrNC?FkFn)Kd8
zx`6a59Z9GGLh>JUoU6{<nbDcK-+bSHohMJonbY<@d$0Yjcdd6F{5$*zuvb-4MG+t%
zAOMWP{{VbHAP?*&At5Cp-c3qMx@XUBG736M3UYD^rhT;3bgaw}Hdba9mV=zfxDFoT
z<6vRo7Cp*${DiQuFoa87T1-&tn2@mG&PfRN?Ab#>PQge?$tcLq!Y=sT|L_$6&2AzQ
zVr@c#L%=Q?0zw)Bd<_5r00Lt0Zg+0@r+);y2#JVENO$icBL{cL+Y9U>ASB#HL`Y0b
zL<H{b0seo0h=zC{`w2M`TFon@hivHty~CgE<~a4DlKy-<ic{#S-K{-j`xzLSm=7P}
z;y%hFEOJs*Ok6_#w1T3NvWn`3i(1+`moDp?Tr<6HcEkLpy@R8ZGu*}1=eDn(e?VYR
zMC84w=$QKtVxJ}?COu0|Nlkm1lbe@c@T#z=s=B7OuD+r1bw}shuI`?9y?vu&;}h>E
zr>19S(M!uKt843+jm@2Q5degrn)P$bzG)W?*sfhfM1(}7JMALa<qUoZX^4o~Pmt`B
z(<HrOOM6JrdpF&w@Fy=S_izZEN6}xkYbV>!DLi@@z0<UhE&F2)yY)jY`?+ENYF8gX
zK}Y~TJVF`(3aoE(KlLK}@8`dL@ZT};-!ZT=26#n{y5~zopBT(;-hRlRX);t_D7`Dp
zPML6^C&Jtlf=;eARd?3!Ip)AhWsw($G_H}XbKNga*hwMRDn>>LxLMLqXu9vYqZHgU
zn#}-nwHz^?ou6$`#x14jZ#rT(bwtO9A(%UgD9d5L)%4U1tfr+c5VTe&zqi=C03u9(
zFzl7qi#p}igMbI2!WE35OI8W_=-i#aYq2g4O=b})j5(5w_aam#BHX&)19X}Sf9r?7
z0p2sF;a@_|oRuWm1fOM=l(b>iCK|UT@**`<nYWFdc6@x6mEm+**D>hZ-iqZ{tW~#c
zH+3GmbE00A7a-W?(1%-!(_VL0eJEVw*h<gI5+pS&wvo0Y*oVE*rCu%RF5Xk?#l%3s
zSg^gE1Hr6188Tx0NAbY^>2-<F7@K)Z6G44F<>Kpif|m-XX7?~H8fdwg)h<$&X5Dcf
zTOjg?(a9@_;7z{sUUdJ`vHHPVf;y6w^ExwLROOY1BAxEe3F>L)HU`y1I+RvT4nY~0
zc=8pyuLO%N&KVqr<AM2cJixhp4i7xfKyLEztdRlSPlG;vQUnAth39mN%8G7v56WP3
z8fX}1HFhD1P=k84w$^<1ay+D=%I^e=!tctl>{W6FB7(T%cm3_3aZe8TFmCT4GmRP9
zAWUb;h1nw&Cn|OzfaR{B*au}Mre@n{JV3fSOjofUCxN~mzdUdywywD%2qM>_)9IB!
z#HOJOtm*PLl2$!NRNaj6+=F&RMbO}ZgWIPA+3!=ZmKY11k}!+gopnJE6XlVOl|zY?
zMs+`Vrm~iDwTo}xXw-hA-1!>=gv%bCl@Le&R62qc5jss-;{U*hO$+@}(}mQxM682e
z-AE@${2U8I@ZQ~<radd!7ea;oID<sUnYpzz+X&ipBC*%J<i`a3H*3Q34@jHREQxKz
zoosP4J`L+m0kZdPC+0R7RZGaEUD!T!pxWZVc8uaxs93F=C46{+T|&S+x5-ZF{$@l`
z@Y1Q;gV4>r@zkeLT-eA~3Ow+Rrw0!x@Eb9&#SR^^roQuN_6}?<hoo+dzu(!aZYY_t
z#Uq`g3TXi$TM<g$O}W0t2{B_}TEe!nJ1y2r+6Rw6(upwdDc_qVBX$#A6~`*ET^9Y~
z%uAPH)4h?j*!z+NrajpZw>~c!pQkTNq#VZ+Q_+WJ=28|dt2Kt-TP}oPr@HfQ09i_P
zIHAaD_2ii%)37za;xuKGmwEho@VY7cUPT0?BD_c2vCj9I0Zco#X3e%SMVFV8_R)vn
zy>f&F>>+?{y*wuVwE%SVc!&1Lk+X5a?Q3@LW+u%%g@*f+Z?>TXf<L&+A4y1roisNc
zqdX$VI&mVx#Kv>(3AD;Xd|}QY%0<-cEeyd_VmYQK&Xn7AX`UwLgW91q5{@1=BO|5t
zc<RefgSdge_(SN6X|z9_TxIF196-@t&(tu#^c2}1Vu9In%3TuGrs=&MV}TI6_jc$+
z$F&}Jri>Yz@s`5e@I#H`o}C3Uy|wlT60F?*0@PAn{|7^J*c$JB<%3VoG928@g0h6Z
zB^d6J9<QZDE!V^c<AIvA2|O@yB+IV^4~#<suC{x!&F)W=YHC*G%J-uuibzrke=f;G
z!nrad%km2|(v4oP(KCQ(qZUG=#>j~x)UR`kh+MvEPe<_LP~)Qw4;LvKpN0%%Z#_<B
z@(CWeav8dEQR^`pR=p_<*^4E7Z?0{=os?G4?^KbnARbMz@yNY*V9-M&6L7^n7-MD7
zjZj|6G|?yrAGV&}y?ZIE>?*b9+J;f1rPT5fD#DKs)zwZMFvyI*)I=@s-F)*L!8yay
zH%<)Hru)<#UYzg~KX9{nWPxanXgoG92tX|c$xy%5sLs4lW60e#QQkGW*Jc0O`-Gfa
zejZ*6jWMz72OdTulU&2Rp%Za%GhC&1`Q}j=64MVFPJaVDP=MHmusX02;eiO3QREh}
zE*_Xpil@5uw_8H)cxrBBQxC$N(ejzKpft6LkP;hQ<x;3d*<qswo%EiFnKnwvwMSh;
zN_gOIbZ|B!2dh+4#VQ`SSdRy~4D)1(-QS~W&-iO>CzOZ^AI%-sf2ZFVx-ezYI&rHk
zBs&hrbv6RmF0=Ra!CV33FrT*Vq=y^F<EfwUN-Fo{4!Q_H&J-?px6CYXNisR9-XADG
z&Qzzw!7-GMxsQo*c-QJ1qXt_PbMZO9ELAQ*D%RUFzpz#ZtM#NA!k#KcO(r{!zD3RT
z58RAs5`XKyDAaN-IR7MzWu;-slrX^xD?JOFbMM%etlY`UNsF!G7cD~lB8UI*I!`~L
zi>!xHQ)8OWx8->pK^*C@XCBlOyQ)ZfQEZ!f)Ueoy(fuO$z@CgjHn*S+=SaKQAQ<0K
z4cY|*0o{C;he;(N@0;*|d;T7*kb$&2fq|1ZTC$Gq08hTeV0A_LQq{yp`*<5kOZ!SK
z;E|vJXYj`Z=5*{P==}i~*W)T*LKpp@TOzK>{x}Y}T2222_3q5zg^Ggv8|Tg1c#V<O
z!US*I73cuXpFUa#9-FDXEcJUmcRuIV2gK?*HHq{_+-zhdMr(GXbqrBe&WH&cQ=d$D
z_AawIWH{=5ixtgj*+Z!mj1$JU;G{nFQs+q9qZFqucg$yBd0F%}W26|leQ{q)+7hBD
zaRNtgMR#K)gsDI<g}C&rL{G~wEsh>jUxV0ZEy8-T*2=;Pn|;jQCg>0Wy|QsV2N7+B
z6r1>%mrQV)s6<PQsT*Zfcaj<L03Mi)k9O3F-0tGyLZ#azC=uOgE{u}ppSA5SbHX4|
z*>y4;m`InDnS5S*6X*9X_787s?%kaxPa@8EFpgjZ-3l(0rgROO3Aco=54?`wzbZDT
z@e8~_r86dQTPehy!6h+Op=b=I<9UZ?W$O9CD6aMpJ5LHsVRF*TX+xn0xsN?-TtrEx
zEmc%Oq%>N=X+P~69JE4Bg9+0>W!*WJnI;@)M$1#I#AHX#LPc6eI(}L69Y%iaY@m61
z^RwRB<7be?5tr&8K++<_&r@s00@t6Q<tv#j6ZF-_3}m#7M)L2SnVL?GG-O3zojZ?4
z`18A)J6lSJ-nZ&#Rn5H*w9;}Ezo~w4HGovCVzE+YJjwkSN-^Y6quFQ^^n{*k?p897
z+um#Zp^wvVVOHTqJ!_=5a!03T3v|+<_ewTQV23``A7q`_ri%qu%6B0erBJQgw=xkc
z=X;PCH{4nOTI3p;#x_ZJsot3vVP`rrRxq+Ox)pch34?jOpiwSoBNY4>q{kSFO?|C}
z?`?@Y5xyBCs2>XhM{F#xMz)N==sO`+xZvPCC-%Vucd`{#_Q|SBJSQbJZ=sa%5-%r%
z*FpAg=QOtGJ(isB7urs`$-XRVwS_#<a&mc<zC+XS3finzvCmSVYJXoYvwmMOt+N}S
zH*sl7%XkEw{}yT1?s4&W;-1y*3-z&&dv0SPqLC&vOREMqJR0iJ3N$z_j#qeKuE%k7
zflvcg7igI{b2lI}UY}Z9x;M;D!*wbbvd>)_H56i<WbS#W{Ysv<O`3p7qx8E+ODYqw
z!yQ=7c$hQ4WfDx+V`bJ)F|=>=nsL1MBf?HjPX~fb1-f$t;D^}qRMqgs=dpDQ2APuv
zS^E@&!{=YiLA<j}aP)}Vcwnky3lESKAh1rwWZb(Rx5U7l6Z2yPUt8+3(-KIgN3nRI
zoXv*Cv#ft@_o$XAbLYg9JG(QVpwR?(hb;IMOF6c{9^nCBqfzL_N*Nxg%pAr8tXWk-
zK$3luuE7jt(wmnL6@yP-JUjbdY0LOSE319%6m(Pvqk;1fC`ZAZ@jyN7`BSf>`ZL-w
zvGV2@GBR(p2LuHU>ZjDn*f^Vo7B<@>QR!GNJRlP@j1egYm)^X0nzD3RE9q(r?779h
zQJKsqn6w^u1qGW-5s6wAHfGuX>g^9jnR|e(wRxGBpvb|KQ}jYJ*nPxrp+{V#1yPla
zLSi?K5F_1-cp$XriUM$<<3mQa@H^Aa$>mH%6QzN(PW${L8!OKXdhoorLr|7aDCFXS
z>^s@pN27~g|I0!6>?r)kH*ZskI;vAAAI(|DjoP2i7L|I|-WhgqyScJO&EX*J0L?9z
z2JzNpRX=4z5vNnHWgTY=Kk!ytxVw!Z{dBh{ACFIGJ4&A%J7>8n7Q@l`e)_m<qv7t#
zxmch#Xlpz;CokPx2I4C_wlF{smF47GHhddnNL1QU!af#8gUPvMX;LG2WBml@<3o8D
z$alTSCX5^=oEEgtk`c#3PkfLli68&)P~095#E%sk?Zvd7#}v@Q^(tLG23#$ch|9XH
z_9ou+Axf#wW}%G`tJ@P#iRc~b)<gI$HytFf5@9k88$6|mY*v>;BXFvaYRDEDa{GX_
z;e*j;lFn*2))(y=a!dRiyBWR6<1*@58#$4gc6eZ432O>Ga*JB4J(yIxU4Zy4O-t>0
z&8Wln@7^<HKbetbeQzs)2dd{D+v~l0Yl21wI=p{q#cz>)##YIfd@0*voNFMqd^AL_
zwkWCwiMk8FI;>Q1QBrl5T(Wq=#Fvr$*5Goz>dZ7@V4Dm?f7*4hEP*Q+tCo@>nFDik
zy%uyP)#!oSxH#L0j-`SbEa8%bNJSgv-7Xm;&S9%1Pt;(sHPqSZ#4sH}gS#cTKFVj|
zf&Ow_SZERTS5|7z7=i+;g*NlI%2hu#Rka6ZpW`NLMPJvW!+2PEHX-jO9HxpvCden6
z$IxQbbJGv|4ar0LDC+2{9_<5$bKNM~NE5&XkU&Yh!(z2mdfvK2r@)S!&tB^E$b@@?
z+%_!E$WMk&n}T1!o0J20q`(O4RJ6&xD>O6l@|>&vJEO#GtBL)Wrr?2<4I6iTHhZMX
zDcrd;7<G+yQ7K{7t;#VAjw|PgS@o22y1~_>T*Yv(f$v2Nn${<LnVobs%f*usqj~9g
zZW$3v7$$ZQf$p1|jwh4I4>!x4X}#hZQCfWWvdQByE>e!kQ`B?%OV|^rwCAQfyZdaX
zf1JeGPrbNj`xN2|lBIn&Ro6bqnT9$CJYorZup63b--N2KN~?_S!0ff7Ui)Ms%7TD|
zbo2eLLyy)NW_bGxiKE4b4%yhrTsw%%YfY{eAgi05fw?oCmBV!vjo^XdzPXf7H<V?*
zX!0vJB#26rDC#pJxo{VeXI1p^fSneUg_&lVFMK@U_*JmM`;;_(x}&D5l-W7+pRO>6
z2N1B2P1ux<E=<k~0h=&i8B{M>qr&?6mY`(yhl+HENzU(v_7&(uwzdo$KRz$M)#jg{
z*M*WVo|hrLl8&C*$m^Xp_ZWfudw~1vw9n@?OK0pn-|H0GV4Ia^K0W~t878>4sE(fA
zPy3VNa6JYJfT_7_+uK$3>kYCd5~i>FWB8Fm4k|}%LuGI$T2;HUtgE9%waeJi7ndv0
zwT=juyFw1(3MHz_YYiNztcdTX?xdQ=vi<Y4N;=%FN8h<G=$*c5>U+^Dc3{dbRM0kh
zyuF{LJIo(<94=(=TxD3t=PVLZQc!BkB)=+ke4QlE{lfSHX=RVo$-XxoRqM$#f_19n
z2lwg46COm~yq=AktBm7tDlBian2fNH)YyO3Y@P2m?M55P^>Q?#%Jnn>Uv}rEw}&My
z=U2ujb3>JZYc}E>WgY}GhP)XwuFsI@oGKkxkejLLxmc7_Rur&Tn*<h-&RB1HT66fx
zWLZEPmUa|LS=5+#Bibp4XP%bFDw6GCi{5HBPtm0kmo25<W@Dl?2_o)eTrAHpz4whn
zaVFk)fWTT6SXV{kfv-KUdHV6;oc^RRpH&Zu+0#Kw!`hL}zW1#(2J1?2Jk_Xp>MI$R
z_Ekkn0YeOLM_=8o+O5H}bVIgPKVZ<lv($o@H^uZ|DiaagZqV(qS49c<uNWvwZjrm?
zz6#!(dE|-h!3kq*)&v{1t&Sstaz=M0Tqc1iB3Jfyzb&V+SvFZ!UP?R#zi4G2Tcii;
zLR7|!0ma?guvC4DU^b?JX%8L`qiS94V$Sn7kE`&WCVSQ&$N{jG@RD^u^*WxrNHVRf
zzT%v({j%Pc%_+w~b>!M!_;rpaE2*mpN^^<%7mX?Xjl4BU=aOo|M<DB$u7A9iWM$F*
z&(~6VoY8as6?peM*E+g6bvsvMpBhDMUQ8bwJulO;!EtJ%A_YrLS(eGj5e-TZWjb#<
zJ)AI5+^w@bVR6kE^+rh^Y!=0hzLghEX_u7j&_^#GdvnrfMI=)JxME&Lfm#a(Tjdu+
z$8h4-eue7|4VAMVl<Mgi6NR2<p$6xxBp+}Xq`XnCn0wb<N=M-P3?o|Ir?6!B&fHec
z&dWFaav&3<w8oRvJOuf4(Mo%BnmKx)k$FK8bY=8yV1_|JRj2!b(S<`B0v5ur;tOR@
zH6(pHA{iF97lj7`uLoJ0sTPz*(DNQp-P1?SN!I_=OMHQ&B95WWk+{4gS5HqZ>HO16
zl?|jRa~D1z<7e>mF;o}XZ1o(@kxC}QX{-9E;tMtn17R=+ue2uS#}za_!yTEkxczO8
z8wn3}I`|e7L!1)yPqU=VdB^tbJfC4=jG0oI+MwvE=eb3)r<y6qG71^RINtR%meWHg
zQsDl$veqT~H4ivLfDFl894~t9MOv!|>`Jn}eoRQHX?P6b-2}qrE3y!oZojA}vwaKo
zy(^g#vulQPL<;EDdb!o?loDwaS7p($MyrL%E-Qw|J=RNRy$pnG&9N&mHAy{k(}J9Q
zzagTn7pJ`GJ-uvYAwqDsH5c^Bgnaeull>~USI=y8;8dMJq&4j8THuc(feW4PKir&A
zlv%Z0wz~8H57=C>U2S&Nq5~S9hVDMkXmV*>fh@p>Kz16KKf3bAi9h<qj7&#frul4R
zm4#$^mKS0TrqXiRAk^~}O2@e3U`Qf(-JGi5d6nD;<j*_{E5`v-mGRVU<ELIyeh^Z3
z<Kn^n3FG~|aOJJ;yMBo6{kJ_GJ24!iaSul30v(x-nRSVz)*X3!EL1O<T}g~|Aac>5
zln&5l2*5Ip4)0HdWtpEFykAGtc+HX3hDq7N?RvqDc#3B7>OKuWhsF7A<~m79gZ|e?
z&Q4Y^aYm?Aupm`CL0rcxa30m|lQF<wm-^Japfk<l^0CbG+T;;7<hyJm;Oio(3IK?o
zP`BVvoUb}H=x(&(0knv{I(SfmzDQlnQ}cYGmlo#ymgkS~-nCKnvb`&|Mh_nJM@V}R
zm0Ac0uOQFdh;1wOJ#nmSfW>cod!);*5b8I)bf^!}Q`>{P*JNP&htBr%6?;xcktqjF
zyQWk4oPFwbBJ8%iu(eFLyL95ijH#9_I=AJyVk?-TSO@1GW}bu7X~&5re&WM_1LpqB
zng5%8UU#fMNu7;tzUx)z&=gg0Oi$+ebi|o)77wgRTvWRLus@$wrn~%D>Xi3*&@4{;
z;Psh9ci{KJ7w#&J8vJqA<8u?Qz;mi3G=f*IcZ$H(lpeRM`tLi1*?R4PkRQ-_RE?@0
zBlXLS?XN1625DUF?dH;XBOZZg$~MQP0k*2r&>&z5g3&?j6KS%w9)kw#l5`S_=9l4P
z&|ewY<;WHvW@6|)DzMv}jl4ZZy@Q?S!609YJ!kXwBy@tRt5gA?e+`3BO0j0&ME1Ex
z`&x^PS~efbdA2-TsZ81VN~EhDbY%PGOwB2ia@FWdXfkXSeF*3k0k#UV=)ayX{VVy`
zU2!q?S6q@4H%(4=`wkopbjli;5QQv~q(?PkPvU{_{hhe7W92K)bmxAQ_I)oUG~O&!
z%f{a0;3g<F$YbnXaP7>i>f>uvjEG^bQozySPei(Y2U*!y=QC)>?I5eF_K(Qw#HSI6
zNrm`2IQ&dheI~4a|JxKEW0;4k_ZL`wSiSX7`OZR%Xn}&}2NgneJUMExe?Z-qPKYx?
z?^UR&c4J78AF#=C92%l-bPRhC<L_vRJ~DB$=fD#*(W?i6qh_4<INGj7VU?0R6#gS&
zmOmTHq03EACvTSK>SR8Wdu;TKTAnj0JIIH?MGUeuMhmb3`N&nYBOXW+?793y?P1ZV
zHDaVDg|p4hGMyX-iMa$_<0V#qeh}X=LC;CU-nSCKvy4dLuj?-n2lL<zI>N>SUtRg&
z7G}eni0K`t#;9Pe4=0s8%-t3^i8e;HB8NmLPpoJf!ek+y^1?dDm86cnR5;|Xbm8u6
zMLD|7C5V^daa<oNeOmPV^<~wHFs^8??cuG7_a1(ItFzxl2YhKKf5#?H`CA;zl-jY0
zz6l>~qDhCse)LfeG?6ELd)p2H^j#k#NzZn+<68==*|!%sw+G9P2^@DVgd9S~OO}tw
zca(P46s>0aNr@veh9Codr;V&^ON)5GfHx$>T`ujWj&4i3bE;wEr2e34N5jy>RM<@*
zEu&#}y7ux}IZ`6#E$ArnPE4KWo}9M^l_kM048~4TvW<Aan?iKlh@mIdogD3t<tP(o
zHhuHZLd4>f9sF|n`-h&KQQ$7rs=#Lce;`)yXCt#@x>|$2AL4RmLJsIMI9_00FsN*>
zIt~pG?au1qLiOWL>uGFaKu@&e0W9MewC?X-{CSDVk^DHiaw96;1CMo;vBi%24q5P5
zy1yWN4-RQwryyVhz|vv@&T9BCX>7}Yu^05~Y-^NHy|WI(Ir%46(H(MhO>3`6>bSE=
z1zKOfhOcJ7m_xQfCydcL+qVyax24QYSm{oPS{_sKaW^$d&-Q;;k@<cgzO-Ot%!(0_
zvGCT-+erOS8Cc!Vt}E_Mp`ORq@}yNDXUmED;X3h|I83JQl$7KZNAocVy@nJ&i;fPu
zk&rEdyC6^n5&$}{q2KMi9xQp0`<DL=L;iF9D&-^M2~>rwD0s#q<}_9m51hh*L(5AO
z#FiGI=c4U18nR2>tai^4|7U`=sZSM|I|OMw>n&`=My^G!9w~^aDq~foC?bi@ppx5f
zdyD$LRtX3V*jx7_-f2Nsg1U>yet439IpP2Azmt*--Wkr!**+7tyxB5(29~D=n%xTA
z!C~EEePp%u28|_jZVaS8R86y9es_0&Oplvdq26GW(gFjPyUSN1$fkL}9bxf*BH&BN
ztJ>|CoCZ62iV(5}k{^r1UyvVX)@yI}h>!M@r)%uRq>aIK#j+O1BS@q<HLdqWK=*<$
zk@fpf@J}P%9>)d-2dC~p=XsEu3NQoWUu^iH(|$&x|Ecfu9a-}N4y&&^nNQ}K**p{J
zOY*rpV2s#no%t6nxy1IHq}X6K9xzYlw$Y{vu>*bUG-T;GJRMg>gP7r@BWU}5Nc69G
z-#7abL?yxGFLa4KWG4vQ5`1sDzS#Pph%-{G9)e(;dgeBk2YZBC@a;+taM!Zku^rBT
z)^_X+W^p_~jcBk3BTe)g+lTV9ZWVsJ;m794e>jEk=!~-yLpQnYvl44?((8)!k%N=I
ze%;68=AVpQe708K%yHiMEQBlw_kC)O*7n}P11bmqT&SNSlUJ8HE;aKqIYIz@_Wb{S
z_J2GT`KY;0G{dIrz)VI6`0Ov!-rxfX0zwS<>=*uA)at*_{vSX0|JOhJ$Gp0@&MuL3
zft(FQZ_H8aa!>c>cFqTNVq2JcX(8RN7PzxUaSoa^$m$sTjj`hLP-VvO<&+(~7!;1t
zQTj2*5fAv6Pl%7TZjmM9!gYJ(^uOKkL#O?-{_^)8Pq$Ls&nmB>_E@BS;qn0QA$sDd
z*9An>W0X3)kr*&+d5AxG+uZ;$b0)9*`<%qjPNhB$jFoVZ29?jVGvFfRKveYg>JK>l
z8DMj5Mz?1zE<N+I3!BlJX@}kyKu>Zd^CsvgBoFptd}>7*_U76e4A*l~C~v&gt4R#2
zRmSore7|o6I+s+4>ft38wAr@r1w5df2?H0gZ#MkUX@A$P`-}Zr&<9-zO-N4aj-6i1
z_&^^{cI)+fy<4PclJva4hJ^ceZ3j`&HX%-tnjZ1)l)yIUEiZ{bBa;zy$b@ZaXmw|l
zgn?`Q%$I9D>!x;|eH?4&hk2~r*uy&kKjC{$_gVh2^GaihKg4-}!<iI1y0}3F&%srt
z;(<i%yf&S0H~iQP`8-qpL*K;G$2RLVJ3Hpo9o)2?wXU6te_5zK;*FK<dOd+VfM~d5
zjt7hwj5aUuJKp;NvXTzafyN^mD#x<7b}NAWbXQ028xtVQ`n^W#%l(-owDY!;)8@1d
zm14FZI7Dx)PrVu3r}D@otUPp^SGRDO+I-!~^B`Kiw&w65(9~c0($xPL=gIP|W&dAm
z>gQOMG}JP8O#PtN&oiFzyG=d)gH()<di#_gd2iz7XW9;-ye@~Yrpa>cPA24ZT#cuc
z;6>tr+nuLz+v_-TD+Fc`f{ML?9Nx+DvTVfz2X<io6do8AZikk8;DKBHNSvq=_DC$(
z`eHds4-A6UO%ujH1)V62y0FILFk&Ph+&iQl5A1msxV^QA-4F7#=pta6I}3rMRj1ur
zAI9*5dyhLHs?47vJi%Nw;sPkv3bZi4e<NQTrLeEA*nKi=EJU#XGMZwUd^L1(O`P3&
zelF~>LJ7CV0B-3ymYcBfKXjLWOLsYDUqM)=zN>aK6L@GX_(7>pQK@YS$_(WE0ZT-3
zHYyWgK3mik6aF3#TsJsew!N_|dOE_4aeQ&ST(vvPxw_9|5E&qK!Pa(<GDxsQlYD7_
zf5%eV^<~D+mI>#-cvV;)+Uu}!gr;v;-7;pG?%KwgMD(0CVwAE2%u1XCCt~F)n0d$c
zc|?7)IRO;62;e;PTCosAaLCMl8ZuvRgZ>lFCG`zSI600wd58QBS}h~t&DML<?nODL
zJ_i1+p|f3@k?#ol62a*us)h%Unc!q0?J38)eje*zZw<r9JCtv1_CQe{ZeYG-%1^WE
z>y7`6b3HF%63)Ru)goLHW96xsRrHHb?qT9{;Tq^(_(#!Z7djR*99gB0<@V_0nK^2o
z<`K-$dUX)7SLFLb>GZA5c;GNqE3Ohmk<;%$6zOTb!R>@tBLhvRV)b{fk@mRVAdhmY
z9@mGMuKP$o5g{6$nBW0R=!(fXQn^1gp(8U{zW`L#AATmzJ_4#Twx0o&s)93u<@pJY
zsgHyyJxI9+{icsAz7D28`crS;0*7sTD(C@#GxjhQWQ$k@47Z*CPDXqRO9Of(hB0_R
zzdZGGQ)80wK##yIEdCz_y~$h+Rzs6^AoeT>vD04rDr$Z7tw^$TzAwsCxKTdwq<2Rx
zhS)wp3G$_1jh!3dtp1L8e-@$=vc!D}Oh(n%X6yn%>`2%K9Uo#L85evLw2%KL>*CAp
zjUA2&sb84>S0258mFXvy!2?U@K*i?si^xq8kRSx%!NiklJYbv(UAd_v_l-}jEgC4L
z^%SF6#XD%XJ|DgGnq6rr?o1)k`0hQ^lz>J1B~LcApZ8cg`JIIW=k)IAIrj>O9H`qn
zF11J)>^!6I`2sy$$$VykLWjZi4zHz&>2A|azpL?KPlO3(?&+|Oh6Ty&LtDP?Gw-{v
zR=1Vkv)jaJO&?Y7Q*or2`B%d@{~esaF*p+~79?$pS<I67=?IoV)F&eWkF4<&>R7q6
z#d<_iFI80;ReR{w0hNX$rzE`lvfv5h+J%Vp0rN6~4rbhe>-~qv`b)N2rP8tq;3JQw
z=to>R8Ju+&LNbFFiMwiMh|VX_^XM<kiZ2>jO*2y=M>F^Lt9BY+vK*03AdHrNw;ADb
z#7hQNfEh$n%a$!UCWVfAQ=p74?>4f|Z%cHZ@G)E+fk4d5WZKo=-(Na5S~aLEWct*k
zGj7(##v|+$?}2e8UrQ;u5%Rg7K;pFrOog{B%a3cl?~(DfuTLS(gIiZ@-}K-ev-B;J
z;a??oGmm)$x%U#8VhZt=$t75={aMkf#jkg%GnZdRZTtMv0kuoGm56-OHdo}8X29TS
z&WgJASu|T1NoL}J$J08GA^l<|QOm2zen6-kOHkXtj2j4czcBEihXr;gbmH6#Jdlsv
zJh8LP|Gq&W<WlJl{lbwNT{rvIk?PrTq!d6$ioImVk-F==!<pfMvU{N(yifmiH^0J6
zP<=CR^+8PDDDRJ}Jdc=un|15!<pPZd`XoX1BDnHh0qxSt#XK;H77ye)V<VfPxGJz!
z)2TNIUzaiI&X)1z7NVqsv&~GL#Y)~<wM}gBoZK)rDW(}-X?DAaASfY;RwKMLsKRaV
zY&2^?T;>(gF$9GU&6bejGb7s9N-gO(<F_E9&<35hiv(yy81{fOYQ46u_82EznPI=P
zJWs8CJx9V(#_={bAtQD7qiBaXLDb>Qg{H-_0uz|SyR8mxh^SnbYMfZ3$fQy3o0~^a
zZ{pqG6!2&~UwVOU*58g5U|kuexlFSpFHB1q_kjhny`P(Ka5~Fc3w?DaPOMEzm|v>9
zE5cc5<JHpX8<*T)N#@*=2zKjH`Npt;k>-z&R1Ji=b1|zTpUm{WmGaH^+v*PgHv08L
z{`xl&wC_C!freW7i?_KeMV*fgyB>F4K9|Cm^|vD;$X+<z_!xs<;LLV93`7yyWy9G%
z!l~8=yB3ZKV}{fkOfO%#c&g^*<7)5586z{ed_A*3umjxMLg32sYeMN%8&2P1Kf_lI
zV$xJ%@q46PWiifU`YX8C;KXWIH{x)Y3NkO_z*rg+y?PsO;uCgH3`__L_1W<u3hMiu
zqDCb%ceQ*tnx`)3o*a6T^8IN~w1q>%2vx~co(r-<fTB4KGXdkv;1^Cb;}2z4I-gb^
z&V^#jC76LSm2sGF8v3Nb5o+DX_)AVloMN)9c~NrHsqC~D!Rzq$;%!(Cm}*>;y&IE!
zk=ZIF-6L)Oi7lizN#mZQxGCXk7S~#+7KD19k$b@L(vofKW-6>Fn)~g0lXD)j@ucmb
z2vR2+{;-03h^A+?mCop7VW#{u11ai5^96Y_dVAUL#MhC&v4fKatx8N`LG=aaa?S7>
z8JKF)uD&zkDg`z;B^b6#5YrpHZM?0xJ%~p3w(<pVGAcD1djD&T{uOvr@e|&h58id+
z+2G;N7=pEI1L6zoksMu+_|RIz#{KHm?L^TSbQ9q<)*bo|v6#exRGm)wt!zX>Bw~Zx
zW(O^4Y%@o->3jnzzP4K>h_z4<ow_80`)fnD>G|D=;do$yb*F1Ib}Bg_HhFY`3F!4f
z?V)~y?%YBazMi|ad|=p;AzF`P&cAg8Hodt)J6e9*5wg#mx%<iJ#u>JQ?MVf3Eedy^
z+&vy?ckOX`#fk9QJ(c5bnwWMp@2P-GH;X(<GaJs|f<4@?!Wc?UbqZNw5(^Y>soZ?7
zkMU9L829MBT*>*;8!K}~pE`>c8jzK}X4etdQ&|+6#HmNQF!a{lZu(AGgOC_@?sbVI
zYPb%v*ZtIb(IW;MdEw#@dE_-b8d+_&ug1Md3H!>@+a2zxlPctNWYC&sW}FhMobw*o
zifmjLCK%~!^||gdj`Vkzan~9XKfOjRIIQNb{)Wq9<D$EJ{Dv`2=wxYjH5)U6Qcr^%
z7F(7%`3fO)p>X+uSRLJnu}&PAbAHyp3Upc!+a$B4-<b7JfpKu1`Wc;ngr0|I5UWj_
zvC}`A+WEH}w<|gdVUjvC=a?)}nl734^2s^XpGeWZkECeGA0<U~prY7&lF%(;aPTBt
z0sWJ&p!?Us4tKc`O=N%}GimZ#W;QNFcOAmy*&5xKpJbncan@90c$6DrE<wB3e!H6`
zg)5&*sGdc~A5IJE<NOfwygymJc<RT(h~6Acm#Dl_yKZhD_r`0P9D-Xy#Fbb|w*r56
z$!BmqC4aRJ%bJtzIKG9!`|C&YZp2eVwiC)H;=o3&(T9PV%W8&|`mJH^tk&a5a`l8}
z(QtOh=sjyzOB4Dd<I~DAHC-B1x>z|s++j4;llbQ38nQ@px-$2Hy?gk5FFZ~hVOt?^
zuJt5plY*^57n&|}Z5_b_T-V;*A1-hn<bXQpfLT;zycN(Da&U#;HGv11S3r1@g{Vg?
zX-KJq*gidbo386cioC7;vq3p?$2GMFR~-DtjS+_%*p19bpWE2Dj+HEJfnGtVBfrdK
z-Qt)@lchbayfEV33(AHHKPKupIce@)!(UskCSfB#!xN~rNRpT&cV#y>Zc!IoO6DW|
zg?HF4H_oce87BXKV$bY7MNrxg$4i-61un*YU_A6n&ljenb+z)cjWaLBl~z#3w4~7+
zgS(F0asYu?*WzkDhGDcW3|xg4Kyj0(Xs1VU)3QwW^t;|-#=i9J<sRwu=O2wI^&yN8
zT^TE;a1_ZhC}(>l#1eKm^X=UlqXtRS>XYL2iyqM1J%~{x<BcH!Kl7E#$+3)1lR5<v
zA{G<wNnV2?$zn5lZ-2oAlI}R9pOFrID~1Dix>NvHh`3-t{Es4n&Ua+bJ4#Di<xYJh
zf-dsx(cu}`3k_iYDMK#vk7meCP43VmW;;GE2!1U<tS6Z+m)4JoYt?$**}W!`6Fu13
zdMPLx{*KKx>|PtGf&xhT_2Ixo%qK>8fCr>Qd?8=|1kH>NbUS-YV3Qtnr=LJDnqU6d
z;e-AM&Y<n+AG}Jbq50N=Z-p*;?rgY8QLs_{R=#P&LMx(%rq4@~nHP!cK{ITV;em~F
zVOZ|3ete=X7Z5avwro5Q1@c{0VPF0F$bf!z$X7r9{PABACvOoj%pIIB0-K_Qqc^GQ
zW{|M<n_m#1IX_B&CoO*@@r78cXD-2uKSI3yJ`id$LMt@?<b@qLxCqj2k$Ax4D9fhK
zZwItkYTv)ljHw}aE-LgRwmc?F@sgNmBXv#}<bL{~+r$<K6g#voL;v07p@l7jM1})G
z1<k=Hg?gCMKro~W-QET6zzVJMqFz{l;BMtZF?*5YN^5K2f+42|!S2>TCv1@`fr@YS
zFdShrIL3Sv@Ic#<dN6gq6Uz<YhT1{Ljn@Rs%ie<%Mp)kf2RA+F%T<8&1el@Ea}k?F
z@9@Bg1i}~c55Jg}+cva%^E%QV;hcO)$<mTmjrXC$hL)nrjO>1B0oV|-orau3gh0ON
z))KB!Xvn-H9tdF0^Gi?e4RE|nv38<_`!rudKs{?4b*+cGTfy3Xurp;JLa?M7sCdFH
zx1HY148sv|VcNG#cY0GB?9E-Y*c-U2ozA2MJJV|hM~Aqw)0umAIukV6WWc|@iLiNo
z2+sQ?6a(@hO8*-#)`B`gIr8}Ey;~IGM9vdE^6Fq^E|E~qiy<WR7mW#~i6y>f68^&p
zliwyVfQeLV5^-?_y7uXPIYsF_^hD`VR=XiJls{$8cK=DS3L~2cT8eEiDhii#vZJj&
z;2a)kQqkC5jx;icfl)X=HVI@kl7?3E?J=gzyrUF4AEvU_TJL*?w0H>38Hg?rk5eyb
z(8!M#UlxKr!!D1YI-k#gll4Max(K`(02vzvm)1%IBce)c1aAB)(&@>wt9|Zqmt{8~
zXhMjV$n@Lqr3OClPY|9sr*xsqv6ur0HZM@If;fH5&#JdBQySG>ZNa#mEq>X9)wBmA
zh1LRKq_9D9Cr1ZV;;SmjM793{A^fgb#qjW!JiK~dLb|{=^IFaNyVyC>C|`}&<)|YN
z%wnk<|MqIxgXDqX!3K`}{J^BBR#aMWq!A6gpvIYv53P>Nv%lLT7!jbcnaje10|;c9
z@)>%VuRpOwn6rmY_Gv|PL8@NcQF;`Q3efapa;mzSyYhDp)R4zRX9sMWFnbl;MGObG
zq%hzO)VBI&CDt(X#`bXeK2Um7jyk+^1Aqs|A6OTDx0QeMIf$hN&b{p7+rOw`v-ajg
zTBYj81R-b%rT!=7)t?&7SK#$Eyrb4+MzfPbe0S>ut^!Cw`KnOKYr$JPVp?jStfy}m
zt&bEJ-kQ8T6`mO0Clr0vPG$V<+g-1xBZzZrQY&`})q0@6%`AbOa6gQuobtiiTMDDj
z7I{o<iVT^Ek=6TycJmSBnFnmH@5k#ZQiO2nN`Sd$y^KUA>P2cc{W&|Tl-7db`oCik
zljsEGl0#{yBWIX>&hT^9-)E1!VoYU=Jmam;h2@42B3j88v1HIu!zT&=yd@l^R9O%k
zS4z}2MEA1%5_@eU;{{8iN1NCV5$j9t)F`J1DSE@Yhp{>*JNRCR6^UzKxa+E;`At64
z_fNFHc?ka66d>m31&5{ZYDwpSbWU;F^HYU9@AZ6eZ6Zz|rOX_<KPYAXNyPEB7J(k*
zUB60Z`eU)wU%T&}Z6x17J<CsVv;M_#RHo}x3G~@uFAT?3`0CCz(Sr6StA6jHr$dLi
zLXm>utw|2TpRMWld=s>SHL&F4_&_Er4KYKX35I9B+3=6zlK)-P2-H;H^5v;ZTMMsK
zTYQ+!*qgOO&wwF<ziMUuHY@e7)ue#SpL*(S-(a|xqHup=0o8$@CF`E_+4}4LEB(d%
zh?;d#8VkR*O>xV9n~&=sx^wcp_)3=F4?!I9ZxtvYv;DP3<%drDE#mpRe;=K34T(-q
zPMVAxU3fgGbt|e7bh069-XE3X;}xIRpnW<&rvClAA3JR4vA>U$<zTk#&`T$2z0b@U
z$<-Od@!#qGn`UQcx?8MTN(=BoJU|<t-|?l5AAmD|Di82f^#{uX{LPuk!Xf5)(t7$Y
zZ+wIaj>f&qgr_5}ZQ4!zce4Ms9_gsLZb)2aa?*RB#e8Q+?LM8u`sp@^K9Y~3=rF>c
zR(6%|EmHEIS=qxgPbR0sqSZfnI2i|IjBiAoL7vqQtIC^;L*X7en`UF;Cdc^i4h<{u
z7h93K-htnZlW>0xdN^%u{|+$x*7pA^>FE5tnmJv$D<#i8)>=NK^{Y;RJ~Q=}Y}ZcU
z#dZYtlM+6V_Xm{lJBbNj=l2$oeWN@6V;N6hW<P!TcSDEBv$w03c8&2gAtz_}yL`;v
zQ0`NSoM{TjXqVa#K{~DG^Y8%ez<|Qb1O3zQ3oI*k$UEA4jR(q89Yw#ML-ghOf9>xn
ztn}A{@?UKD)zqY4JJtW>wxpz++OGL=G11gzy@OqP=K4`~nU#3p#z*?xN{aHQfScyw
zf20b5aZ@pOGcGghvM?`Wr&Yw@rIXHnzMPFK?;EhZR|^-ZX9}D^mNoE$)uCR>g%!L0
z{E?m9nLXplZxZci3@CpnknsyS`7bv7&}sjS{OU)JJ>QYXE3K!Y)<ilCLix<x&rtr^
z{{}NJ6P_va(ae+8?I3w=%EzNmR0KW5?Vx1uvfQdSO$S3u$J`zU<jcExuyilHVx2x1
zxTiwO{dM&thyTx!yt!`FuPp8tbYP&)u-^M~w2bqgSx*9@+>N}@mDhB{*W?qFZ$Y&s
zK9G4-2=TShksjR^{7`R*mTs=mV=pZWg*gpSt?WI0dxTBK^h%3_j{Ls@%|E9B{(h}H
zFdgUrgupa6Q?*s6vtzAty}!69-I6x~%npMt98J%A?S7I7&fG>5w(_YOa?$UsTR9@o
zz2>^YFBA~)=Z%8Vr~BS0Gd5O`hAK_TGCSx%L3L2MKVq64tif>&4~&5klH0@8<tuyf
zK!YFxEeuwb0J9{qM{r<jPrN@)4ve4$Cg5lhRA7`v9(-d-Bbap6_9-%odWdnx-GXe8
zXyA^3f*`CU81SOT`f!0OIhdDE-wK8vtw)D9cOxd4pqN<bs3>Z2n+Uo@fyAnRjJ*Et
zoqR(oX?}A6j6o4?6-ZJrcGR?lm$i1@nwF6hRIQ#$42$Vf21Q3jpF~Gr-WRBz`m$l4
z<uG5|8RW}WeO8Nn*+Gae8}*a6>Z{%XBT!%W@TZXK*B$ixH)3(wu>YpErs3r`IeM>)
zBNV2qy!ie=3L-&RB1{AHrWYblr4(8A$p%xY`T2=|HoXYh&SUeAi?jy%|3X@FAky;g
zoYV%xq~GR^e>3}b^lbjB?UO+Vwrc$@Uo1M(eO0!C3t$tcSNk6tA%DqOCH=;J919#9
z+&8R)=6pFsrxv1eIN&}wtaSCBDK%}szk<>3`QI~!z8%aw<h=csimwkesYJtE?n&7R
zDAG&+qreaddL*TdG}QV@1*|?yXI%UUdbU!(*9h_=AL3M5k=wVD!Hmu4V9f#S@Y1<?
z@Rc$2P+W)~IIF)W0QyO+;2X9L7Nl<v*(p~9CRKbE5&f}BEEFCbx*i(ZJr=R_3Y}IR
zM8<ad!Y3L(`9HPxn<NjMO|s#!vtM|<<nX(XsE~rsLHFmkD`8Cy<5XFoYh<9ER!jov
z*mCSWF~mGQ7cNBg`%(u9ktii>psUfg9qt+E{(UVU`bUwVAph|=E;~7C!6&A`*+HA<
zBmd#?wyW>G%z<4}ls9(QZz_DO(rWOVWBVI<&~Ho27;WI4RRuaUdi|If#aI`8PTP6}
zt<VcG0O;k6>jF#p|03b=HwzW~CBlTCz*heeUrmSpTyI8}7)lDePXyWvFyi;j;N9HN
zk)l@toT5FSq9nJ5IJpRu-(-Mn`7d)&znl}_r9|T?UEPj*V0QTZ_Q3w=7vt`o4IB!M
zfN2;Zr#Gf6Xc<%_moF(h4sU^J)a~dt(#y{<F>>8j%)@HkEB!NFqPb25D!jxEOzbLR
zjVxP1Z?o@1%^(}xuKu5Na4&Q^SN?6d<EyN<PxDd$lzV0wz3us(rR+NZ=9jAa-~U^i
zzbBxq$&nNCZnLDHN~*!qnZtw1C7v?vqr^HL7cVh|BftZz(;)8p;@3|YLA89AFbgpP
z!2^b2m@j^O1|nY^@h6X;6)#cou)AZOe8csg^ZBBuP0p72+}={f?gL(eHYXmLaEQlK
zdhlMz=~`JSeQvk*;X_dEC(5ZPw%8ChLaA|~9V-B%?+R&oV$K(%&XVu60e_^g%%pF%
zxbDsPLGs$F5h@DtmJ|e&6;NF1XTYbkE$i5dB?Q$PJ22^Azdl3CFAjnHqmGA%1$rKX
z$5bNeM4FV7^cGGWBjNktLUuCCNmOfFDbLl}2X`E!S6gOK-iOG^BN?ABuwbwSg*uy}
z(;n%Q!-4A%7$!1HW1^XAqjCv+cT73YDr<T~Jazfp*K5@qHfl`u7;=vbSRULQ*WTF^
z_HyurWx}@qY<bFBwmZuIP6GIb4hs>nmHvLfA+@e41$AKk`BF%G%#fEf)fW;Y>Hw)d
zpnjCP8oEXgl56ehhU??yVcMRoUC%lkBpFwE1NNw7ao){P9>>TM@`u_tU}00NtcwZV
z77x$h0WjZ6Tp^!cWqD84xHD7BaW^*D9E8lB4VCP_0yi%0Sa6gai*L1e-WzG}+?VXj
zhRnJ_*z924r*>YY%dwfN<C2<t>(%B1JX^fcM7Tqx%Zl9=QgR9XH;b;#Nk4vV9a29b
z#~L6`<aDUa9kYnqQbJAaXZ~;p#`?&a_JGSs@D)+Z!<Us5M)eNbb`BLGdW$c@!YFQT
z>NPa4sU<~e1+wo~n3jGSYSpUutS=gEALA*MUlF@4a;Kn`Yw^t?Q=`_3wtw=8YD
zW~G#<Hd<e=iKR`|dpzPKMONaqy-GwhLy7;AIsC#VbKNdcTC=9;liv=>P=DSa)sX4T
z>xnL6X(M&sNeumRc>c4->P}kgSF`JHpKD6cJBRgLn5n5?c5mqJ1@{wEyc@X|C+OBj
z-25BJ#|0No8dxHl1(OI(xFN9McwpkX9;4md9fw^C2Lk(;D6X-&ik>Yc#U3_VB>=Vg
z)Zhz&K0B6Lc;Fl>_z(a*u(<+eVtx7ZCl~gMqm=$fPNCDhi3e&I_hCfGtO~D3@Z+Lm
zu92%DM;IZtMcK2`=>yPFu()Fi6*%y`5ZkUBykqM1ePtZ5oDIS1vI*od)4sWiS;=#x
z9XM)C>8Le>iw%G4nrHlgN5s4@p9f8m=<-_DW*Lxu3WcbH(qS~33SeOi{L=6(k^X%b
zT{%{b-nyM5?acmKM83SZ8=eJa#6<U83_Dx;f-*^2syJos!}hG5e}X`%>(`Vk{S;V*
z1Tk@Uu6%6~)M<=o%VtBrehDD}ub<y`vrLBfXqT2eIGFFF7_D;6-Ye~JKm_b5nskBP
z?%3p=>OPS|Z#|j;BcT&_UFXk(rOStvHgz6g#ZjwMcS_DkL5UWKANO6yoz~(zs7NKt
z6&}5(vuqD3{p#W5YHFh1Uj8&VOe37$X)%_D18LVBiLCRInyZ53N|z<1$eE0(%ka`1
z-LuxfvY}dE%xjk(w*kIs)^LJKQArguJXU_&(w%AiLGBjC9i}I00W^V!>0gpfEZJz4
zQ|NS-Q>-d>dmgwUn$4lco<}2D$eE*f=30d7qtvN?Q~dlJl%dacTS{pSL1qrh0ItsM
zn-2~_>%P~EY!J^Z|9jW(|7ra2o!6IfL1oul)4)B)=Zl`>-o4P~SkDZ+YnHPPraha@
z@(z_8MnkG)ghCV_c&0lk>b8te-1mgkuAWzLzjETu659ilo;h_Tc;aP_>q-}*OvMgv
z`hbPQ+l_ePqT>aD!ux|dwMlhf5^*o<(Wa~+#=w&juF|j6`KKQ#Oq5GWdi?(Bo5Ce)
z_7e@|nzGEoKbKrK62e|WCkL;z21UM`DT;dnqhLFiQ{cmHI&i0DD8e+5#is#$+2RQW
z!F>xv*X07qZdJF9$8MyL)rUx#J#)VpSFiqdmHd+NlJDaZCh}twhE6Mye99ASirj=V
zK}YY#+~5$TP`<dI&7Y;G*=WC%`M9*uSmK(5RdR}MHR(ORg^`BK&WDx@5+7VuI=FN~
z7IH@O%<W=1rn^^9#XnyWn?(L9AIfHiZqn<{g%H%+J{{1oSTabw&`Kh&#H5X5U$?7N
z^w^$3?#j!w*GDPT))hF~I-Ex*6vT~3Un488aj`h*B0j~D=W0vfx0{Hq&TCgN0I>Q3
zF3I)$*1B!(q^xy!Y^8G=ox(Mwm?&9H^)UW}O~g#=>(AWDy7QXtD;XG=2fJ$9)#^fr
z+(Iwk1+5p&`{%tAQ;$NsW@<<n5AJni2FQ*BfD;|TB$6es6efpaac1VPS!+Gqryzy8
zCTtsJJqB^+qca;GdVu29?Gb4u($|N91+^6fm?d34h+mgY$0|ivn?=UiN$Mo;Wi<AZ
zo4)~YR{#X>&h6S!ZT<lUY-b-ZGop)M<gvIw#-+UC;<SijJFuh#n49ZVsNpvPhRAmv
zSNQZIkALqXx|^e6IMw6;=-dcmn*2N*I;s>Tb=4$xsB_-c&3>&ZUK6ANt~pv)m<;H2
z(zpwwTCs;k<2Z_6FcgKnExT@IGec7W;c=K37Z>SSVFWt_wBo#@2yz!^hU0huc9Xmn
z>5y<0xn?pAWkI^m9v&Z~zYAIURMrF}8I}=^@;N*89l!<dB=gtdR}7*e9s&yDh_10}
z_i*6)GCeAP#R2_=c<mA-M*!OlT?3Pu^U}Z7a~gZTDPI-_6%Wa{?s%%HvHGwCRK0ID
zNBe5An7eGm>GWMCcIu3r`_5R#k*#yQ%Bt2tEBUX&vR}>=eR$wvBX@;9Z@^B5^nx$v
zSV_WKQ2R=QJC|j;LUR9+1eqi{yFor#)wzkrN~=Wo^bN9A1JNo^*7gszLl2(JbUPEM
z23jtJA0SO>0AJP%=Km2fWZSd)R`;HcG~hdPB~-C53nM8f?JYK0qB91*Fp3s6cM;7H
zB;q2l3O~)JP+E9net}#ysGk-S%H%gWF*Rd2Ziqk^;8duJ@qiWBe=-ul^bQ#2Tn51~
z*$x<1+o5rYb_A_IzdrX%m-sKEe0090d@vs)A<%g97$=GKz^tr8dni{w#XaM}xM#)8
zGB1?}@4qR}Qg$4Z3NGgIHPF>H#NF(ixw{1z^U;734Yx7ghcs>}gJ(<!9sQNVws<=W
zE<`TMLWD8UQGp<-EALw=li%g8O)bP2r-kDhW{17%Uu7duh)M)GMyKgj9Av>j&SguX
z?^YV3H!L1&=P}ow691_*0Z94(R8r(OOA~zK<@dzvAhDKU_x}7cKQAf7nk|5pFK?Jh
zQVHAIqW6$9I_}v5T{^c){i(ps)$HVH!b$2rNt`Mh61-~4j>s!-4ndOa*E3QVW^2o5
zQ@KySBtp4Ti%x@`GqM73&gR^aXFdVP^BNiGuqZYe7NBkp9VoxGDV_I5MVR*R8#|>#
zA9OBA#owYnrvPaAj9DSMY9Bt9E>U|X;BV3)pPpB9=t$u*Lt<%5ho}CUl&xXlmDEc&
z^PkS0EzK_};7o}g(hA?hs(3Nnkn0KDNcQ1c5c*`~u{+6Sk4a)96mE9UURCV3ZqPS|
zJSZ13>`69vVwR52?X<|8U-0HTbnAG~o>?2QcyfoI7o1OTA^H|Wd|krechieWD{^-A
zoaecvVh6BpWUHVM6=1{#9@A7<t}*XMGWDiVe)2h3YfPqmy&KBBNmzxTa6d6hLfd;O
za4{#+#oj*>I=^OAL0tzHXBn4bN0nEzQC^I$YtT31o?M%wK7#0-FZY6kI5~X5{<CZG
zF{W@ja5^M`sY$2OE}b~1Jgq#V1~$j{;`6KV;cej^(SQDxMc?LGfVmIRu`Q2cy7_bm
z2fI`ZcGBCz(qfDCldEl%yfX<H5B*eIKkK*o$zLi5zfvE6&EZl%mJ&fx*XoJ}<%XOe
zKi3{h&il@a^U~zK7Z+`6DC$UTq!m0Ez!jzvdl{vjG+s{DH{+_mcDI#&R$s_Kk|%2S
zoOj-(sbi9^-9})ZhCg$CH7F28F2Ei#l?_g~Ag(!ESS$-YeqBDyDl}AjDhNRTN>z_K
z>{-;cI0=+RtU<|(dKbEMTZ@i;46e71+s<ZGm+Kl)!Pvx0%7gk1P?^t=H@yP088k0b
z_>&TIoO8V^5$<(kmj)gD$aqccBj<2<x>1J~JB<soV{g#OgD(b@*2^r++Q^za%a{h=
z-E@ix884?F$Si3*ddqF8{WYCWp1-45C7WOICO7ac)_2^T?xqT{5hlDQq7l<5+Fyb!
z5w+@X9UKgHu64-{#0sHxYU5ao+C|8byANl!G%pam(m7zDZyd#u2EOTi5JQVonF1x*
z^q?gBX><|Uau}t#5$(zihY-Or#Gd@9YtQ=|Ot$3iSKYsGOZLE8*p=87*Plb3ygTf4
z(0=GzCpgbVcG9ysa0=v*EpyN<Vg6%;{)v?k^HpghgO@k77}SfEX}lY4+Uk#JjDh)v
zJ9KH%;3v8?m=B~&o2@Fn2cGW!n0ZzW7E<_4G5vQHQYb_O#Icm1oxMf)sj8GYii(!<
zrRGmjzD#&@ch7Wko+DU4H3WMEqn-_?t<}+emg@YhnJ8bwDjmj=Lvk!6vXIVXx7()9
zOVFaghij3y`_@CTTCg;8XIG2k3fH3N3(iZtF#gbcK-2H|czLEXk_s#CG78ycUc`#M
ztKtZoT!b^;lT31E<RonOzc~xO&K-hU$3CV8U&cU(+@#EOBX)@{9Sx(IM(lfrF833j
z^SC(Fdq=OB>DcLG%I^qyMVs1bmt^t=;GT=RrJZN`KEzNxbTE2f5Hz82yJEJ2;L(Kf
zHQR14ksIHX$NDCm@QG^qx!%FQIG8W^tKa0$|Hl2ubhT5>|6hAw9uHOD{y&nYvZa#U
zC`$ISB<m>Ia%&=6)(Rn8vNMdxk}M&V$`X=&-(~EQ>}z&|?92#F%uK(J?)!dz>0Y1p
z`#j(8^GC0lb9By}^I5O!eZAk;l{we!d7~j~WJEJ+Xgd(U`85P3{xuHoBlxxE*8J@6
zCD(xL%u@%<jo3o>sGK0{byWdDRlN3FeHY!AAxu}}&Om3bhx^H#Bv0CoRxh>Mk^J~p
z2$aPjP3I$&trcpG2UJZqEcpUq{V7;oB8J{PoP)*+4gsCNI<YJZ1m;tKDDAHE7~#%W
zoR~D+2Skt>RRDRdup>z*ZF~TipO5-Pq8LKq-hK^PZ#n1#S@gv%pqgAl<ELRmv17#j
zI%9YSuxU4){7D1+S3ZjtixBh3E%8qq+HtHE(I09RLg&RQO5Rzwl#*|?BM|2$edXAk
zwpjr?-YV(4NBL2cz4NWYb~ek8_|DF^g70XSpQPJ6-|D+&`3Xt-gNNC<(CmDWD7c?>
zw&M!bd%aPGX@g^VuAlY{svZgsr8Vjy`%}3=Wy7*m(&jgpB5VH=st4pa0^q_HgAx#;
z6%}kA_5C_@e*T-A5?KSMq#m97%8Aqk)VSZ*_<x@{@WZ-h&yuv3UE2FOjj_57?h^aM
zok^Sw+AvRlMY1`K|EzKLYhyk<50}-K;)b#`pJ*vd&3z<)=Yfme<AGP#Xjf!tZO20s
zdRPAE#?W?>fxmY^lYN`)A#6gQ<+*t6rf15Omtnp7jSIspmd$71jQ2JCZou<zVIlrK
z*MMNYzM<76)dg<z?KrI&@14-g*q`FGKHG6x4&sXiUyw!nppMS3LLGk;ZDTY#jpJvh
z!={jmWNF_|61xtO^5tuVFv<jBs}DkcRnqb2@S#HMd=Y({h5k}A4vmKSTyRB+%|nJl
zOZ$Mxm(6*s?nwSkkl=Btjh0%321=knd}!*b)b<9skX}@N8lMS<Pw-<!DeX@z$zN(H
zYD4ftfXLT~l+d5}hgSM;ankpk3CnCwy9H)_ec)GuPU_dN?bbFW_9kFqt5)RoSE@PL
zGwRyqB*l*Sl9yeeJ1Xt?IVu1!;Q#tx)%_hUv{%(lVnXG0VjAWoQrzl<jpJAxlAYO^
z4L;6mzkV};iuT^9WV{^y{>F2~>DVEihYjw_sF0_8M>kJTZHnmh(_<+-eX#+JFl>w~
z^GGTXLYq9W%8b*!yNV|4vu}STN;dW<;hH}S>-;nRADOC3uZ>fdgwW!X*pZP=Rh}!4
z?gV`6W$cC(8s0`NU@rc_j{j@91T%#I(oVY<uki(<7Y|*;k>&slarNi~1CGm!NvuKv
zf~0kGlTGxr_i#EIjln2J>)2+#yMy>}YyTE$CnMim)(9^T>L^}7I@oqMmmsPdiuw@y
z_kA`d?HZWrh~3)r$C(U&%U&U8QR*}8De`n~##}Hwv&5lzrR6JSY!z6>1%H=i{A<5R
zPTp9>?OV%!5Z!lkP>&I}ANJXjXM*A5!Xh<N)yYX_X{z5vShS#>Q-!lc8;F?+YMviK
zJCParVI9CwyN~}sQ5tC*{|YlW!|lgK2*Zn`&Rflm{U?g{EG4tDi&BphBSrKp%6Y!7
zkGOTf^nHZ4-H&JQIB;%Dp&v@B;>!L5|Ms`oOy6lZ{=v25{&>__ZPQPF@~7mt*Re#|
zu|Gj+`W^=O`S=$Cj$fSki@W&qCI60}DarH2<$cd$HhyRY9JiE5X>TRun|jp@v#(5)
z@{N1Qor5kh7{E615#@ix^!;_j<)2+W%m#KfQloc$i2FX>HtmQBA;`=))%<B&%hL+D
zY%wm^zjE15<&GIEiIq{&Odn9Zw*{E}f8<90+G4SN4iM8m5dx&I0x|^v!@KzOz&QT?
z%<uZ=zjfn(_4!@Vk?DgAdUuT9i-^)J2VK1l?4kdx`ECYw3HP6O35`1wiJ?ElBg<e>
z*8Kp{c5!CjMzlF80ix}o|FC*leYXXo<WI28zGa?$`(5IDDK>DB|Ff5QIWXU2peLtk
ze-1_E++8C3d+dvCo8zA=(>-12dDBo;r#~}}>nnBDjQi*5{#m>AnuVm{h<^n}KjR)H
z{raMV++SAt__rm$kB|JCwe){wvS(I73#s-3@Y)AH_cxi9|MT6h)%#<&YXxm?NF*HH
z21`34=RWel{Ota3t8}@wiuwwt(D^ql*hUWLk598EPR;|#9uIcnsx<Ab%oOF)@;98M
zxPMohPO%TH@geSSmBSAKjGWV~&4AG)l6wqf2md<T|J&`q>VZ?Rae{7N1KrNwAq6b?
zn&V%)JxAxi-TtfX^}j`v|6AS8hn--C@7Bvm(`I0}y8Bw#+I_zCzb(kI69o7N%kmpL
zi=H?Mm>A_pa9sehl)djAV)lGB<>Tw<3v`)jqMWX0<D=&%G!;-DtQ`lwT68LF0EwE=
z|4mN*??E*GTe2)EDFejlH9(9W{AkCkD4lc#5u)C=gtLlaHAm9twfq|T&M|Tlk6oO9
z^*4DL|7@Gi2H14E{h7)4=|qPLea;w6-`&;-uFhZ^to!!(rWop4RUI!c)uJ;VZ4%wR
z9e7|q=m|u+cx~d}gfRa?f&YsWf75OM9&q|^xmLYP%*xPCO?6B71E7ElmP-8f{hse+
z<0P6<p|6lv`MRu3g-bb0OJ)C4`9o!ooSarhK|<MDRVNUvs{*bL@*If8Z5J&9-|;H8
z=vke9_(~ugzXZcZjDYOE*{HDtDBR$W64ZnGIh*wB?OPaHl9UpGtHgMXZz29uJ&fS|
zvveeSf#m1uK*9sSJ#?E7F3?Zhzd1w1LBBxQe%8Sr2Nezyje%sNiUY(S3sL`550mF;
zb#=UtpdgwjtYq)OQeS_2ULa9JiLg(Zzzf^a7TP7awDR(b;($8$$yW1YO_HB)6N1$Z
zXxGsrX7CV*ljv6_{I~C#MEbU^e&&;Z*XlrA^KE<mtWNp1-Qg48wboCx^KaWeY{#?y
ztbzHrc^WM>A$4D!E*mjE8w}+uZHv0#Gkr`<?3KO04LtpF%ZUDawhZ$H$$X1)m%-6N
zj|j7%!~WD2BU4Ta<PN;+*L#8>9bbpOvJ+zSkXZW`(G1kaIgF~6Pe*k;0fT=*h@|}U
zz&+XvpZx($kM$V9@>2(+n+7oD&ff$x2bGEbeC+2r5C8zAL=iB)(S62*a4=6KQ;GXx
zXNaVHP(CZ@r<ufdg><jgfdp`R)T#mu`*1#f?SxhQr_C0?XZtBMj{moMrd>7%`#e02
z>mynZ9W%Ja!_Tio^P+D=Sf+%Nwz{wP&I)F$be++frB-shJeKD~h;$45$$<ou_(Buk
zV_sg3ng4CkKh?|hE((4Axh38MBUcgT_8V>qWv8RHSG&%=vweH+>f!BbuD=Dt{|nEb
zoMSG^E_s<keBwY}@gl~6HR+MGr)KZ`v==L<()a@{C*E4j0!J_%Dg3yT=C7G*WbI?9
zH^fAc*jyHnL*$D=Wsa?t&#*%H7IX|X>)iVX5lDk3e=w7*aACxIU<c_^=1a{ssY9)b
z;hRXc2T*fdhsnA-f1kw-Y(xgq1sLu26VMVNUu$;VMNORp`U;}&IN-Wa{B~I<vXCSz
z;d#Br*JfPG_g{`A)8)_bQvsdzN(|he?`Ezj8}EFb0q*$l{0qLVfe+sP&Ac2TG{CNb
zNBs(D0al4%_!K=@bgt{u5_v<nHPP2W0*nlyt^EV2IZa2f-v807i~<sOCwh?{1z2h}
zph}<DM?h{3*{<XP-iF2U4_F+($U*x339zAj$1MGyINoU$jecVlNhkqpIPI8lF2Fpl
z1G@uuP)F;hK);5mITO=D5Jc~r$sw_Gr0Ku`tBG?X05sRu3Xx{;4}|c1kFjHf5U<mK
zGk*`ZsY2nn{?65oq=;q&Bk4Tt*Fx&<-~8COK=aSvB>)`?q(SkW_yXC2)`0vc`?o_4
zLWDqBAjr){!Zsn_lElBe*=a|cn>O1<_0)Gp^^})Z(NE5lYs9jXIz4e4C^d~bpco(<
zFZ>Nq`gv>;9at1ki#*?bqDxFFgIn@e<kVYD@fEIlNm!$|Y~YJ3Q!ovd4q^DH!>ydP
z2b1L?Z{o{h79Q@8-<r}%cSTL2eAZj@T9KQEb*L7p=>hbofwz<M3J`Z0divM(47d3=
zx|Ku)h4U3_qN?s`7J?GwleC{HuA&CC0ZH{@($_MfjHoxfEu`9F*rpcS9}h5xPw}gl
z*X*_^i~;3r=ptyZ%Izv0NvHuJ4gpln!9U6=@(WMkyA!}b`-{Bu8$$BG_}pQ~5WRl=
zZVl4Avb!UlF4;8dZ`ozjZ>`^{BG2zFe9mmhr`g>%tZ#7f`V;f3yXRLcX2_!&Cu-8f
z*z6jP4BqQ_?<&6({bnF^a_i#BPhp11rk9~=3Z@qB{&`-JZ2Iz~x2BveFe?>8wL^SG
zH>;PEym}u&-R6}Vb<924_tqhS%kaV%VXo_giV6_N-j8EThdf<UT_4o-pAo+@bg6iC
zDSF@jQ=elJS^JA;fAgMzqrXJ&;30lr^v$xhY&(^@a<1wO-{*y>mTPxPrLT4D5egtl
z;hPkP*rqz>izh=$R05g`vS~cqta_2~t#f$&EAQE}DB2=VH(qUedIUA99^a~fEK`7Z
zPNppA8I{;<JQuoL^g3W`V>N($Av9n?Cu979)WU1YX<KXC)Z*t}i@D0`0Ue=MeMnlI
zK1TBsrKP1AcVN;L|EmJcX6&nir`N4Jpxr&%wgDb##&hBA8J66t<#w-aHvFF25%#%e
z@j_-+&{JL=AdLqMtPF`mU~eFPB7_aXh`uZ6HK|*GrJKyf8+`*p&K1{FG-J$E^q_@y
zPs2;gY>h3wiqP5<P)6!cWuM6H2>ZQ7ieSOw%>G3uBFBws5g4YUTxyP)+J~hlp^O~k
zgnDoLMu*D^ljtBl&6_jEcogeKdYxN$a!dLrDfu$l*Jzre*|;Ov-hg`q8LMPn#SGSy
z;#Cq+xQ|lq5mq<7rMI;31t~*F8M`BM(OJm*!si5%Biw`)j-KTuoY}j{x*;|&s~5j9
z7Esu#-@NKIVoBc~RpHz^%VK+5BzW>H{rFfRw|~qJ9-g=A==5)0;T=1lK;bs2y*kFY
zP?QXfzqk><3)L)*Z72F<q1H6Eos+{q!EB~@-}-X6U*RMw%UH>Bk8_%Jw8Lz=1oxal
z62Z7r;7KyNlb~?@M(Gy_qP&~!ngj=pB0^#<f%e>sjDaz|Th6CO2ZojGtt3}Mb%%KC
z26}@ae{xwpD|C>c0=y>UrdUKkPGS6-jH*SL`j*mul_8&}Qmp15W*rQd-0YgdsHh#P
z5wGgl7hWH)T<4&A9CncQ<*6|&^E?UW`p`9}deA7njd=n?H+;?HV9wjICrq1ka%?iU
zy?F59MBm8}&e~{6#74+QVqw{q(xanP1G@=XapZ@(@2(iW%jY^RoqtNVQ&oiBo91Pu
z_=HS_Yd6L&u+qcn$n{Wp>L;9uoED$n)*nVy&ytT={7g|!zNx9`axFdS`2Ff#rBd+^
z-1kN+bXkjKx!v%LP)#0ql5{1Qh>?Te6(t+(H2CCSX_J6299Bv(j4my<DHu{?QQ|jG
z<*jC~H17|ZwfHGuy>rh1Zpioh%5Mg&$sd>YORHLD7*|VkJ9wJr3%wT<u(j!Tx7Kjk
z#|1t5iN@EP&q}qjysF@1j%Z7Wq+z|=p=Oq183&eAHJcx@JLOpGT*HiLYLK+)`Mxz<
zW{=xRidk8LtMi-fZI#`EqH`{EQwT2Gf%SLRb<l-aKtXb##>luzQD9}$-_k;tqD|nR
znXA0;pWU21F-wA0t!F$X?H0$WwtqzWy~Nk6j5T%M#TNz;!t~6Ez6@ZMZ<b>hS1{}r
z4N<*Xc4W}ZuXU;kFMttL>Sv>HHwPQI`3n8~Q`26q;;y4I7eYP&PYRjV^r6;UdPWI%
zq+nu>u*9iZo{yaREF49>&PI#j>qj!-S{h??cuZu$<@?BQ?<)#S9=lRV4<HexzKRuE
z0tMD9tXOq+rZ<)K!}LZ|ys3fxqDqc(UiV^Qi&=4Qp-&&hR^B;A1(-L+o3ks&yhn64
z+tzU*L{^7&XjquRcxxh%=rB)l)3uItPTP2cJ{Op)uND%rQH`+G{lFc}%?cnzDgC(n
zKrgP^=wQqPk@7k1i3G_m&Z;x{)@41}iX4}k>^N?FtX`ry9BTCfUzV+wfE4Y`ws{hb
z(bTtie`){cdpsN)9EXB>u-yJx1n%)ADb>4&CF|K;$oHt|nms1L6Lxu@)hxtM-!Q9?
z4E25GWLCh-FlKS|&|Hg`%=|}!cC668g?wYn=4AX+72j4*SzLZ7C54X0-r?kjXJwSx
zYGmV$<hRbbdhwwtLY(0vBf8}ZoIITG{D!FmdY}V!5{<N35a%%(f{Ghv1*ia~3AdG;
zoIb25AHK(8=E%4^vQx=@{&xyk(%0kBm=eEACir{1?Buf7P+BX~BPyI<ARb&4^Jlqb
zdxyR*JW#mXcB%<Q&}=}7PYNmAb{pD35*KlzPY%#=*`xrYU55^*+8{aOi{vfudthgF
zHpodes$A(BUFmKcGZH_LBe^GL&7Ij?4)9Ddq^fbDJWqPh_~sKEYCpucPI8g`IJPZ5
zbBJzwq?U_=b;&jueYqOs49;jEk>4s9pth@#jI?>(wal#C5kJw^PdP@GiyC)Y0x}(>
zHvX=Vx3r9%y%}5OU{uej0iD4v9gFD!?{gft&&ufP7`{+_g-9G|&EPscfAD<H?T0*j
zLlu=F5w3H6l_pv+Y#z!&%1}Z%Mg_aN{IopgJf-8Z!Hb6;JUPzaK>98Hl?utO=9jF9
zH#b>S?W+ad9D<kBd&6?m*{N_ZpG;<*@I+h;nwN65)f*rt(qDbdTP(L5=g;Q0kw6o=
zt!4mg#7=I*cPDm=ly?0S1rUGg-c^po6!erx0;<0fy?L;|)5$N(RjQ73tjET#C`6d$
zeU@hOG3!T{FJ01+LR=c}fHrKx1B${9Ag1P+&3YGRpl<?mjnih~%<HtYq&5;~&zY7i
zBYjJI_Pm^x6vEv>_)3!T5Bfvr%y-q;?7n^8my%VPUS}3cF92~v2auq6A^9P$4UJnd
zlmwU7GStdlzos7MH7O4Z{&8J_gwVa0ubbT8l}?0Uw&iNTPXA-ig0ERt=m~m43~w7R
z{{B)f)iY#8<s0Xz&2jXCQHhZaE=RSp3~S2V7$@SAII858WP2y$`Iz?`lf{?bGADxG
zVA}&Tky;a5YQ~6%qB?t-VmJzIMp)SPN^8G{xB|Ni7KYauM1j02x0=GX)&-o|VSQ^W
zG|1x}PBwaziyNmbwY^L$m^PL#sMq=+xM(Qc#FwBL#L^YJ<N;dA7PV5C&GXw@wru*Q
zZta)Tr-%w3N_QWL*P*HRKzvj5uu4Q>J;+BPD#CT_nvodJrY{Ra>DRT)Rl<CunRS&z
zOPfZ0*YhV*D&sd_hpG1RuV)F4X>@=1m?d}xsbRxc=6PF=6GuM{+iRkT_rr+_^N?#X
z?sg<DHF)eLQ#=fTUYDZ;1_qSHX}qjR=zfQ5ym}nV14=14@7vD1CwT7ci-ZOfSG*Xy
zLu-}0ECwaijl@~5>YCEcDoB>1*jxwIj!puvB>=-Pda{beW~o0M0$=SzEPG2ZEv&Ud
zt|1vL1?&h`N#}z_U&D37`P&xLecJZtB#;5Dt~#T4;V~9=&jf4kZ_-^kY45`MWJQoA
zvp{?le#zUDFBQz2Flykk^-sdK_MZC!Nq(q<_#T5eNuCP$61$`VkChikk2=yGex=^C
zX2!YOOABKRy%|$0D}&?q7JM9~Lfx8iM%J2aGykv*D9BjaAA~e5im784#P-)yo*g<;
zko9`GCZeT>ie2K$v6;{p-jYJ0?8iyfcozp7rY{hcUY11*nFBrs#4|p!nS34-Xn)ki
zA?%5#FC{TEEg!Qzk^R#F6SYK0jzsT!FHVA6xAoCohA_E6gNu{P1+4Jh?s*~hO(VBH
zF+*_ifoOVBSFt5ZM7L8*pi!LxV(I#RHTHodn|w}WlKc9&DXR<SHEo@d47NX5klXis
zPgVc%@_;Dhea^vs)m!r-cpY+_+X19J-u#~NQ!S~|g{>j?_4SYg)9RXARF}zL0o&(;
zG-_f=sln(#d8cE`RDSeSdNbd0Q#8#*T>Dwb)*<~^qbKG`meHF|_x4d|mG2HYa18>_
z($8FSFzBw7NN!KRRia{NwYNotU1c%Bg*z7r+{X?uZ1GJpco$JivKo!H2o&{4J3l%4
zma60x+3-i=*ma{KBa`tZA$1(;0es?`Rq=||{tMSTGmSItSwCpAJ-G{y&&|2?bxox~
zRijrq@F-FZ7B#zT8+oDp0y#ztOo2rhl;r(e0_krT#j+5WiBJ9nwd|UVg)=bgQXe`h
zORjs0hfmvTnsD>GPw5~OAlkoXEo657DAEOB%bzd}*j2YKD2T?>2n=#KE2UlNn#fyS
zeCP!~t+)Qj%vAeI6elNp)H}j?>?FRRI`(Ba0|BXez*VBr^gNTI=RU?w<|e$>q^q>r
z#8VH-GGvwm0#PtBbkY7O&+1A|lYztRos;C~99jHuHS$<r4cv9;E&#MAbI#2f(c#by
zoJO5Oj)wa!mIWM(RYUYJXWkwa<P7Cdq9{3|yi_oMlDq_e9KhiL=f6No!iccrx7}Cj
zU01vWTs82ncJ=Cl@f0mAJl1k$G4DcRc-WWanz%U=*Rns3#}FdeC`qI6HDpvf{@cpM
z-ZHdx?A(Y$LkP*)&8jC;rE0OyOCs9CpS@~+lQqc4zSR)N(k;YQfX3~KvUFk>uajA{
zFQ3{aoIGWE0L2U$$RygSMe24gi{gtYGxj{A5I>W7MAN`$NCmn(IC!J&*6Xkb(&A~O
zO6J!w)V#~{KP>uF4oo>$b4{FUuHH?e&y!<6gsZF5gZq0i4h&hIQ6I{f?<@@alpIx=
zR%LF|0!zzr&iQ4<-mi!gD{+SKNQQ+}qqCh)0VGwNS%FU_&Vr)eXpjz|_$Orr)DQm|
z;q4`A5qB!^aBlBjAN=ZJcDiO6**R_UGI^sSO#1=(iTHiD^oISEi|@vr(kC0nK8Sbb
z@DdBa8P`D@jA@G~b(5&wY-M|GR;N}jDYH+z2u&J^r@Iy}V4yxo`tb^$l3NUS0&_x%
zi!V5fuD-q_a&!jo(E&{$P;?wG#1@SOuK0>=6iGa)2tLFd>5I>ESl*j~W<*{q7xG&-
zWSx`DY<0V)Y1Cl%RJXE`Cm>Q9W{o<y*`GMKac0x#snzvccLkPS=q9PZ*Sv<}#mItG
zJIzP?QA=Z>GI^8SVJ~+4ro<Y8dKQHZ#z&kva#^2PtV*TNmUkEJwNh$d(`0Y-9N?aJ
z;Zt$0=A;^J^d^s${FQ!3&qHrXeZAJ!(u<!_R;9`%lT3{mk>Jp{7v7>Lvr(i&Xea<?
z`x8NR$3_^57XWo1Y@}&ZnlO|h9+xeP)Kos0`=~aK76&5s9bCJ-N1_p7&`-H_7Cf7D
z4}6i2n-moE+6Pn^x8F|4@Cu+&2qHZ_#N#XQ^Bo+towtD<q%NKTidEkDO$Doe4}9pi
zFe=ua00m!s6-XVrB@gUm&rhTPn{5C%IDJ;@k#KI!Zc{o{T~LUbP{oaX{UOHAQ!J$3
zTU4A*LL*dysp+f?ZJsDutud0LM>I=sQUl9<|78bZRZsPVVaL>_JM9e`DU%f7>{M~j
zW1pf9Amx+=-uth2zo-+&HDdF^<2;r6Q%q;s#c}a<6`~zToe4wv%SA;L*U$wTpXF}v
zu`def@!2EHrwtn<SLqQ~O2%6mRh5zlb{&TQuym7i6=;EiN0~YaEZBIJPRGZUg`^%8
zrf0rf?~XkbDO+}9uk${2itjx^z~sb}3AQ7(CQodvIsEWVe9XRtjCY0cl=w)D_yhh*
z#H2oKu~i6BH@B${7ZIn9S(YG|fIK7f<{OwLZz3H4a5?@t0Ng<YFUgi~fUniIk`@yJ
zZ745(J^qw<oI%ITqeAVH_+D1iWsPT3(cJMX%qxzM*H0F{xuO#{>M;?2M%Ci2qTAWp
z2<1ugoLi6y_=JKhlyuhtj7)?0NkeWw>3s>Vd`s~I&iUsyUdK6qe)6%vd93o1EG|1D
z2xLR>n52A8VeRO7grXdGB=ZB`NnPYC=x8{o?`#BmPVQE}4}rmMLjS6H?9HmaoJ(88
z^H*Y?Y(A7Z%<hxB-X0*ioH$t#MsMjQENQx`pH&Fn<7P5R^BUDF{*i$rZ6Y6UfVbOg
zUJu*k1py8pWfVRgvL?y*99tZMVdGpv4ON}AacF%%^`Sw4@XU7Plgh}m(AQ_S`a$_x
z2X+=PbaEQu8wUWAuELQW_r2wF8H(7(fee%yx?iGdbNU)z4Vj!G&1C=Ze$T1Xq&_&+
zOmGiL8h<|(2@h>lyV$sik*b}~u?{*<KU8PS$u!c`(f_>FJcnR~qknpf*{m;Yf|B9m
znM?bd?~IO4*@ss%O~pb6i17p=V)%Ho)00WF@?O*7ibp<2f4Dck8POUy9<=_}^i-4G
z?%VRrO^0vM(JAhF#R&1ZvfZ;nV6MyKO+eRP*$#y)4Df~FKKU3oYz$g&xI8e+u+%v=
z-!?HhoOpM4+KP`B0yaW#UUvXLV#|d(JYr$D_k7xsr>tcH;r{%jA%Do)2k(<s02Uw|
z#n{+R>h$uTh~AjAd%SRao~C#ISt#3K^#mk0Ho)Jh!qZqaH<oY6b5y^mE|=v|+W0d^
z6T_mzWLGqMvP5lo7c{D3-WC;s+LpLyCz+&OWP4|1S-J#C$L24C!i(5PU6WF4HpfG?
zbPBs@f}4D#yrQ}Z=dj3832gfYzg}8*c&f^Ad+5^-yfagc(@m{~Gw?dy`{VJj#-RH+
znJCTcavc-Zuur`ZcLDlZOVUAHYjKK|%vim7MRAa+8xU^iVGt5z6p^>BvlSJUe&~1k
zajOCg@Xml2FhzF8#Qr+9qyy;`STVI?h<?`awRA~<(c=OLKAGr6zUyBgDe6DshvcT=
z$Ih=xzM#o$g6PEKV<u4k(pJdw>6GC<z0hU5!uJ7fElo1#IA1E#vc~Zh_d2#1nyXZF
z3z>Wruzg{Xa{2bfa9`hn{@^(;Hhd&Df8Gi~J0y=0qHR9v=r_#Qgg>}Ha;cQn91;uZ
zeJ~!iUfZ%K4+k}Voo`?AM#+@L!?$dhUB-{QftvO_R;PwdNvUf*w2$w8q9#TDN0kp5
z5h~N6J#aZ03Oa&1ZmGU;P{?o4=bf;43umSqY>Ba&XvtqmjX|?76qlpx5d}k*#lc2n
ziY3-Q?@7`JPggIi%r%LU^%MBIraDQCvu7OmGULrfgiC9TPsjd1w^Rl2&;wwEcHj}D
z8U%>@jOgTeL#i3c6S5JF>=X|g7~H*5t<nrE_1d24FP7xK@=PrEzfwnA@3NjiQ=IDr
z+l@EYK}T$<cQ|fkl-Yk!6W#xA074bH2qB!rwuNNQ9cU#MVN}-3Qil9JEXH+l@#Pr{
z;m;zwi|j7%p?N+YPdUec8?zjwiMMh&f-FnCs};lH>mzd?7GkHaJ9+{;kAJEb+FWFK
zMQz`yn6_?P=_f3ZkmPX!`i%WN1;njsiETT#5qR+2Ow=Z?-p2lqZ1UrwQ5kZ%w~Tmj
zW>dUFeNxZQnO+{?JZtf$ZQsmj@tRP8{j6SVRdN+CP5~F-vMIBX_@VZ*23mNDsqu~k
zxGPBi0-(V^72VNR3KCN#GStPyhCWGYPB)UHk7yH?zbFnFsOCPZB2E76@*cmX>8f}W
zndzxC`a-%V$HUupy<*}5izzjBx;S-`+OCmKw~*P|L4`dwH2n<24P}TU?>!+_WW9E1
z@)(^gp8`#!tRJ_(DEICDJC~Z?JtY5hyGD2OZXtQ7nmftwg)ZLVLQ$9Fzy<B%<|3%W
zaOAv7Gj~tNo)CaaHI9Ael3XEHfbbN4p@TO%Cq>UHR%SE?>9=J2;<R$Y`1mGBgeLM`
zdMF*SFzVSVj#X5#Xoda?r%TjEpzq>HFi7Bb_p=V)1gHjVMS{h3jS8_DwY(UlU(IBG
zD=>68EIef@s+!YV5*MuFDaVQ?@S!LGQ7Z*CF5JHzE7l1kfs988G1{&D%|KxMB5Mq<
z32?^_+@vvqg_d+I-SP`$w`10cJ-%u<!~_0`o1>jWw<wvTtC?y+-WjND57}IgmZxlo
z<)%&6!#KBwN}dC;6Q5L0bnKat8JOhS+=lCaA&vUQ89;OS8T$S={QO0XM^~~fK3yft
za`5FN{;7z;edDemck6Ct6A22fayQS@qfci5go|{96R4hoRMTHGKagC4;WJIZ%BpTg
z<9pe69DOBW?YKmJ$I*Yp4d}lMZKr;$1WUQBB#g924Gj4VkxSi`hN<(h4cOw2;dz~X
zr;Ur=p0T;l*|eGM^#OQdi?i9NHn0icXHj@2aZqPIX~)r5Ruy4|5!D_FT<*J(C|tVq
zj-#(ksU4U6=iNa1J4hk3on|iIVwAaCwD~z#bXVV5!$>I=El7CWu=ex6ik<ok;2<;S
zz?{SUpk9rY^lbwr2a0+-UIxR?i}?^aj`5`?qcyS&1qTuW4B2hRpjrZrUt-^J^mWKi
z0W<<t2q%z(Q6Ad4<7j&W|4+XG8Ca^n3n1_~qK>F$>&*wKcU~y&>()<yAmJR&d7(aj
zbe;V&q1A8|3C28p5Jn<v{r^N+?nc<hZTiwU>lnc~Y{$`865w|ieRudH*>=ZO|F-w|
zPW-&{IWsMo4VRw{K5tBPu#;&e4wRt^>P;@xpRR{c+@Cuj<I%CF;a|+4_$?3lchA!+
zY4?c&6g1)sM6=Srs}KH~E9>qWd+LCL5d~&!@EA$;3xvD2I9%S6iDTHXscd@fsDmHO
zAU^dXWK%Gx9_y9iknJj^)GN5*`Et2SB6;9cMPPGv)^F2W|2aG3Q?C1mZMZ^T1qr`v
zg3xMMkoD*Uxqh5n3XN{RK5-$4G<!{K@mY0Tq7p?rntZ`fsAi1$7D*htw7#5sP2`-*
zRHDC(uWf^L@MbJz?bdg+qo27;{~JD~hZ^Dgb(&<wN<gtl4XrMltUl3#z~}F_?%N{v
z(=t^zH(Bq;RLNL?m~tgug5{>#BkkzCk=%WZ3#(4t88C-PBcc)K)<x8tq8F(CNnr6E
zCYv1t;en}GyaTBQhnhWB9yCn?esr@Kc82JS0j8IN(~mi2n_mWv?mH28A@pQVLB%r)
z@-*(;9HHJSI9^1p?36wctv;x1DR7--kIYC6Ts`~}o+v|rQ<)y|s7w>YsOC#4e1s9H
zI0&}_s#o^n?--L^T@beHi_v?u+VY`Q^oC+!fZ6`CE%Zrf*vjQS(GPGlHF}i`ySokN
zvYQ~I^}AsBgV7LAd!yLQ?4a42D1UkKhYELOkKs)wXb<VXiHQ}f6e-2bckb3mYb1M1
zTy#IhdAGrKuGl$_89R~{UN&U@a*+R~M*wBuq=BtTH7iwQm$&Gv^bPS1_vg=Cq8V-*
z4mLlJJjW5vvfJ06mGywqg!WGQ0boFAypy{utRhXOdUcc|_o*g!G%a<gWg6jR$xqiz
zY&7Mi9an?6E?3~@Nh`WBs#rxIC+;ivo=>__J(i9NIb}bqLg3{g@gM;iWw2<B;zHk1
zRVf8KUK{c>K_(y4u{<w|TY4_{D%G>+WRE#^7x7$BSJ+4@pvoMI<PMH_tCW2<CrJad
z55gmAO?y6*#)3Q!D->ca?FJ0(C4I&eU(=UNiIJJMkhAUQPn~Oj?bxf)^h0j<ZcJ?(
zrMf*&tUA?o@;CO;XXh_L#X=^~%yU&I`<zAu-%^&&blS?c-|0T)=RCadsP3SmiHzKm
z9NY1K#M);cPe-74nS9OOelg(6nKLbp$ICemn#$3>g`RitZD%zva(287jdFcZEepdU
zY7uk_$%9_R#jvMNJR|(iB}=X^o|4HB*#PNj!=<!==N3TfzPMa<5{%7LI0m9y$yzqU
z&Obsng1%1u%&`M49<=^WR~>r=yrLb%@<Ug5OQ8%dL%A{W!LlG3(r~d3!xbZ=hPoPa
z#yhDUc}LH!Gqqm7>eb0kQ&vhhtbeOG5H+2d%Xe~f=?3Z|V1;=bv2;o+Wm@Slk0dj6
z>nqXuvqaIPy4k$b#3~QIM!h!LN_XX}WA4o>=@Dg<H67H19o2foUZHzE27Am~)bY77
zqTg}0h+1%&uR_IZDv#ASG-+T9)F++stz>wKhaoC*yW+-&3=GBIs5}#ozW8d8MiP>g
zjVR0VkkMD`5fsc?ETU9>vIH}Gcl%v-N3n99_nlN;SJ{9rZ^7>wwmJ_8^7xwcxlta#
zHM&xJM8)A%MqqMO$D1GSdDg2Bo0`UZN%a@%w8m~cvB!2#2D#>3E{yY}!K~_s+cVoz
z&g`M0P1L?4w+u<3epHMn6=%5HV$7lTMRxfsoU%`y*JiR&fQiIPIFAEm<tTHgTyqqx
zsI^CjhxvU(7{kda;5?r=srEtll&d1{d+n<%QAxtdJ`S2^+?Pt1+n!#$SFB5KcyLU=
z!NXGYN*%9lsKsO9{P^>dL6Eqh4=d6tY5J}oFJTEr>=p5d@)F^e3?4h)J-g)ds?G~L
z3u(iU_6uFVs+at&TvGoQ=XWl-^z)nXK}h%ugRJ6=thtRV`A|9ogJf&X2(lU(VcKbc
zM}Pu?<?Lk%mUNbq4IyjcBO;PR2K;`Cqp8p!$a)UokFTmBB>?Fck6xoQRB&teMgqhY
z2C+C(;>FpOP#V<86~QCed6n_x8;&??myMX{{*7?M_rnVIcD&1>x{rlR*LOQ5(C+J%
zWkVX4_U3RG;jEkOxR!Ce^W6zr{K_nH)swktg+c*iM;0s3rH&Zk^Hl_nl`1`lX~xQR
z`m^|Vow_4Zlj8zd11K2&!FlYMpP)I8>imZ``zg;yCr{i_8i5c+;s$c!Dp`_U7c?rL
zUcq}<dL&tj<T1vsNppmvk<`S~&y&N9iblM;+|3p@WdqVKFl8>8MN3q-zineMf!Rg*
zBkn-m7iACOE6(*)QY2qJ=V?bh{XD-$rZ9rHJU{qIqZ^c|9i0VH7!MJ`_{9ez$BtNO
z>$n?7R5u(@8^RB|hInrECm0V5&AU6b`b~M6W%a00HJ66GsrF}L4fb)@RxFvcqjq)Q
z6QkLeIbIu0_c<}y+MS~r?wz@W(k;e8YuT){^AA#5WifwHCN$KUy!8U{Z0bW7Kx{Xu
zCV39Tjm-0WU1E-Gcvd{lo4(H-fjCk4J3OK71=Z9wzW&H4U`3LOoGBG7*_RSl7cbE(
z^#;O1top?#SE$hs04dEP@2tu)=bnR?ppP@z&dJ_;1s#ZmU~RbNapFxOmsT`;?D*G+
zvb}_&Z~?8LVv#&|5r~A;(^7&|tJH=oz}FV^Ycl5%0+ksjVH~^D-Ymt97S#|0@&55t
z*<x@S*SOmGy@QkQj&ruuo#>z$NASuS&~1KrExa}P$w$EEUGztzQI)}ySwWHbDhh~u
zh{-I8r?Y5DNi(-wNO~(v(D(#SI@OqGwT|f;*)+>bcWv4_6w4@>v^`=Hj_tYcQ<|bI
zBUHlWrlMyrc|zIvqVVUo{T7T1(NBpKQkp<}Z96r?-~TTu-AeN2ed`kD>_jUQ@~Vc2
z4_YCr*PiV{7?6Ly429#ANGbt==!I5v%58Lk3*r7?3Gp6$BUXE*-i{d$Dk@KmR#=*2
z1Ozb{eQjM#&=S)#Ts7pQfnBxGAqz>EpVL_}!<)5@I#r|h9d74E8wLrJ$q1h$AG4|_
zTpvdSNv1w<Vi{qnUmdb6P-fn%b0wDJf%oYmMd+p8g>vn(<b`k*!N@c{V<i?F=i(=(
z`#twTgV1s?=_c&ZB(HL&^z}2EKH^q~!tp*5IUZ$|0jgcel|9_b6UFzMrL(HZ6jd^u
z8j-r)LQW`u$&r_FOxe;Y50A|VzM5Z5VvW#3ux(z3-W_SRpA=#nz6GNaWjkLxf94}s
z97`f|Dff!-Gk!PD<>i{1$t?FvF^L45(2dR&X@wg$+GZk(qgQV{t-4Oi>6TeIwBe4^
zzMpDY9{s@6g*s)i<d|!$%ia?j2{yaeInqGz(JFL(>C-4jXJLzjy=VG+{qvHQ1?gb`
zHjDzJ;z(y9o02BFo1gF?OT$<aN9eZ`TWvafLvDKLrDP8~6D~RggF|leuSlP{6KJI2
zr!^2vD4PmH>ck^~@FgMuCq=4F0?f$a%h25KH?ezuyCc2#s3G6Pw!Ov`U=Ei{yBBVH
z`J}J9oz5eFqp{~=u$@EH)-Y9k;-*6u68*};=qR2KN4DN7j__!uFS+NWak+MEwRI>W
z`0UjBP@LOgviTz@NF}w+t|n22ep|_AvCiG3eT81nGdk9u^LER3**LcBedk=<8HhRG
z5J*0}YSq-V#GOctx9=!|Nx>Z4ZQ8fqx6kQ~LU1b&2o9L2IK#bjRjjJcHyZYRWSz9-
zx+adQR_ZT;IJ?A&bz39dQe9<r1=|u)wzkK@)sYDa%D0=G9kz;RY1IvnyMBQLqNZk%
z++YPy1qQZl#=t*fLi|hbLGFu7twqq{XLQ3fd$OGv1rO5EZt}lMf4h(KXn5IFKS0c=
zCV>`db05$Jl3*k;qZ*Ko8C1;`3G)SB$8jJ|#G~-N?#BKE8IuWP+N-m%E>Ab~wT@C{
zmEGspjc}ccKtO1$w13Rs<+Iv0q+Sr)wz_v&;H5@{!m(v-XdeBvUfANw1WQGvKuTZA
zjR2Bv6K{FgORWCK6R+<Cfh|&e!xiiKjAS#s8MPVq*^IyD5J(YBhSpn>HZ-?MMV{(k
zTpU*xNa$SFIGe<%rA~Q`%uvPzpWirn1#4S&-5mDB$tks+il>yitut3Wvbvcw3vvgb
z&*-U#v&I`>k4~D(k;{fp*xrAIcbI65OrGxpTSAp@+s9+0IVFy5bx~aGd!?Bdf6*`G
zB3jsu2ifV_St(bIt+!rcDDI%oE78Z>VZT6XHVXJV@Fpr(4@~PX2Hy6d3-2qJJV{1(
zM09Tuz2-H@DGE1eE$!!%s6*&QUig|#g9n8@=>QPm9E&@Olg%O~ZR9MqpbI}6DYkRY
zgcr})kB|zOs^?bZ*!=zx7QKU!7*;W6^7RQeDXob4vx$#?2(zBgWZVT0qzgVYSgv=H
zya}wV&yYY2bWUJfAX*2oR#2wE-yVx1?k;NI0u)YwrRH1%rh^bUQmV5GR_7l3Lrg@`
z<k4u=H2SBM&l7C4Ak?ix(nP+jpRo6jb_WTx*nq$s)7QGojtfupks&w7`QoNBc0nhC
z8cK-IkOHi4Ki-Nhd_!~5F2u#-P{G+voxDytMiL8%8czs_Y{3=s^(`pvP>&0w+zZ&%
z+|b`Op&Pm`jM7SyHn`FWmG=Jgk@A-bCkjwvEUgG(lV?0!w-@@^EXcclfqZasLQ_B~
zj$Lm%&P-VCwVsjHZ3XdVvPsg>FObMYEGVuT589zT=UTSy#~MAGPhp}fjH)sNJ^TW#
zmFgKek2LS^(Y0wqxsL3e<@=_}+P567|9=05K@2#+|80-|w!weLz@Huiv|rx;Kk<ez
A$N&HU

literal 0
HcmV?d00001

diff --git a/scheduled_sampling/img/decay.jpg b/scheduled_sampling/img/decay.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ea0532750a96aaa6a39d84b1ee9b8bedba3916d7
GIT binary patch
literal 45631
zcmeFZc|4T=`!7DCkR<z7rm|*NmJ%jOwzq_$h$*s%$~G8ti=<E^g%YNcBsAF~+r(It
zWStOWjF8BTW!%hm?mp*yzvun_ewK62?{UuW{Bge1;~rw>p4WA~uH|_>uj_^Tl{*RB
zWqZ=*B#eg#2IGPL!MI~E3)mK(-;dwVTYf+Icz-`{<>TY!+qQMvw!dBcf&$z41^Ksa
z6A%&*-2VFkeYZns`;OoL`F)fB`qnL5d3m>P=ikQvx0n1cJ#jz4L<M-hZHedQ*$>+y
z%EK$l!)=AZp>=Nk+w!2r{?`Z37GA!s+xVen2thBX-UTh6mls;}R%pf0tK*>G!}vtE
zitW=g-zM&OiGTk!iGw%pKNC<qUfwL})JIb~<aIqkaJ!VWjI5lpimKWHb$tUvqr*px
zElyaTw6eCbbv|>}#r52IH}A_nzJC4zfnnhhkx{7Vn8ce&w~}w)Ny&KdFf%Lr(c_%w
z1uqI;7QHHdT~S$8{jR3=eO(K&wXMCQv+HC3z~Im@>FdZSWr{leeP(v<$2@&`Wp$0Q
z4sLAzmWv0*`(MR^p8ug-qL5r$e(wSQZ@GB3L_rs?DBsq7dfUXz9r-U^6W@REhJeKJ
z`_Iao1r-lD(ImaD_idL_(x)iXe~b1v$^PdAOZfjJ*}n_+-{l&G?cn8sHjh^nhJdkI
z&r{T3|9SmCp#$EJO<I@{fX_@A^99(6@L7@wtjSh51U@Tv2zvS>C%7m_%Sl6ithZ|X
z{Y5QG#M|0?2d^)8risdWJuX-sN4oZ}3ak9|7+w<y)+{ZXm|C8nI(DBZcEB&v`@`v{
z84kxZnz^tmTo^A{(gv63!hE&DS~$A~4P?S~uo*}&VVUR<oqei;3%hx!&G4rw@%jB7
zW^teN_^(TUs=XUlXUX3HR===f+F(9&VWMU-C}*bCsalf0X%tbkx6Gg9qO9$9plrvh
z8qor~Z7c6pkM(_ggc=!2+_lq{;lyq*li`r03Es49;*uQbw>Y7Y+=`KS(4M33;@vtb
z(JwPKbS1%FF?#DeAC+C76a3Floy|HpF=RSkSiWUy{&1Hp&$k_%2s(U0nTnKRU1vYT
zNCjPE3Dfb3!JM6(&u+;#O=3k-O%6mAwuMe<{OsYmm3L)b^f^*M{KWe()A9`A?9-Cz
z8}Y7v<SN91H*+s%V89(rX543IH7TGtT$s(<EMaYCJ4U!-aHjIky&3ELDOJ7V5NoGx
z(N9Hev9A3-=9+GW^%^@$H0R9~8Uui;Rr00)q);H66-29HC~{#5M>)OOz}w_z<d`p+
zp-GEsj}vM|3RiXG6;SHycDsUheG`!MYpBTIa`$T6{oFQ{8KrdIkZ(9&PA@>S)&ciQ
zFP@*Yq^~#Qzjic7>1qh93Gp-kh>8gxr_?6BR56k<d;Rre*6yL$9am_(+P|a)I%SVK
zgYHck7iv=bMmG_as^!z&daVPW0xt$SPM1inKJ1^domfR`IJuR)SlaU<YiU)Q>0r-=
zJxznJZvgGsMN=bAf-XS*&V|W=6|)@SK}d)rp9NAG2WW#wmSn=P%(<LT^;}qD2x2mQ
z{m3%l(_=_RthCGQL@d3!d$sQCP_+aXwz$THbw05ZeN^^Jl?%HDX`&Mz1_b;dtcoT=
zn$-dO*hMe7urDV?m@EPnxrGcgLq<lk8GCC<X!fh_MQN@MV41&qO7<f`Lurm+$(0#<
zkrq!!)2-`Ct9I!KUM~$<uS!WSOqaxk)uwY{ih|x@{#@86Ww!bN+Y8$V|Fa;7KX1%R
zDHqmU15`vl?oA4Af)laz>@u1(G5hQ0(v2`k6Uo~^t7Ee#XxIf(SrW562v^aOtkulW
z+OhRDhLDPFOTu*x0L<&EOou&i>{jRR>P}qP*Z{(k@TU*)hfE2WQ{-oJkaYZK92l;G
zr(pASKvl$o)|9{N>uy?thF$Vz@H<g#dpd`R6hSMlpawX?W9+=OT4}ZqIVl_v*<v~@
z8*Eq8k6m=T#!*(kzg7gP^2K{0W=I;4dc4#$z=oq6hOp$q{^<uN(#RY^EebN5YQ@@>
zeUcV~-yCBSM#ijbD;$tyE0G>hD*`9dBn~v!A;iABhl+5z$(8n13)dKNVHC5>m$jk4
zxUfRElY)!Q2H`8{iWl$C`WKBfxvtw-<~a3M1XfO;ohlr4xX&Iq?KcjL#)Pu@E^=Xn
zZjSz+x0>{M(}5+H%@I_^f3<o&hoqhA9ob|IJ3e|7Y$iEZagsWCa4bErmt}0k5j@L<
zS(89vF6`C=D=ut0W>wtYL*aEmNdftviwT4F>?E|O>VzM1To@05)(;s13v2ZcZ|H5x
zh5b{5r-^{!p)9cy&V4)$3WmZeSjcWy0b&6#nTcGz3&fMb0DXXC^;87-(*_s(`F;(&
zoITK6`Cda?6~x-cg%NEExUgkYIBkv$D!Z~3k&{LoIeR!4c7Kldf0t0Ob;#rX<hXya
z_YVk4AAx3z-#kh95X#ceK|s#Z$A#^@3pw^#px=PQ3s7D_KiE)+|4-fgr*8f|f&Hi5
z{HIU-lOX#4LcAFqK>S+*|0fCNbRdFe&4q!=UI8n0K(ir+_tvg*KD2V*@=g)<(J%ac
z*X$(dnl$S-_nzJFtB_H+sjxb~LFEWarQm3`xRKKR@d%0qXg#@e*1B4yDf#J^6t#bT
zJsm<!%U|QdE<l;X+`!cQHPC5D<<ufL3I4oOy`WKcE58dA&>dI@`tzECHCi@<Yn?~j
z9jYI}pB>CuT^iM3NaB$6F-|{F5tWurnQgN^Q$=FXxv<unR-xQS1_CgRy<UjnB+bK0
zN6wB>5wv42oDXcQiQ?p~4Ob3?=;9;JNms@d5r;obWA>Tj8%o+HuXIZ%by3vpCT_kS
z$cQQGaw~Pqh|wRa!o2?cX$>T6Hczbtv|n5IlRLYA_@4He02|wbZZGrJ9J<Cm5cz;_
z@7QaR-Kas%XF^p+q5~11Ds`K8&Zr-F_EV#CWp$%V<Dj9SU<NT=XwesL;hKl(M^<z+
z6GkhWEt+vs4M%1T>Se?8yn0wAi`p1HgN6@2GP`yjaTzr_z$-bKqq-=>G>1R|Vl6c^
z8F-i1LrTNQ_ZW|X097lg#DuXouXtyf*!LTAd_?8O%gXQK>RT>_o$XDyP;K1b#0eiG
zvxHZ4DrTz8l)d%N+f8?v1$wBDg{l_^b74ra??8b0eB2F7XWIJhDiNu<$b41WyXv6g
zfEt=6!<Atqq<N^Fs6IDu?W0(0aJ|7W*CT(nAt$3Q%G^@INygDq0_Lhr7n)RU7D*MY
z0>sTkQK6oidNcEN`iHDdi4|GSzXS}1`y8F}jLX00I(y{UV-7qXDzvRSqHbvu8DOxk
z-?tKihd{=AcK+)4aSve9?@ogsjd&*g7t=Mt#MwlxcMN?!+o|bwy}6`g0|ZC58iC@>
zB?0swwCr~K{qT)jU`A_zT=A*y-E`c6C+{}S%?=Fw7}fY3ZMI!|RmTQG?6na3s%v9S
zMeFSAp4@o*Z4eoA{TdS7`4%_~Ka6tcY<Hw}+mN2oj200zW-B02z47GoHL#-Ri9>A&
z$9}=PI|5F=n7V|t@E;w%JEf&997f!%u4ZQrdf5ea9~nOP(xNYOFrD~Ku57B8A7$53
zw}T@^@3^JU_O`F{sF9M(%W>F|$bNCvoK-SwAH@;m!loP&>q<FC*ohe332@7%)xmF|
z#hLp{Xv8a{t6!RC;`5)7t~s_O{dDfhndB&ySbglnjnnL=r@F3xrpa|_o-oNsvRo!8
zaZZVJZbqhT5GWmsONb;EGKdz+7zR;X*p7ztS0m2_KIoqDcWd4qQd8e`uIp!cZ+E)T
zucJY8tKs+g?$`QU%JezH7en%umQ8NAtaAvKi_vH5uQ?3G7{<tG5(Tk|nX(>baskDb
zjE5dMm=e#sWtPuVG+r%iS9>1+k1jE*bA2yDQUnb?04?@q^HS-7*$T>yDR#9tyZkQg
zP)|Sk?Um-douOXNk~SAc?MH6TkUuumLt!U9xQLy&NLY3HvJJ<WDd)m03~M$<xiESi
zqQ(HbDO*kW43$QAhuqY*kp7SUo{p2ie9h5LWqJp2VHLjjDawua#+rMkMD~Dn!CpsW
z9d;<KYXlXJ+3vZfxn3#Qa&pf(P5F0kLtv&vX4p9{?Aa4{)*=9o!^d(u*gHnavq4-~
z`_CD~9USfSh?x}T3l}DCCP>D7Zc?KQ<A|X={$>a2nrPm_R*r<4!BD?ypHzphX=FdC
z<xfndsLK`=hF1<O5rVLzk==~T?EC0g=2`zHd2DOfBtg*g=QV%wDEu-rYNWu=|Boj3
zN7#>NGSbc@Y@hauH;w%l>mjb;#*!x)_roU1JBYFD0!Td6QFr>b#G)T!?3Z6VUbG2$
zc4&RNSn95nx9=M<dq?fJ!XSWgg8g)g4b=iZOUdM_9F!}QpCjHY@bzc;++B>eTTjTP
z^Cvt*T2~Bj@kX}wK7A_U$oV#mnDZUx!j2+2Mpl2_xQ&lU@Bn{{<BnVK8whyXYnlBv
z%)_XL1DC!?$z7tFwxecqrXe)n>|E9mOUt&Vhjt*o)=<~u(XLUui_X!~0A957>o@s7
z4?W*npZLd_N5<;bGF$gr^PJrq&$HJ1*U$Qk5s~iu1MiR@f^8ajeWtX$&(mHO(GgBf
z11!mBC~`~9rr|Rn%)i2<uexzsuZw3y-Z=W1Nt?QJ<oDi8Oh18k4)`iRS@Qw88|?rB
zOfR%FJ^7A_7UiymQ$JeXhaO}rZQM5AAhQ4bNyQu+lP+1lKuOseT^^gMc(aOssgF>`
zekkYVpq1EF7<CX&kxq(O;_ZDkXRPw1EWjCMF%|OU&70z)Z_U#AGa6CiV>ZFzJ)!D0
z3PWUGelvf^)ghSMzq)^k<^S>DpQT5&Em}|_lMr{;+#Bs22d?budgaz7p|$V!&Oc(e
zu!wI>6mlc*$NJF@&rzfgG!%j8+8tIjpW`)t{({<G8}W|!r$iJJ4x6OVk)!e)>xo7#
z%+ub(Y9WFPQwf_4WlCIvSQcT}bqK1EH!NKjpqOx3z6H7ISB-yGGK&M1LRrsC7?8_V
z2vc;}hek<X24TMw)IYn<$Q7|5>gQ)77dEF-^vb|tW7{`?#1DOlJq!);6MP9=SPGKj
zy6{s_{u1440hrtfWB*Kzl|R!#kzofd8H0xy0?<oB8K>C^4Oa-7Q3$?pkNt~;##t0*
z;3Ir}f{KhLbzdd_x)yy;dP7evW3^;^1Yj20x{2A(W=QL_a$%+dZJYghwm`Pdq@=6+
zmsO;=|GK9FUEzK(a4DA8BHAZ*OSc65Em5&xi>&qGy=ijQmT^wOM+?4>I=RHc_d*ok
z6_Cx`_Ok{MCtP}^O1FxCqZ))0Wl0WdkE9(r5?xj7u3|KgevmhQGuaDY$LWDrM?t`&
zrg>=!o0PNi<cZO8btCU-Y}cUv<Be?^w{1S|vValQ$4&`0cf26=LU3k5s|amFPr!UZ
zRFcz_z$m&`^P%9Wy9Z6oU-MvB<Bsn#?3KT`rBw>HBYC@6V_O>0H}%2pjll4dVAgA8
zy3P_lF2dO=fp)I6aAi{3K)dS6Sp5rf3&eD>I~W&aS0yD^fQ5Bt_`DIuG?n4ce#_be
zCobKY+cKgAF&1KYG3~3WZ*z0SIymRW&WTq^UHnL}c72@I?4@fEt)aDMANZo~aCL!0
z-D+f%RdGVcrSjFrLg`rlfquuhGtbxTBQdY%dmcJkUeCK^DRG+Tzg^q-TYHlTJ2?kY
z$7eqvCD3s1Hv!T?cS`Y;o$nXsV+zt}^A^0F`<r~~Xf=xvGsia*R*E)x;0ivWd5XEM
z6OF{8>qtq@elgwv)TwbRv-8ZSz)m@fwH`@g>Idi6uCuAruij;l`&J*uwv9(t4z64q
z@zK06<ZpDUG%`2J(>_>jJ?%~X^~O_v6zO*OEiP;ii{FQ?L2*q9UW?nmaY(XNc;zH<
z$!b3?=ZN%SwGW~m2I%*69K#>PLQ!&go$-o&JG{zBn*TGMOypD6Y_Qzb6;Sr&n($Y?
z`&H<U!+cpbaRrP>_H&FpW&kI=*rdcV7#yMa>dwo>b&iC+{Pt>NarV@!d0>mK`QAjw
zo9$;ZktI{ET4&$jc4V(W1XP-5dF=T4$CH)YD@tjPYF`c8ZlVt%dms*I*`E+25?f-k
z9`0xvqgpEAXbJm|msJ!OrjWsfkqkL!sWD3|6*vtaLZEnZVSYd;&|*gTN*HBwx)_8?
z5slBw96$#%K86W?-(j$j4bXq+u){*XdwRTpih==7GlRT&Ah1CDylT)`6B)7JN5Q{#
zg0x8J?k!*GWd-9oWlSzi;|$Oap@n%eVYwP<)#q8mAh0bQA;1i<-&46TZ(#5&z>?*{
zzV+rJsKLNmD0#!JfMW=S4;MBUmic<qECo>wy<ZN?5rW7kB2%98yo3vz?B&d$DO{L5
z6j4erT-e8<*jXqdP;{XV0>YLHOG;wru^ZWWA`B7(QdvbD{^AN6cW8Eb{nuFi%vjor
zTez!=hWy&A!i8r-fKs&k+IoJnuQ>0T&5>BQ2ZWXG$2ty?hL@1{JXL8O3&;{E*`ZyT
z8Y~AW55)iU+)L|OYBi^dq%>`%#|Chaw@n<B`Z}JWFl3*njr9p#-gn16{oa{NRvp*v
zx2a?pHX-SL3pUJiC{l-~HhpIqgp|eUo_8tZ-p;l4eQ0IcQj47S4U=oR8@e9zy#Y}X
zdc50x(@uV!<-3QvM_qB4Qf(niqj(wLNG3r{8%|ID{`!6ce0;JCB>reO<Bs9Sz5Nj?
z?)4=}w9Rqi)n^9sXY7x5Z?&_Yf7)rjCIW2$BNQy4BRGQ2&sdmuUpP{zSPGd4vFq?%
zYmMSWK*R*`+MnkBvZO$(DZl(Yxm`&~x&7txM~(|s-^Y93;}U2?Ahr`!F<R_`77z0f
zXFOrQHVL9FS%HbP*)v!d(3BS0drA(MRGR0P5U{vb()C<gF404)!SU#c^0u9A;Wh$z
zD=?c@&)IIyv;&$oIi_ecc7X{mrIw?LHt$>e>9B=mMjI}<()YvA+5L8~rVrtM!L4Y!
zJ4%Q2EQQn)%$;ETpOL${u#1Qh_#`5A38TRoKx}KUr)zxRsDKf`H5#$GPKOxh+O`W7
z<kpZoZ#=Zn@_C3lFl;kZy==5w()|JMAt{;*gFFRvU`QoT0z{A4NiJ;RZW#(W^$0ZZ
znotK50a}Su|6+Cg&p>T-*(sEm(16m!g~X3h6R$2@K47pW-CE3rg=&Ou#kcg{1S&&`
z9^O{C=JazmC1Al6d*(ru@WALs*uHl)eL9YrGPD)|2Pa-wVu=$evx^j_AxjGMV5+nH
zz&*4kogE;&`CXk<0Tm(4I!9<~$Q>PxzV<N(J*r&CqjWpI`2B`t*Vd1d@#q7P&ecFI
zdRQ}|CJxtu9OJ?~Xd<Z%f(2;d92IEIOt~P8DsAGfDx-a^x;BA)HZ%BFLUcx4+05M&
zSDZhxCj(b-Vk}rW9odSwF`>&&Z`uuN6VJW_e^TC4=-6A3N9E@$)9${>mL8SeI`p|9
z>{aw3cP0EzX6Ql1eQGD})Zj`8lTVT5A}om|s_;@gM+1D)9)oB<x+p|FnH^h%mY{96
zQOMgy=NQWlI%5VFktA_)_oK3!5bbhVCD9YJGp304P*7>9gRl!#KtxsnM7%i5>^(h-
zF|D8hKA^d_I=mK1&fiCOv#4bHjvmhK3AkP~BdiqGq4C<5F6bH!Tk9Qj>YyMJ5SPjP
zK?s`Tk>1ule#gMa6lUI*CCW^);r-~?DZ?R0eQJ?=+&h~~>6-N(mfcc!{EomD;koci
za)*dGipe-c!!MLk@KP)<W7kxUycOlKewp-fS{Ue0MkREd3zG54ninsv$$7U@Z=!kp
zAWYUm4FO<=2$iafKFrgc&l7NPY)Xg!ha1oV-{LRF1dS-|kEhn25FLX^ZyS@|6)S&w
z*SX>l6s(j%tleMo>1T6m;w`WUN>p+0L4by*cqTFT{zxBXousWLC+qLAm@>pudpw1N
zx0z}<WKEbxhg-jt%E?>svP()*J}qU}^T{?kA>N2EiA7d%VfP4E+}*KvIAV<zSc*|n
zlMHA@G(MsCLsYY-NtM%cZ{=Ke@2IjyC?nO~@Q;Tpm$G3JUgT?}Rzx{=!IJ6EX#r#!
zjxu#waPUDP67#7}i%~EoHMG_x28cNr3{{rQRr>0d$qp*V9rI8jjn#==OJg@4+bZGs
zmuok25o(<%g+f^aukxmVafAzdaD;|nr$FI%Zrgj{F9<R0ZwL|i{}&-DGmU{(97ll6
z+>1Ved<Y^xaA6`5f=a7t0g-bbm|{OSTqiQ=t_#%^9B7A_yaDH5EmxpQFN{;{KIm~_
zW}Ei8Q2*vs(>u_4Dir*KBMpYNP57?7_IJttV`NE5y~AunrOBP2)7yUdW1M<lfn!X|
zS+#E3RWqD4lOx)4h6}@;4RYQ%r9Y2vvnE58`MkrU-(~q><fu3!7&K*`2Dc-V0a+7e
zsX||y*rS<8wXdjjh?}czJij)7Sn<{lLH=2L1eJgBcaLY9klX@PtxXit#V0p1FZG+L
zxsB=9<;UMlvaUW%($e&fSCLg6N);e2xFw$+STG0@tZH;=mw)2^C`ThT=yqa<x-L=U
z0ObZ>f(W17DlPg!f!+KXby1B_vF@<(bUm$6Kh{<!;=A|tHc$Mp?lL1HE4RzTfK_>K
z0E=&^Nqf*YKm*bm4$%3DQl9!WquUE>!K#k&_TI04tV0acGmf&465PQ{z@QMR2of0%
z;22XD%wMPKu*7Llrc3@+WaN1OJW%W*>J!D@2qzt3>VKWf+X<6TC_TG?t5{#WK}%+o
z(bn3?H}D~v20aHF@S>pA`P9zA)jQ3jUHV#|jkfTn74J>EwbjzAVn|L$JSn@XaA;8t
z`GPv|(UzWl5?6Oh{LuB%x3#rQ<8jF)*|D-C5i6-x)FFc<g<?wy*M~8bKLJc0qz<MZ
z$JYQbP0&_#>~{p8JrdtM9}3fTK15EInkiK}*<EOTimthq;J(-Ay-CJTok4w9l|zCK
zvUZvEHbJslQ=0~xWjS>*hmzs(h1h~F6{^ybtQJ+dDtmgk-@fhT<8HJR*gYu%VPs)_
zLJGwAtYtR#N1QWi{U*5Y8j-kvcKW`&diVU%H1R=FCqz4hKY=oms%wWJ!5^{pt2LRQ
zb*<P6-fF*d_@Fa!^k$l*f%WOQues?JYI{y?3zI>XR<Vy0UEU_1?O*bGUtO2v^R71G
z*bmJsJgzRgcbW5v4$AfNrWU*B-1P~33uar5I&u~W{HR8k+V8cawMptSH!}kxo$P$`
z2e9itL2o|OW6FFX6Kz=gkIb+D)Rz5+ToCEiI2YFOg0R*@$c#QAmvrj^Wv<%ZiJ=h`
z6s#^7qOF>~|3vEcEMM;ruq`&G2W%a0UU;#PPL;K_)pNZ<u)Ge?nlMl;*XKnea6}**
z0qYPHG)sgFd)`I?Sa5fUUW%9jLM{WV(soe3?&re3Cl|8s<N;r_kSs}O2pH!1NJ2oQ
zWdnqGYRgopWC6`;AntAsf_^JB1SeHG32O7lg>qqSeeC!5p|a6Khb0f)D~q3@0@bX?
zssY+r2aZd`LszJn6q=m`*it$|P!`-Ib784(o;VC@8UzVM*lRO1o&-J0a*w^Ov@@e%
zM~TKxfxQU=KRpg#A?mbub%?l$2Vu^)xU7ADShiB$d;5bo?V3}oW3KpEj@VuHkpg+v
zombDBAb6YbiZgbHW<f=MaNzMjjrOd_AW_>N=%f_VlH;p4N+TN8?d4q}2l``=MkYIj
z?Y+^Hp`QU)dXE*=^Q=r~=A?l96zN51rCY_oJ4|;@zXA{YF-HLv)+(o+M>Q2;3b}(d
zRJ$ItftJ1tsGvXU4DS?L1Sk(=WdtbucONsVKX9?Re4OPAuFyN|*teRLP%0gaJ2Wor
zc9Rk7NQzoR^b{ib`j0v#1*zJHLSMC&Q{CPh{^W~nN9ZX%lr0cT(tCdtW=4EYkRo_5
zff_}NveaHVkVwU*jJ59q;VtA{Xd5b4d`^y(S~V7gTaZB?dhsH%Z`9LM{!taE@;<0<
zYw2}C1qRtW#8qU?fhB$vnxkueffAp@EhIB-F`o(YYsfnj2DB?2rAB(q?T~Q!C-SL>
zu1P$Yc<k(&G40k8y7=tampds}*b`?N@o^iLXdPxCXNU`X=ddt9uOz~S8${{d=5(Lg
z*0Rv^*k+6vt>y#MZmh>xNG9o1`h*7(ML3m|x9>{y4d>XC<o#k2tff$<2ULexA-qIx
zHR*xbbYvSrOdEYDc4}j-$I69X)FQHTe)a@2u2oMf^HuVc;^%jco%4z}1$SKP{QPB0
zJf_wWse`@>wlRY_-N=f_a(c^vGcCCzIFy*LUe!nn@7)F-q}pUQ=*x)$!g;lWIy*@8
zcAv*W))K{I-!lq;>~BD`r%CV>P7?K`IVUs$!-F~{kK8>!AxUEs+OVS`4njdWM7Im<
zCySy#`uz_!#D0t4WxarZh|qm1(%_HpL4b<Mx4E!jd%shlEL~#?LI?Vf(MPEwiOWCS
zZasN}j;<SA(+UX^RInVr)UUmo-W{G{H+>=6s1RL$d*b+_m<A^6-^=cQE5G@rnHE5o
zJc;nLtK=2?9x(kG4f&;48e)^@4Uz!?#S9}nKziWn$b_JHS1t(Ww8;RgyvKH9Sz<;U
z$FocWE^K5QEt~UrF=#GuW0oTbr7*SE>>LPZC!!z^{DV=wYVdb;>A%;hu32agb~WIO
zxB-_~MJA9X&D1(xq8@l8$fvsV_5b?iG&~V>BS^Sy`pXG=0L|tq`q4}ssvzT-<GHtc
zW8%ig%aTfKNF+TXpI+4idyORPFFUflP%xehdzj7XK-{u0)NptM3G_5)nLM(@pt2tk
z0+FJgpu=QQt9+w{V@3be(AJkllsr4TpmAf@1AExGu`-JEa2j#T4EfW|EF^B`!e$m!
z0_+{bZbG%grr=YNT*PG7ri6kT<mTp>-X_`i9oerqpE)ws4@?cap1VI|3rE4<X4RY3
zFtt&>@}E-k3_>)uPp_T4HFe@F7uK|eqyr`SyIk1AvzQ)$Vcww3g}sb*8-bKRl|O8z
z$~qK+C~w;D(R&5=?cMwBbP-krmWtGR1XR*Pw$#|B<83wzPgMb{kKJeD%y`)aN!JGJ
z2J*I-^u6!T=48sVu)+TwfBdaAK*NqEGvsK5cJi(<mOprY>In{pKGW}NjJT(NV*AR)
zix$x`8zGnWZr%5uH!bs{QGK7s;R~LMThE^@&_FapL}!7+>wuSGUNqE|2x;4fkeU_y
zD+4Y8Z|*|<BApN9vFtVl89E!k(8CM|-Nu>~5D(U4wClxj3kZo<`%Yw48i{s}$lkd<
z9k1K-9s9Bb3sPy)6r)A>q!8{FPS!*|sPu;`;^2$!J6hAtTh3jE>pShpJ(wP?m~M?Y
z)JzQ>wc`jrGxq1gq}~_+DR5ruwLRHs`)~J@fu}SnPIP`2dh8YT)9!dg^7QRZoJ)%@
zhassRQ8&Om2*K6hMmYQE14!SvmT36-_RAB$wCWzloT+JqZ-N;^z(6OS-iEdsk#Nx=
zl5aO{1M!ngIGkljKbuuu5O{hc%k3WSaK;ho+RnKT(~^hP&H1(nl)a{_3JyF-%gh-!
zl{1|?CAqgyIvgUuav){Dc>3SwXL%8A7)jdFJ@g@9k}MstYhKQ?I6(Q$GoP#V87L3i
z3eh=pgB7cVyeVK8<|82ci*ON0acfvN{-fOP6$?(nBUM%$x&riPLYA99^GEO}AvNK}
zKRXe@h>?~h#%=I)IcNw0R-^U_=>`w)4|V;zu2qBkH!`ClZS=}3KWv;!e7k++(00kV
zZ)dm0r2k;`F;8zSDfHXbo1jU;?&Iy3GUo@HaJ~F=^UYEEqJDiPX%4T&#zIB^7;@Ba
zvaRTDs6OPtOS=Nr#RBrR`#fx$@`{_2a`qQx@=*pAB5V26g9c0Ll48!~=#|({9CSsJ
zW=CFPt%UCsq!C*hRZ(3kO#uF6oj&OpWQ@*N|1?#+<tPDM&E~wXS;ng+I=mffFqO)F
zecm!GIIPfIvaNxhY~%Rs^v92=y$R<0WigFpRtywGE{tusW(UBRg`tw@ZCni}#my<r
zPg+*v<>8Nx8Bj;;NH2Jr%nMZ`_dq1H*^q8nmT=m$rr)4r4+J^{f+56^@>30eh$*57
zMFU$TsGbe6Ri-+N4L>;vP$y+_6vuiy4N*!KzvJKYqYy6HH3s1l383ZHZ_sff>Nn`v
z_ZxI*ls?BUm|!_K4giy{k*re0yA}NA_7aF)2?Umjkg+8RLy+Y5(r-v&5el`!UG0m0
zFy!oS8hd8@NLE)@*-17wh~Bime^T<S)$dvI@Zwi*at)E$lP|s}7YG;;H22AGF1H(P
ze4B0crh}cYrZi3k1os@VoyhIaY%)jYLHBhDy@{)aw_-<)CU@eXOfag@O(Y8&AV%#@
z46qanK@rPXhh3wmCg4L8K|j`xOlL}2Uaq|(rX~>`RwA`Co-~()a%k-ZRco5@sf2f~
zHxa?1dr-aY>oL7I7EhyqX6Pdx=+dT*US;dOd2uCjB~r02e8uQSwfOf=rM2ATS!4%t
zqz<B)o<c$J1`FNKvZwJ+xnlRy;J2%^L~Mpl_-U1t$YOPOoJhdR0-=$9!Y(e%?8mue
zt^GIE>u(v}V={C>cq!`y^nN$AHH~zOvkkT1!~FxG(BKdjM1jk&#QWbAHU*0<_74xw
z+R>&r;|kl0n2(+xRKkm1bv<ZR1NR7oR%wIys@M(@gDik)&hlmBOpefnT7g|_sU8-N
zwRCdwLWJZ%nw0MO7g9cxA`;44eIF~2Xn(rSvt1t}3p5`#6JSZO^U!uoU9=1xceh~=
z1t*4r__Nj&&Ms26O<lj-uZ0UX-ricdj_jPiW4g~_9{UfAg>0YXT_!Uw_p|hvSJ0uf
zY?k-{qk#@h<puk>(vv3ebbe~&lVY^?kO`c+NBTnMm0W*Al3UVLH6_z&LXgieh;V9P
z*`ua35t`oHAC<b+dly;7X)FW>Jo`fp5F<ept(s=vhVr4w_r{;-^8;0*xyKECci9BL
z{NWu2`=cpl*|B^LLUX|6V~FfDE2BxlM+n3T!9lbhrJ97O4)vwIYYg)%o_F0@wuk5u
zxP9hVWbK^^+3A2kDo%d4qsr750nJAUmvwfa?ZIez7=_SM%aSic9~^+&(x#h)8nbQh
z1?wf^7Jk+)j<_s;wdJ7hohJ6kzbEg#Dv}ILz6T&*o6I9@VV$SwjNaNLPhyifN&z*a
zpPzDI!)5|sQ8_D9J~Ph*l7zp%9v`VMy`KHh<HAv^-S>9Cy~xWVGtY896~6$rR~YBm
zMGzHD9{EWjx4|U><P1m&EK{gHJW`znVR_e#UZL)yxw=m=A5W?zT$=WfEl9nr*-v?X
zadyptkqp_6E2a|%H-keH2zQfjc+*Z*)35&;rnXNQ(}EieH^;{;B0{g-2-|<e()XQ5
z=eCyfPeNuiV+073x#aT3-i7fXrW&epvIl>sK|Ux}8BA<*t(=#}zOqrY(S91L5?SkB
zbASBP-27zy#rSK-6aeNqu%H=1bHIK5(IChaMT^in;v1sqMxHgab)pDADvUVnq6@S*
z7VmGZakdD2xkans?i-c#Uk=)z5<VHuu9Q%4ZHO_4<Fv&NY?Bnr3e1cxMBzIov`OJ{
zp%7&uI5h-?U9ow^j$L27v=$2N>fB|J4ymVWVRj)Y-WL#-p~JXDpxm{Ya09;00DnV)
zXOQJNs!(|?LKo*wFXI#fMY4;nd~Ti|c1jG&F3eKUcbQ7wET!2CzeiReMMG?;_Z~%t
zV09(wirJn3Im@hUhL&Qhk(8^^yL4my^luI!oy*GYr1KqH9QE@A`Cw=bD4d=hYLY+^
zsX!|{6{8DwP~36CXkk({;ud0?YTU6CHF6mIlzjc_U`Jk+RD;;LukKZs@0c~KKMQ7Y
zN?a3AOB}%`OnGP$f~rFV?zwVd+r0+Xut`_lnklg!VwBPaH=*O6?eeMq<-30b960zv
z2J>UtHNkwn4yN`z<u4Z$==?)B6l2!e(FPIl@?O^C-WyPwa)d15m_aw^3winE+S2B7
z;5@<`w{Zvu8R^#o5l*~6FH{jGoP^p2>I7c`bMJ&07dBv87AK4}f%KvXe3wL4BH8Jo
zg20db;ig;vbXrQJ2LRRlS{;%RTUnvZ%V=XTyUhJ7*g=i@U<yA49w~Cz>yDT7ls9$#
zBiVl7#!T#^YWYWb&#^i(O({-WGKjz6cQ9uCEDxp;xb6yu1{I>s8~dw-gB)<?{YxkM
z$n#lmdn88PAK4t(mnX-6YL86jUQy2*QH257>r(4cqv7Iq0sAtB&p&hVzfkPdY5g%*
z?BG!`^%xk72#IF`Qk$Dp*sn}f3(<$!FRP*IK+%?+^L(Je;|(g&N~(u8{7YyYv9QVK
ztLAk&q@nvIDx`t=ozs!lb#?w4G7Qs)<1stH^2{fnfx79wka1qxnt8MQ+m8vs0h6O9
zMxRcd$jEcIZA{mNt1XX6?=d$amHee^8*jq<kHCNBBm#Uv2qEl2#r{%FLGLRHQfquP
zRx@Iwz`_le6DIEymQ(N}cuuF$O~Pt#kUK&KG4+sLVJ3wl{%R`=*nD{>O0{24Qyue$
zT!^lb#LUQ;AX%HOPN6J=-LJZ7CE`Co@AZN9ONiND3a>riI+nrPpP0T&QNtITUHr5C
z9C*;Ybx<TIV!MsRZ5Qk5(`O&1?J}PiSaf(EZ*~eyWbQ`0v9lX4#nGV>W;`l5uqI3%
zE5U*fs*Jn{LQcf~k*@hmZ$Rc$yyR&|wgcllyRu0NGni!*bLze%Ihl3o`2+YJmJq&U
zI?(OH)A;6si#zWBIxMEvu~*mSs1@Gz4rY*CE)q}hFZt<kM?s#X&61=mTJkiQQrW!I
z^*7=-ymPl~Z8c5N`z#2H|0ME$M<iS?r4SpL(RbE%+2bK;42$cK9JaG)&gT`+>KW?J
zoLb9NS(F|h0K7brd^qcxurYspR+A`ZtjP?tp_U*<2%PQ0&)iVR)^)A#sIZQAfOg{4
zRb8I*)<VHYIzI(hU0%gr-?#a2*GcnEQ{iav(@sTR1G!YI(#v^N<p~{Mg2ZZosfg9q
zM^G2{Tk%nY7te<ZrAHlOiq>CN+m{_rQG0W!>fhA6VMi4Mh~k{Da-oazMes_N8CVjr
zGnKw`U#nYWNg~7k1miUMV0}2e!8<DgUlOzW#+jN7K@33_8nl1I+DmV2VjuN*8ykH=
zO^bK!-sN6;K~luJqbF1?@oj?jQ@!HoqeB=T)`j=I_c&64h_7xOhVq-mL3oAdj^%D8
zySp7(YUkJ#PfIvsNAnk^WT@JPU3AAR9dG8{0=Y(uZ-Ax|tG+Cn_%U`6%Wh;@zhTX7
zLgW|z!a5|#zq|YY6#a#oDK8G%G>Q)~0Jei+Oz$(*U&K6)+A!Uk86(5rWGE}{-?6c<
zdaO`x{=-6LnRlDw2h=RPRyR0%;)LroX33yGZVWmI@RJLBY<CI*lTa0T>khk)@I!&H
z@vv%%v%jHt5v{sB2nD9Ihz;=$&O|eyjAV&JVKdz<;dxWse|$am@9xMUwE$WUf8)*W
zi!3fkFGG54HUKtw=hXkf0yMs%_dyc%Ch7Me+dOx3I)=1M_hmcwV}>i)DUruK;RcxV
zhI$&cSHE`t=D~t@pUFwcs98Dg$c6Q15IPrHd=I0q<OZ|{l6wS+zpkecp6dq+Tbvot
ze{^&6O~KZRL&2`Kk57yL$cPzSqaj$*Ru+S3zc{8oN}fpAVseOXakd`(t~^vk?3J;X
z3mBbp2V-3_tbGq!7;lB$l7D`wR{PpuL|z9e-<VxY@I@ps`aM~8P$Q9_^3{v4Tl@VK
zQ^WF@@pQNP4R`Vm%mK6!I5cU0bSk+8zUPOT3T*~2Z&UcQZVxNUCENc^XV32Pj5oX`
z2bw2~_mnJ;QD)nF>>Sv)k(}+4lXy^3bpiVfZCXt$``j2|Vy1Jee|h+5+)sm&*8Dv(
zbvE9p9p-mdTraBbZ(L!lf%WvzKcK+73&c{f$!LventR(ihp#{4a$kdVnO#5_<$Vag
zY|qnsGvgP-)0GN$zs76+?40LWD=S(Qr^P_vIk}OYsmbz~L9LvkrJ1QV_KCGz-`D^B
zeXRdX03&)|&bhAwsT+^v-!0D62ePW518D<HLr|5`3boHlCVLYQP*6o5qy4)7w5C-@
z3KXY;@!#A%EKKUcoiEJGv0tq7NQN9Qb~)FHzNo9BlTg=tb1n1^7gmm6Ji<K4>BChO
zpq!zM+G-j@bfML>;nUn+zN(RWmvgG*_Af03#?-g_*Zg;1!o?RQet@x0KPMb$(qWyV
zFG0*j5Ihao1_Y4Dm{Z7wHTo5>-=E+s5qCXKwb?i;E&X`-hP5agdnPO3Q)-wb(FAw+
zp21dIY%LH1PvJ;{!c5_Lhy@1%%G}$E0}99H%E6o^iea&K-m53?BZLkg^3%=dzo&R@
z|F3OlRmA3_E|<V!+aza^)kcfvv~I?6(DE7`JM|)_tSx8WFeW6g(BJr4{h)!z@n7LJ
z)s^pViq6<;zIpvLb9cr$nDqv^l|XZMNHE(@!?yww;26Ul94mm|bkj6WZjCc?rVT%l
z{CwL_?}=SB&tPPia_jZza{ViG39Iq_Z^w?oefy9S5b?Z~#Lm9b;PV>P@B}641-4I!
zO$YkI0g3VXd_L>FU!D)|;I)IMeBOjfHumN}5#Kg_spi9zpi&Sj7q>>U^UWl{fp#~^
zq840=r#5YgXn*W0`V^QviQg7SFsBuMRBxbr*w2(!=nH&YN%b#UHs4^RAA@baPN8AP
zu#5z7&!o=RsB`RR=+G&w%mq;DoW6VQ8juu2bqmbi<uNZ65TsT3DeS|Ekd+e`?wm2v
z+Pje$9D-+*vr94Jn0DND{x*DS@au0D5v6Yan^JcuYy-T3R_K{JKzp?1^SJdP2SvZY
z=^Ywk$N1t!Z1YjU&tjiE&N@tW>bG9l^7=-GjVbJn`xJi%{3b!Dfu{`ZK*!%J4PJE+
z36Pe{FN+9i|2|cnppxZ!?x~2<i~Z#SDWAT6;7iw-a!5kF)47cZgoEl)_srxcSZZLl
z_tOAm#gli9v_n&pxslI*I>-5J7-n2B!yWqOW+$<K(z?^}Q^?QqA&dhFBiuuO!k~hc
zv>GCgW`C5BJgS<osw31}gc4~%>}V2f{QPMBi$_((_=O-1<4=|H5&l_%-=3c5OI4U_
zhOzH}5%e*LvTFm9bKq@A9<)XOQWacujxqWYi`LH4GcBcSmfKX*x*z%+z9{fnfInqE
zSbI@tv7n*6?w-HTu|7NTTdvMu5)La)5Jah<Ry56y(FdwdYK3wHy8!B4>2^~7zGN4g
zbSg)NwM#t6i)5WMr5A9mKndaJe|hZa<JsHB9o0q|r`b4W97|-NNr9z6vsz50RZd-i
zSH3BmL?-3TCO>Ho4hs$9+l>1K(P{p>l=E7~+Tuc^x7If+m%MeokrMsv^l5$)05>y3
z>#`qXWIR}2V8<K#A{3^b)AD&{Sx7eIyl-RUt&ijA+OxU8@~4zKW#lWa^6dND$zB13
zzkrFk4%7wiC?+JC*hf>A#M}(3-mS}p6kcKaB>4NDy)QEEqa0ahY@Fr7$I<2VL30K0
zyM)VHRv$3R;OEho^$?jr<fsh0P#HhzzLoe+Xoz%VH1pF6=rtKFs%x%9`1z!Bw4&%!
zDz={hs%B!FnL24mg(`=H80l6)eS#1;(@U(wrlJGre&^_#Zk(dfOFObp@(up-4f3rC
z=pR1mVs<IX98tGDUg7)yT`n4$pOW16XRfz+Uf7vqUISl)z{;xlF$Eq7ko*JU_B}9V
zF^LysIb*+@iPC4D04DkA(t33`FONg#$bUZ|d7NF=Z~#I-MGGXVy`a|w^?25dQimqZ
zD}t9C?$s2PdUZ(ef?gUlyXpL}fTQO20$2|8-`pwD>fA!t7K<>GcGAlR{bH^|e0<=c
zKI7Hr??Z2JpgQg5>?Ap<!LxrU&f-{A4pag8dwEc4!9?1dmyM+=D)~@LQ`CWy>#h7l
ztyrfy-%vxunLnlYcu||6t<JDk8%GVob91`?ahm)HE|tJ<CW}VVlxK;^)cl>5Bz^bq
zt;11w^2$Cdos{vPz5q?=?QC`J_L^m6Lix~}ko{_p_Z0pE>Xg*9$WbIMT(NSe*-&62
zL0$sr)_sZ?Ig|E!-f?i(Ndd*On8PORD1T#v{jZ&3n-?+k*mmTE=Up5B=ttiwWL~E<
z9x#q5s2XN)&TAFHe{~OQ>p16wXX!x{0>t_7j-ahUN2-ukm?gn}m^ADZa$)@>|Bidc
z2X%K8CGfmSX>tWinBuIHAe@>?9@XFo&X&5NfD3iN(N=;m-aRB~>e!K^&ijJ>epZtL
zr`NFB?q3i-*s&lAc~RA1_C|!3$_ljcyQ&->wC2O;u;r!FZ{A@Y=_X(_pLM}Ar{<^~
zKksxuQ`Zqsc#vmM$Gi%iZT2{7AB)FpeCZZ?{MlewdXMCGyl<1>Msri+1?2$k+-~Pl
zNXg3%f2&M1zGFw1rP&bC5di1Me2Y1#?<Vz>4!Ta|oHTRxIm8o$rVh()Nf@^leL5dB
zA2W;5V(HO3+JRd!vn_bpvFJMA#+TiG;V}7h<4NoHbMs$)Odp#sUO0bC?p*1iv^_=g
zwmMY>uQyC_4<FC^)*hi=4bdNeZCPy2$c?~u{gh^p9C~)odgiWAisfaWt>;gf^FP~`
z)f5ZuU<BHp4v|WgXV^H5nnwsdyCv>ygK24&YD7<k-$Imy$wQQ@zjt@EWj*Oj=1URr
z-M(@L&&wie{3;!K^aoZAKX~uMHWNi9LwsJ(=kz;bu4M#lLNg^Es^|XO_%F|%e@5%B
z9Vs~2Lg!8Rcnpz*S|r@5GTnVw`urfJvc57wEkx_rWr2?$Z^S5c$nw|f^ORfq0xb|%
zxG8%DpeaMA%t3p!B6MOw3(MidgI~7gNka6PApRyW=TpvweSnNp4nZ-XVc9$0AwV$#
z^_X0^iyup+NfbIQkk!G3ZE<BmZHAkq7R2houMojy3mtTT1&}@v-a&Nd4)73Xd+a1G
zjZkHHe=2k*T8oWqP>tMlX=oB1)F!n~=+Y%bg16?SMSkfddC3dKHZ)wkndL2ik@O9s
zDRV$8&MkL1OAtvf;D{pypbmAnHy3t8gQL#_Ld;0$4KA#jl)_OBB+E8vF~!lY4;V*=
z&D7n%6=*Vfi>lOV6y9~n+bYYwJ5HJW$>X5=V9omne`HwZ*to{Dr?l$y;b_hdqhl1r
zBE)s@od!qf8k217oym7q_Xg|+tCl2CwoIGG%0_$J)&>#bLF@Amtsnb$zqFCB5P--0
zD=qTh00%gN>r-~*QFqAcs`;DoV-8ae#5ljHJ@@VhO~oOIF%HRiEmeC%*-O>x2KL%N
zsC(`8j-K0gaQEs3bQwH>!^8PZekZaJ1ud)@<e?%Zz!DGe05v$wovu5qe}MH<>)`tF
zAwRp~Uj@}EefJ9+&XKxJuigydbN!+7KiVF&DAN|L1sQD?+6uIR=5p1+YUX|vKy6B*
z-t}_{y@eF34wVj&i@SBB$TLB+fKVCXUWjwhdGGt~p6i2dCt3Fyyg3_cD|qW*^EILS
z7&J2_&}LZ<3Lw*lhEH70s?R=sYrHzTYK*P->&+iEPW#getNK|>wN&__T_VHef4Q~v
z8kRJ<Z4-GD@G_MO6`!*!^*4e`nHWK4sU1(sIJd{CMZO?RQ?U5e*o)d_DM!@uap{lC
z!^f7N*K~S{(r24-V<d(+y8z(@arH|Jsnk~-CDd4pZjdxbi7wrmDk9!7Wp_JNYp->P
z0b}sjSygMo(AYOS8POL>{KB}ep?`6RzW?Y9Fc#FdgE~*69aG8o2!TC*9{@qlcresj
zmTob%Yjy%czFM*if}VGKjpjbeh|t~GFKc%Gxc(C$60T@OpyWd?@*G|XkLQ?hIuTm{
zpK*~Xk>n;_)Ie*_Y$8T^9Gv;wLNd3Hy_T;9@!G48bB3nxXO?9kO}88n%_Yt8{}1=}
z2Gf(%OqLYq2-E~HdC&qCfD8&p%$KCyx>K}NpYUlOF;aIcXQiiZ>cl<0@9uVcT`=)#
zT^~$4SdzSqt00L^`hXBN+lTqQDV6sIBuuWi3&T5dLX!+UY_3ev`6BOsGdH+hd(~;e
zHRZ?Xkqg1DmY;c);-LP|zX}>~X{hPDon31Jb(*%C(_I%y^cBXj><>+nh{^`2>BmS7
zrV4AE8Jw+8bPXB^H5!>+9*#EYvJLizIp~DHUYIBB1Da2=%%5|<0M((*2zksmIL+Qp
zgdc52U%wsthuj`qLRkbZ1!G|BfyMa+TxX<r4omf)Vx&K@tX<izpKyMEnjylQ@aIV8
zXfuW5wZFFK+vrSAKe4_m^X2ib`-zEzeoe+-_`gRb1HMQFhy*Z(Qq2+%>nt6y5M89x
zvLyIz-gx3Z9We<kY&`#lYN3SgLlyKjnD2S05b?t0%Y_S8%57<yH89Jpq|6m)e8CTW
zfbPyH0JSL4SbCE*tzom~hEBt_VT2${nVpTwsfI{LwC%PBKCwWM(%$n#Z|`W6zN(uC
zdo@C~cczdF{<X7gI51s-W>btVT7+Jh{DFM4ZYaAo2n&_0v<x?tMKeETXjN*{E6_}K
zpurS)6*&5}AqaJ)Ol$FJ?#Z`WtrfP4e=P0DUx%VhGMGfg-(ZaTvBa3I<Vw}st7QNs
zGV%2|%N9(Goq^6x&1Wyqs}E;0YQ**#Ef$xq8(q$Ji6<)kA?RrajXM1`1;GF8;!k(R
z{`08_&nPGuS<}uYw+MUk(<Mu6Oe>ZAA8fI~TeI(LX_o>}#OEFJbZ>AH5pXZJy0B}<
zdHAEfPzHeF>fh7Q_<Z0$lA}nAznkCqw>gf#ef{giL;hb5_v-UZ!v7wNJ~hw~@^C+e
zvunhnjdZJ=Ro63b@L^6i0Z=47`S*mmt)o+F>d3u=*D`{_Q5_4?KsW)WNIO-a@{=I=
zbM)5TaEH@<r#|znwsaQI@P2qvc0Mq5pZ$WPgEj;*4y?JGV!C1xqt8BlGP@@c7c^Ef
zbNJvX6C*1T)iQ+`9nR$Ea1zH(2Jv{c;gY2G{xZ*_F4M0GzclwbLZ?Rib2cChfjo`w
z{6}YqT<>up%B&nogpbC^BgA^6qbb<k{*N9I%}i;`4dZ)xd%I4bw_fq;I&v3#e9ZPi
zU1LfLS@=%t#h>a`m#NB7xmn=gn-fqbcZ7cKf|+4rfv37-9DI^=X86a;oS2u`htTPb
z5iU%E;HM*Z9@9&vIqoqd+11rQ=%f+aQUw_Pb>JSUuy`$Py>8y>nB4t~ew!5symJh4
zWF`9s4;ts*;(x~9=!Lccp)O>anNSndbwjkcxJ{(KYO7dsASA~eUa*R|7`@}g!E}MW
zX{3+W!B7U&Nlx(jNaFA&p970%>MA;6ly%^!2koZm=r|UddQh9a1f6w(2`k1-bo1v1
zS*k-kU$(zJbjp^H;IJY8J(Q9GI>1M%*F7|9T#{Fo4(luxmQE4!v@bUuCfsJKq)c;!
z(APj$rUh#Iz?5I}lB`=$jOEvs-YZ}Ik+4UF7PnH&??1dK&`ITUJ@2Vy8=*EBKCkkk
zDqwD4#j4)6%y@9f_PSH*kp8cnU%!VRo>qRxkD3CMY{Zo%KF&}UvX(}^_X)ETylEx4
zyQe3K%GheV;D?A%g#Iejyc<-R2M}&-A{O`1;H~6s)pu_jlo|AYo-)558eWT1Ufb|F
z<9+X`=)`-(S2z_@POoQ#5BSnwWgw(bo-;@hRLf`>^Xr=YaYd*OS~OlrXLR2mZODNy
zJbpOG&dFy1t)(xUFbkaPCMHJAZK2L03=Pj0&<|hh;OY^%Sbu0NVc7J;zs)7QflqZT
zjM40<apO$|bfXV0n|FswWA`@Ld{6NCW7hAnP>E!hYJgIhN1Fr>M48c+up=6rJ895F
zfa5%zg~5VR9Sab=JkbuY)Ye2e2+uxC7>d9;N@YM8^0_yHBM}4jsNe;hJ#?lr(5Huk
znqa0DTo{&A<#<d%2l}%qAUlIF_ox>d9#D7&&05YugS2Q&7jpFvKZwA_o`xXe+M>j1
z{#2;P+=RG{-&8pS%_>;E0_d9v3K?GE$`+S})>SZw0NJsY|GCKj3z{6qvBa5-la-Ar
zEY|*e6w1}k?7GHj`gHh5n?o9L0{a^SmnVem<=oxRiK#uWzI^ZSp!)Nq53rr_JBI@Q
zCnEL#_G6VO{{N*3J`^@pgtZS*lbjUM`<_3IB^_OeC~yc;9py1&MwZ1!ps<T2P=M^g
z$2aL(Ekj1DL9I7xc))xY_Fy;B3>u(gLGk|kbvWY+^<XojSgxeM`Y=R~W*+FC=`HF-
zj9yQxm!8NiMz|R$Wpiwt5K7R9V4UO1U-H|Zr@Eo<`9HzsfJ<Z<P^g0hRMjkqPuU^2
zfumtFKPMz=FZg>NfMqzLPVybh&9$waez1H<KXVw<jjMqtHkk$)Aq7nUN(r8ab-Irv
zHygh;JZ5;TD`)8Q)=x_4=InG`>(R4MzWs!_-#~}2vy}fr-J8cl*}nbbiju^XvR0--
zvRB9wCT&6}`#wpsOp+pEm_l|Ydl6H1i9z;lvWFsjh%sX+`;27_v)tda`+4r?>Hd7W
zKi~U#e$V&&dwu`u8rSH$T<3Y6$9Wv@<9)mjxka-K%ZdoQ>RzO7rcw$4E97*4Qz$Lw
z<k^IbgnK$mQ?Do14_fU}f8)I+Fyazy=jAGL9idDyWO^5VTD^okO%o3>lWo4?qPpps
zu%4ilDK^%|^!v1-n`l5pkEq`a-;`>AmJPMK4exJx2xCJ{ymr4No8+ZjPO4~)I`g$K
zH4k<F>^du7+{FF1X=PFR8wa<yca~)o4_+zb(wI}3NK=gEAlz#|cJhORNC2QLs}2b(
zZG*FlPCE)BPe&KWuAq`10u7C7%EW_18X7X^$uB+P#WK_BLsJ8jVsG3s6IK?}U=F~X
zomd!T-w@`dE@!?VEFKa*S^|IRBbT!O*2W`IRw`mnl7yw6KsTzp5giu@zC<|dU35zt
zTU|(tFmGkpgJU)pm{SS9u|1pN{57s?*tAd1p9l#)E~}ZFL#g$E=IEyN!sp9O*Q~Mk
zrO-^Qs*JI29rhEH=EJQ!6u)696-(tJyIr7?Np4Lj0W~#aEAK$<@c9nY?wF$jiO2j5
z&wcmnAF{H2s`@NnU-8INvYwCn^*O_Q{kjwV*~@~Wec+7$9)PdB1EjP6zKY|YijNKc
z>{77+DX)JnZ5{yikG~+p{)xH({;dD}8QUc{+S!=QAL)ObIRr0-pqEAr_A|Tj+t&)*
zCbGV6$c8jlgxS5BgO8ZL+RKB8=rj)2W*HMT_16^RJWAUjvNs=ugg`kJapBc7DG$U7
z13^uc<x1C>($$^@)~n>)Nx7#-AeJoOC4Yu5;q0ged8Q=&5y`C?%Y_-qf3G2!)PgE=
z4Dp18T?28IDt*YvTqfO_$i+jHxS@Htt`OtrxJT|lblNTX#2w#Op>uL0@{O2stx(4b
z4COX;mNeODi8SSjG97Kjb1s){7K(Lu^k}=j{j$qgZij`a=xU|MD{fGP_$x**a`jhO
z>Xy)BwPngXk{ub)I`ETFu1wooA(vuj*xF=71yWfyWCWm5cj$Y_&#7VHB@tFUy&QM_
z^I}$|O80VYktj4U82Z$cfA0lP)=}$=!YX^;hBh4qX?{ct&Q5!#r=A}H<5+gEhXts?
zb7xspHY3j)Nh*Ebj6!E23s4jd`PW9-$0C1Z3@*F8MXl(>(a<@snL`chdF!t&_rnSK
z{QB@swWAwp!=T1w0DAI2ILyDOyZ!N<gE#*DI5#zcV1;J|?3bo9maXW#^Ed1J$~Zj}
zC7w|v{kV6jYqCV{gNxe&a6c0^kz#hAil+FM4QPlV`FRwoF-<jLA4hP(8RBxsQ$=od
z@15Ed$26B_EVxL0Zn&*@FUDgkezubsRPK*>d#hUK4C0g1qc02PiKlhmT0J$V%cGlX
zG1`~bBMUKc{Y3w5%pT}2lrxq^lPoeZ6nSI-r0H}+ho;lItZy}`Y0ZRde)oyod*w*l
zK!Sx%Nx-fKHcxDdhK7IR)&Lg$jggJIiQ~U80S&vz-(P^lQRAqo4=S7dwvz;-Q9c<e
zqA@kxfD!l(y4aIWHzyn8wlS5xFS~_cc-Q({Ur#VB&+$-MP$>ITn|F`h;AG*rv8{Fe
zVD}|tb%KlB{wH^@S$4=xhP;6vK;d}ZX0H3L+O0~KBDA=ki*eM6h>?uf+z3e<qRaF`
z-Mem-!?&sI<Zhzdf>y3ll6~eJoD}cPaqxAooVnmp1B%`ApbbZ#t>F3_jQX2#6X@pd
zV0ih`i-V%VJVQIfCbc5{7(CR?JIiy&VoEY-y=a}Shbkif?yB<w85ZFnq0K|s4eo5N
z-&i^;Kut`5S-k(U4@^`#h4EtGJ#5oc&Wl;swGzT)UB18^yO5h6zK*{0rv?7=V*eYT
z|Flkx`hV6!_zPCen&MwBpnnAL|M*!AK?z_yLEVOqpoqtl$Xb)^fn`#4M*5`{B{h1m
z=Apqe)nk$T$Co%|bjC++MHrSxPrZme_%w$11?iMX@Y*sLRYa3in2O4rT|~L>%*wOD
z+hk+&zvAP>K1PY=X3uEvxwmwahwa#jBo3itEE%@pEMO5oh87ttyr#)fyHKG}wt9Z;
zESD01u@3Xa*?5Os!=ZKhkS+0FoXIv{b%j%$Y((tcT#7GVbzVV6pe_D|G}U|9KZnL<
z-M<o}$YsIqr2-P}0m$OJ$?hE$?QX<n$QNorJ4BDX-%VD1ILv+x>xSJ6mLD;g@ydTc
z(#9ZGP%)2wHV<~&>cfYiQwzLJWk@V9c(p$_P7cv4vHGB(#lICLYng|;FiW*!q?e1c
zfQluCPz-!Bfbw~B)ete`>jz?0W90i^#j5{!&g?m}4^v{$fFG%8Ecy=T@d&f)_t;qV
zfb4g^|41l0M)}CwqN^&_C)x5kVz-GL**Dv7+JB0dRF+@~;_%@Dz;bU`=)0^OslLZX
z(6E5_L|xwi3LZYD&>hSaLF!T~NOJFea0^KQt>S$+RJ((is$nDEggT@^JMJ{;Wq3sB
z<%Q#dG36(dp6P7YB}@r-0>j)(?e+wvy5_webtuE&Z#E={=Cp%D!h+3jRxAlO<CST1
zB!Lvv<+Gv3>zbOG{3Lu7684Ay6L31?lbw`%?lazbgE9})s>V!up6r3V#&%-_zN7Zx
zCgSmBW!Xuzlis`Mo0%-sxEmYyy$^qLtXwTnD*ZfsW!PTYz0$+<>n)2t-nWhI((KjF
z$U#Gz`@(Zf-G=oCqf{gi91nvMF2=!Gzg15UK#xWQONA7#700FIg;cytSeA@2bZ(>!
zx<^t_LWa((PTf70QXr&qlpFD#z%O2(a3JGBLg#?Y)b%2?Vc%W;j@rK4%UsQ(GfHgx
z#;n-#>^}MIuyIi=8nBTpc8~4Je%nJiAl1{{6J=>=7+I!wHXE4rTQ2`QyZ=|;|2J}r
zTSE1KviU0{=1&E>|5&iQ<0l<A7oR}mAF@tujC&{s+h_L2sJ{(0*<Po)dh0B=z~6a=
z$|}4qU<Sv`^YP@)sK~t3`KV4W1}^=;9GBVElEKoNROs~}Z8xvGq;A3QA397zh#XiY
zl}<v6gFxhg)C6-KrG>AS22-2x4{P@$ktJ^jgHNkgKZq<CJDvhp{8)e6s|n9<=}mj@
zqZBReKJBt)%^!Mzfj_N?^cMttQ(h2r7Zv6_awXlLCj=LtLi|vcj;39#!f%8|8-D+?
zHbT=X87ehZQ-5x7<^$<VRld%>h}cDis@wHOkv-+8$I)h~#FNhkPri(Ydslp7Uk!TI
z2<TroR<{2{|N6_%^++%U@kouKvgKj2zbutoj`~J}fBp<p>hsj~cD{mY?=F=ScH=Fm
z(AM<&z+CLYopoKo(odHM#49Sp($fUZq>i0D8Xr9oa9JuvFgRMCQ&nd4g(U-{wc-qm
zX?t!bf^INwY9D=}=-=(ZFpfoh-t=n2m(+)mnVlCI6}X$r2hkz;eV%^8htG2v4$IpW
zwn$22Zuh^sW_lHqkU3($FOl2i;WG2nZ!8st-faL&Bz2)cLgR<_T{xwKog%Y<`K~&$
zqQ=m;uhUTwaM3q{`<z@^=tuBW5Afr3ISdi9U`0HbgTMsaipP|)$~$ED$mv>JR=~Ui
zx?kR_={I}p-wQ4I2DECh4Ny`L-J0~da&K{*&OiaYqpnYUa=+Ebo%T1*DNkFmoGv2P
zS?8$ayqNBvosp=KwRIr(#r9yAHML>)4K6a;WiWvOufy`5MxSMMGEO2(-~x;y+|%%E
zo9T<n1$V0^#qz#;n6-^4fo$B!ekR9yz7-P2C|Pm?(i0sPgZOjQ7JHXo>)H4JCbL(^
z^88lCp%2;|>p3B6+Jt3bf6^pKX;sd@OXdy!A(?lyr~ldetw6J=&iNaQXY-7bnv~}S
z{qqWoAI2`a!poj~%$<Dn+HrRb^|g!`w5Me)-i?1uofl+kD<S9pJKOsImF@iZZT;t^
z-M{`P?!qew*pz-{IsOId83F+eJuA0S-<JA)ZezpV^G%5(W6zk-($P4!H?WZ#7TttJ
zTY@c=W(%BTc4Bu3tn>i<m^Y$jdz)#zdwJtJGS8y!{Dl43{8uSJI%S{_qB}A4nm2P=
zbY8GLS4`C3Xk>(<Mo8X=8c>}bIxlf;nlz==dz7nSn71;f!J_;RHs)V0NANoku$ejI
zscpDXBLZqs%axXTuqCCIb5fgqmm1Nb_(jbg9qw=Hd?)2U`DxlZ(Cq14@DVx69WuNz
zR|Go3pX`je6sLR&-*~ZnN{KXc(L@x*sZ?-*#)rJ`HX=fC1Wd7-O$a5k9C~Xg!#yOL
zx!vyN>2>RBelVh(1&_>86zi@6=)3E_SGHyav5e#JGv3eP6g6TwbYZ}DP~%NcZaq_*
zK0IsL6_G(jYgc_%)6BAhn~$ISJZ@oouB1in&Ph;D7Hr79OJrxgE?In`o4icfKUBz+
z@Naz9Ni<;nsy}2aMwQ4NE9QtxO3_Iuxfv&@VWzSmy1CE4F1;ST<GE>rt%SsR_yMuE
z4s+Fty*j-%P_3cp&RqMdTcc4|=<?CA=!`jxc}>f@jajn)M|>H&p&Pgtr@i9{(G|zW
z&2b5Z-d=`4YRcoqie@;G=YHnib9M=0KcuFzhQO9t+Pxr?g}g@gKtb!bdp82@0zQSc
zPnhtvN-}rmPFT+}fz`Fy^>J3HG>ahj8=iM4$K}x+6BP6UO`bVy3yA;;qUO)rznlg5
z2@jg5@ZRyF)MyTn{*ZpY)IP%D+EM3OvZee+zkXYJVoBiOJ>X>HcF@~x{BjGrbX1~{
zV&;U^193i=(|h+XU{>@%rPf2#O#^~PEP!oW)r->qi{btMg^(V2`Tyg~UKu~aPxwN`
zl60HLS<ElSxd>ei_;dw;{6WIEV4M34^8lK)&!ENNv8@0(a+SY2?y9cu%9wu-_sw|B
z)e^p~eKrhluLUrhe4~?I&A7++>51)cu%V>_w1bEuwE_4g?d{n9<jt!nx;WW%KQ9H*
zYAnVS^XbP)7TK0-WNc&N8>w&QMqZy7LQ`$>Oz5-PTh<#wC(`TRZvI@+$MUbH1Dk|%
zgj3txMl*;83!};_3NYdG^(rJEk4vfD38>wa_LvJh4)aKFU~>^OD5}KHxIAyxD}W}4
zCLqM)41pX+XrZUVW8{XAVvm*Yhmb-|eV-Gueu$1waalS7mCM?zFjC!d9El%A^B~JN
zfA0xCbM|u*z{XiOT;%9GR(;-Y-3-TOx{Tb#Wsb~OD-x|O#z!+LN&61HEv^nWWv@IE
zNF4}Y{~-t8>zx0p@F3+0<E1~uzmK^sh0xs!-*w@Qd6gUj_O-G&Mc_a6I@ow14=y1t
zmszWu7{<T61zbag4+WQ_kHzPigvZ-PDS8YgYp51e-_f*C4NA*)K)FoZE02DesxtFd
z)vUP&@i57RG`Bw}!Z7Oo?Z|s-gt#SLY2EdtKsCT-39aWZAy+(6({{+86^h)BIILw*
zX<_#Q;_0BQ5#-6X_YtqpR%@GaNs3<o0<p5ZGAyIABJJete~NzWtk)xB7Lv&M6czHN
z<yH(;yh&_4%uA)3Y|vn%Gga_(V`IASB}g*7rGCeFOl*6WzmM{W^r)@P#e}w~??xLt
zDo*eG>gLIxM_94H4=0QlLd(}BuuJQCJLm<gEq8!=ZAE%*KE;Mn1PTrYlDR&Pb5>XO
zAvH5A5{L%ui0hAj2SzQ#!m6ZdjwpA;we8uBHo=PQ3&94xoi&Y!Oq>O3w-3zpTvD{&
z1veAO@g^>(4I_rJ^Zv8LIT_kd4_r4-0ss7K7x$0v|7Tp?e?XT1K%LnE_a`KZso(<N
z4|W`M<OsA4%IEzMGMq%<OC`#MFoj4=oJ?A3u1i1%kT9#p7(dyicU~fPWc=eLW$($z
zs~_@3IQW+e%vv;rKdZ?+m~`9cLfLwl;E+?dzRsZMA22Qd9;}sk**?hRoO$d$7SW+V
z|9%Wf-++bL58sQeMdTrGkTFje)XWH@FmC#FQg?7psTK6#_wtJU23!ft7&p-vmb-Uv
zI=iL`3Uj`=d0W!VvG^0I;xd{22~v|ZN{r4>iYkTgKa;_om*bt?+GQ@<x6B#RXL;gA
zlxUfG=1Iv9g^L2sXeqv%+g7j(GUOr3Q|c=1A@V#mf-Gg-zqY)cnorOQS?_*YzrBX%
z5+fm_J}2L*$=99pkj&!TIa!RPuyR(#6Q&cF@6e(kd6|f56kJ@xfn-;4QZp)yWcx)|
ztSe(8)u5^9!p-*+S|<j|tRy)1twLmM-1`Gt9zfY}6VFja_=Qp01#4y>bQ{dkV7K>$
zW_hZu47V~;h0rN=7U7iWJpCx$+|EopYyYk(*D2KtJc|eK`;Gp0<C-6nZF`V*2{<u@
zfP6*zHn{++DXpqSaZ<xI`xZ7-oCMJh+Fqw%IvbJ%oU=hNlvL@@PK|hfPb;M+g;h3L
z>$>MYo=btFvTr!nj@P5Y@0pOR#0>KD2XkD2<2^rpJO{u9ZudKc<J6Y~Tdy^IQ(zV?
z>SWFKgBbiGLHswCyQ`P|i_RuX1v8SF;^;^io0|G$)zp}m`^{th{Up=5WA-T`<u=-^
zJx{$9=O#{7B0E`i`8CIRMFh2awG}sqH?tpkFC0eJ95my!u=JQdfio}1Oy|UD0$|dj
z<FDe+pU?BgP2zVr!FZ66w@7ue9yT?5KKT|!iR|T+6`5iGnGg-Jx9m6WIQ7!{R$*FD
z*S<V<L8+XKH6QtJ2GrvfQDtP3moccD_4ULx<2e+EYnA-10u|Fwoq6zN<V?gA{5l`v
zd-bL-(I3chgM>j4Q|1y4!;Q4>(+XC*IJQqQlm#J}!pPgd7hCiIer+fvzT?_!X$5?p
z_R3b9H|(X*W=ooemWol6F1ymBYsj?X@s!Pmwr_mk34B6Ie>{h`+2SP{7u8@gt<IH<
zCSb#sYFWv;SFFDS!b`t&EYLLY5_n$~gpLoM;y!J5;%i$2bTqCd4MJYWE_BnpsJ255
z8fW7-eW0O)`wFlFkI#q6WUF10`0!CL`U+I)WTc;pHM({A32Zma3CA*ql&7{$jM^Zy
zs|8mI`5|o2kLVr<<T?;2&YdZI<@*(ldg!Sf!|*11Km#SUBLNST)3>cwEN7+iU2F4n
znX{?)Tef&fbq9VDcw>32bW10fQKw^7=cFdR)!;O3K>m=wttUrBZsA&>Pl_vb4S2Pn
zsfFG;hX{s?T0au~K;9r1+7uMZup-S9wH`BNsAt-UkR}7(VT7e@7XJ&!+%H#!RoLC<
z-a>Zh3tl<$Sd%weNj5JiJfB0NH>pYS-3r3H-81qn&Bn#;$;Z5#Q?vTAy!PbtR#*i}
zJY4(oJWlHKC~&R1)Q|kDz5I{w{vUOxend=a@)If`$9{pP?Y%mo*tS&4xT{IT?s-jk
z`0>2W<@%Icth>)jC@y&tq2=lc_$S$<q(%93s~Up|KZnbO#gJqh*{RSRTh*9^w(J?j
zMN_50<kWc8M9lp-O?RG7ww{;yKs1Q;>t0Q4905l)XeO3Ca=naP12*B4qeScYHB4E~
zv{PYQKbfEA59LgenQM#9Vfbani=kAShP%7RZ<T_ir`<}!Uu;KzY)gL}*`G1TpsaE@
zC&+L<oP-X!fDQGS(t|g?YjhX2Ovqcc`{FB4$mq5^T7VrsJDHWVc$B8@4WHame5rCk
zVL&L8qb<f)@YGkW==VC<$aTY<Iv+7)4ed+}P0@b@zZd0DgZ=6xti;|rzQd?dH1=>3
zrTxp^`KoJv{gD_VR(v0`{e=;h;^G^-Zgvl~W$f=nBEuIfTBNmx-_*UtHDe_K?y3{=
z33~ndCEj`Ai9c}9;n%@LegKW!ioPWIeg4^dk|ujDHs1f3e=81{&D(B*a3rX53-GcX
zHLsh(Igt$-WzO3qYg945mA7y2nmi=C>-Q>5W>I?0J#+fyod&U8r+csawCcWt$C*0M
zB|CDTm1~+LRxM>6wm(s$BUcNbNhJ<ODS#ZzLNU#WF3ZSWlvB`C9baxrlkd?+H#6CW
z`amSquDPuwa$BrgOxOYYm`aJWh%ZttFc-HkXb{XS9`<r1b6yz4stJ-0UffozG6@9c
zm`f|ekI&ja->%UE5j|l4l5e-!1IHk$9_=ntld3XV0~>B*?%tPf>5XVMlbNEr5qY1F
zzM?+g!*VfbNBK)~ylZ}3ulk3a>TFgBI<O#VeBQ#yr$hZKd=$+z;3Xfgn+!;^3+<ef
zREc`K7G+!@_|OQxvSct6Ye0zgpIuixuw7uM(KDZYm%Kx&f41$v`uzVK_W18BU?(a9
zc`6f7UboPJ>on9E;P)mDF2T&lbYA)#5{B5@n$Y;YGfYF?S0}yyjYYtkZ^&QR#Vzw|
z;>0KCSl8REv;t^HeGl4_Vo5?rT|{y)GHXwfnb0vOe{mDiw@86|#WfEQ_wnb?B&zeC
zqMNK(15yr$_rjU`^+ImLCIDJDv9#`H)(uQy;vHb}b0rX}VZ+V)USvIA9r!vW*)0Jz
z)PW?e#@kK#)&a!d5}EtQTmHX0nsq@*hLD}qKy4Bv1m}VeNxTf_p&uO_vWyZT6qdF&
z!mZ+lg{0+$I}h3y_(4t9ZRGTa6lzlW%}b(ZdRN9Ksw)!i$F|Oy#Dv__Jc9O_ZY3lF
zXi@1MLTk9-CR6XY`)y@Kbc252Lsusnkg-E%#%(@|ujQJALFkuAx8o+4ciTp!%3Jom
zt}l9tI&}J#r$!swt%r*F@KdM;SecFP1U|ghowkcwP0^NBW6e_ph+<CVowAuH51F?v
z?%#;ir<t$qJeRRF)K{jwN4JGwZ>ZBdqFb663-{@3Prv`szU{_qQ=Q%c?T0Vn(n%fK
zqwP8U0OnaZ+xQ2{Q-zCw@6r%LdXi+8L1~HY@1-T{%@nhkshtboo45`Z-Y?8Nofc%K
zeaS{OK|)WT3)CJv^{w18&e1PW;>aYffz`gyr=d7T(?K<y!f*rDEN&YC=F519oA{zN
zP02Tb>V?CmpTjY8Zb84X@cRqVACMH2nlVA+r+$>NtbM9mjDNwhp=A^tHIyO30#s+r
zuAIAJ^7u%!CN@z++P@jMw;mKQZBU|sLGneIrX3>h=zBB{q_=$@ez!f&8mm_OwCv?-
zh)sUnpnBJ*Mc+He-Lj9q@x)=A4Cv0t)2OZl%qV(9P0lW|y{t*xG-Sn1S#Im$=tep&
zrUqLqPQn=V*h<l){YNnnoHJ)iHw!{Y5pbuEIR(0f(zg+}O^=ThwSJ2@;+HP0$#den
zaY^BHcbQp2cq8$oXR2h+bSyK@;5O!Rir$@Wd(Ma^dnd_$(JX$3!r*K{va`a5CU6K<
zKL5%f{5bdjs!`ZxJp`1xeUw_|f_=K1Ts^jN=ilzh%z7}@q2V+wJV&r16()u^@dm$+
zYBGS>s(ns&kM&g)?Sm@}*B{&uXAcWL7Fp%EGmEc6PoZy9;gRD4ntaxrJk)fCgpGO!
z4M##Bjwpo^VoF-g>y1;ss<L=_NFU^#^PIp5Zuoa$O1a}`mn|jGP5vkLIi;zZ(2Cl7
zN7iv;t*v)EP7TbDn<T`aE;<q!0I}R;3bdF-3ujw|C(PSf2D&|J$nVYj*3A}=q0@tg
zjK+!9iX3La4o{<UiAKq~PI_lea;DqQd^KCqhrB8Uj)2N4n7@C``2F|q*+0T6x)So}
z2gtYpn48?LFYVH4?6pGhsKv!0xkm(A&hXJAev^C9bz0|cNqbvvSzQ?@9rM*G8}hLF
zzI12Vwbxhoxz&B${kaaF>J4mr{pKnn)x=+UZ#4Q~0@TcM#KvFbf6;XW9RzyKZ=H|E
zg&wa5^~(Xi-Zjm_Ebs&V%}|~)_7phP#&`OAg%N2aF;HU6S#@h+kI0dGk6Y^=*a8;h
z%}+8EDSzH$WuOv=h@I#EN*bbzJIbIsYcJO)-28EN0{iWR#@j7RkeM;x*#3u+`1d0O
z%E$v3!2?mHGTwr;IzP`x1e}mlkTwA=?CnQS)w&wbWA@E(Pw~{t*0!n<<J5r}C^za(
zcR22yS>!@JKyDi@-f=D6ygN;UJ*}6SCEw{O!!H%IzvuH3n10geDJuE6d)?0tU^K21
zauM}h^;BQ7eB;C48M@Qs&KsnVsJ5GD^I1RG1siT-+sozi#ky6v#jZH!TV^@a=}`l&
zRhI0RzOIHewSe6vn@jkrVo_kzAl6SSIv1|k&X(=zvA$Trx02sFCwTba(I-=TwXW-9
zqh?4sKXe}1J|?Wo1%Gw{$;tYi7h9HUPl={ItRe_JNJN;z$g6iUFw%_Aun>rdpAyMn
z65EIw5r2OMw_k&s+A(C=D;umf|FEgy__k9zAs{LdIq_at^SkC-@(YimwE-VPN_-lC
zhgaI&1RtpjlkZ=CF5M-NjD}chbd{Lh^D|q$w5YsUVA^}#(F$0LWS=L$dguO3gZewn
z^8Y$t5ia82kE&2w<RJACyIXcrH7CGctTzE0-gs{+EJe*%3Vl@_cYQFuQa3;EZE0(;
zarDR)X&K=M((NZ>wbMf{2YO`8E#72qbzi9NU(=JGGdpFeR-SlWKPDw-x6b9mp(bDN
zDeZsqGVR3nv7bOA#0+{ELkVXTEuj!+al^ip!aj{dU&p`Fr2|`E*B>6XZTgL6S5v#c
z>&_%!u}{s7Ywcl{Ql(aQ;$N0v_g=Bu#?UVN4?#I$juMM&&`@DMDmvF7?~AVDJE}MV
z5?ZUhTQW6MS2oW6!6B}+2ob4$9|UF|D+8w8e~(7iN_{#3hXA!L{hSFXujS^TP(g;~
zy^J^CGO!sEO{`yX6=zT-aJ1S5N7pz0UGV*~6;#ao!t&X(WJX5zOTV<~2Sd{%IfWY5
zFN?7bSV6`c<Tj$sv3Xrawg#_-pqjzN^l?fTY%{{y@i^jko8dM`+se%%PS>^$cP^nX
zyVq806|A!^DG!7XbOn0F>b}i`jW^s~G?XG<*?JjJcF1%03)$n^m-7vC#7m{boXWQ@
zr!)Guq#6wfj_<CUW4JYrP#uXm0Se2Z!jQWCvs<Y}iF`~^bwPoL4mXUWn}tTP*{^M*
zC4&$^)J-%FNb0%F<_u>e$9#B;2C^={wRYg^Gq{}m(7AB-)el*@zZI-vp93hyfE$5h
zSA!D_M)8E17cwo&Pef^us0G)M?mTZFujW^1qLW2rMA}Z`w;*heWcj-cXWL^vOLSP_
zEZJrlZl&$^2%&mcJKa<A`}_4B+H5{TQ*CzsRly#8?|ND&v4e6`%(aoZdo#*%Sb~_g
zhK19l+;qInq+FckhF)Q&EkF6p(@6uaS*9GRK8JTH1<2ib*?Dnv=<NwVe8`vg$iUpD
z8%26>-EH_C@mnZt0yD0GOVo8$&@MpA^cP}|(=H_pr;WPPde2QiIedlt-0pz-%&aG%
zOq@GM%vnaXM%!r!|7I!qW~V#Owox+H8d$tZAm<zl_s?68h?oOg{lmkAe_Yk@Q^-4v
zV7k==4r=4y3-aq;VT(xq$rlDM3cX!CTb~jW*y0X1i)@$B*B7(Cuf1sz83<Z=VcDYr
zi&e$YM_tH;9J?sGh9FyIr{w^)Y4Ash+yTpin|-C}v{qdJbg;kEbMl5SRMFh2x|4-f
z4@f8?ELgU72U$zY8ezL*=-?gflQ_l9knJ-(8aFyW9Gb}E9-p-}qFraUp>_e<?Wf%%
zFhMKpS^uNGR-Z^ik)_(mD1$4{4|kAtY7@o+uF-RHn^>CKYWZnaNGH%z&d7me#N53%
z=Ii&N2N7<ZNS8j>@Gqc7>%NR8J00lSb^7%M=$lv?eec{6{4ss*25(MH<`_OD8{dkn
zW~?*T^JdzhVI5O>Gu)(M6P?a>W8EvTWx4yHlA?IMj+ZxTJu%0DJ0pAilDnHbPdgjw
z%3;yVf~Nnbz}%3BXmZZ>ZO<+Y_=kOK`=6-Y|IlIYzd~b5|8>rbb<%BAi=x2DuRlip
zMl27Zsp?J)0@MQ3p|v$a_bEbo5K4SQ<GiQD#xl23<4=w1BrVEfyl<Kxo)}gvm8*;S
zDmS2d0JrZZsXj|Mhv2un?|OMvZN$0KvZ3Q?<HX^^PgA<3tVGT)1;|TE>Kwc6VD|jP
z?e8E3n}C`~g>T2Xq63AGk-b7ucVGbY<-7}of%B^r(E`XzfMyz{Yg9X+$aX8dtMx2r
z0bh^w+C0LBd_G{t6y4cwA^a#Co4fu~r=R~hvhDm}_zBi;*|!;y{-@<DSC|6S`RncK
zA|&P;P>#_cs}T|1pyBpfa}TQo+r9>Xg<@H$_C$jqH#%hU@F_WkF`ch14_G(YGg#@;
z+`H^P9NE`f>pS6yR?{P$Z6*76fI?zLrBt1`3CMoyd(~O5W?dFP8wfN`R=#HvP!Ua?
z4C9ZcJU%z(bvN9ygkrrsFfN{=AnO$QHP0#QD(%qu-&ic~Z-fU|cAR1m=S^RlB$$k6
zDa5|1h(F-sGI?#o2c+0OQz!mn###R1JZlr|F8Z;C6g6wYPo9WvsGz}`(IH--GnZ$F
ztX}&ywRt|~%GHPp=ZT_gR}2S6m>+Sa>`zh7xIk5{7Tj*M_+qiZy1L5fY3Gfwl}~pk
zb0x=4?=;T0-y>Czjb)0rDg7vGc#cAE8FOPe>c#MFZshHQ2ac<=n%Rezoy=HjlTdyZ
zJssA5s=%9yr5d#OxuUpDP1LF{qvV3&W6q33Z)H4Hr<L&twfkWdTs9_eh3d!{5`;a)
zSYE$cQCU$_p_^~fYo2wE+<tJUnc?YOfWf|OfW5KtPfO{?(kh`lPhfT-FO$la1JoW7
zYGmM7`d^*IJC8iA?mY0p_jCp5hxzKU*0Hh^If6Poh7s>B{yT_0i#ZHON-#2T6DdQn
zR1C$vte+{06yhlz!8RdMf>TULQ@5t4`)=KNk$wmxI>pd-*6P8rz?`}RK{kB(u^kmn
zLbQ-v61mS?H*LsDXSZIJZ}xY}6b=bcVGH0`lA7s810!1gR?Sao@f1zayHSn>d6lL7
zHBZ}*6!?DG`*!tDE}w2=TIP1?6FFTyH~6GDQ+z91Uz=s=I?HWQli*w43BR$V>ON@j
zzG|z#S)|>Cwlo{m@JR7R^;MlkiDr;6+C8@0fOO9om^c_;CkZmy79<9`K-ueG1&YQd
z&vupV-Ns-9H7{0%J(gw>d(3`tKo^6;%JBy_-$3}P_PiVO)bju}Zhk_p{<?z=llQ)C
zX$%Gxmiq}IJVn>_JoUskOMtMKeeNHmm;dpM_8(l-e>(mT6Ir(ZY=x_!QK+42V&v|I
z(^Oc?39@P>Mz6o1Yj{D{Y)S9n!y}ZX(^7@$RmIQMWbK}&9o)Y|^?{zYdNNSG);pf;
ztk@IOLcAxCd-LS#y_Z7H1%2?L4881RR~SF_Ai1KY9!kE!0LfIRBr6o3<!ffkVCbkD
zQOd5d^PsTW0_25iEXVva_4G<0B>-ou<tO(wqDWER<s8S7In);AOiEpoRQ=x5N}3s(
zY<r?Z4pG`t>1U~nlLmpz6Xn#SG^4JVuOG+Ff@|;dyQY-{KGn9$G<p&l*r4tcj7n^>
zC#%U@Doc0WHGkNfWosjso@29?*uiU)VRT7J2m|fEc~beVXqNPCs~tkKN~Qtxt~NSS
zZ+*IAYxCxaSq2juIw$6P!)YRujXw|9Uwtk?dQG_Pre7u9%^`v0v5<mga7_MiUEym-
z<;`1B!MoPkJlXE=V%vMJz(vwQpiB0&+0_?~ibp2JJl34ouWrIAwtx>Dj=O~q@9_sL
z%){$dcT0z3vZ$KG`KBj%U%)auGHdm!GUTYSm%ufaQ#-fPYga+np_JNvv}^Q}mDESX
znb_VLl^ugm3oYFrWa7n7SvS8A*rk3><bLdFWsxq>fo^#O(K^|(0WE<o8W%hN=(Hf9
zWIiW2>PoEHmzq@m@!TGoagt`&g+qH(wWjQHq{~YhO;9yHMsXk^tv^M>7U7#qsTtV7
zpgifc+IuFLrK~Io4b0k{0U)*~xzew1_@AKRFZ5jEUBI0CD^TSTW>JxL9C?V5Un|BR
z;(z>miP(DJ<_Lf2TDw}R))mZ=FxMGYNllgn6+`1&Jllj16R;zQ$)N=)>@Z!DQR9%*
z?9Wk^>WWk(WA#gK=Entvn1tQG87Cm@rZT5XU&7fAKzXQ$rux&LW$9AHG44L2KE+)^
zoQoRFF^6E{8G#dkS;YQ1eEsB$*c`37FVAT@>R9JX^+id;euKqHBQ|Dnk)4Ni@D`1o
z!-LC+R&9^G!zxLRnHdy^p3aPURjts{6j$GK1FqP&ZbvK%<fOk@+)O(oXr^U&L4s|s
zW<$Ord8wEL`B4KM;5#>@p}~ljaZ__8HCl{J+EUg1JbG=MU!``RiIZ}u(+-+S^M3Zs
z=sRS%iIPrLqM3Lf@}4B{hdu3WY*aVWt%gf(T*bU>a_3Nqdcx+IaEB)=MB(NN-a&U>
z;1TFivMA=rKjKc4TaK*%nASeA3PV`^cp%lzg$hqG`sc)#o%Oxwk?^rC-UHWdE#`iw
z4Ez+?{eNJ-{-d+}^Uwc>Lt#@TG*g%=4_X2)Aq6M#kcK?xA>A;UZ?+-tBo1Y8f7~Tf
zT#zg!xpso+)REwncV0U?Sx;dDaFa1F3WNrV1~N?JUpnw5JD2uZDGVe#ZO*AM6`8HL
z%3Aa_^0y{s^wQGaN6F<+_%k4zZo8xDHy{%%tcx&+hL|@$5f_|Jc5A}(jJhQ5zb97V
zf4w+0I%iMh*?bni{0CS|GCmYq;+js4bqmA4TMnNg$%SX0&G9|JWtW>)b618r+s-_q
z8rxz%)c$togC^99tiiru_{xkSV;X?R`Yzfd*KH(Bd|Y>f0D8{$><OaWrhwuF9zdO6
zv!_+zwtRQxw;B{!B5c0t)wI)ic?s2Ki*`h}r&~d``xylq&`!dOvF3+N$=;qNkBHU<
zugFr--&j1~MdvtS9Y2>bI9r5_RhNU;Q*6$65WF<K9PX909a-O#8ED&OhH-dQ6Mu5G
z><Zm@>ZRlb`n812x;w-z?KBO?m>CP5$oa;#X;POAAS5;^kIVJW>At;w_jiKu`M<J;
zf1Izap=2`>=z=r@s>XQJ^?valq;`5Y1;^oCqqpg%;9I0=cA1Uu{Q0G;kH4_(Z7}zV
z;!W^5;I3dJ`G)L^onO{-Ny}~8AN)G<wWMOkC&9b8g5)v9+h9t2R+ARyX;$8y7s*qo
z66|7}z3VIb#V6;#SNEfirxL%gbE?04c0dOeE~|0(@u61S<smNGIc67XH_jQmt$hL#
zUd=vogKOps+^QnUGp9>qfutMRI!R{F*v`Q_F=EyR8)2nz)kQR*%5v+;*d~;sT~EFA
z>|lY+U|JC2*uYWi;x^VULGkEYH_vanH=A|fk4sb;&KkX00We(qNcW<}9NmdntRheG
zZvCc}W%CvJW|Wi*(C+L0t6Lr6A3+Ntx)^97^f2utNhwUa%ujMl?sVU^zCu%T2l%y)
zD;hx$MRu0Xe~HXEk#}iKempWm85}i!xE>BW{^0ifd_*qOw3v+<E#u0tQm=)YNf^}(
z#aj>`_tv6~8CWO>7rFL$drAHch`h61!%<2u^x}0J^Q*VTS<ObSC!~G4n>?497NwDg
ztr{-8dYh9Aov|=~dESHKyylKDFYC$K$j@1=SeT@Fg6<qs{$0>Ar9c?LhHhGMZNGL+
z9D~)yHo))S=+nr$mXIU-BVV?T*U~7_j3g!(b3kLK#tv#7;c3m4=k3`p4?j)Y&c;!a
z$8X3V+`g?%^2@eBZmDmJB9q;V5CHY956IBjX?8ZY)JcLzoJ{>w5Ks(c*7P=<H9Tx8
z)+zj6|FprOs+)PUwa57tt*K(|W(88q-}LGLLW)04lhwV@Y5vjn+UBY5v?QJKfk!5~
zZoqkzqUk`479=In!AJu_w~)7gcZ>hNAy-9p`D;%ci~FTz`OF=of-hv`9euLS>jPSc
zTj^P3fI$~c_PY8nyu4@K066_Gl8FE0d;PIYejNV~N5VhiqU~G;x_zGc93BPs1_4Wh
z9un$hAJ1QzGU^A2Jlft?snEYQ*(^lSqSlP2jMN;^fV#xiZ_3;`*xh&cAT!zW!$aD=
zduL^QZFr|<9toSgJW*>SCZR=$t#yPdcgf15J4}~e%sEsII>)Q{ndV<lYT<W2Z)+G7
z|61;)+i*q`dn<M%g2287Ywdp0)V$^4ht!&2KV9ELH*lgNCu1)U9L_JGdeVS?1aM6<
zt>hqdt&$3yOmS(QMO0P?&SO}1tQqw#dCg^tc?`Lms!qOn+#nRiU0R@j+1l?7;@Z31
zHJ-x5sGYpisN3#mY?zY~Ca>wwgGSd(@^Zl?S#w}#y3J_1!K;vsuOb<+Z?Af73!}77
z0GS?Mk*S!$?YllGn1LX@NpRd-ZUeVX14hL)o<$8f_t|irZ%{wQh98JJfuT@H5S}mI
z;%(*mpPQs+E3oThp=P8{u=POsMrU;An>Jd16#~RGqW5y^Jx2K{+&1Autj`UTHe6P(
zB{B80^yqIyef@ZeFw#)H3<<Zylwo!XpY3sHmVDDG)zcr=C}}meCn;uur21$E#b#)R
zCPLSb(5o^ElPQakShRq=K79l}SenynEvJ%ez$<hgbvs8<!f(LdfOMxTXP3Lek#BpX
z-)L>-tqh>?Iwr6KXzTPMwX<X^5P3IZb|Wu;E>j=d5scl_wqmM?gKJ*c7t-967CEUn
zE~D*7cJ&tzw1R{-8N`+zf;)@zZzy0UfG2n7=C9m2US7u2=sbrrDS5-7KZW~2E%hJm
zDfUM4I;ic{ViPWtIiQfdGtSiUW118ri0(-8YED8OfZcEl)1B};KLdvul4NrWU?HAb
zOM9$SOx2aNUhdhk>$Ho(?%$IU^c{toq*cl=)hv%LHI#{x<+7aC8w|-++IU?X$!ooS
zwDNiba;SIxLi0EusXNN!EP4;Mt*KnKFx4YP_Ku>|MNnDN6VqS{y>4sN=@=51xZA&*
zR$vQZj*V*+A?V_vD`A9N-;#_eHr7Hj*?__@nzNJEfF%`gv4?s<cxU%|2R3d^r)AlB
zq%30J${IR3z)d{F%zXpRq-F{pFge}<_<h&>ekzWlO~nz>LCK}v^;Uk|3uDe^ka?N-
zk0vy`3p=1=ZzhT2^aE`ZPAj#3B>P4Tzru5$xpA(3+&jf%7<}xwT``^!nnYgKq68lg
ze*T6BJ|2RDHuk}lao3prErilm!W6E~U?YZ>kX?VzKc}zM?9&J+7Z9P`FX|WhtZ~N4
z|1{O%jmn60tvdk`aqpw*V(iKJ3P$$GDcd#>hfFTN2T=t%6T_t;=Z&zD>y?qS3~**I
zzI3z0FLY=1iQCG1jHa_pWhix+uM1#}V7XEpp10rcDd3~ofc_nFiSSK{A;3==@!9sX
zllSAZNm&$Bo6Mc?;5A~zrfxA|ii>M3^{bSqQEStl=*V3TN1ka49DCcNcP907vYrLz
zC^dz06ObqO%x_?LaW}qalGy{691+<4$OHX%zqGY?e#+7rKG`0PEPU{pz43BAo_5-Q
z5W|jgS?1^@LV}Z<W8hz4aaeCRBlL%UkFbU#Ex<3f{+*V8h5g8r*|G^(DSkmNgKkQ;
zh6~s4pw|CeaqXv&h|=fpqv<R#chs(fptyE>tmX}tuJwbE^5W-rdQRA5Wb_kUeJLh9
z)TE7}`s&q;hD1)@1SKPus=TFHC4`qp3KhNqt=zgVkAL&0f^il4Hb%8#XssVR=<Va=
zeICj}*L|;dW}=GOMppf{9U(Xx_0gS{t<uiIGkj@F9$>zIl$i~s{##dQ$%1t~grF=L
zak+LVLwig(jq`ADTkp4Y%0uUT9l1C0wt0o!183g;*LIo03q^kb6#sbK#QdJA51zxZ
z)yq<`4YoW~E~Riac~XAk84Yo24{?wzWPxf_$sVg~Fen)<mG$J9tSGIlNt7W;o{tE5
zBx>IMd0d|C6~cs4VW#A75uQ6rdrS4s_%ifo<72pLPnE{1u*N+#^j^b;0Tw6f39jrE
z8QX>p!L#BVY0Cn$@Y0ROdC2ZjYIdJValmN9R{AL1xx!2}*4p4ufs=IM>&mEtUPot{
z>Cp5SBKGU>NV=dG`P+n2B$L}au1OUed~?Vn72c$KJV)kj@25NDPN$@H%A+i}tg)S>
zRD8VDHc|I-W0ps43n-c!b%+u*q)@P*n6t15YpO!pPZ$G7?n4e)+9$F)JIEne36Xil
zp3f}C?@fDF`OaI{9|`O~%fYL@Y*|xU-dDJz<)eGtT`Kp4W8ggesDJY~7b72ON5;Pw
zKErs8a+F#Qc}71-<@KGAs!3VA**XX1ndcw!QAkp<L=8K}L<PBjzgMEI+9Yyttp3`m
zI^VWMRvI7Z#!;p?U06ob^6!L0)KIyziH26rO7#jP*Tfu|mv_7lge@I=Ae1=u^oV0J
z%LGvW--=GdkaW6-i-ouLHch~Sp6tB-3{?zziMcJRD=`C3qVUrIZ3T~d3OFJ%{tXyz
z)Gg>!9B`zZP*ic;`$em^5opsVp{CZ6EPPYc@hz)QmIqH{)Vx>{{#v@wCmnf9G)dfj
z-WF60asME3n4o$UnXD*6X3uXdo5GLPK^G+hP+#&Y61uSo6eNGXDbRZGaHI18wRS8C
zNEmP(NB_q1VGY%kftpBwuSKCA<EhRHD5lmc2#VR)hx#tU&R_z0$DglP%gQ_q{wo`R
zoih>ionY_P&H#P8)uANN_(jZ&Ap)OJVhZU&e`AUH&WZ;>;~IWz7XU|4Npz>5u89R+
zSjc6tYe?c|g??k%h9mVl{l-E^YtdDem>ed*v0NFVasgrZI4vNQ<-@W8jQvl82m9%&
zi1#o;500dR-MWl>??zWhhW*AO0*2!Mhq0+YMarS?gH!euF-p`t$@{Ir>>%mfNd!tB
z5SG|N1$$&uA7rVSZq=1hYK~`|9zEz+tnkdnIoyHu^ez*9<ZHCSuSew^kh){!fF1Ai
zWeAOqo7AGDrToUCDF9mrvNTaZ_U2cE_KRWp%#H)3|5@DHzCh-0EOOMIpD${=?U#dw
z{?#k9Z5dQE`nWOJZh$hUPNo9tr;83f{^u78$VC0>rD5>}n(;qB3eY<HkBce-b+><h
zr~cjmSyVFv!B*N5^GH~tr_@#ZR8;cQTBALFhlI#-zj~g3{)=<-bP}ADjl*O8g!KGA
zDN_0MvvlN*a>V(nv-@a$2IRBQef}SCC1&BAhjj@`A;hsEno1Og8G5S)3`r9N$=QdS
z(kA00kc#Lk##7>^$mrUt(oi&W2uiwy`l8i_=TxYJOu*>-zfC+$Drg`>8-<=OeyV6n
zmHdRcQcC7a^?cKK%j%xRtv*6cSo*Bw2YVInOFpPy&h+LM9oqOjY_*$JH>Eltf<{=t
z6Mg%=E5c7)h!q&At$X=3X-vbDDug0^99Qo}O&K!EYTdXsmMJY?m8e_GqscIW1#@V`
zK*(${E@MtoW6mXws|>yB^d-&$^S7tK{5O`oVFg&gTp#FndaM=&L5NWgq6&taap8Gj
zA3eMqLZ-N*!gAhG)<AA{vab!pv3~9>zTpukv{3_Q2_vX8suci>fJXHtjb3Ym7a{sC
zVRDiY^1ps{e*JEkOhX8_1#*95`RX9Ykd4QkLcJ|cBA*m+Mtv2BGLF5WN5QDJuyU>>
zx}SfCkNS*i3M}+Y%(vNEAfuaua?*uwa{9M!h1+Al(Cr;zE7y}jN%7w|Q?Md=C%Rw(
z>gBx^`u7}a2!&!IhnC7%N$_TxSkk#*R0KZsAuyJE8L?_k^*F~^K~&5ER1;!+6qw-)
z6mx2N8qSfl(n-(!A@lnS1NO_ch_ZJ-<bP-ou6h6%-DA)ZI!rD?8V%|Z0jeQnPgx?p
zLABxEL&Bf=Y}W5%g~s>eNO;(u{Y;H|LAqw(P!}nGkH0=b?(YvXvy+%r?!Z4EoU>)D
z$;fJ4Mpo<g?K4`>{sgCbL*MrU5x4^Wmd~gmIF}AA5t#Lmr_3%Fcf~;*l*T-ap-V~q
z`d*Jh-WyPl;TA*)@NksidUrS!^rd<-R4pFL6d84aLS%kePivXrXZ44B3T{0WK_IOp
z`eoKRkXi3#q_hXm>oU8E)8%#s--6*;H@1H0(UEvH%tN+%)RUOPA~ad-)-tb)J8DR8
z`!~>OEQ2Y0iY6ilelJYE1$3iVIE!N{TEPDH`>xer+2R@v_P(GMhE&A-%VWQ>%<QgU
z9*`S+QiLO2M}L{ErrWU2r-Q;`a<b2l_y}hYCO^4TVC#Dlo_@3V7xwwr+g5U;JDRcc
z8W11@$_cCI(xia=GY`1a1<-Nj(>mB=p8B^L^i5vEO=jAn85ss}>Iwb@<d}w{rN64{
z7htFrK_=m!_V~Xr$(D^|FKZhJeIJbJBu4Z$%I2$JLO_UpLP^nWG=d^Q=EfXPz#>h1
z<y;*E5=T~bDr1#|=RQ9}w9)-Q08AAzXwDO!Y!-qBxz0R@(+JBMcP^-*;lJk#>p%LY
zvTO^S%)j|!uSX%Vp~X4_`F7LuA#wD1dGvnW0Qcf)z}C=1`4ot667VzL0aOU(D5<-f
zgkbI#C&L<QPy-gTOo{5m9<3WU6StfD^5w|2xP{ps_y&S1l3A*_(Nq8Fq;UU(TE|i2
zsm4|Gr@n5#w5h+Cogwk9y3{tRcQ}{|F4Z17pBVQ2Dz2Ey18Zy~N&qr3ZzomqAsuo6
zTEV~QN+`VX!|BBEG^;YQyDdsn>r?D^Nq{&|mlE8gMZe1AMyd>*Syxp;*LT*;Qi7D}
zh!NaBdon}MkzkVykYYA!NCM4FN9*6)L2gnHJEVI*AgXu0CUcKKLsU(w@v~fnvD<^n
zwu%)k9Z}~e4U#(UJnoTBwP%AZ=u4v;p0}XBs=yeM8T=F!cN_J7tDN;E3Dm5)>9sO&
z{SVaeuiJ=n!x99MWt1cRQgIbDSi}<N2FkngsBSx96(~p8!3&J#qEC(wHgB4I`iq_D
zSEF|E^tfE`=2NA4@V-`ya7>UeE6e*r;l!;{+{PXQjdE~zkilPG!PAdXF?W2ENQ^wz
zm74BZOFl4TmhdlI*O>{K)bY^~6c?7+DN)$=YwPig4Pm}eKwlirC!6vcOQjs>oc;1$
zBhidRJ`H3g^1xCXwN%MPO_YGbK2QpYB|}0dKxf^0p%gUjn<D7&A?>)Z7nHd$Pt2VJ
zrfknRYN0q880x>j9n3~UHQ;>5e%u}Sb%}>onr47<%RXqT*p{B4JFp1AQ<%U&Hkc~9
zz%UB*qq3^<IlzE>fe`^xn_o}Mz4NvQTj!Kz)!$z<Z!%t@y1%0)DuB|{E@OERYRJuk
z+vK5EdUt^9mIFCrXwD+98<py11nnA0gH61}kqjQr0s^qVDmO4^AJ)bH?t<u9c!%C(
zK<NB(Omn9^M;n9@c&Ph7TAgzN0qFs7J3VG!Jqw1=)@l$jVw<1yL3gxc<oK&3KEuiF
zPGWqt^k+WN83DUQp#oG8w(K}d!Y$Mhdr0sWJT!w^Me6c?p&)lqrnU!RdC0r5$@kEQ
z;6;9>k5(0l<J)wY|Eph5#`Q7qR~*AlB&=9=9s)Yln8$>XGrP1JT@T%ujP~!gSv&P;
z{g6V@YQDY)emFq|>`%#ZBja*C!;7L_LYw;bR=kI<BF^*jk3@=AC9Slpl!=YZ&3l)D
z$N5QY7}^NmaxBBXp8tTFz~FYOInY%68&NxPj$mggFE8>AX@GHh9ERA2ceqPS`66<f
z*a(O2&CiI~)vPbu)>%R?{tBJ@;~eiKzI=I+a}qO*CkUtHB9D{0Qu4jKTbK$xbg_wf
z8`mkTUCZkSOCMX0J-T@1Mu5CbQ~KVnqhGRgcM3Pz!f38^PU;hKAAuXg-!RWOj&H4p
zkQ1Yb-MhH_Ekuf{>iU*NUXEDTURge5wl}O`<f5q){Wq4AQt>0_#2In^r$G;j7j~0w
zEJ#Ek;PnIz5wKBKP5xqaq?Xdbt%t9cbIj)+4w;g=rAy9XopVQ)!m{Es11|9>Z$rNw
z3iEG4?b1-7?j9wIgAU-wJJ0vg9mqy)i$oiPCZ^DUXnje~kqF(swR~OC>R18G>BLdJ
zU~RFLjGY#lyKBzJEtazULz&68ZluMahKM(ckXG}Sew5^z6N)r)%hnke?(yWhXfZl&
z>#AX>r^4P5;jdJ*=X~H(`^Wy`@X=+;Y7rv`3!6ymwWNy<`ion82kM5m5&2#a>I@;=
zv!?P+8`W8BUY5QtJl1+%cN5Dya+BQO=I2{}Kt2<Mj9>&IEqJOiQv}L#le9|x`n^J=
z(Y_^V|D|lMbFCM-JU)wb9EFrtUXnK6ds1Jk8O4FSi5^`g@CPCHk#~Zo1&s(y7!0E3
z@QA3VzslreW+CxBg`Nnh6dM;^+6y%pf{}9S_PgY-ZHJqk%EMgCPsHf4JQOlm#M7*i
zK+{T$q(yNfDK#-*L^fL}*))SJJl~S<>zO(F)k$^o`)P9vX`ZGNO36V=T!)(@FYdii
zm;U0pm^}hTGR8$g_af!r`hUi}8+weC<8P7+#<S7Y$#O&mTZ!-)8#AM_0qL23jDBAl
zhoO}7VkuSVcH*t)+S0`rMKr6gE${Yk!BxOPCs6`C8owP>{e^3sm=l6Ux@w3zjUO3W
zR#-fGm#&GeuRA=D+Wat1Thx?4&R|y<@p5Iwv(%m|uMR;DfYKF|3;HRFT~Tfr4pd6`
z1J~Vlf&NgO1bi@bpPDYw&y)y!5=~dtR=+dfEZ<dxj~mzdMsNmSzq_J2-OVNgNPH*Q
zzn@mU5^&|~>7tGz6*iyVKA8eSX+?DJ*qJsSwo7X)H+K8|sR=vpO41?#8g_`nO7Ly?
z5b(^0C%$`DE|=}|B^#N>SWJ?-LI;z2typ-6l#&dG^`>5bRjWCh=s*i%Siw1&y-<=C
zE)>p(boV35u`<8fo-OXB+mXGPJj;iil7mwT8|D)Tj)kkHUXYB=`-{BqF1YS7`q-0s
z6c;IVjg1dZGQy466Cm$%sKYb^q&O+6r7g0aeH`esI}+gR>izpiHsxoFqByUm5w-aq
zb<SIwE4hnmCsuAhbnz{1T$xb~L*SlNKdd5g5;0Hx56|O-ZV#yOJ5P*axh?S)R38Vj
z%;I$X>8~vcD_?h%#9i&rOuF?VP5GGQ5$E!pqxJxjA%Gr}%5-HY2VlY-po&~X6E@z<
z=n_CCl!t0{y{bOR5?6)MO@{3qAMJG8^=_prEa!i<cctNMrfaxL1+Au*QIVmWeW|sy
zhr^U9Rkc)GyHQjz1XYoA8qr!)>tGs#A*LyYR-=emlGxHli*f84DiVjO!N}K^ewE~#
z`Od%7^XpvK`FsAo@89=%-{-mC>wcd5KJtt@T3)*Tv4_x#Be;vThznub+ei)KBwqvm
zi{&~V2A^SWFX2F(APvo7ROaM87F0X$eqzt!calajebRg(wN`|!s9jk+>*p);!GEI)
zC=<GIhLLEz>~e_KVTpdPGU3}rG+?%sV^+_Yq$AS)#=YFp&B(bORl4tvr|WErr)iz8
zs$ogrfwUV;j~x^6EYQ&B$5(+zmBwv@G$_mFc{NUns=pJDh-Mn~w#oNlFIY-lP@vw_
z_XD%09lUreck>6DR-;F+^3*Aq&$V`#PLPtCI2w=J(USgT?r5@TmOV?NW4z`EA(?SH
zigz08nF<u7whRrwH#K${wWb-#saeSVf><ya_;XKu;_G!GgR_Mqn8Nz4+61%P%gC%n
zBi6Ld=>8WBhDLWnqw;D6wqZ;yWPZ?rTxPtWc%mY_3XM{VaDF8tfGpwiS3wA$rUJPf
z%IkSQFml;E*GU%ATw|MpZNmJRr;p&|TK&)9E*i!r%y_NzW%K5f=pU*wlP|WqC0xD0
zq&708J&sPE0F|?7J;5|u`T&|NZ58a=4M@KZd7grmx?P&mpWOJhQEu3fkvF65a)r*Y
zu<?4QdwF=3OOH%E1S6Y_AZUTTM>O^#9RZ>*gORsj<Qf-C>_ubMXA(E5BJB3&>cf+X
zHa`aOyf?pq`zSrGx+^!v<s1GMuefq`yH^w9Q0Qn$*bJnCH^f^I)B$*g94Yad-WIdI
zUBQkTd)lVi?xg9M9E*?W^r(^=EAD&Q>GGw&okC<p33<=7wAv|w!b1ot9CLa6M=nuS
zlwCeR%V#?RhqE9wFTf?`)Vxv35dF*&i(Ro2qhIt+S!egG`c2ns{DgY<{Eo4wkC&b9
z!3ZrH*19V$2tyJz!G5L|1;JJ(oC6FBm?+xX?kJ~wrX7n;^W$+ZcD2Pt<-Lv<&Lj-}
z+V=7{Y0X@g^MuS!)aRMPKV<4;O0~xOD{T|2>`LB@dqgXPTaonY5oy*TA;Q9XVY-O}
zK3MuhJO*QQiL5;9R-I$l!_c+0rob;fZ`{bHs<|dRro8D)KC}0()9HWNZpZe<q?0UT
z+1es)ZvPgk<|r^zQtS5c+|oM6b#Y$6Ct*Xq_S=jTzP{d7v0ABloz-<MW8#L^mABXO
zN8EJYG{O&$j(x#;L>z+9<Lo1p!ZJx9OO)Cs46Zi*#I%czimMF<?`>nB1^=8pzd?%+
z4-TUf7;5ZjBc0%(W~U`GpTTZ3AEcr16kiZkyAF4=WZG;R2s{}X<vt$iUwFqg#o85M
zbIH>|1BgtIQn|c^5X~^(8lOK9WT+%otV85gCcsLkG>m{R;{nu!72w#GEHD)^?FvP;
z#}yw8#@`X6uxGDdS8HY!R`Yqp;g*FH+!ojVzI{@Ioh|88NBg)%RqzDt1dJ9y6kxA8
z5`26KEXE3_S=|$uKWwyben6R;k`=>$K#w}3bBctn#}$q(j$a&6W!FlpA*;9P?a_Tl
zWTAh2gK8nqdI4iuHvB^Yybuekv&t(^aEj;>$pu}dw0>HtgOOMRdXwIUrDYTC2}<5M
zkReF@B?hu=2TQ2<%rNF;dv*^X&))rj^TVm|JatO~zVUQ8Zg5{qxaf>Kcl<7~N&0|@
z7!+F*N74_%ko@$ind!rQM1={aBi0Qj)zS9nn@3iXQ_C&OxtO`ygS-FE`NL}@i2^Bk
zi#<mLPI=v-{EbgPgX!Gw{JDIj<P;buXpX)VyPPYI^cbhrvZhx$N*+`i(v6<#<;~sI
zP}w^)a$vLWe~PyUtn6<9^oCBcttGA^1c`8xtv?Gn;Dd!>gPK^Ap6m{*m|dOBa6R(F
zVa1qz{gdY5{6kg4MXP6$M(U4^1QEHEj{2%6Tyfky9@pn9i7sE(-YQ5WYWL=!^moN*
zEf10xb)XlaK7lB?^~A_%+zzF6!%hL%P-#h$Nq3j_z;>C%MZAw$Yz1f(k_XcpBwytZ
zLdH|X6L0YJ#Dw;$JB5glG<lExdKm#x^6!0e1k}zFQicMq{j#;(6HFqcR1Q#Kf)HEG
zQ``AYgv=I#QKI=JN-kSZ;+p^5GGw^v_tfsXpLk_!yj%Uo=P-ZopvYdF7%5ZKBO7ML
z*j>;Tc+W9*)}>X=1QXa1(U)KcxINc)aRUk)IE84AmUOlHkr|(K>f<c6yREZmB33tV
z;9aah#o_#{#yhocBX!w(!eUV0eDjiswzOI&Hi1TKj*E!R0_1(Z48dMBC^WcF@NQZN
z<i9m*2pFqhSocc>f?t)>ZiXklGFv?F(6yA!XtmGYBb9e^Kje<i@k99`jur<6MbVkI
iWPlf3KPUW(#J+n##{2V%^dA>#@BjDz_4y$GeEk;^2c<^<

literal 0
HcmV?d00001


From c4beb354936780f05a25c8c33a9956aec1ee5eb3 Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Thu, 15 Jun 2017 19:14:36 +0800
Subject: [PATCH 30/43] revise README and some small problems

---
 scheduled_sampling/README.md             | 93 ++++++++++++++++++------
 scheduled_sampling/scheduled_sampling.py | 20 ++---
 2 files changed, 81 insertions(+), 32 deletions(-)

diff --git a/scheduled_sampling/README.md b/scheduled_sampling/README.md
index 18371b6a12..d85c90bb96 100644
--- a/scheduled_sampling/README.md
+++ b/scheduled_sampling/README.md
@@ -1,24 +1,46 @@
 # Scheduled Sampling
 
 ## 概述
-序列生成任务的训练目标是在给定源输入的条件下，最大化目标序列的概率。训练时该模型将目标序列中的真实元素作为解码阶段每一步的输入，然后最大化下一个元素的概率。生成时上一步解码得到的元素被用作当前的输入，然后生成下一个元素。可见这种情况下训练阶段和生成阶段的解码层输入数据的概率分布并不一致。如果序列前面生成了错误的元素，后面的输入状态将会收到影响，而该误差会随着生成过程不断向后累积。
-Scheduled Sampling是一种解决训练和生成时输入数据分布不一致的方法。在训练早期该方法主要使用真实元素作为解码输入，以将模型从随机初始化的状态快速引导至一个合理的状态。随着训练的进行该方法会逐渐更多的使用生成元素作为解码输入，以解决数据分布不一致的问题。
+
+序列生成任务的生成目标是在给定源输入的条件下，最大化目标序列的概率。训练时该模型将目标序列中的真实元素作为解码器每一步的输入，然后最大化下一个元素的概率。生成时上一步解码得到的元素被用作当前的输入，然后生成下一个元素。可见这种情况下训练阶段和生成阶段的解码器输入数据的概率分布并不一致。
+
+Scheduled Sampling\[[1](#参考文献)\]是一种解决训练和生成时输入数据分布不一致的方法。在训练早期该方法主要使用目标序列中的真实元素作为解码器输入，可以将模型从随机初始化的状态快速引导至一个合理的状态。随着训练的进行，该方法会逐渐更多地使用生成的元素作为解码器输入，以解决数据分布不一致的问题。
+
+标准的序列到序列模型中，如果序列前面生成了错误的元素，后面的输入状态将会收到影响，而该误差会随着生成过程不断向后累积。Scheduled Sampling以一定概率将生成的元素作为解码器输入，这样即使前面生成错误，其训练目标仍然是最大化真实目标序列的概率，模型会朝着正确的方向进行训练。因此这种方式增加了模型的容错能力。
 
 ## 算法简介
-Scheduled Sampling主要应用在Sequence to Sequence模型的训练上，而生成阶段则不需要使用。
-解码阶段在生成第`t`个元素时，标准Sequence to Sequence模型使用上一时刻的真实元素`y(t-1)`作为输入。设上一时刻生成的元素为`g(t-1)`，Scheduled Sampling算法会以一定概率使用`g(t-1)`作为解码输入。
-设当前已经训练到了第`i`个mini-batch，在`t`时刻Scheduled Sampling以概率`epsilon_i`使用上一时刻的真实元素`y(t-1)`作为解码输入，以概率`1-epsilon_i`使用上一时刻生成的元素`g(t-1)`作为解码输入。
-随着`i`的增大`epsilon_i`会不断减小，解码阶段将不断倾向于使用生成的元素作为输入，训练阶段和生成阶段的数据分布将变得越来越一致。
-`epsilon_i`可以使用不同的方式衰减，常见的方式有：
+Scheduled Sampling主要应用在序列到序列模型的训练阶段，而生成阶段则不需要使用。
+
+训练阶段解码器在最大化第$t$个元素概率时，标准序列到序列模型使用上一时刻的真实元素$y_{t-1}$作为输入。设上一时刻生成的元素为$g_{t-1}$，Scheduled Sampling算法会以一定概率使用$g_{t-1}$作为解码器输入。
+
+设当前已经训练到了第$i$个mini-batch，Scheduled Sampling定义了一个概率$\epsilon_i$控制解码器的输入。$\epsilon_i$是一个随着$i$增大而衰减的变量，常见的定义方式有：
+
+ - 线性衰减：$\epsilon_i=max(\epsilon,k-c*i)$，其中$\epsilon$限制$\epsilon_i$的最小值，$k$和$c$控制线性衰减的幅度。
+
+ - 指数衰减：$\epsilon_i=k^i$，其中$0<k<1$，$k$控制着指数衰减的幅度。
+
+ - 反向Sigmoid衰减：$\epsilon_i=k/(k+exp(i/k))$，其中$k>1$，$k$同样控制衰减的幅度。
 
- - 线性衰减：`epsilon_i=max(epsilon,k-c*i)`，其中`epsilon`限制`epsilon_i`的最小值，`k`和`c`控制线性衰减的幅度。
- - 指数衰减：`epsilon_i=k^i`，其中`0<k<1`，`k`控制着指数衰减的幅度。
- - 反向Sigmoid衰减：`epsilon_i=k/(k+exp(i/k))`，其中`k>1`，`k`同样控制衰减的幅度。
+图1给出了这三种方式的衰减曲线，
+
+<p align="center">
+<img src="img/decay.jpg" align="center"><br>
+图1. 线性衰减、指数衰减和反向Sigmoid衰减的衰减曲线
+</p>
+
+如图2所示，在解码器的$t$时刻Scheduled Sampling以概率$\epsilon_i$使用上一时刻的真实元素$y_{t-1}$作为解码器输入，以概率$1-\epsilon_i$使用上一时刻生成的元素$g_{t-1}$作为解码器输入。从图1可知随着$i$的增大$\epsilon_i$会不断减小，解码器将不断倾向于使用生成的元素作为输入，训练阶段和生成阶段的数据分布将变得越来越一致。
+
+<p align="center">
+<img src="img/Scheduled_Sampling.jpg" align="center"><br>
+图2. Scheduled Sampling选择不同元素作为解码器输入示意图
+</p>
 
 ## 模型实现
-由于Scheduled Sampling是对Sequence to Sequence模型的改进，其整体实现框架与Sequence to Sequence模型较为相似。为突出本文重点，这里仅介绍与Scheduled Sampling相关的部分，完整的代码见`scheduled_sampling.py`。
 
-首先定义控制衰减概率的类`RandomScheduleGenerator`，如下：
+由于Scheduled Sampling是对序列到序列模型的改进，其整体实现框架与序列到序列模型较为相似。为突出本文重点，这里仅介绍与Scheduled Sampling相关的部分，完整的代码见`scheduled_sampling.py`。
+
+首先导入需要的包，并定义控制衰减概率的类`RandomScheduleGenerator`，如下：
+
 ```python
 import numpy as np
 import math
@@ -28,8 +50,16 @@ class RandomScheduleGenerator:
     """
     The random sampling rate for scheduled sampling algoithm, which uses devcayed
     sampling rate.
+
     """
+    ...
+```
 
+下面将分别定义类`RandomScheduleGenerator`的`__init__`、`getScheduleRate`和`processBatch`三个方法。
+
+`__init__`方法对类进行初始化，其`schedule_type`参数指定了使用哪种衰减方式，可选的方式有`constant`、`linear`、`exponential`和`inverse_sigmoid`。`constant`指对所有的mini-batch使用固定的$\epsilon_i$，`linear`指线性衰减方式，`exponential`表示指数衰减方式，`inverse_sigmoid`表示反向Sigmoid衰减。`__init__`方法的参数`a`和`b`表示衰减方法的参数，需要在验证集上调优。`self.schedule_computers`将衰减方式映射为计算$\epsilon_i$的函数。最后一行根据`schedule_type`将选择的衰减函数赋给`self.schedule_computer`变量。
+
+```python
     def __init__(self, schedule_type, a, b):
         """
         schduled_type: is the type of the decay. It supports constant, linear,
@@ -49,13 +79,22 @@ class RandomScheduleGenerator:
         }
         assert (self.schedule_type in self.schedule_computers)
         self.schedule_computer = self.schedule_computers[self.schedule_type]
+```
 
+`getScheduleRate`根据衰减函数和已经处理的数据量计算$\epsilon_i$。
+
+```python
     def getScheduleRate(self):
         """
         Get the schedule sampling rate. Usually not needed to be called by the users
         """
         return self.schedule_computer(self.a, self.b, self.data_processed_)
 
+```
+
+`processBatch`方法根据概率值$\epsilon_i$进行采样，得到`indexes`，`indexes`中每个元素取值为`0`的概率为$\epsilon_i$，取值为`1`的概率为$1-\epsilon_i$。`indexes`决定了解码器的输入是真实元素还是生成的元素，取值为`0`表示使用真实元素，取值为`1`表示使用生成的元素。
+
+```python
     def processBatch(self, batch_size):
         """
         Get a batch_size of sampled indexes. These indexes can be passed to a
@@ -68,10 +107,16 @@ class RandomScheduleGenerator:
         self.data_processed_ += batch_size
         return indexes
 ```
-其中`__init__`方法定义了几种不同的衰减概率，`processBatch`方法根据该概率进行采样，最终确定解码时是使用真实元素还是使用生成的元素。
 
+Scheduled Sampling需要在序列到序列模型的基础上增加一个输入`true_token_flag`，以控制解码器输入。
 
-这里对数据reader进行封装，加入从`RandomScheduleGenerator`采样得到的`true_token_flag`作为另一组数据输入，控制解码使用的元素。
+```python
+true_token_flags = paddle.layer.data(
+    name='true_token_flag',
+    type=paddle.data_type.integer_value_sequence(2))
+```
+
+这里还需要对原始reader进行封装，增加`true_token_flag`的数据生成器。下面以线性衰减为例说明如何调用上面定义的`RandomScheduleGenerator`产生`true_token_flag`的输入数据。
 
 ```python
 schedule_generator = RandomScheduleGenerator("linear", 0.75, 1000000)
@@ -98,7 +143,9 @@ def gen_schedule_data(reader):
     return data_reader
 ```
 
-训练时`recurrent_group`每一步调用的解码函数如下：
+这段代码在原始输入数据（即源序列元素`src_ids`、目标序列元素`trg_ids`和目标序列下一个元素`trg_ids_next`）后追加了控制解码器输入的数据。由于解码器第一个元素是序列开始符，因此将追加的数据第一个元素设置为`0`，表示解码器第一步始终使用真实目标序列的第一个元素（即序列开始符）。
+
+训练时`recurrent_group`每一步调用的解码器函数如下：
 
 ```python
     def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
@@ -106,15 +153,15 @@ def gen_schedule_data(reader):
         """
         The decoder step for training.
         :param enc_vec: the encoder vector for attention
-        :type enc_vec: Layer
+        :type enc_vec: LayerOutput
         :param enc_proj: the encoder projection for attention
-        :type enc_proj: Layer
+        :type enc_proj: LayerOutput
         :param true_word: the ground-truth target word
-        :type true_word: Layer
+        :type true_word: LayerOutput
         :param true_token_flag: the flag of using the ground-truth target word
-        :type true_token_flag: Layer
+        :type true_token_flag: LayerOutput
         :return: the softmax output layer
-        :rtype: Layer
+        :rtype: LayerOutput
         """
 
         decoder_mem = paddle.layer.memory(
@@ -159,6 +206,8 @@ def gen_schedule_data(reader):
         return out
 ```
 
-该函数使用`memory`层`gru_out_memory`记忆不同时刻生成的元素，并使用`multiplex`层选择是否使用生成的元素作为解码输入。
+该函数使用`memory`层`gru_out_memory`记忆上一时刻生成的元素，根据`gru_out_memory`选择概率最大的词语`generated_word`作为生成的词语。`multiplex`层会在真实元素`true_word`和生成的元素`generated_word`之间做出选择，并将选择的结果作为解码器输入。`multiplex`层使用了三个输入，分别为`true_token_flag`、`true_word`和`generated_word_emb`。对于这三个输入中每个元素，若`true_token_flag`中的值为`0`，则`multiplex`层输出`true_word`中的相应元素；若`true_token_flag`中的值为`1`，则`multiplex`层输出`generated_word_emb`中的相应元素。
+
+## 参考文献
 
-### 训练结果待调参完成后补充
+[1] Bengio S, Vinyals O, Jaitly N, et al. [Scheduled sampling for sequence prediction with recurrent neural networks](http://papers.nips.cc/paper/5956-scheduled-sampling-for-sequence-prediction-with-recurrent-neural-networks)//Advances in Neural Information Processing Systems. 2015: 1171-1179.
diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index a516bd2d4b..a896fd6c59 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -37,7 +37,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
     :param is_generating: whether in generating mode
     :type is_generating: Bool
     :return: the last layer of the network
-    :rtype: Layer
+    :rtype: LayerOutput
     """
     ### Network Architecture
     word_vector_dim = 512  # dimension of word vector
@@ -76,15 +76,15 @@ def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
         """
         The decoder step for training.
         :param enc_vec: the encoder vector for attention
-        :type enc_vec: Layer
+        :type enc_vec: LayerOutput
         :param enc_proj: the encoder projection for attention
-        :type enc_proj: Layer
+        :type enc_proj: LayerOutput
         :param true_word: the ground-truth target word
-        :type true_word: Layer
+        :type true_word: LayerOutput
         :param true_token_flag: the flag of using the ground-truth target word
-        :type true_token_flag: Layer
+        :type true_token_flag: LayerOutput
         :return: the softmax output layer
-        :rtype: Layer
+        :rtype: LayerOutput
         """
 
         decoder_mem = paddle.layer.memory(
@@ -132,13 +132,13 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
         """
         The decoder step for generating.
         :param enc_vec: the encoder vector for attention
-        :type enc_vec: Layer
+        :type enc_vec: LayerOutput
         :param enc_proj: the encoder projection for attention
-        :type enc_proj: Layer
+        :type enc_proj: LayerOutput
         :param current_word: the previously generated word
-        :type current_word: Layer
+        :type current_word: LayerOutput
         :return: the softmax output layer
-        :rtype: Layer
+        :rtype: LayerOutput
         """
 
         decoder_mem = paddle.layer.memory(

From 8a30af944e663f1095f7c36ef8d164fca707ffae Mon Sep 17 00:00:00 2001
From: wwhu <wwhu@foxmail.com>
Date: Thu, 15 Jun 2017 19:23:12 +0800
Subject: [PATCH 31/43] resize figure

---
 scheduled_sampling/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scheduled_sampling/README.md b/scheduled_sampling/README.md
index d85c90bb96..9af4387e12 100644
--- a/scheduled_sampling/README.md
+++ b/scheduled_sampling/README.md
@@ -24,14 +24,14 @@ Scheduled Sampling主要应用在序列到序列模型的训练阶段，而生
 图1给出了这三种方式的衰减曲线，
 
 <p align="center">
-<img src="img/decay.jpg" align="center"><br>
+<img src="img/decay.jpg" width="50%" align="center"><br>
 图1. 线性衰减、指数衰减和反向Sigmoid衰减的衰减曲线
 </p>
 
 如图2所示，在解码器的$t$时刻Scheduled Sampling以概率$\epsilon_i$使用上一时刻的真实元素$y_{t-1}$作为解码器输入，以概率$1-\epsilon_i$使用上一时刻生成的元素$g_{t-1}$作为解码器输入。从图1可知随着$i$的增大$\epsilon_i$会不断减小，解码器将不断倾向于使用生成的元素作为输入，训练阶段和生成阶段的数据分布将变得越来越一致。
 
 <p align="center">
-<img src="img/Scheduled_Sampling.jpg" align="center"><br>
+<img src="img/Scheduled_Sampling.jpg" width="50%" align="center"><br>
 图2. Scheduled Sampling选择不同元素作为解码器输入示意图
 </p>
 

From 42ba74efff16f79c76f80dcf9587be533e321530 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Fri, 16 Jun 2017 15:17:43 +0800
Subject: [PATCH 32/43] add audio part

---
 deep_speech_2/data_utils/audio.py             | 457 ++++++++----------
 .../data_utils/augmentor/audio_database.py    | 401 ---------------
 .../data_utils/augmentor/augmentation.py      |  15 -
 .../data_utils/augmentor/implus_response.py   |  76 ---
 .../data_utils/augmentor/noise_speech.py      | 318 ------------
 .../online_bayesian_normalization.py          |  57 ---
 .../data_utils/augmentor/resampler.py         |  30 --
 .../data_utils/augmentor/speed_perturb.py     |  53 --
 .../data_utils/augmentor/volume_perturb.py    |   4 +-
 deep_speech_2/requirements.txt                |   2 +
 10 files changed, 215 insertions(+), 1198 deletions(-)
 delete mode 100755 deep_speech_2/data_utils/augmentor/audio_database.py
 delete mode 100755 deep_speech_2/data_utils/augmentor/implus_response.py
 delete mode 100755 deep_speech_2/data_utils/augmentor/noise_speech.py
 delete mode 100755 deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
 delete mode 100755 deep_speech_2/data_utils/augmentor/resampler.py
 delete mode 100755 deep_speech_2/data_utils/augmentor/speed_perturb.py

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index aef13c30ff..ee4e6d84e1 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -8,6 +8,7 @@
 import soundfile
 import scikits.samplerate
 from scipy import signal
+import random
 
 
 class AudioSegment(object):
@@ -46,6 +47,32 @@ def __ne__(self, other):
         """Return whether two objects are unequal."""
         return not self.__eq__(other)
 
+    def __len__(self):
+        """Returns length of segment in samples."""
+        return self.num_samples
+
+    def __add__(self, other):
+        """Add samples from another segment to those of this segment and return
+        a new segment (sample-wise addition, not segment concatenation).
+
+        :param other: Segment containing samples to be
+                      added in.
+        :type other: AudioSegment
+        :return: New segment containing resulting samples.
+        :rtype: AudioSegment
+        :raise TypeError: If sample rates of segments don't match,
+                          or if length of segments don't match.
+        """
+        if type(self) != type(other):
+            raise TypeError("Cannot add segment of different type: {}"
+                            .format(type(other)))
+        if self._sample_rate != other._sample_rate:
+            raise TypeError("Sample rates must match to add segments.")
+        if len(self._samples) != len(other._samples):
+            raise TypeError("Segment lengths must match to add segments.")
+        samples = self.samples + other.samples
+        return type(self)(samples, sample_rate=self._sample_rate)
+
     def __str__(self):
         """Return human-readable representation of segment."""
         return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, "
@@ -64,69 +91,6 @@ def from_file(cls, file):
         samples, sample_rate = soundfile.read(file, dtype='float32')
         return cls(samples, sample_rate)
 
-    @classmethod
-    def slice_from_file(cls, fname, start=None, end=None):
-        """ 
-        Loads a small section of an audio without having to load
-        the entire file into the memory which can be incredibly wasteful.
-
-        :param fname: input audio file name
-        :type fname: bsaestring
-        :param start: start time in seconds (supported granularity is ms)
-            If start is negative, it wraps around from the end. If not
-            provided, this function reads from the very beginning.
-        :type start: float
-        :param end: start time in seconds (supported granularity is ms)
-            If end is negative, it wraps around from the end. If not
-            provided, the default behvaior is to read to the end of the
-            file.
-        :type end: float
-
-        :return:the specified slice of input audio in the audio.AudioSegment
-            format.
-        """
-        sndfile = soundfile.SoundFile(fname)
-
-        sample_rate = sndfile.samplerate
-        if sndfile.channels != 1:
-            raise TypeError("{} has more than 1 channel.".format(fname))
-
-        duration = float(len(sndfile)) / sample_rate
-
-        if start is None:
-            start = 0.0
-        if end is None:
-            end = duration
-
-        if start < 0.0:
-            start += duration
-        if end < 0.0:
-            end += duration
-
-        if start < 0.0:
-            raise IndexError("The slice start position ({} s) is out of "
-                             "bounds. Filename: {}".format(start, fname))
-        if end < 0.0:
-            raise IndexError("The slice end position ({} s) is out of bounds "
-                             "Filename: {}".format(end, fname))
-
-        if start > end:
-            raise IndexError("The slice start position ({} s) is later than "
-                             "the slice end position ({} s)."
-                             .format(start, end))
-
-        if end > duration:
-            raise ValueError("The slice end time ({} s) is out of "
-                             "bounds (> {} s) Filename: {}"
-                             .format(end, duration, fname))
-
-        start_frame = int(start * sample_rate)
-        end_frame = int(end * sample_rate)
-        sndfile.seek(start_frame)
-        data = sndfile.read(frames=end_frame - start_frame, dtype='float32')
-
-        return cls(data, sample_rate)
-
     @classmethod
     def from_bytes(cls, bytes):
         """Create audio segment from a byte string containing audio samples.
@@ -140,43 +104,30 @@ def from_bytes(cls, bytes):
             io.BytesIO(bytes), dtype='float32')
         return cls(samples, sample_rate)
 
-    @classmethod
-    def make_silence(cls, duration, sample_rate):
-        """Creates a silent audio segment of the given duration and
-        sample rate.
-
-        :param duration: length of silence in seconds
-        :type duration: scalar
-        :param sample_rate: sample rate
-        :type sample_rate: scalar
-        :returns: silence of the given duration
-        :rtype: AudioSegment
-        """
-        samples = np.zeros(int(float(duration) * sample_rate))
-        return cls(samples, sample_rate)
-
-    @classmethod
-    def concatenate(cls, *segments):
+    def concatenate(self, *segments):
         """Concatenate an arbitrary number of audio segments together.
 
-        :param *segments: input audio segments
-        :type *segments: [AudioSegment]
+        :param *segments: Input audio segments
+        :type *segments: AudioSegment
+        :return: Audio segment instance.
+        :rtype: AudioSegment
+        :raises ValueError: If number of segments is zero, or if sample_rate
+                            not match between two audio segments
+        :raises TypeError: If item of segments is not Audiosegment instance
         """
         # Perform basic sanity-checks.
-        N = len(segments)
-        if N == 0:
+        if len(segments) == 0:
             raise ValueError("No audio segments are given to concatenate.")
         sample_rate = segments[0]._sample_rate
-        for segment in segments:
-            if sample_rate != segment._sample_rate:
+        for seg in segments:
+            if sample_rate != seg._sample_rate:
                 raise ValueError("Can't concatenate segments with "
                                  "different sample rates")
-            if type(segment) is not cls:
+            if type(seg) is not type(self):
                 raise TypeError("Only audio segments of the same type "
                                 "instance can be concatenated.")
-
         samples = np.concatenate([seg.samples for seg in segments])
-        return cls(samples, sample_rate)
+        return type(self)(samples, sample_rate)
 
     def to_wav_file(self, filepath, dtype='float32'):
         """Save audio segment to disk as wav file.
@@ -203,6 +154,65 @@ def to_wav_file(self, filepath, dtype='float32'):
             format='WAV',
             subtype=subtype_map[dtype])
 
+    def slice_from_file(self, file, start=None, end=None):
+        """Loads a small section of an audio without having to load
+        the entire file into the memory which can be incredibly wasteful.
+
+        :param file: Input audio filepath
+        :type file: basestring
+        :param start: Start time in seconds. If start is negative, it wraps
+                      around from the end. If not provided, this function 
+                      reads from the very beginning.
+        :type start: float
+        :param end: End time in seconds. If end is negative, it wraps around
+                    from the end. If not provided, the default behvaior is
+                    to read to the end of the file.
+        :type end: float
+        :return: The specified slice of input audio in the audio.AudioSegment format.
+        :rtype: AudioSegment
+        :rainse ValueError: If the position is error, or if the time is out bounds.
+        """
+        sndfile = soundfile.SoundFile(file)
+        sample_rate = sndfile.samplerate
+        duration = float(len(sndfile)) / sample_rate
+        start = 0. if start is None else start
+        end = 0. if end is None else end
+        if start < 0.0:
+            start += duration
+        if end < 0.0:
+            end += duration
+        if start < 0.0:
+            raise ValueError("The slice start position (%f s) is out of "
+                             "bounds. Filename: %s" % (start, file))
+        if end < 0.0:
+            raise ValueError("The slice end position (%f s) is out of bounds "
+                             "Filename: %s" % (end, file))
+        if start > end:
+            raise ValueError("The slice start position (%f s) is later than "
+                             "the slice end position (%f s)." % (start, end))
+        if end > duration:
+            raise ValueError("The slice end time (%f s) is out of bounds "
+                             "(> %f s) Filename: %s" % (end, duration, file))
+        start_frame = int(start * sample_rate)
+        end_frame = int(end * sample_rate)
+        sndfile.seek(start_frame)
+        data = sndfile.read(frames=end_frame - start_frame, dtype='float32')
+        return type(self)(data, sample_rate)
+
+    def make_silence(self, duration, sample_rate):
+        """Creates a silent audio segment of the given duration and
+        sample rate.
+
+        :param duration: Length of silence in seconds
+        :type duration: float
+        :param sample_rate: Sample rate
+        :type sample_rate: float
+        :return: Silence of the given duration
+        :rtype: AudioSegment
+        """
+        samples = np.zeros(int(duration * sample_rate))
+        return type(self)(samples, sample_rate)
+
     def to_bytes(self, dtype='float32'):
         """Create a byte string containing the audio content.
         
@@ -247,52 +257,49 @@ def change_speed(self, speed_rate):
         self._samples = np.interp(new_indices, old_indices, self._samples)
 
     def normalize(self, target_db=-20, max_gain_db=300.0):
-        """Normalize audio to desired RMS value in decibels.
+        """Normalize audio to be desired RMS value in decibels.
 
         Note that this is an in-place transformation.
 
-        :param target_db: Target RMS value in decibels.This value 
-            should be less than 0.0 as 0.0 is full-scale audio.
-        :type target_db: float, optional
-        :param max_gain_db: Max amount of gain in dB that can be applied
-            for normalization.  This is to prevent nans when attempting
-            to normalize a signal consisting of all zeros.
-        :type max_gain_db: float, optional
-
-        :raises NormalizationWarning: if the required gain to normalize the
-            segment to the target_db value exceeds max_gain_db.
+        :param target_db: Target RMS value in decibels. This value should
+                          be less than 0.0 as 0.0 is full-scale audio.
+        :type target_db: float
+        :param max_gain_db: Max amount of gain in dB that can be applied for
+                            normalization. This is to prevent nans when attempting
+                            to normalize a signal consisting of all zeros.
+        :type max_gain_db: float
+        :raises ValueError: If the required gain to normalize the segment to
+                            the target_db value exceeds max_gain_db.
         """
         gain = target_db - self.rms_db
         if gain > max_gain_db:
             raise ValueError(
-                "Unable to normalize segment to {} dB because it has an RMS "
-                "value of {} dB and the difference exceeds max_gain_db ({} dB)"
-                .format(target_db, self.rms_db, max_gain_db))
-        gain = min(max_gain_db, target_db - self.rms_db)
-        self.apply_gain(gain)
+                "Unable to normalize segment to %f dB because it has an RMS "
+                "value of %f dB and the difference exceeds max_gain_db (%f dB)"
+                % (target_db, self.rms_db, max_gain_db))
+        self.apply_gain(min(max_gain_db, target_db - self.rms_db))
 
     def normalize_online_bayesian(self,
                                   target_db,
                                   prior_db,
                                   prior_samples,
                                   startup_delay=0.0):
-        """
-        Normalize audio using a production-compatible online/causal algorithm.
-        This uses an exponential likelihood and gamma prior to make
-        online estimates of the RMS even when there are very few samples.
+        """Normalize audio using a production-compatible online/causal algorithm.
+        This uses an exponential likelihood and gamma prior to make online estimates
+        of the RMS even when there are very few samples.
 
         Note that this is an in-place transformation.
 
         :param target_db: Target RMS value in decibels
-        :type target_bd: scalar
+        :type target_bd: float
         :param prior_db: Prior RMS estimate in decibels
-        :type prior_db: scalar
+        :type prior_db: float
         :param prior_samples: Prior strength in number of samples
-        :type prior_samples: scalar
-        :param startup_delay: Default: 0.0 s. If provided, this
-            function will accrue statistics for the first startup_delay
-            seconds before applying online normalization.
-        :type startup_delay: scalar
+        :type prior_samples: float
+        :param startup_delay: Default 0.0 s. If provided, this function will accrue
+                              statistics for the first startup_delay seconds before
+                              applying online normalization.
+        :type startup_delay: float
         """
         # Estimate total RMS online
         startup_sample_idx = min(self.num_samples - 1,
@@ -309,88 +316,54 @@ def normalize_online_bayesian(self,
         mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) /
                                  (sample_count + prior_samples))
         rms_estimate_db = 10 * np.log10(mean_squared_estimate)
-
         # Compute required time-varying gain
         gain_db = target_db - rms_estimate_db
-
-        # Apply gain to new segment
-        self.apply_gain(gain_db)
-
-    def normalize_ewma(self,
-                       target_db,
-                       decay_rate,
-                       startup_delay,
-                       rms_eps=1e-6,
-                       max_gain_db=300.0):
-        startup_sample_idx = min(self.num_samples - 1,
-                                 int(self.sample_rate * startup_delay))
-        mean_sq = self.samples**2
-        if startup_sample_idx > 0:
-            mean_sq[:startup_sample_idx] = \
-                np.sum(mean_sq[:startup_sample_idx]) / startup_sample_idx
-        idx_start = max(0, startup_sample_idx - 1)
-        initial_condition = mean_sq[idx_start] * decay_rate
-        mean_sq[idx_start:] = lfilter(
-            [1.0 - decay_rate], [1.0, -decay_rate],
-            mean_sq[idx_start:],
-            axis=0,
-            zi=[initial_condition])[0]
-        rms_estimate_db = 10.0 * np.log10(mean_sq + rms_eps)
-        gain_db = target_db - rms_estimate_db
-        if np.any(gain_db > max_gain_db):
-            warnings.warn(
-                "Unable to normalize segment to {} dB because it has an RMS "
-                "value of {} dB and the difference exceeds max_gain_db ({} dB)"
-                .format(target_db, self.rms_db, max_gain_db),
-                NormalizationWarning)
-            gain_db = np.minimum(gain_db, max_gain_db)
         self.apply_gain(gain_db)
 
     def resample(self, target_sample_rate, quality='sinc_medium'):
-        """Resample audio and return new AudioSegment.
-        This resamples the audio to a new sample rate and returns a brand
-        new AudioSegment.  The existing AudioSegment is unchanged.
+        """Resample audio segment. This resamples the audio to a new 
+        sample rate.
 
         Note that this is an in-place transformation.
 
-        :param new_sample_rate: target sample rate
-        :type new_sample_rate: scalar
+        :param target_sample_rate: Target sample rate
+        :type target_sample_rate: int
         :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
-            Sets resampling speed/quality tradeoff.
-            See http://www.mega-nerd.com/SRC/api_misc.html#Converters
+                        Sets resampling speed/quality tradeoff.
+                        See http://www.mega-nerd.com/SRC/api_misc.html#Converters
         :type quality: basestring
         """
         resample_ratio = target_sample_rate / self._sample_rate
         new_samples = scikits.samplerate.resample(
             self._samples, r=resample_ratio, type=quality)
         self._samples = new_samples
-        self._sample_rate = new_sample_rate
+        self._sample_rate = target_sample_rate
 
     def pad_silence(self, duration, sides='both'):
         """Pads this audio sample with a period of silence.
 
         Note that this is an in-place transformation.
 
-        :param duration: length of silence in seconds to pad
+        :param duration: Length of silence in seconds to pad
         :type duration: float
-        :param sides:
-            'beginning' - adds silence in the beginning
-            'end' - adds silence in the end
-            'both' - adds silence in both the beginning and the end.
-        :type sides: basestring
+        :param sides: Position for padding
+                     'beginning' - adds silence in the beginning
+                     'end' - adds silence in the end
+                     'both' - adds silence in both the beginning and the end.
+        :type sides: str
+        :raises ValueError: If the sides not surport
         """
         if duration == 0.0:
             return self
-        cls = type(self)
-        silence = cls.make_silence(duration, self._sample_rate)
+        silence = self.make_silence(duration, self._sample_rate)
         if sides == "beginning":
-            padded = cls.concatenate(silence, self)
+            padded = self.concatenate(silence, self)
         elif sides == "end":
-            padded = cls.concatenate(self, silence)
+            padded = self.concatenate(self, silence)
         elif sides == "both":
-            padded = cls.concatenate(silence, self, silence)
+            padded = self.concatenate(silence, self, silence)
         else:
-            raise ValueError("Unknown value for the kwarg 'sides'")
+            raise ValueError("Unknown value for the kwarg %s" % sides)
         self._samples = padded._samples
         self._sample_rate = padded._sample_rate
 
@@ -398,88 +371,83 @@ def subsegment(self, start_sec=None, end_sec=None):
         """Return new AudioSegment containing audio between given boundaries.
 
         :param start_sec: Beginning of subsegment in seconds,
-            (beginning of segment if None).
-        :type start_sec:  scalar
+                          (beginning of segment if None).
+        :type start_sec: float
         :param end_sec: End of subsegment in seconds,
-            (end of segment if None).
-        :type end_sec: scalar
-
-        :return: New AudioSegment containing specified
-            subsegment.
-        :trype: AudioSegment
+                        (end of segment if None).
+        :type end_sec: float
+        :return: New AudioSegment containing specified subsegment.
+        :rtype: AudioSegment
         """
-        # Default boundaries
-        if start_sec is None:
-            start_sec = 0.0
-        if end_sec is None:
-            end_sec = self.duration
-
+        start_sec = 0.0 if start_sec is None else start_sec
+        end_sec = self.duration if end_sec is None else end_sec
         # negative boundaries are relative to end of segment
         if start_sec < 0.0:
             start_sec = self.duration + start_sec
         if end_sec < 0.0:
             end_sec = self.duration + end_sec
-
         start_sample = int(round(start_sec * self._sample_rate))
         end_sample = int(round(end_sec * self._sample_rate))
         samples = self._samples[start_sample:end_sample]
-
         return type(self)(samples, sample_rate=self._sample_rate)
 
     def random_subsegment(self, subsegment_length, rng=None):
-        """
-        Return a random subsegment of a specified length in seconds.
+        """Return a random subsegment of a specified length in seconds.
 
         :param subsegment_length: Subsegment length in seconds.
-        :type subsegment_length: scalar
+        :type subsegment_length: float
         :param rng: Random number generator state
-        :type rng: random.Random [optional]
-
-
-        :return:clip (SpeechDLSegment): New SpeechDLSegmen containing random
-            subsegment of original segment.
+        :type rng: random.Random
+        :return: New AudioSegment containing random subsegment
+                 of original segment
+        :rtype: AudioSegment
+        :raises ValueError: If the length of subsegment greater than origineal
+                            segemnt.
         """
-        if rng is None:
-            rng = random.Random()
-
+        rng = random.Random() if rng is None else rng
         if subsegment_length > self.duration:
             raise ValueError("Length of subsegment must not be greater "
                              "than original segment.")
         start_time = rng.uniform(0.0, self.duration - subsegment_length)
         return self.subsegment(start_time, start_time + subsegment_length)
 
-    def convolve(self, ir, allow_resampling=False):
+    def convolve(self, impulse_segment, allow_resample=False):
         """Convolve this audio segment with the given filter.
 
-        :param ir: impulse response
-        :type ir: AudioSegment
-        :param allow_resampling: indicates whether resampling is allowed
-                when the ir has a different sample rate from this signal.
-        :type allow_resampling: boolean
-        """
-        if allow_resampling and self.sample_rate != ir.sample_rate:
-            ir = ir.resample(self.sample_rate)
-
-        if self.sample_rate != ir.sample_rate:
-            raise ValueError("Impulse response sample rate ({}Hz) is "
-                             "equal to base signal sample rate ({}Hz)."
-                             .format(ir.sample_rate, self.sample_rate))
+        Note that this is an in-place transformation.
 
-        samples = signal.fftconvolve(self.samples, ir.samples, "full")
+        :param impulse_segment: Impulse response segments.
+        :type impulse_segment: AudioSegment
+        :param allow_resample: indicates whether resampling is allowed when
+                                 the impulse_segment has a different sample 
+                                 rate from this signal.
+        :type allow_resample: boolean
+        :raises ValueError: If the sample rate is not match between two
+                            audio segments and resample is not allowed.
+        """
+        if allow_resample and self.sample_rate != impulse_segment.sample_rate:
+            impulse_segment = impulse_segment.resample(self.sample_rate)
+        if self.sample_rate != impulse_segment.sample_rate:
+            raise ValueError("Impulse segment's sample rate (%d Hz) is not"
+                             "equal to base signal sample rate (%d Hz)." %
+                             (impulse_segment.sample_rate, self.sample_rate))
+        samples = signal.fftconvolve(self.samples, impulse_segment.samples,
+                                     "full")
         self._samples = samples
 
-    def convolve_and_normalize(self, ir, allow_resample=False):
+    def convolve_and_normalize(self, impulse_segment, allow_resample=False):
         """Convolve and normalize the resulting audio segment so that it
         has the same average power as the input signal.
 
-        :param ir: impulse response
-        :type ir: AudioSegment
-        :param allow_resampling: indicates whether resampling is allowed
-            when the ir has a different sample rate from this signal.
-        :type allow_resampling: boolean
+        :param impulse_segment: Impulse response segments.
+        :type impulse_segment: AudioSegment
+        :param allow_resample: indicates whether resampling is allowed when
+                               the impulse_segment has a different sample rate from this signal.
+        :type allow_resample: boolean
         """
-        self.convolve(ir, allow_resampling=allow_resampling)
-        self.normalize(target_db=self.rms_db)
+        target_db = self.rms_db
+        self.convolve(impulse_segment, allow_resample=allow_resample)
+        self.normalize(target_db)
 
     def add_noise(self,
                   noise,
@@ -492,36 +460,33 @@ def add_noise(self,
         of matching length is sampled from it and used instead.
 
         :param noise: Noise signal to add.
-        :type noise: SpeechDLSegment
+        :type noise: AudioSegment
         :param snr_dB: Signal-to-Noise Ratio, in decibels.
-        :type snr_dB: scalar
-        :param allow_downsampling: whether to allow the noise signal
-            to be downsampled to match the base signal sample rate.
+        :type snr_dB: float
+        :param allow_downsampling: whether to allow the noise signal to be downsampled
+                                   to match the base signal sample rate.
         :type allow_downsampling: boolean
-        :param max_gain_db: Maximum amount of gain to apply to noise
-            signal before adding it in.  This is to prevent attempting
-            to apply infinite gain to a zero signal.
-        :type max_gain_db: scalar
+        :param max_gain_db: Maximum amount of gain to apply to noise signal before
+                            adding it in. This is to prevent attempting to apply infinite
+                            gain to a zero signal.
+        :type max_gain_db: float
         :param rng: Random number generator state.
         :type rng: random.Random
-
-        Returns:
-            SpeechDLSegment: signal with noise added.
+        :raises ValueError: If the sample rate does not match between the two audio segments
+                            and resample is not allowed, or if the duration of noise segments
+                            is shorter than original audio segments.
         """
-        if rng is None:
-            rng = random.Random()
-
+        rng = random.Random() if rng is None else rng
         if allow_downsampling and noise.sample_rate > self.sample_rate:
             noise = noise.resample(self.sample_rate)
-
         if noise.sample_rate != self.sample_rate:
-            raise ValueError("Noise sample rate ({}Hz) is not equal to "
-                             "base signal sample rate ({}Hz)."
-                             .format(noise.sample_rate, self.sample_rate))
+            raise ValueError("Noise sample rate (%d Hz) is not equal to "
+                             "base signal sample rate (%d Hz)." %
+                             (noise.sample_rate, self.sample_rate))
         if noise.duration < self.duration:
-            raise ValueError("Noise signal ({} sec) must be at "
-                             "least as long as base signal ({} sec)."
-                             .format(noise.duration, self.duration))
+            raise ValueError("Noise signal (%f sec) must be at "
+                             "least as long as base signal (%f sec)." %
+                             (noise.duration, self.duration))
         noise_gain_db = self.rms_db - noise.rms_db - snr_dB
         noise_gain_db = min(max_gain_db, noise_gain_db)
         noise_subsegment = noise.random_subsegment(self.duration, rng=rng)
@@ -529,6 +494,12 @@ def add_noise(self,
         self._samples = output._samples
         self._sample_rate = output._sample_rate
 
+    def tranform_noise(self, noise_subsegment, noise_gain_db):
+        """ tranform noise file
+        """
+        return type(self)(noise_subsegment._samples * (10.**(
+            noise_gain_db / 20.)), noise_subsegment._sample_rate)
+
     @property
     def samples(self):
         """Return audio samples.
@@ -618,9 +589,3 @@ def _convert_samples_from_float32(self, samples, dtype):
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)
         return output_samples.astype(dtype)
-
-    def tranform_noise(self, noise_subsegment, noise_gain_db):
-        """ tranform noise file
-        """
-        return type(self)(noise_subsegment._samples * (10.**(
-            noise_gain_db / 20.)), noise_subsegment._sample_rate)
diff --git a/deep_speech_2/data_utils/augmentor/audio_database.py b/deep_speech_2/data_utils/augmentor/audio_database.py
deleted file mode 100755
index e41c6dd72b..0000000000
--- a/deep_speech_2/data_utils/augmentor/audio_database.py
+++ /dev/null
@@ -1,401 +0,0 @@
-from __future__ import print_function
-from collections import defaultdict
-import bisect
-import logging
-import numpy as np
-import os
-import random
-import sys
-
-UNK_TAG = "<UNK>"
-
-
-def stream_audio_index(fname, UNK=UNK_TAG):
-    """Reads an audio index file and emits one record in the index at a time.
-
-    :param fname: audio index path
-    :type fname: basestring
-    :param UNK: UNK token to denote that certain audios are not tagged.
-    :type UNK: basesring
-
-    Yields:
-        idx, duration, size, relpath, tags (int, float, int, str, list(str)):
-            audio file id, length of the audio in seconds, size in byte,
-            relative path w.r.t. to the root noise directory, list of tags
-    """
-    with open(fname) as audio_index_file:
-        for i, line in enumerate(audio_index_file):
-            tok = line.strip().split("\t")
-            assert len(tok) >= 4, \
-                "Invalid line at line {} in file {}".format(
-                    i + 1, audio_index_file)
-            idx = int(tok[0])
-            duration = float(tok[1])
-            # Sometimes, the duration can round down to 0.0
-            assert duration >= 0.0, \
-                "Invalid duration at line {} in file {}".format(
-                    i + 1, audio_index_file)
-            size = int(tok[2])
-            assert size > 0, \
-                "Invalid size at line {} in file {}".format(
-                    i + 1, audio_index_file)
-            relpath = tok[3]
-            if len(tok) == 4:
-                tags = [UNK_TAG]
-            else:
-                tags = tok[4:]
-            yield idx, duration, size, relpath, tags
-
-
-def truncate_float(val, ndigits=6):
-    """ Truncates a floating-point value to have the desired number of
-    digits after the decimal point.
-
-    :param val: input value.
-    :type val: float
-    :parma ndigits: desired number of digits.
-    :type ndigits: int
-
-    :return: truncated value
-    :rtype: float
-    """
-    p = 10.0**ndigits
-    return float(int(val * p)) / p
-
-
-def print_audio_index(idx, duration, size, relpath, tags, file=sys.stdout):
-    """Prints an audio record to the index file.
-
-    :param idx: Audio file id.
-    :type idx: int
-    :param duration: length of the audio in seconds
-    :type duration: float
-    :param size: size of the file in bytes
-    :type size: int
-    :param relpath: relative path w.r.t. to the root noise directory.
-    :type relpath:  basestring
-    :parma tags: list of tags
-    :parma tags: list(str)
-    :parma file: file to which we want to write an audio record.
-    :type file: sys.stdout
-    """
-    file.write("{}\t{:.6f}\t{}\t{}"
-               .format(idx, truncate_float(duration, ndigits=6), size, relpath))
-    for tag in tags:
-        file.write("\t{}".format(tag))
-    file.write("\n")
-
-
-class AudioIndex(object):
-    """ In-memory index of audio files that do not have annotations.
-    This supports duration-based sampling and sampling from a target
-    distribution.
-
-    Each line in the index file consists of the following fields:
-        (id (int), duration (float), size (int), relative path (str),
-         list of tags ([str]))
-    """
-
-    def __init__(self):
-        self.audio_dir = None
-        self.index_fname = None
-        self.tags = None
-        self.bin_size = 2.0
-        self.clear()
-
-    def clear(self):
-        """ Clears the index
-
-        Returns:
-            None
-        """
-        self.idx_to_record = {}
-        # The list of indices correspond to audio files whose duration is
-        # greater than or equal to the key.
-        self.duration_to_id_set = {}
-        self.duration_to_id_set_per_tag = defaultdict(lambda: {})
-        self.duration_to_list = defaultdict(lambda: [])
-        self.duration_to_list_per_tag = defaultdict(
-            lambda: defaultdict(lambda: []))
-        self.tag_to_id_set = defaultdict(lambda: set())
-        self.shared_duration_bins = []
-        self.id_set_complete = set()
-        self.id_set = set()
-        self.duration_bins = []
-
-    def has_audio(self, distr=None):
-        """
-        :param distr: The target distribution of audio tags that we want to
-            match. If this is not supplied, the function simply checks that
-            there are some audio files.
-        :parma distr: dict
-        :return: True if there are audio files.
-        :rtype: boolean
-        """
-        if distr is None:
-            return len(self.id_set) > 0
-        else:
-            for tag in distr:
-                if tag not in self.duration_to_list_per_tag:
-                    return False
-            return True
-
-    def _load_all_records_from_disk(self, audio_dir, idx_fname, bin_size):
-        """Loads all audio records from the disk into memory and groups them
-        into chunks based on their duration and the bin_size granalarity.
-
-        Once all the records are read, indices are built from these records
-        by another function so that the audio samples can be drawn efficiently.
-
-        Updates:
-            self.audio_dir (path): audio root directory
-            self.idx_fname (path): audio database index filename
-            self.bin_size (float): granularity of bins
-            self.idx_to_record (dict): maps from the audio id to
-                (duration, file_size, relative_path, tags)
-            self.tag_to_id_set (dict): maps from the tag to
-                the set of id's of audios that have this tag.
-            self.id_set_complete (set): set of all audio id's in the index file
-            self.min_duration (float): minimum audio duration observed in the
-                index file
-            self.duration_bins (list): the lower bounds on the duration of
-                audio files falling in each bin
-            self.duration_to_id_set (dict): contains (k, v) where v is the set
-                of id's of audios whose lengths are longer than or equal to k.
-                (e.g. k is the duration lower bound of this bin).
-            self.duration_to_id_set_per_tag (dict): Something like above but
-                has a finer granularity mapping from the tag to
-                duration_to_id_set.
-            self.shared_duration_bins (list): list of sets where each set
-                contains duration lower bounds whose audio id sets are the
-                same. The rationale for having this is that there are a few
-                but extremely long audio files which lead to a lot of bins.
-                When the id sets do not change across various minimum duration
-                boundaries, we
-                cluster these together and make them point to the same id set
-                reference.
-
-        :return: whether the records were read from the disk. The assumption is
-            that the audio index file on disk and the actual audio files
-            are constructed once and never change during training. We only
-            re-read when either the directory or the index file path change.
-        """
-        if self.audio_dir == audio_dir and self.idx_fname == idx_fname and \
-           self.bin_size == bin_size:
-            # The audio directory and/or the list of audio files
-            # haven't changed. No need to load the list again.
-            return False
-
-        # Remember where the audio index is most recently read from.
-        self.audio_dir = audio_dir
-        self.idx_fname = idx_fname
-        self.bin_size = bin_size
-
-        # Read in the idx and compute the number of bins necessary
-        self.clear()
-        rank = []
-        min_duration = float('inf')
-        max_duration = float('-inf')
-        for idx, duration, file_size, relpath, tags in \
-                stream_audio_index(idx_fname):
-            self.idx_to_record[idx] = (duration, file_size, relpath, tags)
-            max_duration = max(max_duration, duration)
-            min_duration = min(min_duration, duration)
-            rank.append((duration, idx))
-            for tag in tags:
-                self.tag_to_id_set[tag].add(idx)
-        if len(rank) == 0:
-            # file is empty
-            raise IOError("Index file {} is empty".format(idx_fname))
-        for tag in self.tag_to_id_set:
-            self.id_set_complete |= self.tag_to_id_set[tag]
-        dur = min_duration
-        self.min_duration = min_duration
-        while dur < max_duration + bin_size:
-            self.duration_bins.append(dur)
-            dur += bin_size
-
-        # Sort in decreasing order of duration and populate
-        # the cumulative indices lists.
-        rank.sort(reverse=True)
-
-        # These are indices for `rank` and used to keep track of whether
-        # there are new records to add in the current bin.
-        last = 0
-        cur = 0
-
-        # The set of audios falling in the previous bin; in the case,
-        # where we don't find new audios for the current bin, we store
-        # the reference to the last set so as to conserve memory.
-        # This is not such a big problem if the audio duration is
-        # bounded by a small number like 30 seconds and the
-        # bin size is big enough. But, for raw freesound audios,
-        # some audios can be as long as a few hours!
-        last_audio_set = set()
-
-        # The same but for each tag so that we can pick audios based on
-        # tags and also some user-specified tag distribution.
-        last_audio_set_per_tag = defaultdict(lambda: set())
-
-        # Set of lists of bins sharing the same audio sets.
-        shared = set()
-
-        for i in range(len(self.duration_bins) - 1, -1, -1):
-            lower_bound = self.duration_bins[i]
-            new_audio_idxs = set()
-            new_audio_idxs_per_tag = defaultdict(lambda: set())
-            while cur < len(rank) and rank[cur][0] >= lower_bound:
-                idx = rank[cur][1]
-                tags = self.idx_to_record[idx][3]
-                new_audio_idxs.add(idx)
-                for tag in tags:
-                    new_audio_idxs_per_tag[tag].add(idx)
-                cur += 1
-            # This makes certain that the same list is shared across
-            # different bins if no new indices are added.
-            if cur == last:
-                shared.add(lower_bound)
-            else:
-                last_audio_set = last_audio_set | new_audio_idxs
-                for tag in new_audio_idxs_per_tag:
-                    last_audio_set_per_tag[tag] = \
-                        last_audio_set_per_tag[tag] | \
-                        new_audio_idxs_per_tag[tag]
-                if len(shared) > 0:
-                    self.shared_duration_bins.append(shared)
-                shared = set([lower_bound])
-                ### last_audio_set = set()  should set blank
-            last = cur
-            self.duration_to_id_set[lower_bound] = last_audio_set
-            for tag in last_audio_set_per_tag:
-                self.duration_to_id_set_per_tag[lower_bound][tag] = \
-                    last_audio_set_per_tag[tag]
-
-        # The last `shared` record isn't added to the `shared_duration_bins`.
-        self.shared_duration_bins.append(shared)
-
-        # We make sure that the while loop above has exhausted through the
-        # `rank` list by checking if the `cur`rent index in `rank` equals
-        # the length of the array, which is the halting condition.
-        assert cur == len(rank)
-
-        return True
-
-    def _build_index_from_records(self, tag_list):
-        """ Uses the in-memory records read from the index file to build
-        an in-memory index restricted to the given tag list.
-
-        :param tag_list: List of tags we are interested in sampling from.
-        :type tag_list: list(str)
-
-        Updates:
-            self.id_set (set): the set of all audio id's that can be sampled.
-            self.duration_to_list (dict): maps from the duration lower bound
-                to the id's of audios longer than this duration.
-            self.duration_to_list_per_tag (dict): maps from the tag to
-                the same structure as self.duration_to_list. This is to support
-                sampling from a target noise distribution.
-
-        :return: whether the index was built from scratch
-        """
-        if self.tags == tag_list:
-            return False
-
-        self.tags = tag_list
-        if len(tag_list) == 0:
-            self.id_set = self.id_set_complete
-        else:
-            self.id_set = set()
-            for tag in tag_list:
-                self.id_set |= self.tag_to_id_set[tag]
-
-        # Next, we need to take a subset of the audio files
-        for shared in self.shared_duration_bins:
-            # All bins in `shared' have the same index lists
-            # so we can intersect once and set all of them to this list.
-            lb = list(shared)[0]
-            intersected = list(self.id_set & self.duration_to_id_set[lb])
-            duration_to_id_set = self.duration_to_id_set_per_tag[lb]
-            intersected_per_tag = {
-                tag: self.tag_to_id_set[tag] & duration_to_id_set[tag]
-                for tag in duration_to_id_set
-            }
-            for bin_key in shared:
-                self.duration_to_list[bin_key] = intersected
-                for tag in intersected_per_tag:
-                    self.duration_to_list_per_tag[tag][bin_key] = \
-                        intersected_per_tag[tag]
-        assert len(self.duration_to_list) == len(self.duration_to_id_set)
-        return True
-
-    def refresh_records_from_index_file(self,
-                                        audio_dir,
-                                        idx_fname,
-                                        tag_list,
-                                        bin_size=2.0):
-        """ Loads the index file and populates the records
-        for building the internal index.
-
-        If the audio directory or index file name has changed, the whole index
-        is reloaded from scratch. If only the tag_list is changed, then the
-        desired index is built from the complete, in-memory record.
-
-        :param audio_dir: audio directory
-        :type audio_dir: basestring
-        :param idx_fname: audio index file name
-        :type idex_fname: basestring
-        :param tag_list: list of tags we are interested in loading;
-            if empty, we load all.
-        :type tag_list: list
-        :param bin_size: optional argument for controlling the granularity
-            of duration bins
-        :type bin_size: float
-        """
-        if tag_list is None:
-            tag_list = []
-        reloaded_records = self._load_all_records_from_disk(audio_dir,
-                                                            idx_fname, bin_size)
-        if reloaded_records or self.tags != tag_list:
-            self._build_index_from_records(tag_list)
-            logger.info('loaded {} audio files from {}'
-                        .format(len(self.id_set), idx_fname))
-
-    def sample_audio(self, duration, rng=None, distr=None):
-        """ Uniformly draws an audio record of at least the desired duration
-
-        :param duration: minimum desired audio duration
-        :type duration: float
-        :param rng: random number generator
-        :type rng: random.Random
-        :param distr: target distribution of audio tags. If not provided,
-        :type distr: dict
-        all audio files are sampled uniformly at random.
-
-        :returns: success, (duration, file_size, path)
-        """
-        if duration < 0.0:
-            duration = self.min_duration
-        i = bisect.bisect_left(self.duration_bins, duration)
-        if i == len(self.duration_bins):
-            return False, None
-        bin_key = self.duration_bins[i]
-        if distr is None:
-            indices = self.duration_to_list[bin_key]
-        else:
-            # If a desired audio distribution is given, we sample from it.
-            if rng is None:
-                rng = random.Random()
-            nprng = np.random.RandomState(rng.getrandbits(32))
-            prob_masses = distr.values()
-            prob_masses /= np.sum(prob_masses)
-            tag = nprng.choice(distr.keys(), p=prob_masses)
-            indices = self.duration_to_list_per_tag[tag][bin_key]
-        if len(indices) == 0:
-            return False, None
-        else:
-            if rng is None:
-                rng = random.Random()
-            # duration, file size and relative path from root
-            s = self.idx_to_record[rng.sample(indices, 1)[0]]
-            s = (s[0], s[1], os.path.join(self.audio_dir, s[2]))
-            return True, s
diff --git a/deep_speech_2/data_utils/augmentor/augmentation.py b/deep_speech_2/data_utils/augmentor/augmentation.py
index c0a70ad186..abe1a0ec89 100755
--- a/deep_speech_2/data_utils/augmentor/augmentation.py
+++ b/deep_speech_2/data_utils/augmentor/augmentation.py
@@ -6,11 +6,6 @@
 import json
 import random
 from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
-from data_utils.augmentor.resamler import ResamplerAugmentor
-from data_utils.augmentor.speed_perturb import SpeedPerturbatioAugmentor
-from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor
-from data_utils.augmentor.Impulse_response import ImpulseResponseAugmentor
-from data_utils.augmentor.noise_speech import NoiseSpeechAugmentor
 
 
 class AugmentationPipeline(object):
@@ -81,15 +76,5 @@ def _get_augmentor(self, augmentor_type, params):
         """Return an augmentation model by the type name, and pass in params."""
         if augmentor_type == "volume":
             return VolumePerturbAugmentor(self._rng, **params)
-        if augmentor_type == "resamle":
-            return ResamplerAugmentor(self._rng, **params)
-        if augmentor_type == "speed":
-            return SpeedPerturbatioAugmentor(self._rng, **params)
-        if augmentor_type == "online_bayesian_normalization":
-            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
-        if augmentor_type == "Impulse_response":
-            return ImpulseResponseAugmentor(self._rng, **params)
-        if augmentor_type == "noise_speech":
-            return NoiseSpeechAugmentor(self._rng, **params)
         else:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
diff --git a/deep_speech_2/data_utils/augmentor/implus_response.py b/deep_speech_2/data_utils/augmentor/implus_response.py
deleted file mode 100755
index cc2053421a..0000000000
--- a/deep_speech_2/data_utils/augmentor/implus_response.py
+++ /dev/null
@@ -1,76 +0,0 @@
-""" Impulse response"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from . import base
-from . import audio_database
-from data_utils.speech import SpeechSegment
-
-
-class ImpulseResponseAugmentor(base.AugmentorBase):
-    """ Instantiates an impulse response model
-
-    :param ir_dir: directory containing impulse responses
-    :type ir_dir: basestring
-    :param tags: optional parameter for specifying what
-            particular impulse responses to apply.
-    :type tags: list
-    :parm tag_distr: optional noise distribution
-    :type tag_distr: dict
-    """
-
-    def __init__(self, rng, ir_dir, index_file, tags=None, tag_distr=None):
-        # Define all required parameter maps here.
-        self.ir_dir = ir_dir
-        self.index_file = index_file
-
-        self.tags = tags
-        self.tag_distr = tag_distr
-
-        self.audio_index = audio_database.AudioIndex()
-        self.rng = rng
-
-    def _init_data(self):
-        """ Preloads stuff from disk in an attempt (e.g. list of files, etc)
-        to make later loading faster. If the data configuration remains the
-        same, this function does nothing.
-
-        """
-        self.audio_index.refresh_records_from_index_file(
-            self.ir_dir, self.index_file, self.tags)
-
-    def transform_audio(self, audio_segment):
-        """ Convolves the input audio with an impulse response.
-
-        :param audio_segment: input audio
-        :type audio_segment: AudioSegemnt
-        """
-        # This handles the cases where the data source or directories change.
-        self._init_data()
-
-        read_size = 0
-        tag_distr = self.tag_distr
-        if not self.audio_index.has_audio(tag_distr):
-            if tag_distr is None:
-                if not self.tags:
-                    raise RuntimeError("The ir index does not have audio "
-                                       "files to sample from.")
-                else:
-                    raise RuntimeError("The ir index does not have audio "
-                                       "files of the given tags to sample "
-                                       "from.")
-            else:
-                raise RuntimeError("The ir index does not have audio "
-                                   "files to match the target ir "
-                                   "distribution.")
-        else:
-            # Querying with a negative duration triggers the index to search
-            # from all impulse responses.
-            success, record = self.audio_index.sample_audio(
-                -1.0, rng=self.rng, distr=tag_distr)
-            if success is True:
-                _, read_size, ir_fname = record
-                ir_wav = SpeechSegment.from_file(ir_fname)
-                audio_segment.convolve(ir_wav, allow_resampling=True)
diff --git a/deep_speech_2/data_utils/augmentor/noise_speech.py b/deep_speech_2/data_utils/augmentor/noise_speech.py
deleted file mode 100755
index 8cf7c27b66..0000000000
--- a/deep_speech_2/data_utils/augmentor/noise_speech.py
+++ /dev/null
@@ -1,318 +0,0 @@
-""" noise speech
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import numpy as np
-import os
-from collections import defaultdict
-
-from . import base
-from . import audio_database
-from data_utils.speech import SpeechSegment
-
-TURK = "turk"
-USE_AUDIO_DATABASE_SOURCES = frozenset(["freesound", "chime"])
-HALF_NOISE_LENGTH_MIN_THRESHOLD = 3.0
-FIND_NOISE_MAX_ATTEMPTS = 20
-
-logger = logging.getLogger(__name__)
-
-
-def get_first_smaller(items, value):
-    index = bisect.bisect_left(items, value) - 1
-    assert items[index] < value, \
-        'get_first_smaller failed! %d %d' % (items[index], value)
-    return items[index]
-
-
-def get_first_larger(items, value):
-    'Find leftmost value greater than value'
-    index = bisect.bisect_right(items, value)
-    assert index < len(items), \
-        "no noise bin exists for this audio length (%f)" % value
-    assert items[index] > value, \
-        'get_first_larger failed! %d %d' % (items[index], value)
-    return items[index]
-
-
-def _get_turk_noise_files(noise_dir, index_file):
-    """ Creates a map from duration => a list of noise filenames
-
-    :param noise_dir: Directory of noise files which contains
-        "noise-samples-list"
-    :type noise_dir: basestring
-    :param index_file: Noise list
-    :type index_file: basestring
-
-    returns:noise_files (defaultdict): A map of bins to noise files.
-        Each key is the duration, and the value is a list of noise
-        files binned to this duration. Each bin is 2 secs.
-
-    Note: noise-samples-list should contain one line per noise (wav) file
-        along with its duration in milliseconds
-    """
-    noise_files = defaultdict(list)
-    if not os.path.exists(index_file):
-        logger.error('No noise files were found at {}'.format(index_file))
-        return noise_files
-    num_noise_files = 0
-    rounded_durations = list(range(0, 65, 2))
-    with open(index_file, 'r') as fl:
-        for line in fl:
-            fname = os.path.join(noise_dir, line.strip().split()[0])
-            duration = float(line.strip().split()[1]) / 1000
-            # bin the noise files into length bins rounded by 2 sec
-            bin_id = get_first_smaller(rounded_durations, duration)
-            noise_files[bin_id].append(fname)
-            num_noise_files += 1
-    logger.info('Loaded {} turk noise files'.format(num_noise_files))
-    return noise_files
-
-
-class NoiseSpeechAugmentor(base.AugmentorBase):
-    """ Noise addition block
-
-    :param snr_min: minimum signal-to-noise ratio
-    :type snr_min: float
-    :param snr_max: maximum signal-to-noise ratio
-    :type snr_max: float
-    :param noise_dir: root of where noise files are stored
-    :type noise_fir: basestring
-    :param index_file: index of noises of interest in noise_dir
-    :type index_file: basestring
-    :param source: select one from
-        - turk
-        - freesound
-        - chime
-        Note that this field is no longer required for the freesound
-        and chime
-    :type source: string
-    :param tags: optional parameter for specifying what
-        particular noises we want to add. See above for the available tags.
-    :type tags: list
-    :param tag_distr: optional noise distribution
-    :type tag_distr: dict
-    """
-
-    def __init__(self,
-                 rng,
-                 snr_min,
-                 snr_max,
-                 noise_dir,
-                 source,
-                 allow_downsampling=None,
-                 index_file=None,
-                 tags=None,
-                 tag_distr=None):
-        # Define all required parameter maps here.
-        self.rng = rng
-        self.snr_min = snr_min
-        self.snr_max = snr_max
-        self.noise_dir = noise_dir
-        self.source = source
-
-        self.allow_downsampling = allow_downsampling
-        self.index_file = index_file
-        self.tags = tags
-        self.tag_distr = tag_distr
-
-        # When new noise sources are added, make sure to define the
-        # associated bookkeeping variables here.
-        self.turk_noise_files = []
-        self.turk_noise_dir = None
-        self.audio_index = audio_database.AudioIndex()
-
-    def _init_data(self):
-        """ Preloads stuff from disk in an attempt (e.g. list of files, etc)
-        to make later loading faster. If the data configuration remains the
-        same, this function does nothing.
-
-        """
-        noise_dir = self.noise_dir
-        index_file = self.index_file
-        source = self.source
-        if not index_file:
-            if source == TURK:
-                index_file = os.path.join(noise_dir, 'noise-samples-list')
-                logger.debug("index_file not provided; " + "defaulting to " +
-                             index_file)
-            else:
-                if source != "":
-                    assert source in USE_AUDIO_DATABASE_SOURCES, \
-                        "{} not supported by audio_database".format(source)
-                index_file = os.path.join(noise_dir,
-                                          "audio_index_commercial.txt")
-                logger.debug("index_file not provided; " + "defaulting to " +
-                             index_file)
-
-        if source == TURK:
-            if self.turk_noise_dir != noise_dir:
-                self.turk_noise_dir = noise_dir
-                self.turk_noise_files = _get_turk_noise_files(noise_dir,
-                                                              index_file)
-        # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES:
-        else:
-            if source != "":
-                assert source in USE_AUDIO_DATABASE_SOURCES, \
-                    "{} not supported by audio_database".format(source)
-            self.audio_index.refresh_records_from_index_file(
-                self.noise_dir, index_file, self.tags)
-
-    def transform_audio(self, audio_segment):
-        """Adds walla noise
-
-        :param audio_segment: Input audio
-        :type audio_segment: SpeechSegment
-        """
-        # This handles the cases where the data source or directories change.
-        self._init_data
-        source = self.source
-        allow_downsampling = self.allow_downsampling
-        if source == TURK:
-            self._add_turk_noise(audio_segment, self.rng, allow_downsampling)
-        # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES:
-        else:
-            self._add_noise(audio_segment, self.rng, allow_downsampling)
-
-    def _sample_snr(self):
-        """ Returns a float sampled in [`self.snr_min`, `self.snr_max`]
-        if both `self.snr_min` and `self.snr_max` are non-zero.
-        """
-        snr_min = self.snr_min
-        snr_max = self.snr_max
-        sampled_snr = self.rng.uniform(snr_min, snr_max)
-        return sampled_snr
-
-    def _add_turk_noise(self, audio_segment, allow_downsampling):
-        """ Adds a turk noise to the input audio.
-
-        :param audio_segment: input audio
-        :type audio_segment: audiosegment
-        :param allow_downsampling: indicates whether downsampling
-            is allowed
-        :type allow_downsampling: boolean 
-        """
-        read_size = 0
-        if len(self.turk_noise_files) > 0:
-            snr = self._sample_snr(self.rng)
-            # Draw the noise file randomly from noise files that are
-            # slightly longer than the utterance
-            noise_bins = sorted(self.turk_noise_files.keys())
-            # note some bins can be empty, so we can't just round up
-            # to the nearest 2-sec interval
-            rounded_duration = get_first_larger(noise_bins,
-                                                audio_segment.duration)
-            noise_fname = \
-                self.rng.sample(self.turk_noise_files[rounded_duration], 1)[0]
-            noise = SpeechSegment.from_wav_file(noise_fname)
-            logger.debug('noise_fname {}'.format(noise_fname))
-            logger.debug('snr {}'.format(snr))
-            read_size = len(noise) * 2
-            # May throw exceptions, but this is caught by
-            # AudioFeaturizer.get_audio_files.
-            audio_segment.add_noise(
-                noise, snr, rng=self.rng, allow_downsampling=allow_downsampling)
-
-    def _add_noise(self, audio_segment, allow_downsampling):
-        """ Adds a noise indexed in audio_database.AudioIndex.
-
-        :param audio_segment: input audio
-        :type audio_segment: SpeechSegment
-        :param allow_downsampling: indicates whether downsampling
-            is allowed
-        :type allow_downsampling: boolean
-
-        Returns:
-            (SpeechSegment, int)
-                - sound with turk noise added
-                - number of bytes read from disk
-        """
-        read_size = 0
-        tag_distr = self.tag_distr
-        if not self.audio_index.has_audio(tag_distr):
-            if tag_distr is None:
-                if not self.tags:
-                    raise RuntimeError("The noise index does not have audio "
-                                       "files to sample from.")
-                else:
-                    raise RuntimeError("The noise index does not have audio "
-                                       "files of the given tags to sample "
-                                       "from.")
-            else:
-                raise RuntimeError("The noise index does not have audio "
-                                   "files to match the target noise "
-                                   "distribution.")
-        else:
-            # Compute audio segment related statistics
-            audio_duration = audio_segment.duration
-
-            # Sample relevant augmentation parameters.
-            snr = self._sample_snr(self.rng)
-
-            # Perhaps, we may not have a sufficiently long noise, so we need
-            # to search iteratively.
-            min_duration = audio_duration + 0.25
-            for _ in range(FIND_NOISE_MAX_ATTEMPTS):
-                logger.debug("attempting to find noise of length "
-                             "at least {}".format(min_duration))
-
-                success, record = \
-                    self.audio_index.sample_audio(min_duration,
-                                                  rng=self.rng,
-                                                  distr=tag_distr)
-
-                if success is True:
-                    noise_duration, read_size, noise_fname = record
-
-                    # Assert after logging so we know
-                    # what caused augmentation to fail.
-                    logger.debug("noise_fname {}".format(noise_fname))
-                    logger.debug("snr {}".format(snr))
-                    assert noise_duration >= min_duration
-                    break
-
-                # Decrease the desired minimum duration linearly.
-                # If the value becomes smaller than some threshold,
-                # we half the value instead.
-                if min_duration > HALF_NOISE_LENGTH_MIN_THRESHOLD:
-                    min_duration -= 2.0
-                else:
-                    min_duration *= 0.5
-
-            if success is False:
-                logger.info("Failed to find a noise file")
-                return
-
-            diff_duration = audio_duration + 0.25 - noise_duration
-            if diff_duration >= 0.0:
-                # Here, the noise is shorter than the audio file, so
-                # we pad with zeros to make sure the noise sound is applied
-                # with a uniformly random shift.
-                noise = SpeechSegment.from_file(noise_fname)
-                noise = noise.pad_silence(diff_duration, sides="both")
-            else:
-                # The noise clip is at least ~25 ms longer than the audio
-                # segment here.
-                diff_duration = int(noise_duration * audio_segment.sample_rate) - \
-                    int(audio_duration * audio_segment.sample_rate) - \
-                    int(0.02 * audio_segment.sample_rate)
-                start = float(self.rng.randint(0, diff_duration)) / \
-                    audio.sample_rate
-                finish = min(start + audio_duration + 0.2, noise_duration)
-                noise = SpeechSegment.slice_from_file(noise_fname, start,
-                                                      finish)
-
-            if len(noise) < len(audio_segment):
-                # This is to ensure that the noise clip is at least as
-                # long as the audio segment.
-                num_samples_to_pad = len(audio_segment) - len(noise)
-                # Padding this amount of silence on both ends ensures that
-                # the placement of the noise clip is uniformly random.
-                silence = SpeechSegment(
-                    np.zeros(num_samples_to_pad), audio_segment.sample_rate)
-                noise = SpeechSegment.concatenate(silence, noise, silence)
-
-            audio_segment.add_noise(
-                noise, snr, rng=self.rng, allow_downsampling=allow_downsampling)
diff --git a/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py b/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
deleted file mode 100755
index bc2d6c1b65..0000000000
--- a/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
+++ /dev/null
@@ -1,57 +0,0 @@
-""" Online bayesian normalization
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from . import base
-
-
-class OnlineBayesianNormalizationAugmentor(base.AugmentorBase):
-    """ 
-    Instantiates an online bayesian normalization module.
-    :param target_db: Target RMS value in decibels
-            :type target_db: func[int->scalar]
-            :param prior_db: Prior RMS estimate in decibels
-            :type prior_db: func[int->scalar]
-            :param prior_samples: Prior strength in number of samples
-            :type prior_samples: func[int->scalar]
-            :param startup_delay: Start-up delay in seconds during
-                which normalization statistics is accrued.
-            :type starup_delay: func[int->scalar]
-    """
-
-    def __init__(self,
-                 rng,
-                 target_db,
-                 prior_db,
-                 prior_samples,
-                 startup_delay=base.parse_parameter_from(0.0)):
-
-        self.target_db = target_db
-        self.prior_db = prior_db
-        self.prior_samples = prior_samples
-        self.startup_delay = startup_delay
-        self.rng = rng
-
-    def transform_audio(self, audio_segment):
-        """
-        Normalizes the input audio using the online Bayesian approach.
-
-        :param audio_segment: input audio
-        :type audio_segment: SpeechSegment
-        :param iteration: current iteration
-        :type iteration: int
-        :param text: audio transcription
-        :type text: basestring
-        :param rng: RNG to use for augmentation
-        :type rng: random.Random
-
-        """
-        read_size = 0
-        target_db = self.target_db(iteration)
-        prior_db = self.prior_db(iteration)
-        prior_samples = self.prior_samples(iteration)
-        startup_delay = self.startup_delay(iteration)
-        audio.normalize_online_bayesian(
-            target_db, prior_db, prior_samples, startup_delay=startup_delay)
diff --git a/deep_speech_2/data_utils/augmentor/resampler.py b/deep_speech_2/data_utils/augmentor/resampler.py
deleted file mode 100755
index 1b959be56c..0000000000
--- a/deep_speech_2/data_utils/augmentor/resampler.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from . import base
-
-
-class ResamplerAugmentor(base.AugmentorBase):
-    """ Instantiates a resampler module.
-    
-    :param new_sample_rate: New sample rate in Hz
-    :type new_sample_rate: func[int->scalar]
-    :param rng: Random generator object.
-    :type rng: random.Random
-    """
-
-    def __init__(self, rng, new_sample_rate):
-        self.new_sample_rate = new_sample_rate
-        self._rng = rng
-
-    def transform_audio(self, audio_segment):
-        """ Resamples the input audio to the target sample rate.
-
-        Note that this is an in-place transformation.
-
-        :param audio: input audio
-        :type audio: SpeechDLSegment
-        """
-        new_sample_rate = self.new_sample_rate
-        audio.resample(new_sample_rate)
\ No newline at end of file
diff --git a/deep_speech_2/data_utils/augmentor/speed_perturb.py b/deep_speech_2/data_utils/augmentor/speed_perturb.py
deleted file mode 100755
index e09be5f74e..0000000000
--- a/deep_speech_2/data_utils/augmentor/speed_perturb.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Speed perturbation module for making ASR robust to different voice
-types (high pitched, low pitched, etc)
-Samples uniformly between speed_min and speed_max
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from . import base
-
-
-class SpeedPerturbatioAugmentor(base.AugmentorBase):
-    """ 
-    Instantiates a speed perturbation module.
-
-    See reference paper here:
-
-    http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
-
-    :param speed_min: Lower bound on new rate to sample
-    :type speed_min: func[int->scalar]
-    :param speed_max: Upper bound on new rate to sample
-    :type speed_max: func[int->scalar]
-    """
-
-    def __init__(self, rng, speed_min, speed_max):
-
-        if (speed_min < 0.9):
-            raise ValueError(
-                "Sampling speed below 0.9 can cause unnatural effects")
-        if (speed_min > 1.1):
-            raise ValueError(
-                "Sampling speed above 1.1 can cause unnatural effects")
-        self.speed_min = speed_min
-        self.speed_max = speed_max
-        self.rng = rng
-
-    def transform_audio(self, audio_segment):
-        """ 
-        Samples a new speed rate from the given range and
-        changes the speed of the given audio clip.
-
-        Note that this is an in-place transformation.
-
-        :param audio_segment: input audio
-        :type audio_segment: SpeechDLSegment
-        """
-        read_size = 0
-        speed_min = self.speed_min(iteration)
-        speed_max = self.speed_max(iteration)
-        sampled_speed = rng.uniform(speed_min, speed_max)
-        audio = audio.change_speed(sampled_speed)
diff --git a/deep_speech_2/data_utils/augmentor/volume_perturb.py b/deep_speech_2/data_utils/augmentor/volume_perturb.py
index 15055b9154..a5a9f6cada 100755
--- a/deep_speech_2/data_utils/augmentor/volume_perturb.py
+++ b/deep_speech_2/data_utils/augmentor/volume_perturb.py
@@ -3,10 +3,10 @@
 from __future__ import division
 from __future__ import print_function
 
-from . import base
+from data_utils.augmentor.base import AugmentorBase
 
 
-class VolumePerturbAugmentor(base.AugmentorBase):
+class VolumePerturbAugmentor(AugmentorBase):
     """Augmentation model for adding random volume perturbation.
     
     This is used for multi-loudness training of PCEN. See
diff --git a/deep_speech_2/requirements.txt b/deep_speech_2/requirements.txt
index 58a93debe4..c37e88ffe7 100644
--- a/deep_speech_2/requirements.txt
+++ b/deep_speech_2/requirements.txt
@@ -1,2 +1,4 @@
 SoundFile==0.9.0.post1
 wget==3.2
+scikits.samplerate==0.3.3
+scipy==0.13.0b1

From 602dcc81933ec97b606e1310900e202326f6bcea Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Fri, 16 Jun 2017 18:29:56 +0800
Subject: [PATCH 33/43] modify audio and speech

---
 deep_speech_2/data_utils/audio.py  | 14 ++++++++------
 deep_speech_2/data_utils/speech.py | 26 ++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index ee4e6d84e1..066437dc6c 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -104,7 +104,8 @@ def from_bytes(cls, bytes):
             io.BytesIO(bytes), dtype='float32')
         return cls(samples, sample_rate)
 
-    def concatenate(self, *segments):
+    @classmethod
+    def concatenate(cls, *segments):
         """Concatenate an arbitrary number of audio segments together.
 
         :param *segments: Input audio segments
@@ -123,11 +124,11 @@ def concatenate(self, *segments):
             if sample_rate != seg._sample_rate:
                 raise ValueError("Can't concatenate segments with "
                                  "different sample rates")
-            if type(seg) is not type(self):
+            if type(seg) is not cls:
                 raise TypeError("Only audio segments of the same type "
                                 "instance can be concatenated.")
         samples = np.concatenate([seg.samples for seg in segments])
-        return type(self)(samples, sample_rate)
+        return cls(samples, sample_rate)
 
     def to_wav_file(self, filepath, dtype='float32'):
         """Save audio segment to disk as wav file.
@@ -355,13 +356,14 @@ def pad_silence(self, duration, sides='both'):
         """
         if duration == 0.0:
             return self
+        cls = type(self)
         silence = self.make_silence(duration, self._sample_rate)
         if sides == "beginning":
-            padded = self.concatenate(silence, self)
+            padded = cls.concatenate(silence, self)
         elif sides == "end":
-            padded = self.concatenate(self, silence)
+            padded = cls.concatenate(self, silence)
         elif sides == "both":
-            padded = self.concatenate(silence, self, silence)
+            padded = cls.concatenate(silence, self, silence)
         else:
             raise ValueError("Unknown value for the kwarg %s" % sides)
         self._samples = padded._samples
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 48db595b41..5d1fc15a72 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -65,6 +65,32 @@ def from_bytes(cls, bytes, transcript):
         audio = AudioSegment.from_bytes(bytes)
         return cls(audio.samples, audio.sample_rate, transcript)
 
+    @classmethod
+    def concatenate(cls, *segments):
+        """Concatenate an arbitrary number of audio segments together.
+
+        :param *segments: Input speech segments
+        :type *segments: SpeechSegment
+        :return: Speech segment instance.
+        :rtype: SpeechSegment
+        :raises ValueError: If number of segments is zero, or if sample_rate
+                            not match between two audio segments
+        :raises TypeError: If item of segments is not Audiosegment instance
+        """
+        # Perform basic sanity-checks.
+        if len(segments) == 0:
+            raise ValueError("No audio segments are given to concatenate.")
+        sample_rate = segments[0]._sample_rate
+        for seg in segments:
+            if sample_rate != seg._sample_rate:
+                raise ValueError("Can't concatenate segments with "
+                                 "different sample rates")
+            if type(seg) is not cls:
+                raise TypeError("Only speech segments of the same type "
+                                "instance can be concatenated.")
+        samples = np.concatenate([seg.samples for seg in segments])
+        return cls(samples, sample_rate, seg._transcript)
+
     @property
     def transcript(self):
         """Return the transcript text.

From 193601a5e7439e45e4873a061db24c309daefafa Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Sat, 17 Jun 2017 09:03:18 +0800
Subject: [PATCH 34/43] add audio file

---
 deep_speech_2/data_utils/audio.py  | 245 +++++++++++++----------------
 deep_speech_2/data_utils/speech.py |  55 ++++++-
 2 files changed, 161 insertions(+), 139 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 066437dc6c..1f75da8ac3 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -47,32 +47,6 @@ def __ne__(self, other):
         """Return whether two objects are unequal."""
         return not self.__eq__(other)
 
-    def __len__(self):
-        """Returns length of segment in samples."""
-        return self.num_samples
-
-    def __add__(self, other):
-        """Add samples from another segment to those of this segment and return
-        a new segment (sample-wise addition, not segment concatenation).
-
-        :param other: Segment containing samples to be
-                      added in.
-        :type other: AudioSegment
-        :return: New segment containing resulting samples.
-        :rtype: AudioSegment
-        :raise TypeError: If sample rates of segments don't match,
-                          or if length of segments don't match.
-        """
-        if type(self) != type(other):
-            raise TypeError("Cannot add segment of different type: {}"
-                            .format(type(other)))
-        if self._sample_rate != other._sample_rate:
-            raise TypeError("Sample rates must match to add segments.")
-        if len(self._samples) != len(other._samples):
-            raise TypeError("Segment lengths must match to add segments.")
-        samples = self.samples + other.samples
-        return type(self)(samples, sample_rate=self._sample_rate)
-
     def __str__(self):
         """Return human-readable representation of segment."""
         return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, "
@@ -108,13 +82,13 @@ def from_bytes(cls, bytes):
     def concatenate(cls, *segments):
         """Concatenate an arbitrary number of audio segments together.
 
-        :param *segments: Input audio segments
+        :param *segments: Input audio segments.
         :type *segments: AudioSegment
-        :return: Audio segment instance.
+        :return: Audio segment instance as concatenating results.
         :rtype: AudioSegment
-        :raises ValueError: If number of segments is zero, or if sample_rate
-                            not match between two audio segments
-        :raises TypeError: If item of segments is not Audiosegment instance
+        :raises ValueError: If the number of segments is zero, or if the 
+                            sample_rate of any two segments does not match.
+        :raises TypeError: If every segment in is not Audiosegment instance.
         """
         # Perform basic sanity-checks.
         if len(segments) == 0:
@@ -155,12 +129,13 @@ def to_wav_file(self, filepath, dtype='float32'):
             format='WAV',
             subtype=subtype_map[dtype])
 
-    def slice_from_file(self, file, start=None, end=None):
+    @classmethod
+    def slice_from_file(cls, file, start=None, end=None):
         """Loads a small section of an audio without having to load
         the entire file into the memory which can be incredibly wasteful.
 
-        :param file: Input audio filepath
-        :type file: basestring
+        :param file: Input audio filepath or file object.
+        :type file: basestring|file
         :param start: Start time in seconds. If start is negative, it wraps
                       around from the end. If not provided, this function 
                       reads from the very beginning.
@@ -169,9 +144,11 @@ def slice_from_file(self, file, start=None, end=None):
                     from the end. If not provided, the default behvaior is
                     to read to the end of the file.
         :type end: float
-        :return: The specified slice of input audio in the audio.AudioSegment format.
+        :return: AudioSegment instance of the specified slice of the input
+                 audio file.
         :rtype: AudioSegment
-        :rainse ValueError: If the position is error, or if the time is out bounds.
+        :raise ValueError: If start or end is incorrectly set, e.g. out of
+                           bounds in time.
         """
         sndfile = soundfile.SoundFile(file)
         sample_rate = sndfile.samplerate
@@ -184,40 +161,60 @@ def slice_from_file(self, file, start=None, end=None):
             end += duration
         if start < 0.0:
             raise ValueError("The slice start position (%f s) is out of "
-                             "bounds. Filename: %s" % (start, file))
+                             "bounds." % start)
         if end < 0.0:
-            raise ValueError("The slice end position (%f s) is out of bounds "
-                             "Filename: %s" % (end, file))
+            raise ValueError("The slice end position (%f s) is out of bounds." %
+                             end)
         if start > end:
             raise ValueError("The slice start position (%f s) is later than "
                              "the slice end position (%f s)." % (start, end))
         if end > duration:
-            raise ValueError("The slice end time (%f s) is out of bounds "
-                             "(> %f s) Filename: %s" % (end, duration, file))
+            raise ValueError("The slice end position (%f s) is out of bounds "
+                             "(> %f s)" % (end, duration))
         start_frame = int(start * sample_rate)
         end_frame = int(end * sample_rate)
         sndfile.seek(start_frame)
         data = sndfile.read(frames=end_frame - start_frame, dtype='float32')
-        return type(self)(data, sample_rate)
+        return cls(data, sample_rate)
 
-    def make_silence(self, duration, sample_rate):
+    @classmethod
+    def make_silence(cls, duration, sample_rate):
         """Creates a silent audio segment of the given duration and
         sample rate.
 
-        :param duration: Length of silence in seconds
+        :param duration: Length of silence in seconds.
         :type duration: float
-        :param sample_rate: Sample rate
+        :param sample_rate: Sample rate.
         :type sample_rate: float
-        :return: Silence of the given duration
+        :return: Silent AudioSegment instance of the given duration.
         :rtype: AudioSegment
         """
         samples = np.zeros(int(duration * sample_rate))
-        return type(self)(samples, sample_rate)
+        return cls(samples, sample_rate)
+
+    def superimposed(self, other):
+        """Add samples from another segment to those of this segment
+        (sample-wise addition, not segment concatenation).
+
+        :param other: Segment containing samples to be added in.
+        :type other: AudioSegments
+        :raise TypeError: If type of two segments don't match.
+        :raise ValueError: If the sample_rate of two segments not equal, or if
+                           the length of segments don't match.
+        """
+        if type(self) != type(other):
+            raise TypeError("Cannot add segments of different types: %s "
+                            "and %s." % (type(self), type(other)))
+        if self._sample_rate != other._sample_rate:
+            raise ValueError("Sample rates must match to add segments.")
+        if len(self._samples) != len(other._samples):
+            raise ValueError("Segment lengths must match to add segments.")
+        self._samples += other._samples
 
     def to_bytes(self, dtype='float32'):
         """Create a byte string containing the audio content.
         
-        :param dtype: Data type for export samples. Options: 'int16', 'int32',
+        :param dtype: Data type for export samples. Options: 'int16','int32',
                       'float32', 'float64'. Default is 'float32'.
         :type dtype: str
         :return: Byte string containing audio content.
@@ -258,16 +255,17 @@ def change_speed(self, speed_rate):
         self._samples = np.interp(new_indices, old_indices, self._samples)
 
     def normalize(self, target_db=-20, max_gain_db=300.0):
-        """Normalize audio to be desired RMS value in decibels.
+        """Normalize audio to be of the desired RMS value in decibels.
 
         Note that this is an in-place transformation.
 
-        :param target_db: Target RMS value in decibels. This value should
-                          be less than 0.0 as 0.0 is full-scale audio.
+        :param target_db: Target RMS value in decibels. This value should be
+                          less than 0.0 as 0.0 is full-scale audio.
         :type target_db: float
         :param max_gain_db: Max amount of gain in dB that can be applied for
-                            normalization. This is to prevent nans when attempting
-                            to normalize a signal consisting of all zeros.
+                            normalization. This is to prevent nans when
+                            attempting to normalize a signal consisting of
+                            all zeros.
         :type max_gain_db: float
         :raises ValueError: If the required gain to normalize the segment to
                             the target_db value exceeds max_gain_db.
@@ -275,9 +273,9 @@ def normalize(self, target_db=-20, max_gain_db=300.0):
         gain = target_db - self.rms_db
         if gain > max_gain_db:
             raise ValueError(
-                "Unable to normalize segment to %f dB because it has an RMS "
-                "value of %f dB and the difference exceeds max_gain_db (%f dB)"
-                % (target_db, self.rms_db, max_gain_db))
+                "Unable to normalize segment to %f dB because the "
+                "the probable gain have exceeds max_gain_db (%f dB)" %
+                (target_db, max_gain_db))
         self.apply_gain(min(max_gain_db, target_db - self.rms_db))
 
     def normalize_online_bayesian(self,
@@ -285,30 +283,30 @@ def normalize_online_bayesian(self,
                                   prior_db,
                                   prior_samples,
                                   startup_delay=0.0):
-        """Normalize audio using a production-compatible online/causal algorithm.
-        This uses an exponential likelihood and gamma prior to make online estimates
-        of the RMS even when there are very few samples.
+        """Normalize audio using a production-compatible online/causal
+        algorithm. This uses an exponential likelihood and gamma prior to
+        make online estimates of the RMS even when there are very few samples.
 
         Note that this is an in-place transformation.
 
-        :param target_db: Target RMS value in decibels
+        :param target_db: Target RMS value in decibels.
         :type target_bd: float
-        :param prior_db: Prior RMS estimate in decibels
+        :param prior_db: Prior RMS estimate in decibels.
         :type prior_db: float
-        :param prior_samples: Prior strength in number of samples
+        :param prior_samples: Prior strength in number of samples.
         :type prior_samples: float
-        :param startup_delay: Default 0.0 s. If provided, this function will accrue
-                              statistics for the first startup_delay seconds before
-                              applying online normalization.
+        :param startup_delay: Default 0.0 s. If provided, this function will
+                              accrue statistics for the first startup_delay 
+                              seconds before applying online normalization.
         :type startup_delay: float
         """
-        # Estimate total RMS online
+        # Estimate total RMS online.
         startup_sample_idx = min(self.num_samples - 1,
                                  int(self.sample_rate * startup_delay))
         prior_mean_squared = 10.**(prior_db / 10.)
         prior_sum_of_squares = prior_mean_squared * prior_samples
         cumsum_of_squares = np.cumsum(self.samples**2)
-        sample_count = np.arange(len(self)) + 1
+        sample_count = np.arange(len(self.num_samples)) + 1
         if startup_sample_idx > 0:
             cumsum_of_squares[:startup_sample_idx] = \
                 cumsum_of_squares[startup_sample_idx]
@@ -317,42 +315,40 @@ def normalize_online_bayesian(self,
         mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) /
                                  (sample_count + prior_samples))
         rms_estimate_db = 10 * np.log10(mean_squared_estimate)
-        # Compute required time-varying gain
+        # Compute required time-varying gain.
         gain_db = target_db - rms_estimate_db
         self.apply_gain(gain_db)
 
     def resample(self, target_sample_rate, quality='sinc_medium'):
-        """Resample audio segment. This resamples the audio to a new 
-        sample rate.
+        """Resample the audio to a target sample rate.
 
         Note that this is an in-place transformation.
 
-        :param target_sample_rate: Target sample rate
+        :param target_sample_rate: Target sample rate.
         :type target_sample_rate: int
         :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
                         Sets resampling speed/quality tradeoff.
                         See http://www.mega-nerd.com/SRC/api_misc.html#Converters
-        :type quality: basestring
+        :type quality: str
         """
         resample_ratio = target_sample_rate / self._sample_rate
-        new_samples = scikits.samplerate.resample(
+        self._samples = scikits.samplerate.resample(
             self._samples, r=resample_ratio, type=quality)
-        self._samples = new_samples
         self._sample_rate = target_sample_rate
 
     def pad_silence(self, duration, sides='both'):
-        """Pads this audio sample with a period of silence.
+        """Pad this audio sample with a period of silence.
 
         Note that this is an in-place transformation.
 
-        :param duration: Length of silence in seconds to pad
+        :param duration: Length of silence in seconds to pad.
         :type duration: float
-        :param sides: Position for padding
-                     'beginning' - adds silence in the beginning
-                     'end' - adds silence in the end
+        :param sides: Position for padding:
+                     'beginning' - adds silence in the beginning;
+                     'end' - adds silence in the end;
                      'both' - adds silence in both the beginning and the end.
         :type sides: str
-        :raises ValueError: If the sides not surport
+        :raises ValueError: If sides is not supported.
         """
         if duration == 0.0:
             return self
@@ -367,51 +363,41 @@ def pad_silence(self, duration, sides='both'):
         else:
             raise ValueError("Unknown value for the kwarg %s" % sides)
         self._samples = padded._samples
-        self._sample_rate = padded._sample_rate
 
     def subsegment(self, start_sec=None, end_sec=None):
         """Return new AudioSegment containing audio between given boundaries.
 
-        :param start_sec: Beginning of subsegment in seconds,
-                          (beginning of segment if None).
+        :param start_sec: Beginning of subsegment in seconds.
         :type start_sec: float
-        :param end_sec: End of subsegment in seconds,
-                        (end of segment if None).
+        :param end_sec: End of subsegment in seconds.
         :type end_sec: float
-        :return: New AudioSegment containing specified subsegment.
-        :rtype: AudioSegment
         """
         start_sec = 0.0 if start_sec is None else start_sec
         end_sec = self.duration if end_sec is None else end_sec
-        # negative boundaries are relative to end of segment
         if start_sec < 0.0:
             start_sec = self.duration + start_sec
         if end_sec < 0.0:
             end_sec = self.duration + end_sec
         start_sample = int(round(start_sec * self._sample_rate))
         end_sample = int(round(end_sec * self._sample_rate))
-        samples = self._samples[start_sample:end_sample]
-        return type(self)(samples, sample_rate=self._sample_rate)
+        self._samples = self._samples[start_sample:end_sample]
 
     def random_subsegment(self, subsegment_length, rng=None):
         """Return a random subsegment of a specified length in seconds.
 
         :param subsegment_length: Subsegment length in seconds.
         :type subsegment_length: float
-        :param rng: Random number generator state
+        :param rng: Random number generator state.
         :type rng: random.Random
-        :return: New AudioSegment containing random subsegment
-                 of original segment
-        :rtype: AudioSegment
-        :raises ValueError: If the length of subsegment greater than origineal
-                            segemnt.
+        :raises ValueError: If the length of subsegment greater than
+                            origineal segemnt.
         """
         rng = random.Random() if rng is None else rng
         if subsegment_length > self.duration:
             raise ValueError("Length of subsegment must not be greater "
                              "than original segment.")
         start_time = rng.uniform(0.0, self.duration - subsegment_length)
-        return self.subsegment(start_time, start_time + subsegment_length)
+        self.subsegment(start_time, start_time + subsegment_length)
 
     def convolve(self, impulse_segment, allow_resample=False):
         """Convolve this audio segment with the given filter.
@@ -420,10 +406,10 @@ def convolve(self, impulse_segment, allow_resample=False):
 
         :param impulse_segment: Impulse response segments.
         :type impulse_segment: AudioSegment
-        :param allow_resample: indicates whether resampling is allowed when
-                                 the impulse_segment has a different sample 
-                                 rate from this signal.
-        :type allow_resample: boolean
+        :param allow_resample: Indicates whether resampling is allowed when
+                               the impulse_segment has a different sample 
+                               rate from this signal.
+        :type allow_resample: bool
         :raises ValueError: If the sample rate is not match between two
                             audio segments and resample is not allowed.
         """
@@ -443,9 +429,10 @@ def convolve_and_normalize(self, impulse_segment, allow_resample=False):
 
         :param impulse_segment: Impulse response segments.
         :type impulse_segment: AudioSegment
-        :param allow_resample: indicates whether resampling is allowed when
-                               the impulse_segment has a different sample rate from this signal.
-        :type allow_resample: boolean
+        :param allow_resample: Indicates whether resampling is allowed when
+                               the impulse_segment has a different sample
+                               rate from this signal.
+        :type allow_resample: bool
         """
         target_db = self.rms_db
         self.convolve(impulse_segment, allow_resample=allow_resample)
@@ -465,42 +452,36 @@ def add_noise(self,
         :type noise: AudioSegment
         :param snr_dB: Signal-to-Noise Ratio, in decibels.
         :type snr_dB: float
-        :param allow_downsampling: whether to allow the noise signal to be downsampled
-                                   to match the base signal sample rate.
-        :type allow_downsampling: boolean
-        :param max_gain_db: Maximum amount of gain to apply to noise signal before
-                            adding it in. This is to prevent attempting to apply infinite
-                            gain to a zero signal.
+        :param allow_downsampling: Whether to allow the noise signal to be
+                                   downsampled to match the base signal sample
+                                   rate.
+        :type allow_downsampling: bool
+        :param max_gain_db: Maximum amount of gain to apply to noise signal
+                            before adding it in. This is to prevent attempting
+                            to apply infinite gain to a zero signal.
         :type max_gain_db: float
         :param rng: Random number generator state.
-        :type rng: random.Random
-        :raises ValueError: If the sample rate does not match between the two audio segments
-                            and resample is not allowed, or if the duration of noise segments
-                            is shorter than original audio segments.
+        :type rng: None|random.Random
+        :raises ValueError: If the sample rate does not match between the two
+                            audio segments and resample is not allowed, or if
+                            the duration of noise segments is shorter than
+                            original audio segments.
         """
         rng = random.Random() if rng is None else rng
         if allow_downsampling and noise.sample_rate > self.sample_rate:
             noise = noise.resample(self.sample_rate)
         if noise.sample_rate != self.sample_rate:
-            raise ValueError("Noise sample rate (%d Hz) is not equal to "
-                             "base signal sample rate (%d Hz)." %
-                             (noise.sample_rate, self.sample_rate))
+            raise ValueError("Noise sample rate (%d Hz) is not equal to base "
+                             "signal sample rate (%d Hz)." % (noise.sample_rate,
+                                                              self.sample_rate))
         if noise.duration < self.duration:
-            raise ValueError("Noise signal (%f sec) must be at "
-                             "least as long as base signal (%f sec)." %
+            raise ValueError("Noise signal (%f sec) must be at least as long as"
+                             " base signal (%f sec)." %
                              (noise.duration, self.duration))
-        noise_gain_db = self.rms_db - noise.rms_db - snr_dB
-        noise_gain_db = min(max_gain_db, noise_gain_db)
-        noise_subsegment = noise.random_subsegment(self.duration, rng=rng)
-        output = self + self.tranform_noise(noise_subsegment, noise_gain_db)
-        self._samples = output._samples
-        self._sample_rate = output._sample_rate
-
-    def tranform_noise(self, noise_subsegment, noise_gain_db):
-        """ tranform noise file
-        """
-        return type(self)(noise_subsegment._samples * (10.**(
-            noise_gain_db / 20.)), noise_subsegment._sample_rate)
+        noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db)
+        noise.random_subsegment(self.duration, rng=rng)
+        noise.apply_gain(noise_gain_db)
+        self.superimposed(noise)
 
     @property
     def samples(self):
@@ -571,7 +552,7 @@ def _convert_samples_from_float32(self, samples, dtype):
         Audio sample type is usually integer or float-point. For integer
         type, float32 will be rescaled from [-1, 1] to the maximum range
         supported by the integer type.
-        
+
         This is for writing a audio file.
         """
         dtype = np.dtype(dtype)
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 5d1fc15a72..443df68c63 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -67,20 +67,20 @@ def from_bytes(cls, bytes, transcript):
 
     @classmethod
     def concatenate(cls, *segments):
-        """Concatenate an arbitrary number of audio segments together.
+        """Concatenate an arbitrary number of speech segments together.
 
-        :param *segments: Input speech segments
+        :param *segments: Input speech segments.
         :type *segments: SpeechSegment
         :return: Speech segment instance.
         :rtype: SpeechSegment
-        :raises ValueError: If number of segments is zero, or if sample_rate
-                            not match between two audio segments
-        :raises TypeError: If item of segments is not Audiosegment instance
+        :raises ValueError: If the number of segments is zero, or if the 
+                            sample_rate of any two segments does not match.
+        :raises TypeError: If every segment in is not Audiosegment instance.
         """
-        # Perform basic sanity-checks.
         if len(segments) == 0:
             raise ValueError("No audio segments are given to concatenate.")
         sample_rate = segments[0]._sample_rate
+        transcripts = ""
         for seg in segments:
             if sample_rate != seg._sample_rate:
                 raise ValueError("Can't concatenate segments with "
@@ -88,8 +88,49 @@ def concatenate(cls, *segments):
             if type(seg) is not cls:
                 raise TypeError("Only speech segments of the same type "
                                 "instance can be concatenated.")
+            transcripts += seg._transcript
         samples = np.concatenate([seg.samples for seg in segments])
-        return cls(samples, sample_rate, seg._transcript)
+        return cls(samples, sample_rate, transcripts)
+
+    @classmethod
+    def slice_from_file(cls, filepath, start=None, end=None, transcript=""):
+        """Loads a small section of an speech without having to load
+        the entire file into the memory which can be incredibly wasteful.
+
+        :param filepath: Filepath or file object to audio file.
+        :type filepath: basestring|file
+        :param start: Start time in seconds. If start is negative, it wraps
+                      around from the end. If not provided, this function 
+                      reads from the very beginning.
+        :type start: float
+        :param end: End time in seconds. If end is negative, it wraps around
+                    from the end. If not provided, the default behvaior is
+                    to read to the end of the file.
+        :type end: float
+        :param transcript: Transcript text for the speech. if not provided, 
+                           the defaults is an empty string.
+        :type transript: basestring
+        :return: SpeechSegment instance of the specified slice of the input
+                 speech file.
+        :rtype: SpeechSegment
+        """
+        audio = Audiosegment.slice_from_file(filepath, start, end)
+        return cls(audio.samples, audio.sample_rate, transcripts)
+
+    @classmethod
+    def make_silence(cls, duration, sample_rate):
+        """Creates a silent speech segment of the given duration and
+        sample rate.
+
+        :param duration: Length of silence in seconds.
+        :type duration: float
+        :param sample_rate: Sample rate.
+        :type sample_rate: float
+        :return: Silence of the given duration.
+        :rtype: AudioSegment
+        """
+        audio = AudioSegment.make_silence(duration, sample_rate)
+        return cls(audio.samples, audio.sample_rate, "")
 
     @property
     def transcript(self):

From 4a181015387d3756af8570de96cff4958e77d908 Mon Sep 17 00:00:00 2001
From: Hu Weiwei <wwhu@foxmail.com>
Date: Sat, 17 Jun 2017 11:28:16 +0800
Subject: [PATCH 35/43] remove V2 postfix

---
 scheduled_sampling/scheduled_sampling.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scheduled_sampling/scheduled_sampling.py b/scheduled_sampling/scheduled_sampling.py
index a896fd6c59..e2e328ea6a 100644
--- a/scheduled_sampling/scheduled_sampling.py
+++ b/scheduled_sampling/scheduled_sampling.py
@@ -168,8 +168,8 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
         return out
 
     decoder_group_name = "decoder_group"
-    group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
-    group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
+    group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
+    group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True)
     group_inputs = [group_input1, group_input2]
 
     if not is_generating:
@@ -198,7 +198,7 @@ def gru_decoder_with_attention_test(enc_vec, enc_proj, current_word):
 
         return cost
     else:
-        trg_embedding = paddle.layer.GeneratedInputV2(
+        trg_embedding = paddle.layer.GeneratedInput(
             size=target_dict_dim,
             embedding_name='_target_language_embedding',
             embedding_size=word_vector_dim)

From 7afa9db6011901b12cafa42c4adc5691a4b0d369 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Sun, 18 Jun 2017 13:58:13 +0800
Subject: [PATCH 36/43] Extend ci:     1. First install dependencies;     2.
 Surpport abosulate import.

---
 .travis.yml         |  5 ++---
 .travis/unittest.sh | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100755 .travis/unittest.sh

diff --git a/.travis.yml b/.travis.yml
index f069c12016..a5559a7963 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,9 +21,8 @@ before_install:
   -  docker pull paddlepaddle/paddle:latest
 script:
   -  .travis/precommit.sh
-  -  docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c
-     "cd /py_unittest && find . -name 'tests' -type d -print0 | xargs -0 -I{} -n1 bash -c 'cd {};
-     python -m unittest discover -v'"
+  -  docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c 
+    'cd /py_unittest; sh .travis/unittest.sh'
 
 notifications:
   email:
diff --git a/.travis/unittest.sh b/.travis/unittest.sh
new file mode 100755
index 0000000000..23f15094c8
--- /dev/null
+++ b/.travis/unittest.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+abort(){
+    echo "Run unittest failed" 1>&2
+    echo "Please check your code" 1>&2
+    exit 1
+}
+
+unittest(){
+    cd $1 > /dev/null
+    if [ -f "requirements.txt" ]; then
+        pip install -r requirements.txt
+    fi
+    if [ $? != 0 ]; then
+        exit 1
+    fi
+    find . -name 'tests' -type d -print0 | \
+        xargs -0 -I{} -n1 bash -c \
+        'python -m unittest discover -v -s {}'
+    cd - > /dev/null
+}
+
+trap 'abort' 0
+set -e
+
+for proj in */ ; do
+    if [ -d $proj ]; then
+        unittest $proj
+        if [ $? != 0 ]; then
+            exit 1
+        fi
+    fi
+done
+
+trap : 0

From 0322d7526d12f8f48f771c5e61b8724c01988412 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Sun, 18 Jun 2017 14:31:57 +0800
Subject: [PATCH 37/43] Follow comments.

---
 deep_speech_2/error_rate.py            | 16 ++++++----
 deep_speech_2/tests/test_error_rate.py | 44 +++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/deep_speech_2/error_rate.py b/deep_speech_2/error_rate.py
index 2bb6371149..08fe125581 100644
--- a/deep_speech_2/error_rate.py
+++ b/deep_speech_2/error_rate.py
@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
-"""
-    This module provides functions to calculate error rate in different level.
-    e.g. wer for word-level, cer for char-level.
+"""This module provides functions to calculate error rate in different level.
+e.g. wer for word-level, cer for char-level.
 """
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import numpy as np
 
 
@@ -42,8 +44,7 @@ def levenshtein_distance(ref, hyp):
 
 
 def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
-    """
-    Calculate word error rate (WER). WER compares reference text and 
+    """Calculate word error rate (WER). WER compares reference text and 
     hypothesis text in word-level. WER is defined as:
 
     .. math::
@@ -71,6 +72,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
     :type delimiter: char
     :return: Word error rate.
     :rtype: float
+    :raises ValueError: If reference length is zero.
     """
     if ignore_case == True:
         reference = reference.lower()
@@ -88,8 +90,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
 
 
 def cer(reference, hypothesis, ignore_case=False):
-    """
-    Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate charactor error rate (CER). CER compares reference text and
     hypothesis text in char-level. CER is defined as:
 
     .. math::
@@ -117,6 +118,7 @@ def cer(reference, hypothesis, ignore_case=False):
     :type ignore_case: bool
     :return: Character error rate.
     :rtype: float
+    :raises ValueError: If reference length is zero.
     """
     if ignore_case == True:
         reference = reference.lower()
diff --git a/deep_speech_2/tests/test_error_rate.py b/deep_speech_2/tests/test_error_rate.py
index bb6dca30a0..57a6ccd687 100644
--- a/deep_speech_2/tests/test_error_rate.py
+++ b/deep_speech_2/tests/test_error_rate.py
@@ -1,29 +1,63 @@
 # -*- coding: utf-8 -*-
+"""Test error rate."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import unittest
-import sys
-sys.path.append('..')
 import error_rate
 
 
 class TestParse(unittest.TestCase):
-    def test_wer(self):
+    def test_wer_1(self):
         ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night'
         hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night'
         word_error_rate = error_rate.wer(ref, hyp)
         self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6)
 
-    def test_cer_en(self):
+    def test_wer_2(self):
+        ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night'
+        word_error_rate = error_rate.wer(ref, ref)
+        self.assertEqual(word_error_rate, 0.0)
+
+    def test_wer_3(self):
+        ref = ' '
+        hyp = 'Hypothesis sentence'
+        try:
+            word_error_rate = error_rate.wer(ref, hyp)
+        except Exception as e:
+            self.assertTrue(isinstance(e, ValueError))
+
+    def test_cer_1(self):
         ref = 'werewolf'
         hyp = 'weae  wolf'
         char_error_rate = error_rate.cer(ref, hyp)
         self.assertTrue(abs(char_error_rate - 0.25) < 1e-6)
 
-    def test_cer_zh(self):
+    def test_cer_2(self):
+        ref = 'werewolf'
+        char_error_rate = error_rate.cer(ref, ref)
+        self.assertEqual(char_error_rate, 0.0)
+
+    def test_cer_3(self):
         ref = u'我是中国人'
         hyp = u'我是 美洲人'
         char_error_rate = error_rate.cer(ref, hyp)
         self.assertTrue(abs(char_error_rate - 0.6) < 1e-6)
 
+    def test_cer_4(self):
+        ref = u'我是中国人'
+        char_error_rate = error_rate.cer(ref, ref)
+        self.assertFalse(char_error_rate, 0.0)
+
+    def test_cer_5(self):
+        ref = ''
+        hyp = 'Hypothesis'
+        try:
+            char_error_rate = error_rate.cer(ref, hyp)
+        except Exception as e:
+            self.assertTrue(isinstance(e, ValueError))
+
 
 if __name__ == '__main__':
     unittest.main()

From 3d4aba57e434ee34edb0ac65f8a2408857cfc0bb Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Sun, 18 Jun 2017 16:23:30 +0800
Subject: [PATCH 38/43] add audio augmentation

---
 deep_speech_2/data_utils/audio.py  | 3 ++-
 deep_speech_2/data_utils/speech.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 1f75da8ac3..3c671b69b8 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -88,7 +88,8 @@ def concatenate(cls, *segments):
         :rtype: AudioSegment
         :raises ValueError: If the number of segments is zero, or if the 
                             sample_rate of any two segments does not match.
-        :raises TypeError: If every segment in is not Audiosegment instance.
+        :raises TypeError: If every item in segments is not Audiosegment
+                           instance.
         """
         # Perform basic sanity-checks.
         if len(segments) == 0:
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 443df68c63..66f22b2471 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -75,7 +75,8 @@ def concatenate(cls, *segments):
         :rtype: SpeechSegment
         :raises ValueError: If the number of segments is zero, or if the 
                             sample_rate of any two segments does not match.
-        :raises TypeError: If every segment in is not Audiosegment instance.
+        :raises TypeError: If every item in segments is not Audiosegment
+                           instance.
         """
         if len(segments) == 0:
             raise ValueError("No audio segments are given to concatenate.")

From bfa4dd987282bc683c3a5cfe20421e5ce156caa0 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Sun, 18 Jun 2017 16:47:09 +0800
Subject: [PATCH 39/43] add audio augmentation

---
 deep_speech_2/data_utils/audio.py  | 6 +++---
 deep_speech_2/data_utils/speech.py | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 3c671b69b8..1ad20bf328 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -88,7 +88,7 @@ def concatenate(cls, *segments):
         :rtype: AudioSegment
         :raises ValueError: If the number of segments is zero, or if the 
                             sample_rate of any two segments does not match.
-        :raises TypeError: If every item in segments is not Audiosegment
+        :raises TypeError: If every item in segments is not AudioSegment
                            instance.
         """
         # Perform basic sanity-checks.
@@ -296,7 +296,7 @@ def normalize_online_bayesian(self,
         :type prior_db: float
         :param prior_samples: Prior strength in number of samples.
         :type prior_samples: float
-        :param startup_delay: Default 0.0 s. If provided, this function will
+        :param startup_delay: Default 0.0s. If provided, this function will
                               accrue statistics for the first startup_delay 
                               seconds before applying online normalization.
         :type startup_delay: float
@@ -401,7 +401,7 @@ def random_subsegment(self, subsegment_length, rng=None):
         self.subsegment(start_time, start_time + subsegment_length)
 
     def convolve(self, impulse_segment, allow_resample=False):
-        """Convolve this audio segment with the given filter.
+        """Convolve this audio segment with the given impulse_segment.
 
         Note that this is an in-place transformation.
 
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 66f22b2471..94ead1e8f9 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -75,11 +75,11 @@ def concatenate(cls, *segments):
         :rtype: SpeechSegment
         :raises ValueError: If the number of segments is zero, or if the 
                             sample_rate of any two segments does not match.
-        :raises TypeError: If every item in segments is not Audiosegment
+        :raises TypeError: If every item in segments is not SpeechSegment
                            instance.
         """
         if len(segments) == 0:
-            raise ValueError("No audio segments are given to concatenate.")
+            raise ValueError("No speech segments are given to concatenate.")
         sample_rate = segments[0]._sample_rate
         transcripts = ""
         for seg in segments:
@@ -116,7 +116,7 @@ def slice_from_file(cls, filepath, start=None, end=None, transcript=""):
         :rtype: SpeechSegment
         """
         audio = Audiosegment.slice_from_file(filepath, start, end)
-        return cls(audio.samples, audio.sample_rate, transcripts)
+        return cls(audio.samples, audio.sample_rate, transcript)
 
     @classmethod
     def make_silence(cls, duration, sample_rate):
@@ -128,7 +128,7 @@ def make_silence(cls, duration, sample_rate):
         :param sample_rate: Sample rate.
         :type sample_rate: float
         :return: Silence of the given duration.
-        :rtype: AudioSegment
+        :rtype: SpeechSegment
         """
         audio = AudioSegment.make_silence(duration, sample_rate)
         return cls(audio.samples, audio.sample_rate, "")

From 6f7a0ba65a6bc9b7a3c31bd7cf4f379c0d39efac Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Sun, 18 Jun 2017 17:11:58 +0800
Subject: [PATCH 40/43] add audio file

---
 deep_speech_2/data_utils/audio.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 1ad20bf328..fd1f93df52 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -87,7 +87,7 @@ def concatenate(cls, *segments):
         :return: Audio segment instance as concatenating results.
         :rtype: AudioSegment
         :raises ValueError: If the number of segments is zero, or if the 
-                            sample_rate of any two segments does not match.
+                            sample_rate of any two segment does not match.
         :raises TypeError: If every item in segments is not AudioSegment
                            instance.
         """
@@ -412,7 +412,7 @@ def convolve(self, impulse_segment, allow_resample=False):
                                rate from this signal.
         :type allow_resample: bool
         :raises ValueError: If the sample rate is not match between two
-                            audio segments and resample is not allowed.
+                            audio segments when resample is not allowed.
         """
         if allow_resample and self.sample_rate != impulse_segment.sample_rate:
             impulse_segment = impulse_segment.resample(self.sample_rate)
@@ -464,8 +464,8 @@ def add_noise(self,
         :param rng: Random number generator state.
         :type rng: None|random.Random
         :raises ValueError: If the sample rate does not match between the two
-                            audio segments and resample is not allowed, or if
-                            the duration of noise segments is shorter than
+                            audio segments when downsampling is not allowed, or
+                            if the duration of noise segments is shorter than
                             original audio segments.
         """
         rng = random.Random() if rng is None else rng

From 1b7c7c61d143fd99a9d00e8bfb3a870055dc1a54 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Sun, 18 Jun 2017 18:22:48 +0800
Subject: [PATCH 41/43] add audio file

---
 deep_speech_2/data_utils/audio.py  | 4 ++--
 deep_speech_2/data_utils/speech.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index fd1f93df52..37f4f0ba50 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -82,8 +82,8 @@ def from_bytes(cls, bytes):
     def concatenate(cls, *segments):
         """Concatenate an arbitrary number of audio segments together.
 
-        :param *segments: Input audio segments.
-        :type *segments: AudioSegment
+        :param *segments: Input audio segments to be concatenated.
+        :type *segments: tuple of AudioSegment
         :return: Audio segment instance as concatenating results.
         :rtype: AudioSegment
         :raises ValueError: If the number of segments is zero, or if the 
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 94ead1e8f9..00190009ac 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -69,8 +69,8 @@ def from_bytes(cls, bytes, transcript):
     def concatenate(cls, *segments):
         """Concatenate an arbitrary number of speech segments together.
 
-        :param *segments: Input speech segments.
-        :type *segments: SpeechSegment
+        :param *segments: Input speech segments to be concatenated.
+        :type *segments: tuple of SpeechSegment
         :return: Speech segment instance.
         :rtype: SpeechSegment
         :raises ValueError: If the number of segments is zero, or if the 

From e64bd0006114860545a754da2df9a782099a857e Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Mon, 19 Jun 2017 00:08:05 +0800
Subject: [PATCH 42/43] add audio file

---
 deep_speech_2/data_utils/audio.py  | 64 ++++++++++++++++++++----------
 deep_speech_2/data_utils/speech.py | 10 ++---
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 37f4f0ba50..5d02feb60d 100755
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -9,6 +9,7 @@
 import scikits.samplerate
 from scipy import signal
 import random
+import copy
 
 
 class AudioSegment(object):
@@ -87,9 +88,8 @@ def concatenate(cls, *segments):
         :return: Audio segment instance as concatenating results.
         :rtype: AudioSegment
         :raises ValueError: If the number of segments is zero, or if the 
-                            sample_rate of any two segment does not match.
-        :raises TypeError: If every item in segments is not AudioSegment
-                           instance.
+                            sample_rate of any segments does not match.
+        :raises TypeError: If any segment is not AudioSegment instance.
         """
         # Perform basic sanity-checks.
         if len(segments) == 0:
@@ -101,7 +101,7 @@ def concatenate(cls, *segments):
                                  "different sample rates")
             if type(seg) is not cls:
                 raise TypeError("Only audio segments of the same type "
-                                "instance can be concatenated.")
+                                "can be concatenated.")
         samples = np.concatenate([seg.samples for seg in segments])
         return cls(samples, sample_rate)
 
@@ -180,8 +180,7 @@ def slice_from_file(cls, file, start=None, end=None):
 
     @classmethod
     def make_silence(cls, duration, sample_rate):
-        """Creates a silent audio segment of the given duration and
-        sample rate.
+        """Creates a silent audio segment of the given duration and sample rate.
 
         :param duration: Length of silence in seconds.
         :type duration: float
@@ -193,15 +192,17 @@ def make_silence(cls, duration, sample_rate):
         samples = np.zeros(int(duration * sample_rate))
         return cls(samples, sample_rate)
 
-    def superimposed(self, other):
+    def superimpose(self, other):
         """Add samples from another segment to those of this segment
         (sample-wise addition, not segment concatenation).
 
+        Note that this is an in-place transformation.
+
         :param other: Segment containing samples to be added in.
         :type other: AudioSegments
         :raise TypeError: If type of two segments don't match.
-        :raise ValueError: If the sample_rate of two segments not equal, or if
-                           the length of segments don't match.
+        :raise ValueError: If the sample rates of the two segments are not
+                           equal, or if the lengths of segments don't match.
         """
         if type(self) != type(other):
             raise TypeError("Cannot add segments of different types: %s "
@@ -215,7 +216,7 @@ def superimposed(self, other):
     def to_bytes(self, dtype='float32'):
         """Create a byte string containing the audio content.
         
-        :param dtype: Data type for export samples. Options: 'int16','int32',
+        :param dtype: Data type for export samples. Options: 'int16', 'int32',
                       'float32', 'float64'. Default is 'float32'.
         :type dtype: str
         :return: Byte string containing audio content.
@@ -362,16 +363,20 @@ def pad_silence(self, duration, sides='both'):
         elif sides == "both":
             padded = cls.concatenate(silence, self, silence)
         else:
-            raise ValueError("Unknown value for the kwarg %s" % sides)
+            raise ValueError("Unknown value for the sides %s" % sides)
         self._samples = padded._samples
 
     def subsegment(self, start_sec=None, end_sec=None):
-        """Return new AudioSegment containing audio between given boundaries.
+        """Cut the AudioSegment between given boundaries.
+
+        Note that this is an in-place transformation.
 
         :param start_sec: Beginning of subsegment in seconds.
         :type start_sec: float
         :param end_sec: End of subsegment in seconds.
         :type end_sec: float
+        :raise ValueError: If start_sec or end_sec is incorrectly set, e.g. out
+                           of bounds in time.
         """
         start_sec = 0.0 if start_sec is None else start_sec
         end_sec = self.duration if end_sec is None else end_sec
@@ -379,19 +384,33 @@ def subsegment(self, start_sec=None, end_sec=None):
             start_sec = self.duration + start_sec
         if end_sec < 0.0:
             end_sec = self.duration + end_sec
+        if start_sec < 0.0:
+            raise ValueError("The slice start position (%f s) is out of "
+                             "bounds." % start_sec)
+        if end_sec < 0.0:
+            raise ValueError("The slice end position (%f s) is out of bounds." %
+                             end_sec)
+        if start_sec > end_sec:
+            raise ValueError("The slice start position (%f s) is later than "
+                             "the end position (%f s)." % (start_sec, end_sec))
+        if end_sec > self.duration:
+            raise ValueError("The slice end position (%f s) is out of bounds "
+                             "(> %f s)" % (end_sec, self.duration))
         start_sample = int(round(start_sec * self._sample_rate))
         end_sample = int(round(end_sec * self._sample_rate))
         self._samples = self._samples[start_sample:end_sample]
 
     def random_subsegment(self, subsegment_length, rng=None):
-        """Return a random subsegment of a specified length in seconds.
+        """Cut the specified length of the audiosegment randomly.
+
+        Note that this is an in-place transformation.
 
         :param subsegment_length: Subsegment length in seconds.
         :type subsegment_length: float
         :param rng: Random number generator state.
         :type rng: random.Random
-        :raises ValueError: If the length of subsegment greater than
-                            origineal segemnt.
+        :raises ValueError: If the length of subsegment is greater than
+                            the origineal segemnt.
         """
         rng = random.Random() if rng is None else rng
         if subsegment_length > self.duration:
@@ -401,7 +420,7 @@ def random_subsegment(self, subsegment_length, rng=None):
         self.subsegment(start_time, start_time + subsegment_length)
 
     def convolve(self, impulse_segment, allow_resample=False):
-        """Convolve this audio segment with the given impulse_segment.
+        """Convolve this audio segment with the given impulse segment.
 
         Note that this is an in-place transformation.
 
@@ -428,6 +447,8 @@ def convolve_and_normalize(self, impulse_segment, allow_resample=False):
         """Convolve and normalize the resulting audio segment so that it
         has the same average power as the input signal.
 
+        Note that this is an in-place transformation.
+
         :param impulse_segment: Impulse response segments.
         :type impulse_segment: AudioSegment
         :param allow_resample: Indicates whether resampling is allowed when
@@ -445,10 +466,12 @@ def add_noise(self,
                   allow_downsampling=False,
                   max_gain_db=300.0,
                   rng=None):
-        """Adds the given noise segment at a specific signal-to-noise ratio.
+        """Add the given noise segment at a specific signal-to-noise ratio.
         If the noise segment is longer than this segment, a random subsegment
         of matching length is sampled from it and used instead.
 
+        Note that this is an in-place transformation.
+
         :param noise: Noise signal to add.
         :type noise: AudioSegment
         :param snr_dB: Signal-to-Noise Ratio, in decibels.
@@ -480,9 +503,10 @@ def add_noise(self,
                              " base signal (%f sec)." %
                              (noise.duration, self.duration))
         noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db)
-        noise.random_subsegment(self.duration, rng=rng)
-        noise.apply_gain(noise_gain_db)
-        self.superimposed(noise)
+        noise_new = copy.deepcopy(noise)
+        noise_new.random_subsegment(self.duration, rng=rng)
+        noise_new.apply_gain(noise_gain_db)
+        self.superimpose(noise_new)
 
     @property
     def samples(self):
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 00190009ac..fc031ff46f 100755
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -67,7 +67,8 @@ def from_bytes(cls, bytes, transcript):
 
     @classmethod
     def concatenate(cls, *segments):
-        """Concatenate an arbitrary number of speech segments together.
+        """Concatenate an arbitrary number of speech segments together, both
+        audio and transcript will be concatenated.
 
         :param *segments: Input speech segments to be concatenated.
         :type *segments: tuple of SpeechSegment
@@ -75,8 +76,7 @@ def concatenate(cls, *segments):
         :rtype: SpeechSegment
         :raises ValueError: If the number of segments is zero, or if the 
                             sample_rate of any two segments does not match.
-        :raises TypeError: If every item in segments is not SpeechSegment
-                           instance.
+        :raises TypeError: If any segment is not SpeechSegment instance.
         """
         if len(segments) == 0:
             raise ValueError("No speech segments are given to concatenate.")
@@ -94,7 +94,7 @@ def concatenate(cls, *segments):
         return cls(samples, sample_rate, transcripts)
 
     @classmethod
-    def slice_from_file(cls, filepath, start=None, end=None, transcript=""):
+    def slice_from_file(cls, filepath, start=None, end=None, transcript):
         """Loads a small section of an speech without having to load
         the entire file into the memory which can be incredibly wasteful.
 
@@ -121,7 +121,7 @@ def slice_from_file(cls, filepath, start=None, end=None, transcript=""):
     @classmethod
     def make_silence(cls, duration, sample_rate):
         """Creates a silent speech segment of the given duration and
-        sample rate.
+        sample rate, transcript will be an empty string.
 
         :param duration: Length of silence in seconds.
         :type duration: float

From ada409670ccb7f786302312e7b87111fd2741d9f Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Mon, 19 Jun 2017 11:31:34 +0800
Subject: [PATCH 43/43] Follow comments.

---
 deep_speech_2/error_rate.py            | 18 ++++++++++++------
 deep_speech_2/tests/test_error_rate.py |  8 ++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/deep_speech_2/error_rate.py b/deep_speech_2/error_rate.py
index 08fe125581..0cf17921c0 100644
--- a/deep_speech_2/error_rate.py
+++ b/deep_speech_2/error_rate.py
@@ -2,14 +2,20 @@
 """This module provides functions to calculate error rate in different level.
 e.g. wer for word-level, cer for char-level.
 """
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import numpy as np
 
 
-def levenshtein_distance(ref, hyp):
+def _levenshtein_distance(ref, hyp):
+    """Levenshtein distance is a string metric for measuring the difference between
+    two sequences. Informally, the levenshtein disctance is defined as the minimum
+    number of single-character edits (substitutions, insertions or deletions) 
+    required to change one word into the other. We can naturally extend the edits to 
+    word level when calculate levenshtein disctance for two sentences.
+    """
     ref_len = len(ref)
     hyp_len = len(hyp)
 
@@ -72,7 +78,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
     :type delimiter: char
     :return: Word error rate.
     :rtype: float
-    :raises ValueError: If reference length is zero.
+    :raises ValueError: If the reference length is zero.
     """
     if ignore_case == True:
         reference = reference.lower()
@@ -84,7 +90,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
     if len(ref_words) == 0:
         raise ValueError("Reference's word number should be greater than 0.")
 
-    edit_distance = levenshtein_distance(ref_words, hyp_words)
+    edit_distance = _levenshtein_distance(ref_words, hyp_words)
     wer = float(edit_distance) / len(ref_words)
     return wer
 
@@ -118,7 +124,7 @@ def cer(reference, hypothesis, ignore_case=False):
     :type ignore_case: bool
     :return: Character error rate.
     :rtype: float
-    :raises ValueError: If reference length is zero.
+    :raises ValueError: If the reference length is zero.
     """
     if ignore_case == True:
         reference = reference.lower()
@@ -130,6 +136,6 @@ def cer(reference, hypothesis, ignore_case=False):
     if len(reference) == 0:
         raise ValueError("Length of reference should be greater than 0.")
 
-    edit_distance = levenshtein_distance(reference, hypothesis)
+    edit_distance = _levenshtein_distance(reference, hypothesis)
     cer = float(edit_distance) / len(reference)
     return cer
diff --git a/deep_speech_2/tests/test_error_rate.py b/deep_speech_2/tests/test_error_rate.py
index 57a6ccd687..be7313f357 100644
--- a/deep_speech_2/tests/test_error_rate.py
+++ b/deep_speech_2/tests/test_error_rate.py
@@ -23,10 +23,8 @@ def test_wer_2(self):
     def test_wer_3(self):
         ref = ' '
         hyp = 'Hypothesis sentence'
-        try:
+        with self.assertRaises(ValueError):
             word_error_rate = error_rate.wer(ref, hyp)
-        except Exception as e:
-            self.assertTrue(isinstance(e, ValueError))
 
     def test_cer_1(self):
         ref = 'werewolf'
@@ -53,10 +51,8 @@ def test_cer_4(self):
     def test_cer_5(self):
         ref = ''
         hyp = 'Hypothesis'
-        try:
+        with self.assertRaises(ValueError):
             char_error_rate = error_rate.cer(ref, hyp)
-        except Exception as e:
-            self.assertTrue(isinstance(e, ValueError))
 
 
 if __name__ == '__main__':