From b9c56e1d21411d759cb58b427f3ebbfcdc67df5f Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Fri, 13 Apr 2018 10:56:45 +0800 Subject: [PATCH 01/67] remove cifar30 shuffle (#19) --- resnet30/continuous_evaluation.py | 4 ++-- resnet30/latest_kpis/train_cost_factor.txt | 15 ++++++++++----- resnet30/latest_kpis/train_duration_factor.txt | 15 ++++++++++----- resnet30/model.py | 2 +- resnet30/run.xsh | 2 +- resnet50/run.xsh | 7 ++++--- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/resnet30/continuous_evaluation.py b/resnet30/continuous_evaluation.py index 4aad0b4b..283fcf48 100644 --- a/resnet30/continuous_evaluation.py +++ b/resnet30/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi -train_cost_kpi = CostKpi('train_cost', 0.05) -train_duration_kpi = DurationKpi('train_duration', 0.05) +train_cost_kpi = CostKpi('train_cost', 0.01) +train_duration_kpi = DurationKpi('train_duration', 0.04) tracking_kpis = [ train_cost_kpi, diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 09e94b13..040e98ef 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,5 +1,10 @@ -[[3.082345724105835]] -[[2.967602491378784]] -[[2.708749532699585]] -[[2.545051336288452]] -[[2.389655828475952]] \ No newline at end of file +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] +[[100.0]] diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/resnet30/latest_kpis/train_duration_factor.txt index ad322d8c..13bd6f8b 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/resnet30/latest_kpis/train_duration_factor.txt @@ -1,5 +1,10 @@ -[10.225940942764282] -[10.315479040145874] -[10.815844058990479] -[10.180259943008423] -[10.14051604270935] \ No newline at end of file +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] +[1000.0] diff --git a/resnet30/model.py b/resnet30/model.py index 482d17f3..0140ab72 100644 --- a/resnet30/model.py +++ b/resnet30/model.py @@ -102,7 +102,7 @@ def train(batch_size, device, pass_num, iterations): fluid.memory_optimize(fluid.default_main_program()) train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.cifar.train10(), buf_size=5120), + paddle.dataset.cifar.train10(), batch_size=batch_size) test_reader = paddle.batch( diff --git a/resnet30/run.xsh b/resnet30/run.xsh index ffd26f00..11393550 100755 --- a/resnet30/run.xsh +++ b/resnet30/run.xsh @@ -9,4 +9,4 @@ import sys model_file = 'model.py' -python @(model_file) --batch_size 500 --iters 5 --device CPU +python @(model_file) --batch_size 1000 --iters 10 --device CPU diff --git a/resnet50/run.xsh b/resnet50/run.xsh index e05f2691..81f7847f 100755 --- a/resnet50/run.xsh +++ b/resnet50/run.xsh @@ -2,10 +2,11 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 -export CUDA_VISIBLE_DEVICES=3 +cudaid=${resnet50_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid # cifar10 128 -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --data_set=cifar10 --model=resnet_cifar10 --pass_num=30 --gpu_id=$CUDA_VISIBLE_DEVICES +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --data_set=cifar10 --model=resnet_cifar10 --pass_num=30 --gpu_id=$cudaid #flowers 64 -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=64 --data_set=flowers --model=resnet_imagenet --pass_num=3 --gpu_id=$CUDA_VISIBLE_DEVICES +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=64 --data_set=flowers --model=resnet_imagenet --pass_num=3 --gpu_id=$cudaid From b3f7661511be2e6155a20cee532eaff690c9fbce Mon Sep 17 00:00:00 2001 From: root Date: Mon, 16 Apr 2018 05:24:09 +0000 Subject: [PATCH 02/67] evalute [494c262a26a1ff29143491fa60fd6ba546d3bebf] --- mnist/latest_kpis/test_acc_factor.txt | 10 +++++----- mnist/latest_kpis/train_acc_factor.txt | 10 +++++----- mnist/latest_kpis/train_duration_factor.txt | 10 +++++----- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../latest_kpis/train_duration_factor.txt | 20 +++++++++---------- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_acc_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_64_gpu_memory_factor.txt | 2 +- .../flowers_64_train_speed_factor.txt | 2 +- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/mnist/latest_kpis/test_acc_factor.txt b/mnist/latest_kpis/test_acc_factor.txt index f1fd6659..62e7af85 100644 --- a/mnist/latest_kpis/test_acc_factor.txt +++ b/mnist/latest_kpis/test_acc_factor.txt @@ -1,5 +1,5 @@ -[0.9749000072479248] -[0.9811000227928162] -[0.9858999848365784] -[0.9860000014305115] -[0.9872000217437744] \ No newline at end of file +[0.9768999814987183] +[0.9839000105857849] +[0.9868000149726868] +[0.9866999983787537] +[0.9879000186920166] \ No newline at end of file diff --git a/mnist/latest_kpis/train_acc_factor.txt b/mnist/latest_kpis/train_acc_factor.txt index 55a945e1..a51eb966 100644 --- a/mnist/latest_kpis/train_acc_factor.txt +++ b/mnist/latest_kpis/train_acc_factor.txt @@ -1,5 +1,5 @@ -[0.9435666799545288] -[0.982283353805542] -[0.9876833558082581] -[0.9906833171844482] -[0.9932000041007996] \ No newline at end of file +[0.9471499919891357] +[0.9831333160400391] +[0.9886166453361511] +[0.9915000200271606] +[0.9929666519165039] \ No newline at end of file diff --git a/mnist/latest_kpis/train_duration_factor.txt b/mnist/latest_kpis/train_duration_factor.txt index 8253e948..e8fa1ce8 100644 --- a/mnist/latest_kpis/train_duration_factor.txt +++ b/mnist/latest_kpis/train_duration_factor.txt @@ -1,5 +1,5 @@ -[38.24392104148865] -[36.998713970184326] -[36.87090182304382] -[36.75976610183716] -[36.79504203796387] \ No newline at end of file +[37.873125076293945] +[37.53660321235657] +[37.41462516784668] +[37.099024057388306] +[36.901039123535156] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 040e98ef..65c61b6f 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] +[[3.5852110385894775]] +[[3.3053879737854004]] +[[2.8615353107452393]] +[[2.841454267501831]] +[[2.748034715652466]] +[[2.6309971809387207]] +[[2.6644375324249268]] +[[2.5834758281707764]] +[[2.3889408111572266]] +[[2.501643180847168]] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/resnet30/latest_kpis/train_duration_factor.txt index 13bd6f8b..20325a62 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/resnet30/latest_kpis/train_duration_factor.txt @@ -1,10 +1,10 @@ -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] +[20.545660972595215] +[20.668606996536255] +[20.573415994644165] +[20.669420957565308] +[20.579085111618042] +[20.67686700820923] +[20.57048511505127] +[20.665179014205933] +[20.57060194015503] +[20.727864027023315] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index 466467aa..16c80a12 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[1508] \ No newline at end of file +[1514] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt index c276983e..9b7bb93b 100644 --- a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt @@ -1 +1 @@ -[0.99755859375] \ No newline at end of file +[0.982226550579071] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt index c4fe04e3..14601274 100644 --- a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[404.4730529785156] \ No newline at end of file +[413.5191955566406] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 24c95b88..4fd62d27 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[11014] \ No newline at end of file +[10800] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt index 8585524f..fa29944f 100644 --- a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt +++ b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt @@ -1 +1 @@ -[78.7945785522461] \ No newline at end of file +[78.08560943603516] \ No newline at end of file From e4a20c5626c133cb3262c2ed7ca5578d017065af Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Mon, 23 Apr 2018 20:41:25 +0800 Subject: [PATCH 03/67] add model vgg16 --- vgg16/continuous_evaluation.py | 22 ++ .../cifar10_128_gpu_memory_factor.txt | 1 + .../cifar10_128_train_speed_factor.txt | 1 + .../flowers_32_gpu_memory_factor.txt | 1 + .../flowers_32_train_speed_factor.txt | 1 + vgg16/model.py | 279 ++++++++++++++++++ vgg16/run.xsh | 13 + 7 files changed, 318 insertions(+) create mode 100644 vgg16/continuous_evaluation.py create mode 100644 vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt create mode 100644 vgg16/latest_kpis/cifar10_128_train_speed_factor.txt create mode 100644 vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt create mode 100644 vgg16/latest_kpis/flowers_32_train_speed_factor.txt create mode 100644 vgg16/model.py create mode 100755 vgg16/run.xsh diff --git a/vgg16/continuous_evaluation.py b/vgg16/continuous_evaluation.py new file mode 100644 index 00000000..c302462d --- /dev/null +++ b/vgg16/continuous_evaluation.py @@ -0,0 +1,22 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import AccKpi +from kpi import CostKpi +from kpi import DurationKpi + +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.02, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.02, 0) + +flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.02, 0) +flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.02, 0) + +tracking_kpis = [ + cifar10_128_train_speed_kpi, + cifar10_128_gpu_memory_kpi, + flowers_32_train_speed_kpi, + flowers_32_gpu_memory_kpi, +] diff --git a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt new file mode 100644 index 00000000..c38a317b --- /dev/null +++ b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -0,0 +1 @@ +[2332] \ No newline at end of file diff --git a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt new file mode 100644 index 00000000..fdab1315 --- /dev/null +++ b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt @@ -0,0 +1 @@ +[440.19189453125] \ No newline at end of file diff --git a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt new file mode 100644 index 00000000..e5cd1eec --- /dev/null +++ b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt @@ -0,0 +1 @@ +[10606] \ No newline at end of file diff --git a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt new file mode 100644 index 00000000..f15d1c81 --- /dev/null +++ b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt @@ -0,0 +1 @@ +[26.19645690917969] diff --git a/vgg16/model.py b/vgg16/model.py new file mode 100644 index 00000000..b3e20771 --- /dev/null +++ b/vgg16/model.py @@ -0,0 +1,279 @@ +""" +VGG16 benchmark in Fluid +""" +from __future__ import print_function + +import sys +import time +import numpy as np +import commands +import threading +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import argparse +import functools + +from continuous_evaluation import tracking_kpis + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + '--batch_size', type=int, default=128, help="Batch size for training.") +parser.add_argument( + '--skip_batch_num', + type=int, + default=5, + help='The first num of minibatch num to skip, for better performance test') +parser.add_argument( + '--iterations', type=int, default=80, help='The number of minibatches.') +parser.add_argument( + '--learning_rate', + type=float, + default=1e-3, + help="Learning rate for training.") +parser.add_argument('--pass_num', type=int, default=50, help="No. of passes.") +parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help="The device type.") +parser.add_argument( + "--gpu_id", + type=int, + default=3, + help="The GPU Card Id. (default: %(default)d)") +parser.add_argument( + '--data_format', + type=str, + default='NCHW', + choices=['NCHW', 'NHWC'], + help='The data order, now only support NCHW.') +parser.add_argument( + '--data_set', + type=str, + default='cifar10', + choices=['cifar10', 'flowers'], + help='Optional dataset for benchmark.') +parser.add_argument( + '--with_test', + action='store_true', + help='If set, test the testset during training.') +args = parser.parse_args() + + +def vgg16_bn_drop(input): + """ + vgg16_bn_drop + """ + def conv_block(input, num_filter, groups, dropouts): + """ + conv_block + """ + return fluid.nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') + + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) + fc1 = fluid.layers.fc(input=drop, size=512, act=None) + bn = fluid.layers.batch_norm(input=fc1, act='relu') + drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) + fc2 = fluid.layers.fc(input=drop2, size=512, act=None) + return fc2 + + +def main(): + """ + main + """ + if args.data_set == "cifar10": + classdim = 10 + if args.data_format == 'NCHW': + data_shape = [3, 32, 32] + else: + data_shape = [32, 32, 3] + else: + classdim = 102 + if args.data_format == 'NCHW': + data_shape = [3, 224, 224] + else: + data_shape = [224, 224, 3] + + # Input data + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + net = vgg16_bn_drop(images) + predict = fluid.layers.fc(input=net, size=classdim, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + # inference program + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program( + target_vars=[batch_acc, batch_size_tensor]) + + # Optimization + optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + opts = optimizer.minimize(avg_cost) + + fluid.memory_optimize(fluid.default_main_program()) + + # Initialize executor + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + exe = fluid.Executor(place) + + # Parameter initialization + exe.run(fluid.default_startup_program()) + + # data reader + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), + buf_size=5120), + batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.cifar.test10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), + batch_size=args.batch_size) + + # test + def test(exe): + """ + test + """ + test_accuracy = fluid.average.WeightedAverage() + for batch_id, data in enumerate(test_reader()): + img_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape([-1, 1]) + + acc, weight = exe.run(inference_program, + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[batch_acc, batch_size_tensor]) + test_accuracy.add(value=acc, weight=weight) + return test_accuracy.eval() + + train_acc_kpi = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_train_acc' % (args.data_set, args.batch_size): + train_acc_kpi = kpi + train_speed_kpi = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_train_speed' % (args.data_set, args.batch_size): + train_speed_kpi = kpi + + iters, num_samples, start_time = 0, 0, time.time() + accuracy = fluid.average.WeightedAverage() + for pass_id in range(args.pass_num): + accuracy.reset() + train_accs = [] + train_losses = [] + for batch_id, data in enumerate(train_reader()): + if iters == args.skip_batch_num: + start_time = time.time() + num_samples = 0 + if iters == args.iterations: + break + img_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape([-1, 1]) + + loss, acc, weight = exe.run( + fluid.default_main_program(), + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[avg_cost, batch_acc, batch_size_tensor]) + accuracy.add(value=acc, weight=weight) + iters += 1 + num_samples += len(y_data) + if (batch_id % 10) == 0: + print( + "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % + (pass_id, iters, loss, acc) + ) # The accuracy is the accumulation of batches, but not the current batch. + + # pass_train_acc = accuracy.eval() + train_losses.append(loss) + train_accs.append(acc) + print("Pass: %d, Loss: %f, Train Accuray: %f\n" % + (pass_id, np.mean(train_losses), np.mean(train_accs))) + train_elapsed = time.time() - start_time + examples_per_sec = num_samples / train_elapsed + print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % + (num_samples, train_elapsed, examples_per_sec)) + #train_acc_kpi.add_record(np.array(train_accs, dtype='float32')) + train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) + # evaluation + if args.with_test: + pass_test_acc = test(exe) + break + #train_acc_kpi.persist() + train_speed_kpi.persist() + + +def print_arguments(): + """ + print_arguments + """ + print('----------- vgg Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def collect_gpu_memory_data(mem_list): + """ + collect the GPU memory data + """ + while(True): + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id + status, output = commands.getstatusoutput(command) + if status != 0: + print('Get GPU memory data error') + else: + mem_list.append(int(output.split('\n')[1].split(' ')[0])) + time.sleep(1) + + +def save_gpu_data(mem_list): + gpu_memory_kpi = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + + +if __name__ == "__main__": + print_arguments() + mem_data_list = [] + collect_memory_thread = threading.Thread(target=collect_gpu_memory_data, args=(mem_data_list,)) + collect_memory_thread.setDaemon(True) + collect_memory_thread.start() + main() + save_gpu_data(mem_data_list) diff --git a/vgg16/run.xsh b/vgg16/run.xsh new file mode 100755 index 00000000..4609cfc6 --- /dev/null +++ b/vgg16/run.xsh @@ -0,0 +1,13 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${vgg16_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + + +#cifar10 128 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --data_set=cifar10 --iterations=300 --gpu_id=$cudaid + +#flowers 32 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=32 --data_set=flowers --iterations=100 --gpu_id=$cudaid From b510dd036dca4b2c735ec9fee8fccc4a87618ec8 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Tue, 24 Apr 2018 07:34:11 +0800 Subject: [PATCH 04/67] reset resnet30 train_duration kpi history It seems that in AWS, CPU performance has some stable difference. --- .../latest_kpis/train_duration_factor.txt | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/resnet30/latest_kpis/train_duration_factor.txt index 20325a62..cfd0474b 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/resnet30/latest_kpis/train_duration_factor.txt @@ -1,10 +1,10 @@ -[20.545660972595215] -[20.668606996536255] -[20.573415994644165] -[20.669420957565308] -[20.579085111618042] -[20.67686700820923] -[20.57048511505127] -[20.665179014205933] -[20.57060194015503] -[20.727864027023315] \ No newline at end of file +[210.545660972595215] +[210.668606996536255] +[210.573415994644165] +[210.669420957565308] +[210.579085111618042] +[210.67686700820923] +[210.57048511505127] +[210.665179014205933] +[210.57060194015503] +[210.727864027023315] From f5de75376c102329e632c01af86da642083f759a Mon Sep 17 00:00:00 2001 From: root Date: Tue, 24 Apr 2018 00:20:02 +0000 Subject: [PATCH 05/67] evalute [504e60a881fd7e72d744e256d90eaec4f52e5c7b] --- mnist/latest_kpis/train_duration_factor.txt | 10 +++++----- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../latest_kpis/train_duration_factor.txt | 20 +++++++++---------- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_64_gpu_memory_factor.txt | 2 +- .../flowers_64_train_speed_factor.txt | 2 +- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/mnist/latest_kpis/train_duration_factor.txt b/mnist/latest_kpis/train_duration_factor.txt index e8fa1ce8..07c6feef 100644 --- a/mnist/latest_kpis/train_duration_factor.txt +++ b/mnist/latest_kpis/train_duration_factor.txt @@ -1,5 +1,5 @@ -[37.873125076293945] -[37.53660321235657] -[37.41462516784668] -[37.099024057388306] -[36.901039123535156] \ No newline at end of file +[36.88168907165527] +[36.785341024398804] +[36.939675092697144] +[36.425740003585815] +[37.25818109512329] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 65c61b6f..06457738 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[3.5852110385894775]] -[[3.3053879737854004]] -[[2.8615353107452393]] -[[2.841454267501831]] -[[2.748034715652466]] -[[2.6309971809387207]] -[[2.6644375324249268]] -[[2.5834758281707764]] -[[2.3889408111572266]] -[[2.501643180847168]] \ No newline at end of file +[[3.593585252761841]] +[[3.2120132446289062]] +[[2.9515209197998047]] +[[2.733518123626709]] +[[2.5107831954956055]] +[[2.4020209312438965]] +[[2.422689914703369]] +[[2.372192144393921]] +[[2.407041072845459]] +[[2.4115140438079834]] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/resnet30/latest_kpis/train_duration_factor.txt index cfd0474b..42296340 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/resnet30/latest_kpis/train_duration_factor.txt @@ -1,10 +1,10 @@ -[210.545660972595215] -[210.668606996536255] -[210.573415994644165] -[210.669420957565308] -[210.579085111618042] -[210.67686700820923] -[210.57048511505127] -[210.665179014205933] -[210.57060194015503] -[210.727864027023315] +[22.227629899978638] +[21.618540048599243] +[21.557451009750366] +[21.5592360496521] +[21.5337131023407] +[21.54674196243286] +[21.52670407295227] +[21.53929376602173] +[21.61059308052063] +[21.552989959716797] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index 16c80a12..17cb986d 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[1514] \ No newline at end of file +[1074] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt index 14601274..a7018699 100644 --- a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[413.5191955566406] \ No newline at end of file +[927.919677734375] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 4fd62d27..08a40c04 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[10800] \ No newline at end of file +[3400] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt index fa29944f..4eedd470 100644 --- a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt +++ b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt @@ -1 +1 @@ -[78.08560943603516] \ No newline at end of file +[101.89627075195312] \ No newline at end of file From 9145e56149dbb9b74b09421d947263b2f8b9395f Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Tue, 24 Apr 2018 09:04:23 +0800 Subject: [PATCH 06/67] add model seq2seq --- seq2seq/continuous_evaluation.py | 18 + .../latest_kpis/wmb_128_gpu_memory_factor.txt | 1 + .../wmb_128_train_speed_factor.txt | 1 + seq2seq/model.py | 415 ++++++++++++++++++ seq2seq/run.xsh | 9 + 5 files changed, 444 insertions(+) create mode 100644 seq2seq/continuous_evaluation.py create mode 100644 seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt create mode 100644 seq2seq/latest_kpis/wmb_128_train_speed_factor.txt create mode 100644 seq2seq/model.py create mode 100755 seq2seq/run.xsh diff --git a/seq2seq/continuous_evaluation.py b/seq2seq/continuous_evaluation.py new file mode 100644 index 00000000..a1c86023 --- /dev/null +++ b/seq2seq/continuous_evaluation.py @@ -0,0 +1,18 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import AccKpi +from kpi import CostKpi +from kpi import DurationKpi + +wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.03, 0) +wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.02, 0) + + +tracking_kpis = [ + wmb_128_train_speed_kpi, + wmb_128_gpu_memory_kpi, +] diff --git a/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt b/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt new file mode 100644 index 00000000..82cc4a46 --- /dev/null +++ b/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt @@ -0,0 +1 @@ +[3976] \ No newline at end of file diff --git a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt new file mode 100644 index 00000000..87d52665 --- /dev/null +++ b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt @@ -0,0 +1 @@ +[2408.184814453125] \ No newline at end of file diff --git a/seq2seq/model.py b/seq2seq/model.py new file mode 100644 index 00000000..124f32f6 --- /dev/null +++ b/seq2seq/model.py @@ -0,0 +1,415 @@ +"""seq2seq model for fluid.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import argparse +import time +import distutils.util +import commands +import threading + +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.framework as framework +from paddle.fluid.executor import Executor + +from continuous_evaluation import tracking_kpis + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--embedding_dim", + type=int, + default=512, + help="The dimension of embedding table. (default: %(default)d)") +parser.add_argument( + "--encoder_size", + type=int, + default=512, + help="The size of encoder bi-rnn unit. (default: %(default)d)") +parser.add_argument( + "--decoder_size", + type=int, + default=512, + help="The size of decoder rnn unit. (default: %(default)d)") +parser.add_argument( + "--batch_size", + type=int, + default=16, + help="The sequence number of a mini-batch data. (default: %(default)d)") +parser.add_argument( + '--skip_batch_num', + type=int, + default=5, + help='The first num of minibatch num to skip, for better performance test') +parser.add_argument( + '--iterations', type=int, default=80, help='The number of minibatches.') +parser.add_argument( + "--dict_size", + type=int, + default=30000, + help="The dictionary capacity. Dictionaries of source sequence and " + "target dictionary have same capacity. (default: %(default)d)") +parser.add_argument( + "--pass_num", + type=int, + default=2, + help="The pass number to train. (default: %(default)d)") +parser.add_argument( + "--learning_rate", + type=float, + default=0.0002, + help="Learning rate used to train the model. (default: %(default)f)") +parser.add_argument( + "--infer_only", action='store_true', help="If set, run forward only.") +parser.add_argument( + "--beam_size", + type=int, + default=3, + help="The width for beam searching. (default: %(default)d)") +parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help="The device type.") +parser.add_argument( + "--gpu_id", + type=int, + default=3, + help="The GPU Card Id. (default: %(default)d)") +parser.add_argument( + "--max_length", + type=int, + default=250, + help="The maximum length of sequence when doing generation. " + "(default: %(default)d)") +parser.add_argument( + '--with_test', + action='store_true', + help='If set, test the testset during training.') + + +def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): + def linear(inputs): + return fluid.layers.fc(input=inputs, size=size, bias_attr=True) + + forget_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + input_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + output_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + cell_tilde = fluid.layers.tanh(x=linear([hidden_t_prev, x_t])) + + cell_t = fluid.layers.sums(input=[ + fluid.layers.elementwise_mul( + x=forget_gate, y=cell_t_prev), fluid.layers.elementwise_mul( + x=input_gate, y=cell_tilde) + ]) + + hidden_t = fluid.layers.elementwise_mul( + x=output_gate, y=fluid.layers.tanh(x=cell_t)) + + return hidden_t, cell_t + + +def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, + target_dict_dim, is_generating, beam_size, max_length): + """Construct a seq2seq network.""" + + def bi_lstm_encoder(input_seq, gate_size): + # Linear transformation part for input gate, output gate, forget gate + # and cell activation vectors need be done outside of dynamic_lstm. + # So the output size is 4 times of gate_size. + input_forward_proj = fluid.layers.fc(input=input_seq, + size=gate_size * 4, + act=None, + bias_attr=False) + forward, _ = fluid.layers.dynamic_lstm( + input=input_forward_proj, size=gate_size * 4, use_peepholes=False) + input_reversed_proj = fluid.layers.fc(input=input_seq, + size=gate_size * 4, + act=None, + bias_attr=False) + reversed, _ = fluid.layers.dynamic_lstm( + input=input_reversed_proj, + size=gate_size * 4, + is_reverse=True, + use_peepholes=False) + return forward, reversed + + src_word_idx = fluid.layers.data( + name='source_sequence', shape=[1], dtype='int64', lod_level=1) + + src_embedding = fluid.layers.embedding( + input=src_word_idx, + size=[source_dict_dim, embedding_dim], + dtype='float32') + + src_forward, src_reversed = bi_lstm_encoder( + input_seq=src_embedding, gate_size=encoder_size) + + encoded_vector = fluid.layers.concat( + input=[src_forward, src_reversed], axis=1) + + encoded_proj = fluid.layers.fc(input=encoded_vector, + size=decoder_size, + bias_attr=False) + + backward_first = fluid.layers.sequence_pool( + input=src_reversed, pool_type='first') + + decoder_boot = fluid.layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act='tanh') + + def lstm_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, + decoder_boot, decoder_size): + def simple_attention(encoder_vec, encoder_proj, decoder_state): + decoder_state_proj = fluid.layers.fc(input=decoder_state, + size=decoder_size, + bias_attr=False) + decoder_state_expand = fluid.layers.sequence_expand( + x=decoder_state_proj, y=encoder_proj) + concated = fluid.layers.concat( + input=[encoder_proj, decoder_state_expand], axis=1) + attention_weights = fluid.layers.fc(input=concated, + size=1, + act='tanh', + bias_attr=False) + attention_weights = fluid.layers.sequence_softmax( + input=attention_weights) + weigths_reshape = fluid.layers.reshape( + x=attention_weights, shape=[-1]) + scaled = fluid.layers.elementwise_mul( + x=encoder_vec, y=weigths_reshape, axis=0) + context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + return context + + rnn = fluid.layers.DynamicRNN() + + cell_init = fluid.layers.fill_constant_batch_size_like( + input=decoder_boot, + value=0.0, + shape=[-1, decoder_size], + dtype='float32') + cell_init.stop_gradient = False + + with rnn.block(): + current_word = rnn.step_input(target_embedding) + encoder_vec = rnn.static_input(encoder_vec) + encoder_proj = rnn.static_input(encoder_proj) + hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) + cell_mem = rnn.memory(init=cell_init) + context = simple_attention(encoder_vec, encoder_proj, hidden_mem) + decoder_inputs = fluid.layers.concat( + input=[context, current_word], axis=1) + h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size) + rnn.update_memory(hidden_mem, h) + rnn.update_memory(cell_mem, c) + out = fluid.layers.fc(input=h, + size=target_dict_dim, + bias_attr=True, + act='softmax') + rnn.output(out) + return rnn() + + if not is_generating: + trg_word_idx = fluid.layers.data( + name='target_sequence', shape=[1], dtype='int64', lod_level=1) + + trg_embedding = fluid.layers.embedding( + input=trg_word_idx, + size=[target_dict_dim, embedding_dim], + dtype='float32') + + prediction = lstm_decoder_with_attention(trg_embedding, encoded_vector, + encoded_proj, decoder_boot, + decoder_size) + label = fluid.layers.data( + name='label_sequence', shape=[1], dtype='int64', lod_level=1) + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + + feeding_list = ["source_sequence", "target_sequence", "label_sequence"] + + return avg_cost, feeding_list + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + lod_t = core.LoDTensor() + lod_t.set(flattened_data, place) + lod_t.set_lod([lod]) + return lod_t, lod[-1] + + +def lodtensor_to_ndarray(lod_tensor): + dims = lod_tensor.get_dims() + ndarray = np.zeros(shape=dims).astype('float32') + for i in xrange(np.product(dims)): + ndarray.ravel()[i] = lod_tensor.get_float_element(i) + return ndarray + + +def train(): + avg_cost, feeding_list = seq_to_seq_net( + args.embedding_dim, + args.encoder_size, + args.decoder_size, + args.dict_size, + args.dict_size, + False, + beam_size=args.beam_size, + max_length=args.max_length) + + # clone from default main program + inference_program = fluid.default_main_program().clone() + + optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + optimizer.minimize(avg_cost) + + fluid.memory_optimize(fluid.default_main_program()) + + train_batch_generator = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(args.dict_size), buf_size=1000), + batch_size=args.batch_size) + + test_batch_generator = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), + batch_size=args.batch_size) + + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + exe = Executor(place) + exe.run(framework.default_startup_program()) + + def do_validation(): + total_loss = 0.0 + count = 0 + for batch_id, data in enumerate(test_batch_generator()): + src_seq = to_lodtensor(map(lambda x: x[0], data), place)[0] + trg_seq = to_lodtensor(map(lambda x: x[1], data), place)[0] + lbl_seq = to_lodtensor(map(lambda x: x[2], data), place)[0] + + fetch_outs = exe.run(inference_program, + feed={ + feeding_list[0]: src_seq, + feeding_list[1]: trg_seq, + feeding_list[2]: lbl_seq + }, + fetch_list=[avg_cost], + return_numpy=False) + + total_loss += lodtensor_to_ndarray(fetch_outs[0])[0] + count += 1 + + return total_loss / count + + train_acc_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'wmb_%s_train_acc' % (args.batch_size): + train_acc_kpi = kpi + train_speed_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'wmb_%s_train_speed' % (args.batch_size): + train_speed_kpi = kpi + + iters, num_samples, start_time = 0, 0, time.time() + for pass_id in xrange(args.pass_num): + train_accs = [] + train_losses = [] + for batch_id, data in enumerate(train_batch_generator()): + if iters == args.skip_batch_num: + start_time = time.time() + num_samples = 0 + if iters == args.iterations: + break + src_seq, word_num = to_lodtensor(map(lambda x: x[0], data), place) + num_samples += word_num + trg_seq, word_num = to_lodtensor(map(lambda x: x[1], data), place) + num_samples += word_num + lbl_seq, _ = to_lodtensor(map(lambda x: x[2], data), place) + + fetch_outs = exe.run(framework.default_main_program(), + feed={ + feeding_list[0]: src_seq, + feeding_list[1]: trg_seq, + feeding_list[2]: lbl_seq + }, + fetch_list=[avg_cost]) + + iters += 1 + loss = np.array(fetch_outs[0]) + print( + "Pass = %d, Iter = %d, Loss = %f" % (pass_id, iters, loss) + ) # The accuracy is the accumulation of batches, but not the current batch. + + train_elapsed = time.time() - start_time + examples_per_sec = num_samples / train_elapsed + print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % + (num_samples, train_elapsed, examples_per_sec)) + # evaluation + train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) + if args.with_test: + test_loss = do_validation() + break + train_speed_kpi.persist() + + +def infer(): + pass + + +def print_arguments(args): + print('----------- seq2seq Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def collect_gpu_memory_data(mem_list): + """ + collect the GPU memory data + """ + while(True): + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id + status, output = commands.getstatusoutput(command) + if status != 0: + print('Get GPU memory data error') + else: + mem_list.append(int(output.split('\n')[1].split(' ')[0])) + time.sleep(1) + + +def save_gpu_data(mem_list): + gpu_memory_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'wmb_%s_gpu_memory' % (args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + + +if __name__ == '__main__': + args = parser.parse_args() + print_arguments(args) + mem_data_list = [] + collect_memory_thread = threading.Thread( + target=collect_gpu_memory_data, args=(mem_data_list,)) + collect_memory_thread.setDaemon(True) + collect_memory_thread.start() + if args.infer_only: + infer() + else: + train() + save_gpu_data(mem_data_list) diff --git a/seq2seq/run.xsh b/seq2seq/run.xsh new file mode 100755 index 00000000..078cabe0 --- /dev/null +++ b/seq2seq/run.xsh @@ -0,0 +1,9 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${seq2seq_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +#imdb 128 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --iterations=50 --gpu_id=$cudaid From b6478aa8b55889e03d2e43318963545a4f7a0c01 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Tue, 24 Apr 2018 11:23:37 +0800 Subject: [PATCH 07/67] add lstm --- lstm/continuous_evaluation.py | 18 ++ .../latest_kpis/imdb_32_gpu_memory_factor.txt | 1 + .../imdb_32_train_speed_factor.txt | 1 + lstm/model.py | 274 ++++++++++++++++++ lstm/run.xsh | 9 + 5 files changed, 303 insertions(+) create mode 100644 lstm/continuous_evaluation.py create mode 100644 lstm/latest_kpis/imdb_32_gpu_memory_factor.txt create mode 100644 lstm/latest_kpis/imdb_32_train_speed_factor.txt create mode 100644 lstm/model.py create mode 100755 lstm/run.xsh diff --git a/lstm/continuous_evaluation.py b/lstm/continuous_evaluation.py new file mode 100644 index 00000000..fbec0024 --- /dev/null +++ b/lstm/continuous_evaluation.py @@ -0,0 +1,18 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import AccKpi +from kpi import CostKpi +from kpi import DurationKpi + +imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.05, 0) +imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.02, 0) + + +tracking_kpis = [ + imdb_32_train_speed_kpi, + imdb_32_gpu_memory_kpi, +] diff --git a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt new file mode 100644 index 00000000..7eca9b0b --- /dev/null +++ b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt @@ -0,0 +1 @@ +[1172] \ No newline at end of file diff --git a/lstm/latest_kpis/imdb_32_train_speed_factor.txt b/lstm/latest_kpis/imdb_32_train_speed_factor.txt new file mode 100644 index 00000000..f05bc2ec --- /dev/null +++ b/lstm/latest_kpis/imdb_32_train_speed_factor.txt @@ -0,0 +1 @@ +[833.5032958984375] diff --git a/lstm/model.py b/lstm/model.py new file mode 100644 index 00000000..d5d8c8c9 --- /dev/null +++ b/lstm/model.py @@ -0,0 +1,274 @@ +""" +stacked_dynamic_lstm model for fluid +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import cPickle +import os +import random +import commands +import threading +import time +import numpy as np + +import numpy +import paddle +import paddle.dataset.imdb as imdb +import paddle.fluid as fluid +import paddle.batch as batch +import paddle.fluid.profiler as profiler + +from continuous_evaluation import tracking_kpis + +def parse_args(): + parser = argparse.ArgumentParser("Understand Sentiment by Dynamic RNN.") + parser.add_argument( + '--batch_size', + type=int, + default=32, + help='The sequence number of a batch data. (default: %(default)d)') + parser.add_argument( + '--skip_batch_num', + type=int, + default=5, + help='The first num of minibatch num to skip, for better performance test' + ) + parser.add_argument( + '--iterations', type=int, default=80, help='The number of minibatches.') + parser.add_argument( + '--emb_dim', + type=int, + default=512, + help='Dimension of embedding table. (default: %(default)d)') + parser.add_argument( + '--hidden_dim', + type=int, + default=512, + help='Hidden size of lstm unit. (default: %(default)d)') + parser.add_argument( + '--pass_num', + type=int, + default=100, + help='Epoch number to train. (default: %(default)d)') + parser.add_argument( + '--device', + type=str, + default='CPU', + choices=['CPU', 'GPU'], + help='The device type.') + parser.add_argument( + '--gpu_id', + type=int, + default=3, + help='The GPU Card Id. (default: %(default)d)') + parser.add_argument( + '--crop_size', + type=int, + default=int(os.environ.get('CROP_SIZE', '1500')), + help='The max sentence length of input. Since this model use plain RNN,' + ' Gradient could be explored if sentence is too long') + parser.add_argument( + '--with_test', + action='store_true', + help='If set, test the testset during training.') + args = parser.parse_args() + return args + + +word_dict = imdb.word_dict() + + +def crop_sentence(reader, crop_size): + unk_value = word_dict[''] + + def __impl__(): + for item in reader(): + if len([x for x in item[0] if x != unk_value]) < crop_size: + yield item + + return __impl__ + + +def main(): + args = parse_args() + lstm_size = args.hidden_dim + + data = fluid.layers.data( + name="words", shape=[1], lod_level=1, dtype='int64') + sentence = fluid.layers.embedding( + input=data, size=[len(word_dict), args.emb_dim]) + + sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + word = rnn.step_input(sentence) + prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) + prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) + + def gate_common( + ipt, + hidden, + size, ): + gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) + gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) + gate = fluid.layers.sums(input=[gate0, gate1]) + return gate + + forget_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + input_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + output_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + cell_gate = fluid.layers.tanh( + x=gate_common(word, prev_hidden, lstm_size)) + + cell = fluid.layers.sums(input=[ + fluid.layers.elementwise_mul( + x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( + x=input_gate, y=cell_gate) + ]) + + hidden = fluid.layers.elementwise_mul( + x=output_gate, y=fluid.layers.tanh(x=cell)) + + rnn.update_memory(prev_cell, cell) + rnn.update_memory(prev_hidden, hidden) + rnn.output(hidden) + + last = fluid.layers.sequence_pool(rnn(), 'last') + logit = fluid.layers.fc(input=last, size=2, act='softmax') + loss = fluid.layers.cross_entropy( + input=logit, + label=fluid.layers.data( + name='label', shape=[1], dtype='int64')) + loss = fluid.layers.mean(x=loss) + + # add acc + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ + shape=[1], dtype='int64'), total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program( + target_vars=[batch_acc, batch_size_tensor]) + + adam = fluid.optimizer.Adam() + adam.minimize(loss) + + fluid.memory_optimize(fluid.default_main_program()) + + place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + train_reader = batch( + paddle.reader.shuffle( + crop_sentence(imdb.train(word_dict), args.crop_size), + buf_size=25000), + batch_size=args.batch_size) + + train_acc_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'imdb_%s_train_acc' % (args.batch_size): + train_acc_kpi = kpi + train_speed_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'imdb_%s_train_speed' % (args.batch_size): + train_speed_kpi = kpi + + iters, num_samples, start_time = 0, 0, time.time() + for pass_id in range(args.pass_num): + train_accs = [] + train_losses = [] + for batch_id, data in enumerate(train_reader()): + if iters == args.skip_batch_num: + start_time = time.time() + num_samples = 0 + if iters == args.iterations: + break + tensor_words = to_lodtensor([x[0] for x in data], place) + label = numpy.array([x[1] for x in data]).astype("int64") + label = label.reshape((-1, 1)) + loss_np, acc, weight = exe.run( + fluid.default_main_program(), + feed={"words": tensor_words, + "label": label}, + fetch_list=[loss, batch_acc, batch_size_tensor]) + iters += 1 + for x in data: + num_samples += len(x[0]) + print( + "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % + (pass_id, iters, loss_np, acc) + ) # The accuracy is the accumulation of batches, but not the current batch. + + train_elapsed = time.time() - start_time + examples_per_sec = num_samples / train_elapsed + print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % + (num_samples, train_elapsed, examples_per_sec)) + train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) + break + train_speed_kpi.persist() + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = numpy.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def print_arguments(args): + print('----------- lstm Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def collect_gpu_memory_data(mem_list): + """ + collect the GPU memory data + """ + while(True): + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id + status, output = commands.getstatusoutput(command) + if status != 0: + print('Get GPU memory data error') + else: + mem_list.append(int(output.split('\n')[1].split(' ')[0])) + time.sleep(1) + + +def save_gpu_data(mem_list): + gpu_memory_kpi = None + for kpi in tracking_kpis: + if kpi.name == 'imdb_%s_gpu_memory' % (args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + + +if __name__ == '__main__': + args = parse_args() + print_arguments(args) + mem_data_list = [] + collect_memory_thread = threading.Thread( + target=collect_gpu_memory_data, args=(mem_data_list,)) + collect_memory_thread.setDaemon(True) + collect_memory_thread.start() + main() + save_gpu_data(mem_data_list) diff --git a/lstm/run.xsh b/lstm/run.xsh new file mode 100755 index 00000000..5f140153 --- /dev/null +++ b/lstm/run.xsh @@ -0,0 +1,9 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${resnet50_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +#imdb 32 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=32 --iterations=50 --gpu_id=$cudaid From 64d90a6f66e361662281a5a51ef0dac12ae9b948 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 24 Apr 2018 05:19:58 +0000 Subject: [PATCH 08/67] evalute [44fa823841549f0405f6f55aa8e51560fc0200ce] --- mnist/latest_kpis/train_duration_factor.txt | 10 +++++----- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../flowers_64_gpu_memory_factor.txt | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/mnist/latest_kpis/train_duration_factor.txt b/mnist/latest_kpis/train_duration_factor.txt index 07c6feef..ce48626b 100644 --- a/mnist/latest_kpis/train_duration_factor.txt +++ b/mnist/latest_kpis/train_duration_factor.txt @@ -1,5 +1,5 @@ -[36.88168907165527] -[36.785341024398804] -[36.939675092697144] -[36.425740003585815] -[37.25818109512329] \ No newline at end of file +[36.52754783630371] +[36.04332995414734] +[36.20732808113098] +[36.188393115997314] +[35.95417380332947] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 06457738..fa232989 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[3.593585252761841]] -[[3.2120132446289062]] -[[2.9515209197998047]] -[[2.733518123626709]] -[[2.5107831954956055]] -[[2.4020209312438965]] -[[2.422689914703369]] -[[2.372192144393921]] -[[2.407041072845459]] -[[2.4115140438079834]] \ No newline at end of file +[[3.4473652839660645]] +[[3.0381433963775635]] +[[2.664496660232544]] +[[2.4615819454193115]] +[[2.4176268577575684]] +[[2.4198837280273438]] +[[2.4023916721343994]] +[[2.5021331310272217]] +[[2.513448715209961]] +[[2.5482802391052246]] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 08a40c04..51f01560 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[3400] \ No newline at end of file +[2772] \ No newline at end of file From 1b5a58d0f44de8f3578f2f42b67dab0b87463864 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Tue, 24 Apr 2018 15:47:14 +0800 Subject: [PATCH 09/67] add model image_classification --- image_classification/continuous_evaluation.py | 15 + .../latest_kpis/train_acc_top1_kpi_factor.txt | 1 + .../latest_kpis/train_acc_top5_kpi_factor.txt | 1 + .../latest_kpis/train_cost_kpi_factor.txt | 1 + image_classification/reader.py | 164 +++++++++ image_classification/run.xsh | 9 + image_classification/se_resnext.py | 121 +++++++ image_classification/train.py | 312 ++++++++++++++++++ image_classification/utility.py | 62 ++++ 9 files changed, 686 insertions(+) create mode 100644 image_classification/continuous_evaluation.py create mode 100644 image_classification/latest_kpis/train_acc_top1_kpi_factor.txt create mode 100644 image_classification/latest_kpis/train_acc_top5_kpi_factor.txt create mode 100644 image_classification/latest_kpis/train_cost_kpi_factor.txt create mode 100644 image_classification/reader.py create mode 100755 image_classification/run.xsh create mode 100644 image_classification/se_resnext.py create mode 100644 image_classification/train.py create mode 100644 image_classification/utility.py diff --git a/image_classification/continuous_evaluation.py b/image_classification/continuous_evaluation.py new file mode 100644 index 00000000..2c869f80 --- /dev/null +++ b/image_classification/continuous_evaluation.py @@ -0,0 +1,15 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.05, 0) +train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.05, 0) +train_cost_kpi = CostKpi('train_cost_kpi', 0.05, 0) + + +tracking_kpis = [ + train_acc_top1_kpi, + train_acc_top5_kpi, + train_cost_kpi +] diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt new file mode 100644 index 00000000..fce52a52 --- /dev/null +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -0,0 +1 @@ +[0.27615979313850403] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt new file mode 100644 index 00000000..29b13770 --- /dev/null +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -0,0 +1 @@ +[0.5755798816680908] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt new file mode 100644 index 00000000..8067248d --- /dev/null +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -0,0 +1 @@ +[2.8537046909332275] \ No newline at end of file diff --git a/image_classification/reader.py b/image_classification/reader.py new file mode 100644 index 00000000..50bea712 --- /dev/null +++ b/image_classification/reader.py @@ -0,0 +1,164 @@ +import os +import math +import random +import functools +import numpy as np +import paddle as paddle +from PIL import Image, ImageEnhance + +random.seed(0) + +DATA_DIM = 224 + +THREAD = 8 +BUF_SIZE = 1024 + +DATA_DIR = 'ILSVRC2012' +TRAIN_LIST = 'ILSVRC2012/train_list.txt' +TEST_LIST = 'ILSVRC2012/test_list.txt' + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = random.randint(0, width - size) + h_start = random.randint(0, height - size) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img.size[0] - w) + j = random.randint(0, img.size[1] - h) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.LANCZOS) + return img + + +def rotate_image(img): + angle = random.randint(-10, 10) + img = img.rotate(angle) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image(sample, mode, color_jitter, rotate): + img_path = sample[0] + + img = Image.open(img_path) + if mode == 'train': + if rotate: img = rotate_image(img) + img = random_crop(img, DATA_DIM) + else: + img = resize_short(img, DATA_DIM) + img = crop_image(img, target_size=DATA_DIM, center=True) + if mode == 'train': + if color_jitter: + img = distort_color(img) + if random.randint(0, 1) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + if mode == 'train' or mode == 'test': + return img, sample[1] + elif mode == 'infer': + return img + + +def _reader_creator(file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False): + def reader(): + with open(file_list) as flist: + lines = [line.strip() for line in flist] + if shuffle: + random.shuffle(lines) + for line in lines: + if mode == 'train' or mode == 'test': + img_path, label = line.split() + img_path = os.path.join(DATA_DIR, img_path) + yield img_path, int(label) + elif mode == 'infer': + img_path = os.path.join(DATA_DIR, line) + yield [img_path] + + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def train(): + return _reader_creator( + TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True) + + +def test(): + return _reader_creator(TEST_LIST, 'test', shuffle=False) + + +def infer(file_list): + return _reader_creator(file_list, 'infer', shuffle=False) diff --git a/image_classification/run.xsh b/image_classification/run.xsh new file mode 100755 index 00000000..2097e71e --- /dev/null +++ b/image_classification/run.xsh @@ -0,0 +1,9 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${se_resnext_cudaid:=3} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python train.py --batch_size=64 + diff --git a/image_classification/se_resnext.py b/image_classification/se_resnext.py new file mode 100644 index 00000000..6b448d40 --- /dev/null +++ b/image_classification/se_resnext.py @@ -0,0 +1,121 @@ +import paddle as paddle +import paddle.fluid as fluid + + +def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, + act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act) + + +def squeeze_excitation(input, num_channels, reduction_ratio): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + squeeze = fluid.layers.fc(input=pool, + size=num_channels / reduction_ratio, + act='relu') + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid') + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out: + if stride == 1: + filter_size = 1 + else: + filter_size = 3 + return conv_bn_layer(input, ch_out, filter_size, stride) + else: + return input + + +def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): + conv0 = conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) + scale = squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) + + short = shortcut(input, num_filters * 2, stride) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + +def SE_ResNeXt(input, class_dim, infer=False, layers=50): + supported_layers = [50, 152] + if layers not in supported_layers: + print("supported layers are", supported_layers, "but input layer is", + layers) + exit() + if layers == 50: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + + conv = conv_bn_layer( + input=input, num_filters=64, filter_size=7, stride=2, act='relu') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + elif layers == 152: + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [128, 256, 512, 1024] + + conv = conv_bn_layer( + input=input, num_filters=64, filter_size=3, stride=2, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv = bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) + + pool = fluid.layers.pool2d( + input=conv, pool_size=0, pool_type='avg', global_pooling=True) + if not infer: + drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) + else: + drop = pool + out = fluid.layers.fc(input=drop, size=class_dim, act='softmax') + return out diff --git a/image_classification/train.py b/image_classification/train.py new file mode 100644 index 00000000..ef5b53d4 --- /dev/null +++ b/image_classification/train.py @@ -0,0 +1,312 @@ +import os +import numpy as np +import time +import sys +import paddle as paddle +import paddle.fluid as fluid +from se_resnext import SE_ResNeXt +import paddle.dataset.flowers as flowers +import reader + +import argparse +import functools +from utility import add_arguments, print_arguments + +from continuous_evaluation import (train_acc_top1_kpi, train_acc_top5_kpi, train_cost_kpi) +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 256, "Minibatch size.") +add_arg('num_layers', int, 50, "How many layers for SE-ResNeXt model.") +add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.") +add_arg('parallel_exe', bool, True, "Whether to use ParallelExecutor to train or not.") +# yapf: enable + + +def train_parallel_do(args, + learning_rate, + batch_size, + num_passes, + init_model=None, + model_save_dir='model', + parallel=True, + use_nccl=True, + lr_strategy=None, + layers=50): + class_dim = 1000 + image_shape = [3, 224, 224] + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + if parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) + + with pd.do(): + image_ = pd.read_input(image) + label_ = pd.read_input(label) + out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers) + cost = fluid.layers.cross_entropy(input=out, label=label_) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5) + pd.write_output(avg_cost) + pd.write_output(acc_top1) + pd.write_output(acc_top5) + + avg_cost, acc_top1, acc_top5 = pd() + avg_cost = fluid.layers.mean(x=avg_cost) + acc_top1 = fluid.layers.mean(x=acc_top1) + acc_top5 = fluid.layers.mean(x=acc_top5) + else: + out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + + inference_program = fluid.default_main_program().clone(for_test=True) + + if lr_strategy is None: + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + else: + bd = lr_strategy["bd"] + lr = lr_strategy["lr"] + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + + opts = optimizer.minimize(avg_cost) + if args.with_mem_opt: + fluid.memory_optimize(fluid.default_main_program()) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if init_model is not None: + fluid.io.load_persistables(exe, init_model) + + train_reader = paddle.batch(flowers.train(), batch_size=batch_size) + test_reader = paddle.batch(flowers.test(), batch_size=batch_size) + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + + for pass_id in range(num_passes): + train_info = [[], [], []] + test_info = [[], [], []] + for batch_id, data in enumerate(train_reader()): + t1 = time.time() + loss, acc1, acc5 = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost, acc_top1, acc_top5]) + t2 = time.time() + period = t2 - t1 + train_info[0].append(loss[0]) + train_info[1].append(acc1[0]) + train_info[2].append(acc5[0]) + if batch_id % 10 == 0: + print("Pass {0}, trainbatch {1}, loss {2}, \ + acc1 {3}, acc5 {4} time {5}" + .format(pass_id, \ + batch_id, loss[0], acc1[0], acc5[0], \ + "%2.2f sec" % period)) + sys.stdout.flush() + + train_loss = np.array(train_info[0]).mean() + train_acc1 = np.array(train_info[1]).mean() + train_acc5 = np.array(train_info[2]).mean() + for data in test_reader(): + t1 = time.time() + loss, acc1, acc5 = exe.run( + inference_program, + feed=feeder.feed(data), + fetch_list=[avg_cost, acc_top1, acc_top5]) + t2 = time.time() + period = t2 - t1 + test_info[0].append(loss[0]) + test_info[1].append(acc1[0]) + test_info[2].append(acc5[0]) + if batch_id % 10 == 0: + print("Pass {0},testbatch {1},loss {2}, \ + acc1 {3},acc5 {4},time {5}" + .format(pass_id, \ + batch_id, loss[0], acc1[0], acc5[0], \ + "%2.2f sec" % period)) + sys.stdout.flush() + + test_loss = np.array(test_info[0]).mean() + test_acc1 = np.array(test_info[1]).mean() + test_acc5 = np.array(test_info[2]).mean() + + print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ + test_loss {4}, test_acc1 {5}, test_acc5 {6}" + .format(pass_id, \ + train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ + test_acc5)) + sys.stdout.flush() + + model_path = os.path.join(model_save_dir, str(pass_id)) + if not os.path.isdir(model_path): + os.makedirs(model_path) + fluid.io.save_persistables(exe, model_path) + + +def train_parallel_exe(args, + learning_rate, + batch_size, + num_passes, + init_model=None, + model_save_dir='model', + parallel=True, + use_nccl=True, + lr_strategy=None, + layers=50): + class_dim = 1000 + image_shape = [3, 224, 224] + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + cost = fluid.layers.cross_entropy(input=out, label=label) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + avg_cost = fluid.layers.mean(x=cost) + + test_program = fluid.default_main_program().clone(for_test=True) + + if lr_strategy is None: + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + else: + bd = lr_strategy["bd"] + lr = lr_strategy["lr"] + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + + opts = optimizer.minimize(avg_cost) + + if args.with_mem_opt: + fluid.memory_optimize(fluid.default_main_program()) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + fluid.default_startup_program.random_seed = 1000 + exe.run(fluid.default_startup_program()) + + if init_model is not None: + fluid.io.load_persistables(exe, init_model) + + train_reader = paddle.batch(flowers.train(), batch_size=batch_size) + test_reader = paddle.batch(flowers.test(), batch_size=batch_size) + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + + train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) + test_exe = fluid.ParallelExecutor( + use_cuda=True, main_program=test_program, share_vars_from=train_exe) + + fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] + + for pass_id in range(num_passes): + train_info = [[], [], []] + test_info = [[], [], []] + for batch_id, data in enumerate(train_reader()): + t1 = time.time() + loss, acc1, acc5 = train_exe.run(fetch_list, + feed=feeder.feed(data)) + t2 = time.time() + period = t2 - t1 + loss = np.mean(np.array(loss)) + acc1 = np.mean(np.array(acc1)) + acc5 = np.mean(np.array(acc5)) + train_info[0].append(loss) + train_info[1].append(acc1) + train_info[2].append(acc5) + if batch_id % 10 == 0: + print("Pass {0}, trainbatch {1}, loss {2}, \ + acc1 {3}, acc5 {4} time {5}" + .format(pass_id, \ + batch_id, loss, acc1, acc5, \ + "%2.2f sec" % period)) + sys.stdout.flush() + + train_loss = np.array(train_info[0]).mean() + train_acc1 = np.array(train_info[1]).mean() + train_acc5 = np.array(train_info[2]).mean() + if pass_id == num_passes - 1: + train_acc_top1_kpi.add_record(train_acc1) + train_acc_top5_kpi.add_record(train_acc5) + train_cost_kpi.add_record(train_loss) + for data in test_reader(): + t1 = time.time() + loss, acc1, acc5 = test_exe.run(fetch_list, + feed=feeder.feed(data)) + t2 = time.time() + period = t2 - t1 + loss = np.mean(np.array(loss)) + acc1 = np.mean(np.array(acc1)) + acc5 = np.mean(np.array(acc5)) + test_info[0].append(loss) + test_info[1].append(acc1) + test_info[2].append(acc5) + if batch_id % 10 == 0: + print("Pass {0},testbatch {1},loss {2}, \ + acc1 {3},acc5 {4},time {5}" + .format(pass_id, \ + batch_id, loss, acc1, acc5, \ + "%2.2f sec" % period)) + sys.stdout.flush() + + test_loss = np.array(test_info[0]).mean() + test_acc1 = np.array(test_info[1]).mean() + test_acc5 = np.array(test_info[2]).mean() + + print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ + test_loss {4}, test_acc1 {5}, test_acc5 {6}" + .format(pass_id, \ + train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ + test_acc5)) + sys.stdout.flush() + train_acc_top1_kpi.persist() + train_acc_top5_kpi.persist() + train_cost_kpi.persist() + + +if __name__ == '__main__': + args = parser.parse_args() + print_arguments(args) + + epoch_points = [30, 60, 90] + total_images = 1281167 + batch_size = args.batch_size + step = int(total_images / batch_size + 1) + bd = [e * step for e in epoch_points] + lr = [0.1, 0.01, 0.001, 0.0001] + + lr_strategy = {"bd": bd, "lr": lr} + + use_nccl = True + # layers: 50, 152 + layers = args.num_layers + method = train_parallel_exe if args.parallel_exe else train_parallel_do + method( + args, + learning_rate=0.1, + batch_size=batch_size, + num_passes=5, + init_model=None, + parallel=True, + use_nccl=True, + lr_strategy=lr_strategy, + layers=layers) diff --git a/image_classification/utility.py b/image_classification/utility.py new file mode 100644 index 00000000..506e6007 --- /dev/null +++ b/image_classification/utility.py @@ -0,0 +1,62 @@ +"""Contains common utility functions.""" +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import distutils.util +import numpy as np +from paddle.fluid import core + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) From b2c9714fd7dabe8066c878dd0925cdfeba045417 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Tue, 24 Apr 2018 16:33:34 +0800 Subject: [PATCH 10/67] change image_classification default cudaid --- image_classification/run.xsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_classification/run.xsh b/image_classification/run.xsh index 2097e71e..13db4406 100755 --- a/image_classification/run.xsh +++ b/image_classification/run.xsh @@ -2,7 +2,7 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 -cudaid=${se_resnext_cudaid:=3} # use 0-th card as default +cudaid=${se_resnext_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python train.py --batch_size=64 From 64d0cf127c2b70220ca2534752d31d5c5844f6e9 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Tue, 24 Apr 2018 20:04:08 +0800 Subject: [PATCH 11/67] add object_detection --- .../latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- .../latest_kpis/train_acc_top5_kpi_factor.txt | 2 +- .../latest_kpis/train_cost_kpi_factor.txt | 2 +- object_detection/continuous_evaluation.py | 13 + object_detection/download.sh | 19 + object_detection/image_util.py | 234 ++++++++++++ object_detection/labels/create_list.py | 64 ++++ object_detection/labels/label_list | 21 + .../latest_kpis/train_cost_factor.txt | 1 + .../latest_kpis/train_speed_factor.txt | 1 + object_detection/mobilenet_ssd.py | 116 ++++++ object_detection/reader.py | 350 +++++++++++++++++ object_detection/run.xsh | 13 + object_detection/train.py | 358 ++++++++++++++++++ object_detection/utility.py | 62 +++ 15 files changed, 1255 insertions(+), 3 deletions(-) create mode 100644 object_detection/continuous_evaluation.py create mode 100755 object_detection/download.sh create mode 100644 object_detection/image_util.py create mode 100644 object_detection/labels/create_list.py create mode 100644 object_detection/labels/label_list create mode 100644 object_detection/latest_kpis/train_cost_factor.txt create mode 100644 object_detection/latest_kpis/train_speed_factor.txt create mode 100644 object_detection/mobilenet_ssd.py create mode 100644 object_detection/reader.py create mode 100755 object_detection/run.xsh create mode 100644 object_detection/train.py create mode 100644 object_detection/utility.py diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index fce52a52..d82c6e08 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.27615979313850403] \ No newline at end of file +[0.17615979313850403] diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt index 29b13770..df4023a8 100644 --- a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -1 +1 @@ -[0.5755798816680908] \ No newline at end of file +[0.1855798816680908] diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt index 8067248d..c934b4ef 100644 --- a/image_classification/latest_kpis/train_cost_kpi_factor.txt +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -1 +1 @@ -[2.8537046909332275] \ No newline at end of file +[12.8537046909332275] diff --git a/object_detection/continuous_evaluation.py b/object_detection/continuous_evaluation.py new file mode 100644 index 00000000..a49dd968 --- /dev/null +++ b/object_detection/continuous_evaluation.py @@ -0,0 +1,13 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +train_cost_kpi = AccKpi('train_cost', 0.05, 0) +train_speed_kpi = AccKpi('train_speed', 0.05, 0) + + +tracking_kpis = [ + train_cost_kpi, + train_speed_kpi +] diff --git a/object_detection/download.sh b/object_detection/download.sh new file mode 100755 index 00000000..fe483255 --- /dev/null +++ b/object_detection/download.sh @@ -0,0 +1,19 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +cp labels/* data/pascalvoc/ +cd data/pascalvoc + +echo "Downloading..." +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar +# Extract the data. +echo "Extractint..." +tar -xf VOCtrainval_11-May-2012.tar +tar -xf VOCtrainval_06-Nov-2007.tar +tar -xf VOCtest_06-Nov-2007.tar + +echo "Creating data lists..." +python create_list.py diff --git a/object_detection/image_util.py b/object_detection/image_util.py new file mode 100644 index 00000000..4ce53048 --- /dev/null +++ b/object_detection/image_util.py @@ -0,0 +1,234 @@ +from PIL import Image, ImageEnhance +import numpy as np +import random +import math + + +class sampler(): + def __init__(self, max_sample, max_trial, min_scale, max_scale, + min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap, + max_jaccard_overlap): + self.max_sample = max_sample + self.max_trial = max_trial + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + self.min_jaccard_overlap = min_jaccard_overlap + self.max_jaccard_overlap = max_jaccard_overlap + + +class bbox(): + def __init__(self, xmin, ymin, xmax, ymax): + self.xmin = xmin + self.ymin = ymin + self.xmax = xmax + self.ymax = ymax + + +def bbox_area(src_bbox): + width = src_bbox.xmax - src_bbox.xmin + height = src_bbox.ymax - src_bbox.ymin + return width * height + + +def generate_sample(sampler): + scale = random.uniform(sampler.min_scale, sampler.max_scale) + min_aspect_ratio = max(sampler.min_aspect_ratio, (scale**2.0)) + max_aspect_ratio = min(sampler.max_aspect_ratio, 1 / (scale**2.0)) + aspect_ratio = random.uniform(min_aspect_ratio, max_aspect_ratio) + bbox_width = scale * (aspect_ratio**0.5) + bbox_height = scale / (aspect_ratio**0.5) + xmin_bound = 1 - bbox_width + ymin_bound = 1 - bbox_height + xmin = random.uniform(0, xmin_bound) + ymin = random.uniform(0, ymin_bound) + xmax = xmin + bbox_width + ymax = ymin + bbox_height + sampled_bbox = bbox(xmin, ymin, xmax, ymax) + return sampled_bbox + + +def jaccard_overlap(sample_bbox, object_bbox): + if sample_bbox.xmin >= object_bbox.xmax or \ + sample_bbox.xmax <= object_bbox.xmin or \ + sample_bbox.ymin >= object_bbox.ymax or \ + sample_bbox.ymax <= object_bbox.ymin: + return 0 + intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin) + intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin) + intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax) + intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax) + intersect_size = (intersect_xmax - intersect_xmin) * ( + intersect_ymax - intersect_ymin) + sample_bbox_size = bbox_area(sample_bbox) + object_bbox_size = bbox_area(object_bbox) + overlap = intersect_size / ( + sample_bbox_size + object_bbox_size - intersect_size) + return overlap + + +def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): + if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0: + return True + for i in range(len(bbox_labels)): + object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], + bbox_labels[i][3], bbox_labels[i][4]) + overlap = jaccard_overlap(sample_bbox, object_bbox) + if sampler.min_jaccard_overlap != 0 and \ + overlap < sampler.min_jaccard_overlap: + continue + if sampler.max_jaccard_overlap != 0 and \ + overlap > sampler.max_jaccard_overlap: + continue + return True + return False + + +def generate_batch_samples(batch_sampler, bbox_labels): + sampled_bbox = [] + index = [] + c = 0 + for sampler in batch_sampler: + found = 0 + for i in range(sampler.max_trial): + if found >= sampler.max_sample: + break + sample_bbox = generate_sample(sampler) + if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): + sampled_bbox.append(sample_bbox) + found = found + 1 + index.append(c) + c = c + 1 + return sampled_bbox + + +def clip_bbox(src_bbox): + src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0) + src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0) + src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0) + src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0) + return src_bbox + + +def meet_emit_constraint(src_bbox, sample_bbox): + center_x = (src_bbox.xmax + src_bbox.xmin) / 2 + center_y = (src_bbox.ymax + src_bbox.ymin) / 2 + if center_x >= sample_bbox.xmin and \ + center_x <= sample_bbox.xmax and \ + center_y >= sample_bbox.ymin and \ + center_y <= sample_bbox.ymax: + return True + return False + + +def transform_labels(bbox_labels, sample_bbox): + proj_bbox = bbox(0, 0, 0, 0) + sample_labels = [] + for i in range(len(bbox_labels)): + sample_label = [] + object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], + bbox_labels[i][3], bbox_labels[i][4]) + if not meet_emit_constraint(object_bbox, sample_bbox): + continue + sample_width = sample_bbox.xmax - sample_bbox.xmin + sample_height = sample_bbox.ymax - sample_bbox.ymin + proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width + proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height + proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width + proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height + proj_bbox = clip_bbox(proj_bbox) + if bbox_area(proj_bbox) > 0: + sample_label.append(bbox_labels[i][0]) + sample_label.append(float(proj_bbox.xmin)) + sample_label.append(float(proj_bbox.ymin)) + sample_label.append(float(proj_bbox.xmax)) + sample_label.append(float(proj_bbox.ymax)) + sample_label.append(bbox_labels[i][5]) + sample_labels.append(sample_label) + return sample_labels + + +def crop_image(img, bbox_labels, sample_bbox, image_width, image_height): + sample_bbox = clip_bbox(sample_bbox) + xmin = int(sample_bbox.xmin * image_width) + xmax = int(sample_bbox.xmax * image_width) + ymin = int(sample_bbox.ymin * image_height) + ymax = int(sample_bbox.ymax * image_height) + sample_img = img[ymin:ymax, xmin:xmax] + sample_labels = transform_labels(bbox_labels, sample_bbox) + return sample_img, sample_labels + + +def random_brightness(img, settings): + prob = random.uniform(0, 1) + if prob < settings._brightness_prob: + delta = random.uniform(-settings._brightness_delta, + settings._brightness_delta) + 1 + img = ImageEnhance.Brightness(img).enhance(delta) + return img + + +def random_contrast(img, settings): + prob = random.uniform(0, 1) + if prob < settings._contrast_prob: + delta = random.uniform(-settings._contrast_delta, + settings._contrast_delta) + 1 + img = ImageEnhance.Contrast(img).enhance(delta) + return img + + +def random_saturation(img, settings): + prob = random.uniform(0, 1) + if prob < settings._saturation_prob: + delta = random.uniform(-settings._saturation_delta, + settings._saturation_delta) + 1 + img = ImageEnhance.Color(img).enhance(delta) + return img + + +def random_hue(img, settings): + prob = random.uniform(0, 1) + if prob < settings._hue_prob: + delta = random.uniform(-settings._hue_delta, settings._hue_delta) + img_hsv = np.array(img.convert('HSV')) + img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta + img = Image.fromarray(img_hsv, mode='HSV').convert('RGB') + return img + + +def distort_image(img, settings): + prob = random.uniform(0, 1) + # Apply different distort order + if prob > 0.5: + img = random_brightness(img, settings) + img = random_contrast(img, settings) + img = random_saturation(img, settings) + img = random_hue(img, settings) + else: + img = random_brightness(img, settings) + img = random_saturation(img, settings) + img = random_hue(img, settings) + img = random_contrast(img, settings) + return img + + +def expand_image(img, bbox_labels, img_width, img_height, settings): + prob = random.uniform(0, 1) + if prob < settings._expand_prob: + if settings._expand_max_ratio - 1 >= 0.01: + expand_ratio = random.uniform(1, settings._expand_max_ratio) + height = int(img_height * expand_ratio) + width = int(img_width * expand_ratio) + h_off = math.floor(random.uniform(0, height - img_height)) + w_off = math.floor(random.uniform(0, width - img_width)) + expand_bbox = bbox(-w_off / img_width, -h_off / img_height, + (width - w_off) / img_width, + (height - h_off) / img_height) + expand_img = np.ones((height, width, 3)) + expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean)) + expand_img = Image.fromarray(expand_img) + expand_img.paste(img, (int(w_off), int(h_off))) + bbox_labels = transform_labels(bbox_labels, expand_bbox) + return expand_img, bbox_labels, width, height + return img, bbox_labels, img_width, img_height diff --git a/object_detection/labels/create_list.py b/object_detection/labels/create_list.py new file mode 100644 index 00000000..1f53b182 --- /dev/null +++ b/object_detection/labels/create_list.py @@ -0,0 +1,64 @@ +import os +import os.path as osp +import re +import random + +devkit_dir = './VOCdevkit' +years = ['2007', '2012'] + + +def get_dir(devkit_dir, year, type): + return osp.join(devkit_dir, 'VOC' + year, type) + + +def walk_dir(devkit_dir, year): + filelist_dir = get_dir(devkit_dir, year, 'ImageSets/Main') + annotation_dir = get_dir(devkit_dir, year, 'Annotations') + img_dir = get_dir(devkit_dir, year, 'JPEGImages') + trainval_list = [] + test_list = [] + added = set() + + for _, _, files in os.walk(filelist_dir): + for fname in files: + img_ann_list = [] + if re.match('[a-z]+_trainval\.txt', fname): + img_ann_list = trainval_list + elif re.match('[a-z]+_test\.txt', fname): + img_ann_list = test_list + else: + continue + fpath = osp.join(filelist_dir, fname) + for line in open(fpath): + name_prefix = line.strip().split()[0] + if name_prefix in added: + continue + added.add(name_prefix) + ann_path = osp.join(annotation_dir, name_prefix + '.xml') + img_path = osp.join(img_dir, name_prefix + '.jpg') + assert os.path.isfile(ann_path), 'file %s not found.' % ann_path + assert os.path.isfile(img_path), 'file %s not found.' % img_path + img_ann_list.append((img_path, ann_path)) + + return trainval_list, test_list + + +def prepare_filelist(devkit_dir, years, output_dir): + trainval_list = [] + test_list = [] + for year in years: + trainval, test = walk_dir(devkit_dir, year) + trainval_list.extend(trainval) + test_list.extend(test) + random.shuffle(trainval_list) + with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval: + for item in trainval_list: + ftrainval.write(item[0] + ' ' + item[1] + '\n') + + with open(osp.join(output_dir, 'test.txt'), 'w') as ftest: + for item in test_list: + ftest.write(item[0] + ' ' + item[1] + '\n') + + +if __name__ == '__main__': + prepare_filelist(devkit_dir, years, '.') diff --git a/object_detection/labels/label_list b/object_detection/labels/label_list new file mode 100644 index 00000000..87df23ce --- /dev/null +++ b/object_detection/labels/label_list @@ -0,0 +1,21 @@ +background +aeroplane +bicycle +bird +boat +bottle +bus +car +cat +chair +cow +diningtable +dog +horse +motorbike +person +pottedplant +sheep +sofa +train +tvmonitor diff --git a/object_detection/latest_kpis/train_cost_factor.txt b/object_detection/latest_kpis/train_cost_factor.txt new file mode 100644 index 00000000..f8af6455 --- /dev/null +++ b/object_detection/latest_kpis/train_cost_factor.txt @@ -0,0 +1 @@ +[50.4810791015625] diff --git a/object_detection/latest_kpis/train_speed_factor.txt b/object_detection/latest_kpis/train_speed_factor.txt new file mode 100644 index 00000000..f5dffe7a --- /dev/null +++ b/object_detection/latest_kpis/train_speed_factor.txt @@ -0,0 +1 @@ +[22.96385064308009] diff --git a/object_detection/mobilenet_ssd.py b/object_detection/mobilenet_ssd.py new file mode 100644 index 00000000..06c71d55 --- /dev/null +++ b/object_detection/mobilenet_ssd.py @@ -0,0 +1,116 @@ +import paddle as paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + + +def conv_bn(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True): + parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=parameter_attr, + bias_attr=False) + parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) + bias_attr = ParamAttr(learning_rate=0.2) + return fluid.layers.batch_norm(input=conv, act=act) + + +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): + depthwise_conv = conv_bn( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False) + + pointwise_conv = conv_bn( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + return pointwise_conv + + +def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale): + # 1x1 conv + pointwise_conv = conv_bn( + input=input, + filter_size=1, + num_filters=int(num_filters1 * scale), + stride=1, + num_groups=int(num_groups * scale), + padding=0) + + # 3x3 conv + normal_conv = conv_bn( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2 * scale), + stride=2, + num_groups=int(num_groups * scale), + padding=1) + return normal_conv + + +def mobile_net(num_classes, img, img_shape, scale=1.0): + # 300x300 + tmp = conv_bn(img, 3, int(32 * scale), 2, 1, 3) + # 150x150 + tmp = depthwise_separable(tmp, 32, 64, 32, 1, scale) + tmp = depthwise_separable(tmp, 64, 128, 64, 2, scale) + # 75x75 + tmp = depthwise_separable(tmp, 128, 128, 128, 1, scale) + tmp = depthwise_separable(tmp, 128, 256, 128, 2, scale) + # 38x38 + tmp = depthwise_separable(tmp, 256, 256, 256, 1, scale) + tmp = depthwise_separable(tmp, 256, 512, 256, 2, scale) + + # 19x19 + for i in range(5): + tmp = depthwise_separable(tmp, 512, 512, 512, 1, scale) + module11 = tmp + tmp = depthwise_separable(tmp, 512, 1024, 512, 2, scale) + + # 10x10 + module13 = depthwise_separable(tmp, 1024, 1024, 1024, 1, scale) + module14 = extra_block(module13, 256, 512, 1, 2, scale) + # 5x5 + module15 = extra_block(module14, 128, 256, 1, 2, scale) + # 3x3 + module16 = extra_block(module15, 128, 256, 1, 2, scale) + # 2x2 + module17 = extra_block(module16, 64, 128, 1, 2, scale) + + mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head( + inputs=[module11, module13, module14, module15, module16, module17], + image=img, + num_classes=num_classes, + min_ratio=20, + max_ratio=90, + min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0], + max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0], + aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]], + base_size=img_shape[2], + offset=0.5, + flip=True) + + return mbox_locs, mbox_confs, box, box_var diff --git a/object_detection/reader.py b/object_detection/reader.py new file mode 100644 index 00000000..78efcc4a --- /dev/null +++ b/object_detection/reader.py @@ -0,0 +1,350 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import image_util +from paddle.utils.image_util import * +import random +from PIL import Image +from PIL import ImageDraw +import numpy as np +import xml.etree.ElementTree +import os +import time +import copy + + +class Settings(object): + def __init__(self, + dataset=None, + data_dir=None, + label_file=None, + resize_h=300, + resize_w=300, + mean_value=[127.5, 127.5, 127.5], + apply_distort=True, + apply_expand=True, + toy=0): + self._dataset = dataset + self._toy = toy + self._data_dir = data_dir + if dataset == "pascalvoc": + self._label_list = [] + label_fpath = os.path.join(data_dir, label_file) + for line in open(label_fpath): + self._label_list.append(line.strip()) + + self._apply_distort = apply_distort + self._apply_expand = apply_expand + self._resize_height = resize_h + self._resize_width = resize_w + self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype( + 'float32') + self._expand_prob = 0.5 + self._expand_max_ratio = 4 + self._hue_prob = 0.5 + self._hue_delta = 18 + self._contrast_prob = 0.5 + self._contrast_delta = 0.5 + self._saturation_prob = 0.5 + self._saturation_delta = 0.5 + self._brightness_prob = 0.5 + self._brightness_delta = 0.125 + + @property + def dataset(self): + return self._dataset + + @property + def toy(self): + return self._toy + + @property + def apply_distort(self): + return self._apply_expand + + @property + def apply_distort(self): + return self._apply_distort + + @property + def data_dir(self): + return self._data_dir + + @data_dir.setter + def data_dir(self, data_dir): + self._data_dir = data_dir + + @property + def label_list(self): + return self._label_list + + @property + def resize_h(self): + return self._resize_height + + @property + def resize_w(self): + return self._resize_width + + @property + def img_mean(self): + return self._img_mean + + +def preprocess(img, bbox_labels, mode, settings): + img_width, img_height = img.size + sampled_labels = bbox_labels + if mode == 'train': + if settings._apply_distort: + img = image_util.distort_image(img, settings) + if settings._apply_expand: + img, bbox_labels, img_width, img_height = image_util.expand_image( + img, bbox_labels, img_width, img_height, settings) + # sampling + batch_sampler = [] + # hard-code here + batch_sampler.append( + image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0)) + batch_sampler.append( + image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0)) + sampled_bbox = image_util.generate_batch_samples(batch_sampler, + bbox_labels) + + img = np.array(img) + if len(sampled_bbox) > 0: + idx = int(random.uniform(0, len(sampled_bbox))) + img, sampled_labels = image_util.crop_image( + img, bbox_labels, sampled_bbox[idx], img_width, img_height) + + img = Image.fromarray(img) + img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) + img = np.array(img) + + if mode == 'train': + mirror = int(random.uniform(0, 2)) + if mirror == 1: + img = img[:, ::-1, :] + for i in xrange(len(sampled_labels)): + tmp = sampled_labels[i][1] + sampled_labels[i][1] = 1 - sampled_labels[i][3] + sampled_labels[i][3] = 1 - tmp + # HWC to CHW + if len(img.shape) == 3: + img = np.swapaxes(img, 1, 2) + img = np.swapaxes(img, 1, 0) + # RBG to BGR + img = img[[2, 1, 0], :, :] + img = img.astype('float32') + img -= settings.img_mean + img = img * 0.007843 + return img, sampled_labels + + +def coco(settings, file_list, mode, shuffle): + # cocoapi + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + coco = COCO(file_list) + image_ids = coco.getImgIds() + images = coco.loadImgs(image_ids) + category_ids = coco.getCatIds() + category_names = [item['name'] for item in coco.loadCats(category_ids)] + + if not settings.toy == 0: + images = images[:settings.toy] if len(images) > settings.toy else images + print("{} on {} with {} images".format(mode, settings.dataset, len(images))) + + def reader(): + if mode == 'train' and shuffle: + random.shuffle(images) + for image in images: + image_name = image['file_name'] + image_path = os.path.join(settings.data_dir, image_name) + + im = Image.open(image_path) + if im.mode == 'L': + im = im.convert('RGB') + im_width, im_height = im.size + + # layout: category_id | xmin | ymin | xmax | ymax | iscrowd | + # origin_coco_bbox | segmentation | area | image_id | annotation_id + bbox_labels = [] + annIds = coco.getAnnIds(imgIds=image['id']) + anns = coco.loadAnns(annIds) + for ann in anns: + bbox_sample = [] + # start from 1, leave 0 to background + bbox_sample.append( + float(category_ids.index(ann['category_id'])) + 1) + bbox = ann['bbox'] + xmin, ymin, w, h = bbox + xmax = xmin + w + ymax = ymin + h + bbox_sample.append(float(xmin) / im_width) + bbox_sample.append(float(ymin) / im_height) + bbox_sample.append(float(xmax) / im_width) + bbox_sample.append(float(ymax) / im_height) + bbox_sample.append(float(ann['iscrowd'])) + bbox_labels.append(bbox_sample) + im, sample_labels = preprocess(im, bbox_labels, mode, settings) + sample_labels = np.array(sample_labels) + if len(sample_labels) == 0: continue + im = im.astype('float32') + boxes = sample_labels[:, 1:5] + lbls = sample_labels[:, 0].astype('int32') + difficults = sample_labels[:, -1].astype('int32') + yield im, boxes, lbls, difficults + + return reader + + +def pascalvoc(settings, file_list, mode, shuffle): + flist = open(file_list) + images = [line.strip() for line in flist] + if not settings.toy == 0: + images = images[:settings.toy] if len(images) > settings.toy else images + print("{} on {} with {} images".format(mode, settings.dataset, len(images))) + + def reader(): + if mode == 'train' and shuffle: + random.shuffle(images) + for image in images: + image_path, label_path = image.split() + image_path = os.path.join(settings.data_dir, image_path) + label_path = os.path.join(settings.data_dir, label_path) + + im = Image.open(image_path) + if im.mode == 'L': + im = im.convert('RGB') + im_width, im_height = im.size + + # layout: label | xmin | ymin | xmax | ymax | difficult + bbox_labels = [] + root = xml.etree.ElementTree.parse(label_path).getroot() + for object in root.findall('object'): + bbox_sample = [] + # start from 1 + bbox_sample.append( + float(settings.label_list.index(object.find('name').text))) + bbox = object.find('bndbox') + difficult = float(object.find('difficult').text) + bbox_sample.append(float(bbox.find('xmin').text) / im_width) + bbox_sample.append(float(bbox.find('ymin').text) / im_height) + bbox_sample.append(float(bbox.find('xmax').text) / im_width) + bbox_sample.append(float(bbox.find('ymax').text) / im_height) + bbox_sample.append(difficult) + bbox_labels.append(bbox_sample) + im, sample_labels = preprocess(im, bbox_labels, mode, settings) + sample_labels = np.array(sample_labels) + if len(sample_labels) == 0: continue + im = im.astype('float32') + boxes = sample_labels[:, 1:5] + lbls = sample_labels[:, 0].astype('int32') + difficults = sample_labels[:, -1].astype('int32') + yield im, boxes, lbls, difficults + + return reader + + +def draw_bounding_box_on_image(image, + sample_labels, + image_name, + category_names, + color='red', + thickness=4, + with_text=True, + normalized=True): + image = Image.fromarray(image) + draw = ImageDraw.Draw(image) + im_width, im_height = image.size + if not normalized: + im_width, im_height = 1, 1 + for item in sample_labels: + label = item[0] + category_name = category_names[int(label)] + bbox = item[1:5] + xmin, ymin, xmax, ymax = bbox + (left, right, top, bottom) = (xmin * im_width, xmax * im_width, + ymin * im_height, ymax * im_height) + draw.line( + [(left, top), (left, bottom), (right, bottom), (right, top), + (left, top)], + width=thickness, + fill=color) + if with_text: + if image.mode == 'RGB': + draw.text((left, top), category_name, (255, 255, 0)) + image.save(image_name) + + +def train(settings, file_list, shuffle=True): + file_list = os.path.join(settings.data_dir, file_list) + if settings.dataset == 'coco': + train_settings = copy.copy(settings) + if '2014' in file_list: + sub_dir = "train2014" + elif '2017' in file_list: + sub_dir = "train2017" + train_settings.data_dir = os.path.join(settings.data_dir, sub_dir) + return coco(train_settings, file_list, 'train', shuffle) + else: + return pascalvoc(settings, file_list, 'train', shuffle) + + +def test(settings, file_list): + file_list = os.path.join(settings.data_dir, file_list) + if settings.dataset == 'coco': + test_settings = copy.copy(settings) + if '2014' in file_list: + sub_dir = "val2014" + elif '2017' in file_list: + sub_dir = "val2017" + test_settings.data_dir = os.path.join(settings.data_dir, sub_dir) + return coco(test_settings, file_list, 'test', False) + else: + return pascalvoc(settings, file_list, 'test', False) + + +def infer(settings, image_path): + def reader(): + im = Image.open(image_path) + if im.mode == 'L': + im = im.convert('RGB') + im_width, im_height = im.size + img = img.resize((settings.resize_w, settings.resize_h), + Image.ANTIALIAS) + img = np.array(img) + # HWC to CHW + if len(img.shape) == 3: + img = np.swapaxes(img, 1, 2) + img = np.swapaxes(img, 1, 0) + # RBG to BGR + img = img[[2, 1, 0], :, :] + img = img.astype('float32') + img -= settings.img_mean + img = img * 0.007843 + yield img + + return reader diff --git a/object_detection/run.xsh b/object_detection/run.xsh new file mode 100755 index 00000000..cb961181 --- /dev/null +++ b/object_detection/run.xsh @@ -0,0 +1,13 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${object_detection_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +if [ ! -d "data/pascalvoc" ];then + mkdir -p data/pascalvoc + ./download.sh +fi +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python train.py --batch_size=64 --num_passes=1 + diff --git a/object_detection/train.py b/object_detection/train.py new file mode 100644 index 00000000..6d0c38d4 --- /dev/null +++ b/object_detection/train.py @@ -0,0 +1,358 @@ +import os +import time +import numpy as np +import argparse +import functools +import shutil + +import paddle as paddle +import paddle.fluid as fluid +import reader +from mobilenet_ssd import mobile_net +from utility import add_arguments, print_arguments + +from continuous_evaluation import train_cost_kpi, train_speed_kpi + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('learning_rate', float, 0.001, "Learning rate.") +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('num_passes', int, 120, "Epoch number.") +add_arg('iterations', int, 120, "mini batchs.") +add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.") +add_arg('parallel', bool, True, "Whether use parallel training.") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('use_nccl', bool, True, "Whether to use NCCL or not.") +add_arg('dataset', str, 'pascalvoc', "coco or pascalvoc.") +add_arg('model_save_dir', str, 'model', "The path to save model.") +add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.") +add_arg('apply_distort', bool, True, "Whether apply distort") +add_arg('apply_expand', bool, True, "Whether appley expand") +add_arg('ap_version', str, '11point', "11point or integral") +add_arg('resize_h', int, 300, "The resized image height.") +add_arg('resize_w', int, 300, "The resized image width.") +add_arg('mean_value_B', float, 127.5, "mean value for B channel which will be subtracted") #123.68 +add_arg('mean_value_G', float, 127.5, "mean value for G channel which will be subtracted") #116.78 +add_arg('mean_value_R', float, 127.5, "mean value for R channel which will be subtracted") #103.94 +add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample") +# yapf: enable + + +def parallel_do(args, + train_file_list, + val_file_list, + data_args, + learning_rate, + batch_size, + num_passes, + model_save_dir, + pretrained_model=None): + image_shape = [3, data_args.resize_h, data_args.resize_w] + if data_args.dataset == 'coco': + num_classes = 81 + elif data_args.dataset == 'pascalvoc': + num_classes = 21 + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + gt_box = fluid.layers.data( + name='gt_box', shape=[4], dtype='float32', lod_level=1) + gt_label = fluid.layers.data( + name='gt_label', shape=[1], dtype='int32', lod_level=1) + difficult = fluid.layers.data( + name='gt_difficult', shape=[1], dtype='int32', lod_level=1) + + if args.parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places, use_nccl=args.use_nccl) + with pd.do(): + image_ = pd.read_input(image) + gt_box_ = pd.read_input(gt_box) + gt_label_ = pd.read_input(gt_label) + difficult_ = pd.read_input(difficult) + locs, confs, box, box_var = mobile_net(num_classes, image_, + image_shape) + loss = fluid.layers.ssd_loss(locs, confs, gt_box_, gt_label_, box, + box_var) + nmsed_out = fluid.layers.detection_output( + locs, confs, box, box_var, nms_threshold=0.45) + loss = fluid.layers.reduce_sum(loss) + pd.write_output(loss) + pd.write_output(nmsed_out) + + loss, nmsed_out = pd() + loss = fluid.layers.mean(loss) + else: + locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) + nmsed_out = fluid.layers.detection_output( + locs, confs, box, box_var, nms_threshold=0.45) + loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, + box_var) + loss = fluid.layers.reduce_sum(loss) + + test_program = fluid.default_main_program().clone(for_test=True) + with fluid.program_guard(test_program): + map_eval = fluid.evaluator.DetectionMAP( + nmsed_out, + gt_label, + gt_box, + difficult, + num_classes, + overlap_threshold=0.5, + evaluate_difficult=False, + ap_version=args.ap_version) + + if data_args.dataset == 'coco': + # learning rate decay in 12, 19 pass, respectively + if '2014' in train_file_list: + boundaries = [82783 / batch_size * 12, 82783 / batch_size * 19] + elif '2017' in train_file_list: + boundaries = [118287 / batch_size * 12, 118287 / batch_size * 19] + elif data_args.dataset == 'pascalvoc': + boundaries = [40000, 60000] + values = [learning_rate, learning_rate * 0.5, learning_rate * 0.25] + optimizer = fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay(boundaries, values), + regularization=fluid.regularizer.L2Decay(0.00005), ) + + optimizer.minimize(loss) + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + + train_reader = paddle.batch( + reader.train(data_args, train_file_list), batch_size=batch_size) + test_reader = paddle.batch( + reader.test(data_args, val_file_list), batch_size=batch_size) + feeder = fluid.DataFeeder( + place=place, feed_list=[image, gt_box, gt_label, difficult]) + + def test(pass_id): + _, accum_map = map_eval.get_map_var() + map_eval.reset(exe) + test_map = None + for data in test_reader(): + test_map = exe.run(test_program, + feed=feeder.feed(data), + fetch_list=[accum_map]) + print("Test {0}, map {1}".format(pass_id, test_map[0])) + + for pass_id in range(num_passes): + start_time = time.time() + prev_start_time = start_time + end_time = 0 + for batch_id, data in enumerate(train_reader()): + prev_start_time = start_time + start_time = time.time() + loss_v = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[loss]) + end_time = time.time() + if batch_id % 20 == 0: + print("Pass {0}, batch {1}, loss {2}, time {3}".format( + pass_id, batch_id, loss_v[0], start_time - prev_start_time)) + test(pass_id) + + if pass_id % 10 == 0 or pass_id == num_passes - 1: + model_path = os.path.join(model_save_dir, str(pass_id)) + print 'save models to %s' % (model_path) + fluid.io.save_persistables(exe, model_path) + + +def parallel_exe(args, + train_file_list, + val_file_list, + data_args, + learning_rate, + batch_size, + num_passes, + model_save_dir='model', + pretrained_model=None): + image_shape = [3, data_args.resize_h, data_args.resize_w] + if data_args.dataset == 'coco': + num_classes = 81 + elif data_args.dataset == 'pascalvoc': + num_classes = 21 + + devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" + devices_num = len(devices.split(",")) + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + gt_box = fluid.layers.data( + name='gt_box', shape=[4], dtype='float32', lod_level=1) + gt_label = fluid.layers.data( + name='gt_label', shape=[1], dtype='int32', lod_level=1) + difficult = fluid.layers.data( + name='gt_difficult', shape=[1], dtype='int32', lod_level=1) + + locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) + nmsed_out = fluid.layers.detection_output( + locs, confs, box, box_var, nms_threshold=0.45) + loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) + loss = fluid.layers.reduce_sum(loss) + + test_program = fluid.default_main_program().clone(for_test=True) + with fluid.program_guard(test_program): + map_eval = fluid.evaluator.DetectionMAP( + nmsed_out, + gt_label, + gt_box, + difficult, + num_classes, + overlap_threshold=0.5, + evaluate_difficult=False, + ap_version=args.ap_version) + + if data_args.dataset == 'coco': + # learning rate decay in 12, 19 pass, respectively + if '2014' in train_file_list: + epocs = 82783 / batch_size + boundaries = [epocs * 12, epocs * 19] + elif '2017' in train_file_list: + epocs = 118287 / batch_size + boundaries = [epcos * 12, epocs * 19] + elif data_args.dataset == 'pascalvoc': + epocs = 19200 / batch_size + boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] + values = [ + learning_rate, learning_rate * 0.5, learning_rate * 0.25, + learning_rate * 0.1, learning_rate * 0.01 + ] + optimizer = fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay(boundaries, values), + regularization=fluid.regularizer.L2Decay(0.00005), ) + + optimizer.minimize(loss) + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + fluid.default_startup_program.random_seed = 1000 + exe.run(fluid.default_startup_program()) + + if pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + + if args.parallel: + train_exe = fluid.ParallelExecutor( + use_cuda=args.use_gpu, loss_name=loss.name) + + train_reader = paddle.batch( + reader.train(data_args, train_file_list), batch_size=batch_size) + test_reader = paddle.batch( + reader.test(data_args, val_file_list), batch_size=batch_size) + feeder = fluid.DataFeeder( + place=place, feed_list=[image, gt_box, gt_label, difficult]) + + def save_model(postfix): + model_path = os.path.join(model_save_dir, postfix) + if os.path.isdir(model_path): + shutil.rmtree(model_path) + print 'save models to %s' % (model_path) + fluid.io.save_persistables(exe, model_path) + + best_map = 0. + + def test(pass_id, best_map): + _, accum_map = map_eval.get_map_var() + map_eval.reset(exe) + test_map = None + for data in test_reader(): + test_map = exe.run(test_program, + feed=feeder.feed(data), + fetch_list=[accum_map]) + if test_map[0] > best_map: + best_map = test_map[0] + save_model('best_model') + print("Test {0}, map {1}".format(pass_id, test_map[0])) + + train_num = 0 + total_train_time = 0.0 + total_iters = 0 + for pass_id in range(num_passes): + every_pass_loss = [] + iter = 0 + pass_duration = 0.0 + for batch_id, data in enumerate(train_reader()): + batch_start = time.time() + if iter == args.iterations: + break + if len(data) < devices_num: continue + if args.parallel: + loss_v, = train_exe.run(fetch_list=[loss.name], + feed=feeder.feed(data)) + else: + loss_v, = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[loss]) + loss_v = np.mean(np.array(loss_v)) + if batch_id % 20 == 0: + print("Pass {0}, batch {1}, loss {2}, time {3}".format( + pass_id, batch_id, loss_v, time.time() - batch_start)) + if iter >= args.skip_batch_num or pass_id != 0: + batch_duration = time.time() - batch_start + pass_duration += batch_duration + train_num += len(data) + every_pass_loss.append(loss_v) + iter += 1 + total_iters += 1 + #test(pass_id, best_map) + total_train_time += pass_duration + print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" % (pass_id, np.mean(every_pass_loss), pass_duration)) + if pass_id == num_passes - 1: + examples_per_sec = train_num / total_train_time + train_cost_kpi.add_record(np.mean(every_pass_loss)) + train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float')) + train_cost_kpi.persist() + train_speed_kpi.persist() + print("Best test map {0}".format(best_map)) + + +if __name__ == '__main__': + args = parser.parse_args() + print_arguments(args) + + data_dir = 'data/pascalvoc' + train_file_list = 'trainval.txt' + val_file_list = 'test.txt' + label_file = 'label_list' + model_save_dir = args.model_save_dir + if args.dataset == 'coco': + data_dir = './data/COCO17' + train_file_list = 'annotations/instances_train2017.json' + val_file_list = 'annotations/instances_val2017.json' + label_file = 'label_list' + + data_args = reader.Settings( + dataset=args.dataset, + data_dir=data_dir, + label_file=label_file, + apply_distort=args.apply_distort, + apply_expand=args.apply_expand, + resize_h=args.resize_h, + resize_w=args.resize_w, + mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], + toy=args.is_toy) + #method = parallel_do + method = parallel_exe + method( + args, + train_file_list=train_file_list, + val_file_list=val_file_list, + data_args=data_args, + learning_rate=args.learning_rate, + batch_size=args.batch_size, + num_passes=args.num_passes, + model_save_dir=model_save_dir, + pretrained_model=args.pretrained_model) diff --git a/object_detection/utility.py b/object_detection/utility.py new file mode 100644 index 00000000..506e6007 --- /dev/null +++ b/object_detection/utility.py @@ -0,0 +1,62 @@ +"""Contains common utility functions.""" +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import distutils.util +import numpy as np +from paddle.fluid import core + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) From a478b5575c358be844078893400704caa84e2ab8 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Wed, 25 Apr 2018 10:02:06 +0800 Subject: [PATCH 12/67] change gpu schedule time --- lstm/model.py | 2 +- resnet50/model.py | 2 +- seq2seq/model.py | 2 +- vgg16/model.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lstm/model.py b/lstm/model.py index d5d8c8c9..a79b8ec0 100644 --- a/lstm/model.py +++ b/lstm/model.py @@ -250,7 +250,7 @@ def collect_gpu_memory_data(mem_list): print('Get GPU memory data error') else: mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(1) + time.sleep(0.01) def save_gpu_data(mem_list): diff --git a/resnet50/model.py b/resnet50/model.py index 91977d26..27c7ee0f 100644 --- a/resnet50/model.py +++ b/resnet50/model.py @@ -343,7 +343,7 @@ def collect_gpu_memory_data(mem_list): print('Get GPU memory data error') else: mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(1) + time.sleep(0.01) def save_gpu_data(mem_list): diff --git a/seq2seq/model.py b/seq2seq/model.py index 124f32f6..c2c9763e 100644 --- a/seq2seq/model.py +++ b/seq2seq/model.py @@ -388,7 +388,7 @@ def collect_gpu_memory_data(mem_list): print('Get GPU memory data error') else: mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(1) + time.sleep(0.01) def save_gpu_data(mem_list): diff --git a/vgg16/model.py b/vgg16/model.py index b3e20771..93954ccb 100644 --- a/vgg16/model.py +++ b/vgg16/model.py @@ -257,7 +257,7 @@ def collect_gpu_memory_data(mem_list): print('Get GPU memory data error') else: mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(1) + time.sleep(0.01) def save_gpu_data(mem_list): From c0b7261631c241da657ddc14527a8c580d8f5a14 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Wed, 25 Apr 2018 15:19:47 +0800 Subject: [PATCH 13/67] add model ocr_recognition --- ocr_recognition/continuous_evaluation.py | 13 ++ ocr_recognition/crnn_ctc_model.py | 221 ++++++++++++++++++ ocr_recognition/ctc_reader.py | 200 ++++++++++++++++ ocr_recognition/ctc_train.py | 134 +++++++++++ ocr_recognition/eval.py | 70 ++++++ ocr_recognition/images/demo.jpg | Bin 0 -> 3275 bytes ocr_recognition/images/train.jpg | Bin 0 -> 30972 bytes ocr_recognition/inference.py | 65 ++++++ .../latest_kpis/train_avg_loss_factor.txt | 1 + .../latest_kpis/train_seq_err_factor.txt | 1 + ocr_recognition/run.xsh | 8 + ocr_recognition/utility.py | 90 +++++++ 12 files changed, 803 insertions(+) create mode 100644 ocr_recognition/continuous_evaluation.py create mode 100644 ocr_recognition/crnn_ctc_model.py create mode 100644 ocr_recognition/ctc_reader.py create mode 100644 ocr_recognition/ctc_train.py create mode 100644 ocr_recognition/eval.py create mode 100644 ocr_recognition/images/demo.jpg create mode 100644 ocr_recognition/images/train.jpg create mode 100644 ocr_recognition/inference.py create mode 100644 ocr_recognition/latest_kpis/train_avg_loss_factor.txt create mode 100644 ocr_recognition/latest_kpis/train_seq_err_factor.txt create mode 100755 ocr_recognition/run.xsh create mode 100644 ocr_recognition/utility.py diff --git a/ocr_recognition/continuous_evaluation.py b/ocr_recognition/continuous_evaluation.py new file mode 100644 index 00000000..679893c3 --- /dev/null +++ b/ocr_recognition/continuous_evaluation.py @@ -0,0 +1,13 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +train_avg_loss_kpi = AccKpi('train_avg_loss', 0.05, 0) +train_seq_err_kpi = AccKpi('train_seq_err', 0.05, 0) + + +tracking_kpis = [ + train_avg_loss_kpi, + train_seq_err_kpi, +] diff --git a/ocr_recognition/crnn_ctc_model.py b/ocr_recognition/crnn_ctc_model.py new file mode 100644 index 00000000..df33100e --- /dev/null +++ b/ocr_recognition/crnn_ctc_model.py @@ -0,0 +1,221 @@ +import paddle.fluid as fluid + + +def conv_bn_pool(input, + group, + out_ch, + act="relu", + param=None, + bias=None, + param_0=None, + is_test=False): + tmp = input + for i in xrange(group): + tmp = fluid.layers.conv2d( + input=tmp, + num_filters=out_ch[i], + filter_size=3, + padding=1, + param_attr=param if param_0 is None else param_0, + act=None, # LinearActivation + use_cudnn=True) + tmp = fluid.layers.batch_norm( + input=tmp, + act=act, + param_attr=param, + bias_attr=bias, + is_test=is_test) + tmp = fluid.layers.pool2d( + input=tmp, + pool_size=2, + pool_type='max', + pool_stride=2, + use_cudnn=True, + ceil_mode=True) + + return tmp + + +def ocr_convs(input, + num, + with_bn, + regularizer=None, + gradient_clip=None, + is_test=False): + assert (num % 4 == 0) + + b = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.0)) + w0 = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.0005)) + w1 = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.01)) + tmp = input + tmp = conv_bn_pool( + tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test) + + tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test) + tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test) + tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test) + return tmp + + +def encoder_net(images, + num_classes, + rnn_hidden_size=200, + regularizer=None, + gradient_clip=None, + is_test=False): + conv_features = ocr_convs( + images, + 8, + True, + regularizer=regularizer, + gradient_clip=gradient_clip, + is_test=is_test) + sliced_feature = fluid.layers.im2sequence( + input=conv_features, + stride=[1, 1], + filter_size=[conv_features.shape[2], 1]) + + para_attr = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.02)) + bias_attr = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.02), + learning_rate=2.0) + bias_attr_nobias = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.02)) + + fc_1 = fluid.layers.fc(input=sliced_feature, + size=rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=bias_attr_nobias) + fc_2 = fluid.layers.fc(input=sliced_feature, + size=rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=bias_attr_nobias) + + gru_forward = fluid.layers.dynamic_gru( + input=fc_1, + size=rnn_hidden_size, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu') + gru_backward = fluid.layers.dynamic_gru( + input=fc_2, + size=rnn_hidden_size, + is_reverse=True, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu') + + w_attr = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.02)) + b_attr = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=fluid.initializer.Normal(0.0, 0.0)) + + fc_out = fluid.layers.fc(input=[gru_forward, gru_backward], + size=num_classes + 1, + param_attr=w_attr, + bias_attr=b_attr) + + return fc_out + + +def ctc_train_net(images, label, args, num_classes): + regularizer = fluid.regularizer.L2Decay(args.l2) + gradient_clip = None + if args.parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places, use_nccl=True) + with pd.do(): + images_ = pd.read_input(images) + label_ = pd.read_input(label) + + fc_out = encoder_net( + images_, + num_classes, + regularizer=regularizer, + gradient_clip=gradient_clip) + + cost = fluid.layers.warpctc( + input=fc_out, + label=label_, + blank=num_classes, + norm_by_times=True) + sum_cost = fluid.layers.reduce_sum(cost) + + decoded_out = fluid.layers.ctc_greedy_decoder( + input=fc_out, blank=num_classes) + + pd.write_output(sum_cost) + pd.write_output(decoded_out) + + sum_cost, decoded_out = pd() + sum_cost = fluid.layers.reduce_sum(sum_cost) + + else: + fc_out = encoder_net( + images, + num_classes, + regularizer=regularizer, + gradient_clip=gradient_clip) + + cost = fluid.layers.warpctc( + input=fc_out, label=label, blank=num_classes, norm_by_times=True) + sum_cost = fluid.layers.reduce_sum(cost) + decoded_out = fluid.layers.ctc_greedy_decoder( + input=fc_out, blank=num_classes) + + casted_label = fluid.layers.cast(x=label, dtype='int64') + error_evaluator = fluid.evaluator.EditDistance( + input=decoded_out, label=casted_label) + + inference_program = fluid.default_main_program().clone(for_test=True) + + optimizer = fluid.optimizer.Momentum( + learning_rate=args.learning_rate, momentum=args.momentum) + _, params_grads = optimizer.minimize(sum_cost) + model_average = fluid.optimizer.ModelAverage( + args.average_window, + params_grads, + min_average_window=args.min_average_window, + max_average_window=args.max_average_window) + + return sum_cost, error_evaluator, inference_program, model_average + + +def ctc_infer(images, num_classes): + fc_out = encoder_net(images, num_classes, is_test=True) + return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) + + +def ctc_eval(images, label, num_classes): + fc_out = encoder_net(images, num_classes, is_test=True) + decoded_out = fluid.layers.ctc_greedy_decoder( + input=fc_out, blank=num_classes) + + casted_label = fluid.layers.cast(x=label, dtype='int64') + error_evaluator = fluid.evaluator.EditDistance( + input=decoded_out, label=casted_label) + + cost = fluid.layers.warpctc( + input=fc_out, label=label, blank=num_classes, norm_by_times=True) + + return error_evaluator, cost diff --git a/ocr_recognition/ctc_reader.py b/ocr_recognition/ctc_reader.py new file mode 100644 index 00000000..0076ca17 --- /dev/null +++ b/ocr_recognition/ctc_reader.py @@ -0,0 +1,200 @@ +import os +import cv2 +import tarfile +import numpy as np +from PIL import Image +from os import path +import paddle as paddle +from paddle.utils.image_util import load_image + +NUM_CLASSES = 10784 +DATA_SHAPE = [1, 48, 512] + +DATA_MD5 = "1de60d54d19632022144e4e58c2637b5" +DATA_URL = "http://cloud.dlnel.org/filepub/?uuid=df937251-3c0b-480d-9a7b-0080dfeee65c" +CACHE_DIR_NAME = "ctc_data" +SAVED_FILE_NAME = "data.tar.gz" +DATA_DIR_NAME = "data" +TRAIN_DATA_DIR_NAME = "train_images" +TEST_DATA_DIR_NAME = "test_images" +TRAIN_LIST_FILE_NAME = "train.list" +TEST_LIST_FILE_NAME = "test.list" + + +class DataGenerator(object): + def __init__(self): + pass + + def train_reader(self, img_root_dir, img_label_list, batchsize): + ''' + Reader interface for training. + + :param img_root_dir: The root path of the image for training. + :type img_root_dir: str + + :param img_label_list: The path of the file for training. + :type img_label_list: str + + ''' + + img_label_lines = [] + if batchsize == 1: + to_file = "tmp.txt" + cmd = "cat " + img_label_list + " | awk '{print $1,$2,$3,$4;}' | shuf > " + to_file + print "cmd: " + cmd + os.system(cmd) + print "finish batch shuffle" + img_label_lines = open(to_file, 'r').readlines() + else: + to_file = "tmp.txt" + #cmd1: partial shuffle + cmd = "cat " + img_label_list + " | awk '{printf(\"%04d%.4f %s\\n\", $1, rand(), $0)}' | sort | sed 1,$((1 + RANDOM % 100))d | " + #cmd2: batch merge and shuffle + cmd += "awk '{printf $2\" \"$3\" \"$4\" \"$5\" \"; if(NR % " + str( + batchsize) + " == 0) print \"\";}' | shuf | " + #cmd3: batch split + cmd += "awk '{if(NF == " + str( + batchsize + ) + " * 4) {for(i = 0; i < " + str( + batchsize + ) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file + print "cmd: " + cmd + os.system(cmd) + print "finish batch shuffle" + img_label_lines = open(to_file, 'r').readlines() + + def reader(): + sizes = len(img_label_lines) / batchsize + for i in range(sizes): + result = [] + sz = [0, 0] + for j in range(batchsize): + line = img_label_lines[i * batchsize + j] + # h, w, img_name, labels + items = line.split(' ') + + label = [int(c) for c in items[-1].split(',')] + img = Image.open(os.path.join(img_root_dir, items[ + 2])).convert('L') #zhuanhuidu + if j == 0: + sz = img.size + img = img.resize((sz[0], sz[1])) + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + result.append([img, label]) + yield result + + return reader + + def test_reader(self, img_root_dir, img_label_list): + ''' + Reader interface for inference. + + :param img_root_dir: The root path of the images for training. + :type img_root_dir: str + + :param img_label_list: The path of the file for testing. + :type img_label_list: str + ''' + + def reader(): + for line in open(img_label_list): + # h, w, img_name, labels + items = line.split(' ') + + label = [int(c) for c in items[-1].split(',')] + img = Image.open(os.path.join(img_root_dir, items[2])).convert( + 'L') + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + yield img, label + + return reader + + def infer_reader(self, img_root_dir=None, img_label_list=None): + '''A reader interface for inference. + + :param img_root_dir: The root path of the images for training. + :type img_root_dir: str + + :param img_label_list: The path of the file for + inference. It should be the path of file if img_root_dir + was None. If img_label_list was set to None, it will read image path + from stdin. + :type img_root_dir: str + ''' + + def reader(): + if img_label_list is not None: + for line in open(img_label_list): + if img_root_dir is not None: + # h, w, img_name, labels + img_name = line.split(' ')[2] + img_path = os.path.join(img_root_dir, img_name) + else: + img_path = line.strip("\t\n\r") + img = Image.open(img_path).convert('L') + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + yield img, label + else: + while True: + img_path = raw_input("Please input the path of image: ") + img = Image.open(img_path).convert('L') + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + yield img, [[0]] + + return reader + + +def num_classes(): + '''Get classes number of this dataset. + ''' + return NUM_CLASSES + + +def data_shape(): + '''Get image shape of this dataset. It is a dummy shape for this dataset. + ''' + return DATA_SHAPE + + +def train(batch_size, train_images_dir=None, train_list_file=None): + generator = DataGenerator() + if train_images_dir is None: + data_dir = download_data() + train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) + if train_list_file is None: + train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) + return generator.train_reader(train_images_dir, train_list_file, batch_size) + + +def test(batch_size=1, test_images_dir=None, test_list_file=None): + generator = DataGenerator() + if test_images_dir is None: + data_dir = download_data() + test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) + if test_list_file is None: + test_list_file = path.join(data_dir, TEST_LIST_FILE_NAME) + return paddle.batch( + generator.test_reader(test_images_dir, test_list_file), batch_size) + + +def inference(infer_images_dir=None, infer_list_file=None): + generator = DataGenerator() + return paddle.batch( + generator.infer_reader(infer_images_dir, infer_list_file), 1) + + +def download_data(): + '''Download train and test data. + ''' + tar_file = paddle.dataset.common.download( + DATA_URL, CACHE_DIR_NAME, DATA_MD5, save_name=SAVED_FILE_NAME) + data_dir = path.join(path.dirname(tar_file), DATA_DIR_NAME) + if not path.isdir(data_dir): + t = tarfile.open(tar_file, "r:gz") + t.extractall(path=path.dirname(tar_file)) + t.close() + return data_dir diff --git a/ocr_recognition/ctc_train.py b/ocr_recognition/ctc_train.py new file mode 100644 index 00000000..3b3285e5 --- /dev/null +++ b/ocr_recognition/ctc_train.py @@ -0,0 +1,134 @@ +"""Trainer for OCR CTC model.""" +import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data +from crnn_ctc_model import ctc_train_net +import ctc_reader +import argparse +import functools +import sys +import time +import os +import numpy as np +from continuous_evaluation import train_avg_loss_kpi, train_seq_err_kpi + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('pass_num', int, 100, "Number of training epochs.") +add_arg('log_period', int, 100, "Log period.") +add_arg('iterations', int, 10000, "the total iterations.") +add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") +add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") +add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.") +add_arg('init_model', str, None, "The init model file of directory.") +add_arg('learning_rate', float, 1.0e-3, "Learning rate.") +add_arg('l2', float, 0.0004, "L2 regularizer.") +add_arg('momentum', float, 0.9, "Momentum.") +add_arg('rnn_hidden_size', int, 200, "Hidden size of rnn layers.") +add_arg('use_gpu', bool, True, "Whether use GPU to train.") +add_arg('min_average_window',int, 10000, "Min average window.") +add_arg('max_average_window',int, 15625, "Max average window. It is proposed to be set as the number of minibatch in a pass.") +add_arg('average_window', float, 0.15, "Average window.") +add_arg('parallel', bool, False, "Whether use parallel training.") +add_arg('train_images', str, None, "The directory of training images." + "None means using the default training images of reader.") +add_arg('train_list', str, None, "The list file of training images." + "None means using the default train_list file of reader.") +add_arg('test_images', str, None, "The directory of training images." + "None means using the default test images of reader.") +add_arg('test_list', str, None, "The list file of training images." + "None means using the default test_list file of reader.") +add_arg('num_classes', int, None, "The number of classes." + "None means using the default num_classes from reader.") +# yapf: enable + + +def train(args, data_reader=ctc_reader): + """OCR CTC training""" + num_classes = data_reader.num_classes( + ) if args.num_classes is None else args.num_classes + data_shape = data_reader.data_shape() + # define network + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int32', lod_level=1) + sum_cost, error_evaluator, inference_program, model_average = ctc_train_net( + images, label, args, num_classes) + + # data reader + train_reader = data_reader.train( + args.batch_size, + train_images_dir=args.train_images, + train_list_file=args.train_list) + test_reader = data_reader.test( + test_images_dir=args.test_images, test_list_file=args.test_list) + + # prepare environment + place = fluid.CPUPlace() + if args.use_gpu: + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # load init model + if args.init_model is not None: + model_dir = args.init_model + model_file_name = None + if not os.path.isdir(args.init_model): + model_dir = os.path.dirname(args.init_model) + model_file_name = os.path.basename(args.init_model) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.init_model + + for pass_id in range(args.pass_num): + error_evaluator.reset(exe) + batch_id = 1 + total_loss = 0.0 + total_seq_error = 0.0 + # train a pass + for data in train_reader(): + batch_loss, _, batch_seq_error = exe.run( + fluid.default_main_program(), + feed=get_feeder_data(data, place), + fetch_list=[sum_cost] + error_evaluator.metrics) + total_loss += batch_loss[0] + total_seq_error += batch_seq_error[0] + # training log + if batch_id % args.log_period == 0: + print "\nTime: %s; Pass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % ( + time.time(), pass_id, batch_id, + total_loss / (batch_id * args.batch_size), + total_seq_error / (batch_id * args.batch_size)) + sys.stdout.flush() + if batch_id == args.iterations - 1: + avg_seq_err = batch_seq_error[0] / args.batch_size + avg_loss = batch_loss[0] / args.batch_size + train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) + train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) + break + # evaluate + if batch_id % args.eval_period == 0: + with model_average.apply(exe): + error_evaluator.reset(exe) + for data in test_reader(): + exe.run(inference_program, + feed=get_feeder_data(data, place)) + _, test_seq_error = error_evaluator.eval(exe) + + print "\nTime: %s; Pass[%d]-batch[%d]; Test seq error: %s.\n" % ( + time.time(), pass_id, batch_id, str(test_seq_error[0])) + + batch_id += 1 + train_avg_loss_kpi.persist() + train_seq_err_kpi.persist() + + +def main(): + args = parser.parse_args() + print_arguments(args) + train(args, data_reader=ctc_reader) + + +if __name__ == "__main__": + main() diff --git a/ocr_recognition/eval.py b/ocr_recognition/eval.py new file mode 100644 index 00000000..be0a0438 --- /dev/null +++ b/ocr_recognition/eval.py @@ -0,0 +1,70 @@ +import paddle.v2 as paddle +import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data +from crnn_ctc_model import ctc_infer +from crnn_ctc_model import ctc_eval +import ctc_reader +import argparse +import functools +import os + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('model_path', str, None, "The model path to be used for inference.") +add_arg('input_images_dir', str, None, "The directory of images.") +add_arg('input_images_list', str, None, "The list file of images.") +add_arg('use_gpu', bool, True, "Whether use GPU to eval.") +# yapf: enable + + +def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): + """OCR inference""" + num_classes = data_reader.num_classes() + data_shape = data_reader.data_shape() + # define network + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int32', lod_level=1) + evaluator, cost = eval(images, label, num_classes) + + # data reader + test_reader = data_reader.test( + test_images_dir=args.input_images_dir, + test_list_file=args.input_images_list) + + # prepare environment + place = fluid.CPUPlace() + if use_gpu: + place = fluid.CUDAPlace(0) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # load init model + model_dir = args.model_path + model_file_name = None + if not os.path.isdir(args.model_path): + model_dir = os.path.dirname(args.model_path) + model_file_name = os.path.basename(args.model_path) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.model_path + + evaluator.reset(exe) + count = 0 + for data in test_reader(): + count += 1 + exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place)) + avg_distance, avg_seq_error = evaluator.eval(exe) + print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( + count, avg_distance, avg_seq_error) + + +def main(): + args = parser.parse_args() + print_arguments(args) + evaluate(args, data_reader=ctc_reader) + + +if __name__ == "__main__": + main() diff --git a/ocr_recognition/images/demo.jpg b/ocr_recognition/images/demo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..be5aee506f68861583903d04c526523afc299ab8 GIT binary patch literal 3275 zcmV;+3^eoq*#F=F5K2Z#MgRc;000310RRC1+Wgv=4-_35A08bV92_7dE+-%&EF&BoC^soAFflYVG#@89JvcHvE;BST|G)qX2mmkv zya53b0RO}Q9{>OW1pxs80RaI300000000010s{mE1_uZU3Jd?l0JRVR0s#X90t5pE z1q1{D00Dgg0s{a95d{(Xb($mz{*4NnC+Tr5k$a>K1Te(yALI-Fy@X4~p<-QK;r9F)&BCFSzk z%MzZBaaV^U9<&~xN{&gyP?vrH6x6$lMGOu>^rbh@tE_vl#M91qf;t+hf2hS`@T;5| z4PK0mipRLkC~RjuQicVN8#P(A2$tvU`5XNz)_CM%fN;EbrsoUCQfWcUQZWIrPAODl zIOd$(U}u_2R^~oE}CPCQ@^xTGfw z$67|o`qDWlzzTB>z#ig}SROOadJACiMKm^g8X3xio&Xf6=8Crrs?`fgo&J%l=4ZfkN{tNQmMv4?OFQ8__DNjQ}eLG ziqG)oo^7FPiF4-4OB%bY+j+L@COJk7D%PKPmoO{vSa5l(IT<3dyg)etm1IQ}Tn3Fe#HO zNQ?sjNUZw_z7VPvO>ukyv0Hk=m407q=^CX!ExK3eSVi zy@h8DfZWn)T9eCvEu^Ckods+Ik(1Xn;(lH_)@=U(Yjc#~4r`s#Be~Y6o>}A}BYnF; zu7c|BJtodF+^B43rF;BZnS$hP+zm&iYU!yXl>QTgSHU0#`;PK5YPN$AX)c+YBYrumma7@@CF1l^ zfDgSUw{IENH00+6vU%-S_2|TUJ2YhV9qU6#=S#P2^<0{sYlVb`$9kz{aTVZ}G#n|x z#a8hoQOBe>?)Xts==P!wE(IfyywwA4r^RC)#Qs&&6NNt2PU`b*ti-PvKgzk^46`+i z$&I_DB#~Q@O>US#pt$SnRvPRg7Xu`AsO&Eq?cZ_{j+v{h;xbu?#w(t?Sv4I=H;>#a zaaygY!*1yrz$9P_mP>%q9D$4)?;GUDFRTPok;Yf^16NvOqhBXYet?NRGvI*ozOKyT8kc%gH3V8rA1oL5q#&?n(kjTIsqB z_Nw<*%Xe&IenWPq!ii^WsD9${So%%7Tx#>`EtMH6qos5M?tnU)9F>TX)C$bIyPDq7 z=5Rn!T>k)tZe0DhZZgf0ORgp7_I_t=$>whSE2fDT`y~d@a}E#XSw0U%TWg>PK55N# zryHJYaf7}UNWB27B_JafoDk*fMF#8%cV8RHT)J3+Ye z?JuNb(aET->?QjH-I7GFF7ACP@!QR*Pc7Tzd1-{tsHkr5o_`M9mj`kg$2E`PnAAgYCpj`4((Ajf?k%lj8To@YJ7TudFp-2E6amFzTOTvUGm+6( zt#lo74OO~=F=;PTOVcnW`&V0c<6=C+-fWbVPPf*WX| zVpTviPLABD{cD-jQJ-D5j|9IT&ZXBbE^V&uAVNm->+eq28UFyZbj9h@G`=fFl1mT{ zFy^{=pD?LDfKq~YbgJ5o$h5MSV~>?i1#(s~Tv}<8Uqzg}g^$xU(dpWn-rBT_)mP>f zN5byYHSDd7naBs)x*^Kr-jIL?xU5@=A5ysTVkAO`Evsy(ZyP~c$z%`%Y>$ja}hEKf!_RqO+NZ{6NGhFFxV~`d>-nDG? zDDHRw=C1%Tnu6x;J9zgw0CmT$WN8-0TicCJLGwxXbLm&LE7(&`xEua$hmU&C@Qf`U zkV^~~a@D>^BBGvEhBjtwU~^b{ay8bkCZRg*L>Nrdbp`uhh;2gr!2r+an%TP3l6#0{ zZn5W_;(lo0%BkeJe9SluM~< z5#ViB3PvAvRb5IR(IK@{n=z$fWYTiC^J#Hg!GNKALE_OUlFx>m}L)pZe8 zF&d00;9{}w^jEr^5XZ~zGg`LR>1}S2Ai|2DKQIQV&uVTaZRh2wji@j*X@D_`r(vf3 zn&Ib>w>N6ibGa}LU|(qxUR|Mc`?Wb0xT}R#9Tx(ik{K=-qi~|8g5Eer)B~Ste39;r zkp6YeTxnN#7h*P(Fdm~d(OKHZXKt{>HmZuBpr%NnhSSoZTR5}KVEz>%z=(2+0rsg* zPvJ^bmLE!Mjj8WgdJdZ(jtJ; za%#aNiettlQiKEeR7S!xkVgZxPk|DRqW}z1pO+`4RffzZ!_L5NCy%9DILRd7VzK0o zeM0HgvXCQkKfPLUK^#!|XrM2qD=GAk_?t&^jfKz51zyx%XkvGTcK|w90NP#M+_KE^ J9J7;O|JgAdAf*5R literal 0 HcmV?d00001 diff --git a/ocr_recognition/images/train.jpg b/ocr_recognition/images/train.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3d691f1cd6b44c99c1b89286573daf1abd6dcbfa GIT binary patch literal 30972 zcmeIb2V4}(vM@d*6Cgn(N)QQ3&L{$ll0^hmauSyul^{W25d;JY0s;!kB8Z3xNJi3< zqe#vM8&c z5fPvU{sRcpfV;AnoizYxY65%!02~5HiI@R05F!Fkc_NnIU=^a%0Lj;KVgLxW14w_t zxdQ&(KR{|`h8Tw8gN$I*2Ue$&DP}>hq#akaQ33QCh0y! z@c0UIe+8-gSR;m`fdbN{fWuE#(HH#*GY8Am6&203uWG5NUseVM0f2+AEpFX%B<27B zCueuptEv|{3=EAp4h{pv024qCFazWkP`6tbw6!nq)7;1XO8dKyoq?~s1HFR(xc)oj zKYt;&v~q)j!XE-FTrAz7P9W?B0A!BPTdwW^a9|EBKkMm!3xp4z0AV^;kU%WxyIN!XY1;XI9 z#AyyL8vAtiaay2mil8n5P7t23wNl@I#vnf=fTfj^G6)NRFq^H1+As7;F1g*gwqO30 zj*qS4zV(81NL=kLE@^`>GYCI$b-B8KFOX-FG%H8d{d!zbM%H#J>c8Oo-g5kEiy&{LFI_#Z?(+uH zC+)IuRaOOIkPhjlm9y@?-awd)(b7U`U$;UaEDz`rSpZgm3vd(sgaXb04&VT;0;<48 z@b4|a6|Aua>;Olw#0spn0!!?OC;>uN4f5rKB&F>gCU$0)=zZ$&DKgRiXhoxU| zTu2#6MM%ZL^E~NgQaRGIq~`z*QV6L6sXVC?SbC21{BIm+|A9{qzy??XIsHcGHsAr? zd7lqG;Nl}@kJG(N9`t(o50o3uyhFe#z#@7Rma-&tbB$->)YdB^v|(4FeazC*b&%BPWn6yRT2e z?KeEZ?_{V2G2HAJ{Njb>}fP^)}a2((}aPENUflK@FuPyHYFIaaD{8avp`)@7hH;nHju=)cQsFmO72!6G? zeOukX`VPpqBgnUvyOkH%cL0E*%PntLI~!Yf4lxlCX^snES7ybb?hF+Y;IMFXus$HvnLe_`U2j z=o62_0ia9@>fw6l8$Gb&?EnBbYjLr!--#$Lg1%{jK-dEP)xlT*z{L;rVV$8WlJAED=xnC3?9i}qEr9}_I_ ze<;E;&X8b0?=>jiRBk2Apl*{1mKh=0ch+-w&Rujr17J$#f99>fU^*^p4mYH zkm`b0NhSaf)A7d>?-77S76K6SiU7>4RHpsJ`Kh0uw(+xF{PYR`Vt*(v)mJR2U%!|8 zBHeZ*cswUQr@yX|c1)v~uC19w;EEXRNG1U|Y#t6-6!j(m_h?~}Z2X{GPB$O`dF|0S z#{PQ>?1U~{4cdwTaLR+;+X=DlHt21_UN8KGW_uFjL;%uLD%yb4Y2p9llYc7_f5Z)C zGXtq30K-c+&~EgZHn(TwE{@dA8uapNcsp|1LAj5&A3naW0#&)teC`w{LM3~S0E|jc z=|6bw zM&0YnXJZ6|#&qXVUoIHE=JW>j%2oSBy;6}eu7W0}IWMIOoyW(g=w+v3%x~9Y%+W@s z;!|d?Bv`r-tgKfXJ)4thy96c?3!gAGz6hgvUpovxI=yMKNVNeG#!&t|zBCwn7dZnX zOG{VV?HU_v-MZY(*kCPrc`D5FSX0-zycBs>d^5cGF>`6nU6(L_4rGv*+mv-G*;{6t zQU88#tNF*`8Uo#9TaUSH^WUUBGJLuTq#iB3h|BXmg^cZ-rLW4;oIx<#i(oI6Axoa7 zk84tXC|JIBn>8ij4MS@&i@}>}HGqQQEI&pBYl&KDq#E^{>BtLrANQ=&8(?GUfy^ZL ze-cQL=4<1MJ>5M)%=K|TUbM=Xw=7q`fdIU7Y2ts4b(qPcaMdv^6rL zAr*P}j1%mEZcLP`Z$3xc+IR?;53D{K@Kr-Iph9Pa*W56AUU_wU)=y{T7&P+0zY-(N1LnZI|^spC{k{&ivnhzeH>XJkfvASbb5SPkI8HCT@z2aW4VwXnTD zGa{&($txTddSh8<1f8EimYEp2c8b z#ES&rSdo(o0cf}?^xVaXvU$TMVfBkTvoFGXI56*h9tkc5VG})qn1bI=$H9dp^A6jy zA1Qh{04=K4tzw>UkO>^Cq|sojb)j#6dOub~=nmz>lp?^^9I2DTYl7yVaTy`=XYdN? ztd(cYFD}!-1X0|c9P98t{;Xu60iN9I?fq!lApKt3)*>5#D=UI9`YKnTJ!hx@ub|pN z24>3UFbQ=J+nH#LzGg|QjX{6=#nse;^sN9-r?Uq;d2vzxJgo$PbdJ4%dD%!6%fs1! z5~Gs@TOD7?j#R+!wru6kM{TG4C=(QxQ%yQ~Z^hg#;ukX&f2 zdT{-O*~RWA)a(0M59r%zrvnyWfkP`ISgo3@@T`;KbbrTSy$h|aKOH*mwQ}0h9&Y5K zKva&nqp=6kZn<~Bi^qWUftr(y(LFNQ>R#B*yl_49!pzkbWUQ0EicE7IglWhtTJN#e z?H#)uLFe^M_hP6s>-?xp0PCr%7utye{Uacyp{rg;A=6H_Ca~U40svKDgisRzM|b38 zV~4}kZhdqJo&z19?WHm?ve6)%eu&tv+w^rTmyUQ=${ocrr6;_pTp{G2V22R)1R%m+ z1eafZ#`hSC{a)@g5+3p@Km2f^ePnvWK=hgSm5efrgmTE<#HUC{sXXE@n0rU+#3c7A86)p| z6WP2LG&}h)iFG2_8@V!574_UHx88mCOtw+U)0-^^>6DSUO+~MT z5&-gz#uAhl`*hv|<7xa0x4SkrGhgV5GCL&0)joDm!(`5yG(n>-9uhl#@pu^dPO!hP zKlqGEVD{0&1A!u3bZ`PA5NCQ4{9W=v>2O-z`b8weV`oskCzDo>*n7*2!02 zF;CqFbd>4-HzyHFKb!!}*$zNfHfITdisTJF0`R92?Sq8wzj5=)Qv`rbb!N?_(TJ%h zx9EK(zkh=8P80EU@1|wE({J6oXC&qFUA&q z5<{8$I!^S|%1C%V=uuL7bpG`tvh%aN{P7$G1^J5}1mJ|1aosM>$jV^dfMForfpaY9 zhtf9Rs0xUN1IvrKRCs!q{fPrN0eEB$c9f?HKydODZ0|5^jtPMd2R2UsMER+gUyXr} z<)OpfGNXxQ?j&AJ%QDlIb8~F`Vc(k!WZD|djt?~da=-=886aa@M0sRFH-%Da0p!knuKk-U=8ylx_@f5@HTqE1?0Z{$ z_jECrpdhy()1A63ydg+B>vk-h{D{F)L8K*sH^gWk$?+C=|`=fOs^Kg~^y@qa1+ zCo8>w((pf+tnde+g9!k~I6uCg*&H9=Y&w4nMva(b$$$+(H3&dY*&<10TKH+;-|!@v z$vaK}TDsrD2Irgc5%x7X1mOFER4PK>PZjk4_f>FRo((-{jIz#}b>cVcz0qrpd{rXR z(pXh*a`=Ux{73vb)19&qugDL3@cwJMrLyb&tqgcVM)CaJzt>} zcSrT(?V4S-BBB*{{SE>^39-fT&KR$VE>*Mibr*#tNYQY1KA9WOPbRw~Iy7u{JG#=L zA2NNO0AzWB^X?ig{Ks-p1Ea@$GP=4i%l&!I`YQ3&UVpXzbVA339m@;*EVROoJ`7Hk zyTPgZBu=Be8BYw`@%ZTI`B)zeBmQ3G4y*Gd09V)Z2|xfG_Z;|H|5HCd?c@Jt`_N^T z!eB17@lO_ZIDA^zb$ivyKpOqj_4YGMx1PLz+UfpS$2aizE^ivkk6fvnAnz|;b5qc0 ztVBt+O_&#deBtdC`!WCJ$$|4vU#hi)OK6S1iF}>itw4jl2+p*e;|YL8q%Z-vG@H9w z26+VbN&wVVgX>}IF{a7bAsyx8TdL)BI?BMm)9I@_gZRQM8NG)s zBLLlz2FDTKf2SiN4F0{=e_*&V3m&d0l*19GV*+hB3#g zl*;jhYC|5H)_mS0GuLP|-=InKYTlF;V#AXr+G%Wo1JY{S-HYL;Q;GkLPO9)qeQ-@s z1?+WRAfvx!Twe<}!o{788Dhg2A9z6dWtM4Vh&xZp+rq6NhC7mxei$vw9V~GO;oqvQvIf^KyooxC6Va7^V9)+f?c8IAy|A-!gV|Sg z@bii=17qxv;m9(xoiUVntX8gLOl78H7Xi0KH69L|v}}A>n3moM*G6Z)c1Y-2(q7ur z8Wkh}MMce9zT45eX=|cv1567fpBBZ}6xao{@DDLQ3O?koAqfff1i)(F=*Lki>3PQf z!#+ndg<|H-TPf~3`FDkSIe=M8Ne8-P)<&v75rF$a&FtHJusJMb+FYZ-r zidORJh?!s*J-f_sLKRYt@=w>(=4x8pcHRF!Jzd)$aI$+OwFbcAGodC3=VRZrD z!UBKbQWeX^c9sCVxJCeYrwPE3Z{ezc%HmH3ern*S2KF`ZdZ}76NIv)8s$LV)~!>pYW9{t*q{I!ir`zdcHAH^z;-PR^TdoU-HDH zUgGUBVJ^3OeCKKPE>u#F^}B?aZ=PC0?2HlsRXNYcL~UMfHU0;Y9*t{*_kHCu7vt?aF*B&(ciEXeUn&!p61!+2cI~@$?t*4V}D_m!5VO^U< z_58 zlSy2v#*LfCcVYQs#uCbwCGt?dFIF#Y1UgpP>A*3_=}q^%Rd_xw4x@1y9B6;?PJxZ| zx{S}-jCcB1_YrM*u1y}@AVXWvVv_DZBmjO&TKINz2is$Q=@)x*WxkYjmEyJo2J0au z_vJ-gLSA4#6kYz}f~dFb}?%he&H0++2_ zC)>vnvk|j7enRI>3XfvW;X{M(_p+P!K(bcd_k%4A5PT{D_#&!`H6Z|d;5x}%qi^^6 z^REH=MsR)jH3WYd969}proj5M=BEz6i2i?32dbL*UJRZ2D4W*kxh>hq3G-`_74MfK z7OKp^IlF^cYWt(~$#5Gu6?WVF!?ky`-8$ajtff;K+jzsiS_~Ny{KjsN=Y6ih(n!?X zT7k7(p_?Ih69niELGMtW6i}!OY9*34hLeA%Co6Cpx(LC_L|Jy4~8B z(G0K9wPXci2t1KZ<+Tw&j`&b4M%#`MlZb_5yqhy&6mtI9t`rm#9T3tmF1JM zmXw`Ai1&k8YG2aF|JmZ*|Lt;lU-TpfvNXy3sV7(1Bm22vQE`ke^n!pR7f0N?`1;#@ zY^ukn5Xf2|1{~!s1bqmOb*;vwRaNrR%-UYNo^`!QUqg6P_SqHH;2i0zFNG$-Y&`KP zVf2kEa(oB|jwRK?pEJ^%EYz!*rgU~kZkesnzstNA%d}86%0emL&}(lLC77hc>`nhk z+B}X|AV1;GE&r9Yg64&?%In8vw#toG5lVJJZ6auIx9(v02XtOtIb@C1&m&cFhM&B` zd{sk>a%4>FuBVNJzW#vDa#+?xTD@%ad|Qvvi86H)=p2hNE%t;Ayq?uT4`tr8GF&Oj zQ=I1I-!O)bGbo79Ek1t?1}1I{4-0XTAH@W_YdxZDzR4Moz2kR$E+q!Fo}G!+K!s$? z+})rUU*45U`k3*uB<`(UQt|5^{R@~Y&v%MSADGOU+&6Brqab}ZmVN3#luReeC7@bv zxPE37PHJydl&CMVk|1N$`i3ivHNU&xz4esgJRc8p;KIi^vH2%06NZeKOW^<#Lg^K! zj=ttO1EB(M=~VsPiYM#SqF~PQyW$TcZiYrhk=5S51G<&cd?cHx;0Dh4*}cg?e@?Gg zJu_Q+y_FZP}$w9^LT`uG-^*+px%PZ-_||!gNOBpT$*j0v`tQc<2B}%HI`W1%qC9Pn3r1X8?rP9tDJ`?ka|H7! z(bZi1J%j*MGk9+2N#LS*w)_tc@nSbCD$GA> zP7YKjFwfJPz7fW8Av0fDF-%Zx$!Dg8??n+eysVm zgOm)q%umL_g&LPTf}1KEwC>Y&N*Vp2645~2@iZ0JaEr0Dped_}WR3aHxeSA0_>VgT zfQ|#D8f}A4ez`2g;PZHZA$dJheHHuVhP=BJ&pB2l5eukkDlMHj9{FBA8KtE)C}h#| zs`s-?el0HJ6>t0;7raW^E=*dqlZjneG1>>{LpM^vk%>()o-$IYs(r@qk59w$l_p$j zvyS1t?GyRGywt=w^i}#zcQUn3nbR~w!mx|Unw6C7LusbV7LK_WT3Ti$w-Q&apfhrc z(+OF)m|U23BYmETC@$$-!DVXJ(I;~@ujI0qZk5mLCfccP+5|u770JLe?6Ke7UjaUa z-wz`e5&$j>0>BKJW(Q-Cl<+U7sL+EQY)fAkiT^puetO3(7&00?ix26<#)M*R!B8p; z45doJ<{}`ozYM(UAYi1E1+uBaj!MVVfO&bau3-WYD?$Jwe_2I7L;%2Grz05bWP~+E zqgAn>uTr$ZtJUKrh<>v0Qwu+B;lJ4;bb6gOoE+1FbjmSKbvC}Eq<1&f+4+znXS4hM zS?=!-=JozL&iOyfh`QS-f=jPwu0ohgeFQuF*F*Zvkav$qD@kVfVe9r5h>h3Uu+`$7 z@l}XzVajA)GK(;7Th#m@zBbMox$qEN0DAA~IJw*B2-z{(GeMY?(pH@HcSPowLl=EC zHdQAjbdP>g+pgJMH7uKJwl65dp^< zVs-y(5LPf$?piJ+F8OMK}(&j)56 zU(>Vs6q1pSRmVlkYL83=^f*iq^;xkwI(soq36wEL-8gFqj9`f&_x)y(yE8UH)x5U( zEB>?yjbJ{CceJECtFRD*t$2AHl`fZ?X;gZqf&I?0C7+B+&pU}~(1bJBWn`qu47Es^ z(QeI<0==B+4t@MFtpKBja{2Vb0s<-47dmyrZ@LBbTE0J60S({F%b6}blo9S54@-WD zXdM0mm-}pwmYQI7yM{N*?rm?u)P-ZW+1U|xd7-Cl8sH=_JEQvDby|jpXIGxCVOt>F zd6$szwQOXz@i5hc7Zu3ug{%Zfkpfj=eIj=Dc&l+RDkfT%^HoJ-<`)MvD_tR0+_%s- zuP7cd#eSpPOy`xa2l`z0y&VYSQb0Qd?uorb5~W*N;YA|z)g4TngpO^a_`*ALUd3H zF)B0AR}-4GEKi&CQ4?|{S-meeEvezT_43A5!;tjN&Z_ia(!z63z

|YQwunfW3)M z?RR~8s=ezs-y2xkz7&-yQ$MzG%88fad{9d+87m6!!vImWfzQbQCx4k^d?5#raq0Yp4Up zhcX~yYFmcy*w&m)ATtjy`4jsOh*F6OhrJm0CMxD%J%(;j{lvfO@)zu9;~X0i(Fs}= z;qN=~xzmI7?v*b)Bf;oCGp<}>5}jzYa%;S9%zMHrZWd1_aZ{`3u`dJeog{qwxyB6L z9bsGpqAYqlkQ$NW7yVbFu&>DJzO}oi)pi>4?hNitoCEeUdJ0R0%Prd2PG*0w@1J;~v!hP; zCcf2QWFHbSCsWT;hdo6hutHR$tl)+nJz26NW3W!xyN+ngYUljbhk2uINWWL-y##f? z(9B+D#4;B8TUR%OGZHHND^@50ASM79aE1eB?qr4Tr^X`iP0^o2$fxSBeR)LywDFC} z=CeZy6lRq6 zeL~Ig;sN>i9h<+DMSF6&Zv)RNzY@B`RkE{HnzsU-)j=Y5y1P|JS7q&vZ}<3i&{T@8 z@s^A}@kO-EJO@)iLf7i2b3KHq6yNggZaRG$M;obyZXf>}rRAD8zPMt(vj(4HA5w=M zEgHmC;cc2BNbv0n_S3a!i&!~{(GC1LRV>)1MoRHvq4;Yq1OPqn2O7vfH(U16CoX$r z(KGDBlHjH(PV;-H&OZ~7@S5EjL+OmaL|d~mY2{Rk-0%~&g6`=Lvm@^jfWG>-uqkt} z4SoN}aR_Ax3IX8-&%MwcIsKRUv|kMULz^L}6vQ@|!E@$1tjUJ}^ip9n^oJ(@f(*U@ z)+Z^9CBYwpf%DkjJvf>HOEL>bcbe~k-ygq$%*w%*Lh)C#|Ek@9vsXjNGIJuH2#i@l z-hnwlI4vwW9!yKb+T`KD`D&Z)$#Puf+vX9=(!ix_acmOx@Y+kIa}KMpf8H2>BMREx zcSLK@;~3K=*kU39IC^zmnE)h&Wsb|k7&A`8j+moCO%=P~j!|J>O@eFoZ5WpCoZp{~ zy7U(A);p{shNtwh5@KKOKV1%)hT;1W+m|7m$uI)Y{FmOtjsQ4YAQtH%4+ubk1NbJ6 z|20Kh{8&A#O8`cVZ_NXD*3e?9qF~=MbQ(;l0$V4zldyYV%}xXW3=`z#Bff8}-zyp8 zMo9n^^7o}i;7B!)^3nt#Bp=;6T?l8viY0Kfszoh-fDX0AhC`-v{3n&fD4Nt1gYMdb%mHbR3w% zUyn#FcRsvpuczCnhT_*Gvwzwn+EIK>Qu|{c`X@8`mPGdd z2ZT2=PSHC~kM+!LlUCX~H+cOvrPLwfH*A<{cK+8h>&W#xiXu1fv_X87E|YQ#PPJ5`J29y)9vgOLsgG^KI}dk zKF?lk5_a-|(t4WseAXj=CVAIGo!RKp>5UL9)C21GX1G?c@mYR;teT#R7gXRfSIQmH zn|Gt_f%VsU!u|48Uf@V^6>i-v;3VI9`$T8KFvKV9S<|?@UZyiQ-w^M;BX9QneAmI(I%ei}$h$T4d(Uy(<=he7reDlV1RawtHU}w7^Co3e z$mrn*D~VjPCMPx03_6Za#?RLcw3e_7-Wv76ZM9Bmf2EMIFKp-tzB|oe9QSHQJ!c*KKB%e@wJYr=Pe418-xo zgDA31kcQcn#ad1}!pl{gPMIP2W~1PjB+P>*fS&CZN9Sz~mM=HQ%k2c?+}3ZZxTOTl z=GgpppqxEa)?HPuhuF!b-fq~XfBqyD6#^zl`JMD|X^yj-Z>_doQ*iZt* z76Xafffw@NFj8MqDhjteRwe@EE{1O!cLw6g-Xr%GQFeN_OdiZ=73UOq2dDEVthx@J zTc|P`sHXBFx!%i@U(_w&$erjsoL-{Y8bUhDxO_lnH7;hGB`L^f{OLcq_ zV(`=|_ddbAuZ;HfeU;B%dsqEXDL;f?OZr2wQ=H?(&1p|}OlC}NI-YJf08)Y|hU5$Q zckaM=S0}AClXus*+n-_((~Z=dWA z7?m)n=T34MkLFG*=B4jV$(7CAvrF6Q8lb+(sfuywO8FHS&4xH0j3ur|UX=Os7yT3N zd*2K+4K&aLiLVr)o@oY??euHC>W;~9H(K#$+Xq?i>ilKxR{|O~Nb{P633ewoQ@)RP z<4$Dll$2UawGgPd)4^Kc zd|8B-u~}Pm@Y^HV+gGMJE^c*%J2G>4{nXPPW8MuN@h791ZNm$BJwIYx|9hwNHxyuX zDaeC6gFBf!+|k4M&N*E2YHOiCZo=l(s&AEy=eIWUH}>}O} z$q{F+r_!Um>oKSYgL!Rbvza|#+d57dY)1m}gFBJ5O&e>{w3n0D;&#>04Gx3zCbxT+ zRQ}rTz6qkd-`IsOhcs;{>)03znMjY&CCuX<}z96@^qyFB5hW+oU>31BOXKh zxqB65pA}NN%{wwMEL+zd-+t$u`@j#i;< zf)w1QEXros5niSA5$X)*w@E=<`T?KwBVG#(J^giu1%`h1o%pEv2pn|cHv3N8rI4os zv+JFboWHrf^v#KbVT%3cID+rnZ;lYKITrdom2#P^_^>@5otSxL`v<&^-(UMJyT9(P z_Vw`$*&Hf}70hhU7#6{tDNC}2>JB}6nQNjFewgTbTm*Ei7?dMr+ zx&&8qqx7o0XPu-Q3rEJryzZL$$sa7bmotkwZ!luY8Sl-_{di0)VSEdEUVlCMa$of@ z?zso<+01#w8Ek^()^W>`JF&^LV?@pERc^hvRt&%wi}2pVGReR2m;yr*OCxk6MGpm6 zhKv;L2gP_URyJ{8J4Nw~zLiO`%bwXUB*(%n!@i{3%+O2;*-6?`pe%6pjJ~Q0nW3RN z#Y^f7wW(CBXr@{7V_wZlC{i|&Hv0*-0cZDN&V+HN+;rt|rfCMzX0CT(Y-eH>EMj8P zEoDWm%ob+2@M0xMvn)tGnym8yy(C7!@znWbpY@s~wRC}Fx3xF<;vydL8`r1MBRhk( zyVJe2)Qh?uNs8b$1@{DJCKd!nPaTmKoqJ}vn{6}@mhKgO8Lot)yw98hcLx$CK5-{8 z5kIPTJq7{V`o^^=hkO*0(16ghoTV|tU6GPxQ=g`_KjS7~hf|-%CA?i0j zxT@%jwk!^fy}ghPqb)Y{g-k`7H#p=7hfa5|%~h3EI(U>1yEuoaiyjl3i$8c`L5}}b zx4Rrey^m>i*z_p5pYW9;GK-SpFpdyOu2@-2d4$Z z+3wIDJxm%wa&G~`UTK3OsbwZ*ZVrM{4_~0%-W^0h!H|fJ)Xo}rc_lxWl|}4@Fss%> z>7=hVMCEqgchaCCb!H;r6IAqzB}RREv4>x1jC!rppF60k!}*SK{aor{U%_mX6+B~) zh|m5(Xmz%8&TQr?1B?h^XkphW_B6qhs}GT4d!fUfr+KXa%XsknVpam7yrT0 zi5)2hjw=*%eou|(ZSQ^Tj;{23Rc4L0Kz0|)=^MKowmYaZ_(Ezgvh`i*R-VO8#^&2R z9X&5NXmizCKAt9lObv#6n4j?FKuSk?%rf<1FTAiz3hoZOqq;CY(kzfn7gy{lTGLvy zVP;(|g3h{|;j3L-8ZRtqes~2nY>*qD*|b<lTtsvnk(nyqTpf$G!HY9bk3P{tO3| zH*zvWR&r{MZL+?ay2nXEj<1h~G9!X!)VS$z>tVV}uiwiDzCY|w;mh{agD$aV}PaW{_GQ{+T2v1Bi(F2O%vs^ZD3iQ>C=r@>Vb8=b1*>f4i2^iBJfjrB*R`E`k zcc}%UpoR*MRV$yrsJV64@x)-~t~m`}Ou8=FoI$D8Ln>`!d2fFTg9XSBZ zW_WM<9z#pAB}k}M9!!1ptgx!9#8jyN21eZRN?LC~=Ill`g+*q-C)t zo1Y}`Q0)o*^QpcNBzy3U{9+^`WP6;ARlEbkX*gW(oPhTyt-jjy=Gcc3>9wX+iUm5N zNK}w9N;t%y9-ZA#O`S3_s^23qnmxo-5uAJSz=+i2kVvivu@@i7-y-68q`>Fv&;+5w z^Pt_DCYW>!z_*aW^pfj$^6wYgU0v7In@+8=eoyN^opZgJ@1)jKMb{}f|B^5@+NHi) zsC!*jZl+3T(5NT3=9rOLO`_Ch@W}wLmlTnKL`Zt1s9;Mo-z|l~bDRJe#20DNc`ag%*=SJ5hK1me(6l~1a<69^lp`NvE;HYG z6h>Vr^+DwcU~~IZ1^7A^FZNn9nc|Xs6e4t+08oz_w(mUP>~rF*P2WBf;0+G7K9u`9u{OsY}Kz-^6w4MzYsT96|2nzxE1M z=#s!m>-IlQ+FTzQIPENC_^hSLkVf&GkOphRe3+cN1knP5x>9qN^;Gi&Ww$K%Yhx3s z9QSOG47sdW#mpx4s;ay9PK&(c(}A8B=_zQ^U}CUh53CkN<>m`zPrHQG8M`nn=?yR> z-idT^p%Yd1Ti|kW5iHsz7tj=}Y|9w7A`)AycnnwT_h<8k3|1=J%u>45%UGx9*{#ry z+A~OLYkCsbd@{K2R?bAt74h+ffzwU3+H=|R3`o@=SsE9)%peIC$^+z?w3ia-^Nj_f z4s?Cs)Fll7m=@2T&_4JS4R0_zr}eO$HwY`3ZLDSI=W<&r({4wu$1dUKhy~e7=clPw z?&anax5e&p`QO&QEL68ps|jmjI}`9(p%r0)Z$`F3QA%b`or$-%Q3?nY*b zuWWssawF456fG>QnCr=^Djn!V8iI~IctHH-H6!?<_ay~(DRf>lJA;#~)yz0=4XqJ6 zWmLV#%#KroGw0PZt)|P4@h!X;!`0$+E!lUm5mL2*4`mjr&?jg8+?wT?`h|*-Jz5#| zyCe1{a)pLf4SGEmTwaWEED>K^kIK74i{)pw;mE9NdYG+!QW^42gz)El&dD)K`7zBq zJRB{&k9d}(?w+M`;=)hyeR%nQ@_YNPYh#(hn? zW0yxpcI)uD+D8<(`?kmva(~%D|M~+z!->INy zsja6~Fp)j6Lgh%rn-;pMT-dtfGZr!KkzmDW5x z+2D5(SZI(ZtA+_3mMDw()Qgefe?3Jq&Bo{H&Ny1WCP^i(7}vPe^ZB{dtV;jtban6gtgUrviev)QO^cBIVVl}ipolWy zYYIgr&Q$swhldqU3zmdVcW4+Eq6AO|#w_ik4jzsI%Ka7Nfb>W!S8OpH4v&fP9+9s? zxvkvxa>kn^=Fgwp>g# zt)axQ5 zLAi6cuCG~9h@Iu2X`v(9goRe~_-do{B9R~Q^+v^^1}L5k!I1P!*7GcT9Rb6!^ z9ZeJ@2+KX|Bqsr90tZu{BU}$BkYl{oZyFL896ZeTjePlZtKw7d+sz{!8;U8Ad_v^hr0*Rc|P zZVLUWE7wWjbMnx-^UWljdWpteGD9B5vE=zjmA9Ji-TM%7S*)S1s;(LIrM;3(DEZ`6 zyPc$Zg_5QneuLP#n(#OI-fv#)kSP(lMes%3pm4UAaen^RKr3T;C*N$?^kkGRuX5}e znle7H_a+&m0Vf$7N6>{0Nr{b}3|{)$xQPWXrb_jYH_0c7?`YbXu$i2OtpB_=&%YOB4m+OxHZnov4QOY)ox4x}RwHC|` zB*Gio(Qbv{qyy~Pj*X*7wUF!2oo4va4f@QF^&Vxcc1MMA*S(H;>_g94npZS_{(98e zVt#|%9wz8>dP_rMP4a_aeE29U;{m-BfkY>|zzq@-sJw{JkTCzVNZtsR`R#LRR z%Ed`)d0Z8dFQeO%ye9gRJ*X3Ii&${P(|KFHXMX@;72bwK$~u=;!)rvxkWtl$Z^t8?*0rfjJ#P#zEM!oN(HBFpRbIOJ=$8vnvs^E)_UUcoon^f z3bGHUJRf59XIR;Fx~n;v(op_k+~xLqy~d+89Csgj&FdU9>U5)vi8aXfPOcEkI)T*o zb?leb!dx)(gl)dDwqhnD!zn`>bX;e>7k3=}*P3@TG6F2zM_yUfD zKQ=`I+~PlGjI_BgdpS`D9j%z(PrrBCqKrYWyJJDwc}+kK)364)fBxq`ijYoR}L9XaK{d3j5<56&#BX@bH+gLB>LP-e3-| z@b$=1vpK%$Gtm(c8VU9gqtbAJg))2}QZH>_59?WORWhn<;I!Uu>X)#oOrK?UHh!*H zyfuVTyJgY8T;{<25gKHL-9WR7)Ff0&{*qtfYK1I2}hw>$2f5XSt4i85o@zj3PnXG)TKn2;U#r!kp4Cm!iLETYT@$ zbBWP0l~Tu+K-2ugf_fgU6UHu*pY@8XRo!HW-XOE?IO2ZMgUqe~Q@NfxG6#_pEu^G8(E37=(Fj_ttF|PNnc~TY z^AO5f;d*2JiayObPLD9bmrtFJ`VNE%EtCX5W37}3ikRgM-yCO|*?-Z{%w$LpTDrmR zwQUN2@0CJt7x`!TYDp$~Z4{05>rsZep8^Cdp)SdR6NY*IDBk;6;aS16c5Fi3tDh^V zXD*Pf(u^swULYz_W3(hzx!n~r)}PEywJ2lSQrRmp>Y(0OBy~~ELT$`wltHBSy@I@n zwd!fOho=Jr!JtVc7ldE3cR`uj4x0OlxE#A38ScJYiCjxElhiq z;U?V0#*5Sn@^vDmpAhSElpJz;QmGDeI^NdQa3h{J8&I2?%~7|Q+Bj@yw6+i~QAOL< zYuI<4yJJ-f)$Aq{!CG+X4Qq(PS*{3h>su$d|C_;MgVbw5qbuiR3gwaBD{=b$c}{Cn z?_e>zR)lr%%jsr}JPl&fbmna-`E3aJ(5tNR+Ohyo|zigIsvk^~ZjEq1(^>D7d0 zJcs!uV!flJBHkT3H?4ffXCn~HKJ#$={8f}tFf`kA>8Mpf()!4mxf0j5#us+S!&zG* zb-|s;v?`os21W%8jzcNtlh%4C}>0xxvbsdd+FmWpBg)kl6g`VO(`Ej>4j~U%= zJ}B!j?Q)`Y&ofk5Fo7(#+i}6cINQS`KK4v5r>Jc9>EMK`nCou?MB06#GN+;sI2g@m zhJB7a0H%r<^ma8n-IaTLb*wHFBH&t(sIw&vL}m8%pv`OLDUot72iVBF-b%1Igl<`d z*NyOB_a+XeXs3P6b^6HRMSvAZrAqd2i7=;j_UG`bs2gETuNz2|_F)K(Sz8+!*fn^l zWNJt|19jz%Ocu}=c`0v1(PCP|)`*O5Oo|TMcGBxKPna+6AklW9yvSc+E0_`JMk=j# z->Bm@ISB=-%rZWNgq%VH^xry=ppPOjY{pIH-eD1t~BOjyT_#g&2nuZsgEUShi z=4^g{F?rX4T`adeSdrSefF}i$J{xtwq&lrIFoErtAE-d-1}OtBu)1qAussSh*pw0+ ztIPg9O#=z-pJaaO=BM54H?W_L;isec$4=Cba>|7m4K+qK=H_G@RgKS&c5)T8$~Hca n=#0IxpD3y8fk3|9e>c6tBsLOUmtfG?|6#Yt-+tH)LKyx(G*O9y literal 0 HcmV?d00001 diff --git a/ocr_recognition/inference.py b/ocr_recognition/inference.py new file mode 100644 index 00000000..04175bb1 --- /dev/null +++ b/ocr_recognition/inference.py @@ -0,0 +1,65 @@ +import paddle.v2 as paddle +import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data +from crnn_ctc_model import ctc_infer +import numpy as np +import ctc_reader +import argparse +import functools +import os + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('model_path', str, None, "The model path to be used for inference.") +add_arg('input_images_dir', str, None, "The directory of images.") +add_arg('input_images_list', str, None, "The list file of images.") +add_arg('use_gpu', bool, True, "Whether use GPU to infer.") +# yapf: enable + + +def inference(args, infer=ctc_infer, data_reader=ctc_reader): + """OCR inference""" + num_classes = data_reader.num_classes() + data_shape = data_reader.data_shape() + # define network + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + sequence = infer(images, num_classes) + # data reader + infer_reader = data_reader.inference( + infer_images_dir=args.input_images_dir, + infer_list_file=args.input_images_list) + # prepare environment + place = fluid.CPUPlace() + if use_gpu: + place = fluid.CUDAPlace(0) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # load init model + model_dir = args.model_path + model_file_name = None + if not os.path.isdir(args.model_path): + model_dir = os.path.dirname(args.model_path) + model_file_name = os.path.basename(args.model_path) + fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + print "Init model from: %s." % args.model_path + + for data in infer_reader(): + result = exe.run(fluid.default_main_program(), + feed=get_feeder_data( + data, place, need_label=False), + fetch_list=[sequence], + return_numpy=False) + print "result: %s" % (np.array(result[0]).flatten(), ) + + +def main(): + args = parser.parse_args() + print_arguments(args) + inference(args, data_reader=ctc_reader) + + +if __name__ == "__main__": + main() diff --git a/ocr_recognition/latest_kpis/train_avg_loss_factor.txt b/ocr_recognition/latest_kpis/train_avg_loss_factor.txt new file mode 100644 index 00000000..1d7a3785 --- /dev/null +++ b/ocr_recognition/latest_kpis/train_avg_loss_factor.txt @@ -0,0 +1 @@ +[3196.62353515625] \ No newline at end of file diff --git a/ocr_recognition/latest_kpis/train_seq_err_factor.txt b/ocr_recognition/latest_kpis/train_seq_err_factor.txt new file mode 100644 index 00000000..ce2f11e8 --- /dev/null +++ b/ocr_recognition/latest_kpis/train_seq_err_factor.txt @@ -0,0 +1 @@ +[128.0] \ No newline at end of file diff --git a/ocr_recognition/run.xsh b/ocr_recognition/run.xsh new file mode 100755 index 00000000..59e9d990 --- /dev/null +++ b/ocr_recognition/run.xsh @@ -0,0 +1,8 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${ocr_recognition_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=2000 diff --git a/ocr_recognition/utility.py b/ocr_recognition/utility.py new file mode 100644 index 00000000..67a5bfa0 --- /dev/null +++ b/ocr_recognition/utility.py @@ -0,0 +1,90 @@ +"""Contains common utility functions.""" +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import distutils.util +import numpy as np +from paddle.fluid import core + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int32") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = core.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def get_feeder_data(data, place, need_label=True): + pixel_tensor = core.LoDTensor() + pixel_data = None + pixel_data = np.concatenate( + map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32") + pixel_tensor.set(pixel_data, place) + label_tensor = to_lodtensor(map(lambda x: x[1], data), place) + if need_label: + return {"pixel": pixel_tensor, "label": label_tensor} + else: + return {"pixel": pixel_tensor} From 3e10b3009bc08d329c94066bfb13528c13c44d46 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Wed, 25 Apr 2018 15:21:04 +0800 Subject: [PATCH 14/67] change ocr model --- resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt | 2 +- resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index 466467aa..7fa7ef20 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[1508] \ No newline at end of file +[2508] diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 24c95b88..e6fd5379 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[11014] \ No newline at end of file +[31014] From 659ecd85f8a6b7ebff55c244726def06ae049a74 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Wed, 25 Apr 2018 17:12:59 +0800 Subject: [PATCH 15/67] add transformer --- ocr_recognition/continuous_evaluation.py | 4 +- transformer/continuous_evaluation.py | 13 + transformer/infer.py | 354 +++++++++++ .../latest_kpis/train_avg_ppl_kpi_factor.txt | 1 + .../train_pass_duration_kpi_factor.txt | 1 + transformer/model.py | 578 ++++++++++++++++++ transformer/optim.py | 37 ++ transformer/run.xsh | 8 + transformer/train.py | 278 +++++++++ transformer/transformer_config.py | 186 ++++++ 10 files changed, 1458 insertions(+), 2 deletions(-) create mode 100644 transformer/continuous_evaluation.py create mode 100644 transformer/infer.py create mode 100644 transformer/latest_kpis/train_avg_ppl_kpi_factor.txt create mode 100644 transformer/latest_kpis/train_pass_duration_kpi_factor.txt create mode 100644 transformer/model.py create mode 100644 transformer/optim.py create mode 100755 transformer/run.xsh create mode 100644 transformer/train.py create mode 100644 transformer/transformer_config.py diff --git a/ocr_recognition/continuous_evaluation.py b/ocr_recognition/continuous_evaluation.py index 679893c3..cb0c82af 100644 --- a/ocr_recognition/continuous_evaluation.py +++ b/ocr_recognition/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_avg_loss_kpi = AccKpi('train_avg_loss', 0.05, 0) -train_seq_err_kpi = AccKpi('train_seq_err', 0.05, 0) +train_avg_loss_kpi = CostKpi('train_avg_loss', 0.05, 0) +train_seq_err_kpi = CostKpi('train_seq_err', 0.05, 0) tracking_kpis = [ diff --git a/transformer/continuous_evaluation.py b/transformer/continuous_evaluation.py new file mode 100644 index 00000000..9b4cb407 --- /dev/null +++ b/transformer/continuous_evaluation.py @@ -0,0 +1,13 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.05, 0) +train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.05, 0) + + +tracking_kpis = [ + train_avg_ppl_kpi, + train_pass_duration_kpi, +] diff --git a/transformer/infer.py b/transformer/infer.py new file mode 100644 index 00000000..432183cd --- /dev/null +++ b/transformer/infer.py @@ -0,0 +1,354 @@ +import numpy as np + +import paddle +import paddle.fluid as fluid + +import model +from model import wrap_encoder as encoder +from model import wrap_decoder as decoder +from transformer_config import * +from train import pad_batch_data + + +def translate_batch(exe, + src_words, + encoder, + enc_in_names, + enc_out_names, + decoder, + dec_in_names, + dec_out_names, + beam_size, + max_length, + n_best, + batch_size, + n_head, + d_model, + src_pad_idx, + trg_pad_idx, + bos_idx, + eos_idx, + unk_idx, + output_unk=True): + """ + Run the encoder program once and run the decoder program multiple times to + implement beam search externally. + """ + # Prepare data for encoder and run the encoder. + enc_in_data = pad_batch_data( + src_words, + src_pad_idx, + n_head, + is_target=False, + is_label=False, + return_attn_bias=True, + return_max_len=False) + # Append the data shape input to reshape the output of embedding layer. + enc_in_data = enc_in_data + [ + np.array( + [-1, enc_in_data[2].shape[-1], d_model], dtype="int32") + ] + # Append the shape inputs to reshape before and after softmax in encoder + # self attention. + enc_in_data = enc_in_data + [ + np.array( + [-1, enc_in_data[2].shape[-1]], dtype="int32"), np.array( + enc_in_data[2].shape, dtype="int32") + ] + enc_output = exe.run(encoder, + feed=dict(zip(enc_in_names, enc_in_data)), + fetch_list=enc_out_names)[0] + + # Beam Search. + # To store the beam info. + scores = np.zeros((batch_size, beam_size), dtype="float32") + prev_branchs = [[] for i in range(batch_size)] + next_ids = [[] for i in range(batch_size)] + # Use beam_inst_map to map beam idx to the instance idx in batch, since the + # size of feeded batch is changing. + beam_inst_map = { + beam_idx: inst_idx + for inst_idx, beam_idx in enumerate(range(batch_size)) + } + # Use active_beams to recode the alive. + active_beams = range(batch_size) + + def beam_backtrace(prev_branchs, next_ids, n_best=beam_size): + """ + Decode and select n_best sequences for one instance by backtrace. + """ + seqs = [] + for i in range(n_best): + k = i + seq = [] + for j in range(len(prev_branchs) - 1, -1, -1): + seq.append(next_ids[j][k]) + k = prev_branchs[j][k] + seq = seq[::-1] + # Add the , since next_ids don't include the . + seq = [bos_idx] + seq + seqs.append(seq) + return seqs + + def init_dec_in_data(batch_size, beam_size, enc_in_data, enc_output): + """ + Initialize the input data for decoder. + """ + trg_words = np.array( + [[bos_idx]] * batch_size * beam_size, dtype="int64") + trg_pos = np.array([[1]] * batch_size * beam_size, dtype="int64") + src_max_length, src_slf_attn_bias, trg_max_len = enc_in_data[2].shape[ + -1], enc_in_data[2], 1 + # This is used to remove attention on subsequent words. + trg_slf_attn_bias = np.ones((batch_size * beam_size, trg_max_len, + trg_max_len)) + trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape( + [-1, 1, trg_max_len, trg_max_len]) + trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) * + [-1e9]).astype("float32") + # This is used to remove attention on the paddings of source sequences. + trg_src_attn_bias = np.tile( + src_slf_attn_bias[:, :, ::src_max_length, :][:, np.newaxis], + [1, beam_size, 1, trg_max_len, 1]).reshape([ + -1, src_slf_attn_bias.shape[1], trg_max_len, + src_slf_attn_bias.shape[-1] + ]) + # Append the shape input to reshape the output of embedding layer. + trg_data_shape = np.array( + [batch_size * beam_size, trg_max_len, d_model], dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # decoder self attention. + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + trg_slf_attn_bias.shape, dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # encoder-decoder attention. + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + trg_src_attn_bias.shape, dtype="int32") + enc_output = np.tile( + enc_output[:, np.newaxis], [1, beam_size, 1, 1]).reshape( + [-1, enc_output.shape[-2], enc_output.shape[-1]]) + return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output + + def update_dec_in_data(dec_in_data, next_ids, active_beams, beam_inst_map): + """ + Update the input data of decoder mainly by slicing from the previous + input data and dropping the finished instance beams. + """ + trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output = dec_in_data + trg_cur_len = trg_slf_attn_bias.shape[-1] + 1 + trg_words = np.array( + [ + beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx]) + for beam_idx in active_beams + ], + dtype="int64") + trg_words = trg_words.reshape([-1, 1]) + trg_pos = np.array( + [range(1, trg_cur_len + 1)] * len(active_beams) * beam_size, + dtype="int64").reshape([-1, 1]) + active_beams = [beam_inst_map[beam_idx] for beam_idx in active_beams] + active_beams_indice = ( + (np.array(active_beams) * beam_size)[:, np.newaxis] + + np.array(range(beam_size))[np.newaxis, :]).flatten() + # This is used to remove attention on subsequent words. + trg_slf_attn_bias = np.ones((len(active_beams) * beam_size, + trg_cur_len, trg_cur_len)) + trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape( + [-1, 1, trg_cur_len, trg_cur_len]) + trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) * + [-1e9]).astype("float32") + # This is used to remove attention on the paddings of source sequences. + trg_src_attn_bias = np.tile(trg_src_attn_bias[ + active_beams_indice, :, ::trg_src_attn_bias.shape[2], :], + [1, 1, trg_cur_len, 1]) + # Append the shape input to reshape the output of embedding layer. + trg_data_shape = np.array( + [len(active_beams) * beam_size, trg_cur_len, d_model], + dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # decoder self attention. + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + trg_slf_attn_bias.shape, dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # encoder-decoder attention. + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + trg_src_attn_bias.shape, dtype="int32") + enc_output = enc_output[active_beams_indice, :, :] + return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output + + dec_in_data = init_dec_in_data(batch_size, beam_size, enc_in_data, + enc_output) + for i in range(max_length): + predict_all = exe.run(decoder, + feed=dict(zip(dec_in_names, dec_in_data)), + fetch_list=dec_out_names)[0] + predict_all = np.log( + predict_all.reshape([len(beam_inst_map) * beam_size, i + 1, -1]) + [:, -1, :]) + predict_all = (predict_all + scores[active_beams].reshape( + [len(beam_inst_map) * beam_size, -1])).reshape( + [len(beam_inst_map), beam_size, -1]) + if not output_unk: # To exclude the token. + predict_all[:, :, unk_idx] = -1e9 + active_beams = [] + for beam_idx in range(batch_size): + if not beam_inst_map.has_key(beam_idx): + continue + inst_idx = beam_inst_map[beam_idx] + predict = (predict_all[inst_idx, :, :] + if i != 0 else predict_all[inst_idx, 0, :]).flatten() + top_k_indice = np.argpartition(predict, -beam_size)[-beam_size:] + top_scores_ids = top_k_indice[np.argsort(predict[top_k_indice]) + [::-1]] + top_scores = predict[top_scores_ids] + scores[beam_idx] = top_scores + prev_branchs[beam_idx].append(top_scores_ids / + predict_all.shape[-1]) + next_ids[beam_idx].append(top_scores_ids % predict_all.shape[-1]) + if next_ids[beam_idx][-1][0] != eos_idx: + active_beams.append(beam_idx) + if len(active_beams) == 0: + break + dec_in_data = update_dec_in_data(dec_in_data, next_ids, active_beams, + beam_inst_map) + beam_inst_map = { + beam_idx: inst_idx + for inst_idx, beam_idx in enumerate(active_beams) + } + + # Decode beams and select n_best sequences for each instance by backtrace. + seqs = [ + beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx], n_best) + for beam_idx in range(batch_size) + ] + + return seqs, scores[:, :n_best].tolist() + + +def main(): + place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + encoder_program = fluid.Program() + with fluid.program_guard(main_program=encoder_program): + enc_output = encoder( + ModelHyperParams.src_vocab_size, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout) + + decoder_program = fluid.Program() + with fluid.program_guard(main_program=decoder_program): + predict = decoder( + ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout) + + # Load model parameters of encoder and decoder separately from the saved + # transformer model. + encoder_var_names = [] + for op in encoder_program.block(0).ops: + encoder_var_names += op.input_arg_names + encoder_param_names = filter( + lambda var_name: isinstance(encoder_program.block(0).var(var_name), + fluid.framework.Parameter), + encoder_var_names) + encoder_params = map(encoder_program.block(0).var, encoder_param_names) + decoder_var_names = [] + for op in decoder_program.block(0).ops: + decoder_var_names += op.input_arg_names + decoder_param_names = filter( + lambda var_name: isinstance(decoder_program.block(0).var(var_name), + fluid.framework.Parameter), + decoder_var_names) + decoder_params = map(decoder_program.block(0).var, decoder_param_names) + fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=encoder_params) + fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=decoder_params) + + # This is used here to set dropout to the test mode. + encoder_program = fluid.io.get_inference_program( + target_vars=[enc_output], main_program=encoder_program) + decoder_program = fluid.io.get_inference_program( + target_vars=[predict], main_program=decoder_program) + + + test_data = paddle.batch( + paddle.dataset.wmt16.test(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=InferTaskConfig.batch_size) + + trg_idx2word = paddle.dataset.wmt16.get_dict( + "de", dict_size=ModelHyperParams.trg_vocab_size, reverse=True) + + def post_process_seq(seq, + bos_idx=ModelHyperParams.bos_idx, + eos_idx=ModelHyperParams.eos_idx, + output_bos=InferTaskConfig.output_bos, + output_eos=InferTaskConfig.output_eos): + """ + Post-process the beam-search decoded sequence. Truncate from the first + and remove the and tokens currently. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = seq[:eos_pos + 1] + return filter( + lambda idx: (output_bos or idx != bos_idx) and \ + (output_eos or idx != eos_idx), + seq) + + for batch_id, data in enumerate(test_data()): + batch_seqs, batch_scores = translate_batch( + exe, + [item[0] for item in data], + encoder_program, + encoder_data_input_fields + encoder_util_input_fields, + [enc_output.name], + decoder_program, + decoder_data_input_fields[:-1] + decoder_util_input_fields + (decoder_data_input_fields[-1],), + [predict.name], + InferTaskConfig.beam_size, + InferTaskConfig.max_length, + InferTaskConfig.n_best, + len(data), + ModelHyperParams.n_head, + ModelHyperParams.d_model, + ModelHyperParams.eos_idx, # Use eos_idx to pad. + ModelHyperParams.eos_idx, # Use eos_idx to pad. + ModelHyperParams.bos_idx, + ModelHyperParams.eos_idx, + ModelHyperParams.unk_idx, + output_unk=InferTaskConfig.output_unk) + for i in range(len(batch_seqs)): + # Post-process the beam-search decoded sequences. + seqs = map(post_process_seq, batch_seqs[i]) + scores = batch_scores[i] + for seq in seqs: + print(" ".join([trg_idx2word[idx] for idx in seq])) + + +if __name__ == "__main__": + main() diff --git a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt new file mode 100644 index 00000000..010dd732 --- /dev/null +++ b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt @@ -0,0 +1 @@ +[122.8649673461914] \ No newline at end of file diff --git a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt new file mode 100644 index 00000000..c7656c0d --- /dev/null +++ b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt @@ -0,0 +1 @@ +[804.0266849994659] \ No newline at end of file diff --git a/transformer/model.py b/transformer/model.py new file mode 100644 index 00000000..f2ffb88e --- /dev/null +++ b/transformer/model.py @@ -0,0 +1,578 @@ +from functools import partial +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from transformer_config import * + + +def position_encoding_init(n_position, d_pos_vec): + """ + Generate the initial values for the sinusoid position encoding table. + """ + position_enc = np.array([[ + pos / np.power(10000, 2 * (j // 2) / d_pos_vec) + for j in range(d_pos_vec) + ] if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)]) + position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i + position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1 + return position_enc.astype("float32") + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError( + "Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc(input=queries, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + k = layers.fc(input=keys, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + v = layers.fc(input=values, + size=d_value * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_value, + fan_out=n_head * d_value), + bias_attr=False, + num_flatten_dims=2) + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + if n_head == 1: + return x + + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape( + x=x, shape=[0, -1, n_head, hidden_size // n_head]) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape( + x=trans_x, + shape=map(int, [0, -1, trans_x.shape[2] * trans_x.shape[3]])) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_model, + dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_model**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + weights = layers.reshape( + x=layers.elementwise_add( + x=product, y=attn_bias) if attn_bias else product, + shape=[-1, product.shape[-1]], + actual_shape=pre_softmax_shape, + act="softmax") + weights = layers.reshape( + x=weights, shape=product.shape, actual_shape=post_softmax_shape) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_model, + dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc(input=out, + size=d_model, + param_attr=fluid.initializer.Xavier(uniform=False), + bias_attr=False, + num_flatten_dims=2) + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc(input=x, + size=d_inner_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_hid**-0.5), high=(d_hid**-0.5)), + act="relu") + out = layers.fc(input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5))) + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.initializer.Constant(1.), + bias_attr=fluid.initializer.Constant(0.)) + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def prepare_encoder(src_word, + src_pos, + src_vocab_size, + src_emb_dim, + src_max_len, + dropout_rate=0., + src_data_shape=None, + pos_enc_param_name=None): + """Add word embeddings and position encodings. + The output tensor has a shape of: + [batch_size, max_src_length_in_batch, d_model]. + This module is used at the bottom of the encoder stacks. + """ + src_word_emb = layers.embedding( + src_word, + size=[src_vocab_size, src_emb_dim], + param_attr=fluid.initializer.Normal(0., 1.)) + src_pos_enc = layers.embedding( + src_pos, + size=[src_max_len, src_emb_dim], + param_attr=fluid.ParamAttr( + name=pos_enc_param_name, trainable=False)) + enc_input = src_word_emb + src_pos_enc + enc_input = layers.reshape( + x=enc_input, + shape=[-1, src_max_len, src_emb_dim], + actual_shape=src_data_shape) + return layers.dropout( + enc_input, dropout_prob=dropout_rate, + is_test=False) if dropout_rate else enc_input + + +prepare_encoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[0]) +prepare_decoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[1]) + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + enc_input, enc_input, enc_input, attn_bias, d_key, d_value, d_model, + n_head, dropout_rate, pre_softmax_shape, post_softmax_shape) + attn_output = post_process_layer(enc_input, attn_output, "dan", + dropout_rate) + ffd_output = positionwise_feed_forward(attn_output, d_inner_hid, d_model) + return post_process_layer(attn_output, ffd_output, "dan", dropout_rate) + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + pre_softmax_shape, + post_softmax_shape, ) + enc_input = enc_output + return enc_output + + +def decoder_layer(dec_input, + enc_output, + slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + slf_attn_pre_softmax_shape=None, + slf_attn_post_softmax_shape=None, + src_attn_pre_softmax_shape=None, + src_attn_post_softmax_shape=None): + """ The layer to be stacked in decoder part. + The structure of this module is similar to that in the encoder part except + a multi-head attention is added to implement encoder-decoder attention. + """ + slf_attn_output = multi_head_attention( + dec_input, + dec_input, + dec_input, + slf_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, ) + slf_attn_output = post_process_layer( + dec_input, + slf_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + enc_attn_output = multi_head_attention( + slf_attn_output, + enc_output, + enc_output, + dec_enc_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + enc_attn_output = post_process_layer( + slf_attn_output, + enc_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + ffd_output = positionwise_feed_forward( + enc_attn_output, + d_inner_hid, + d_model, ) + dec_output = post_process_layer( + enc_attn_output, + ffd_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + return dec_output + + +def decoder(dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + slf_attn_pre_softmax_shape=None, + slf_attn_post_softmax_shape=None, + src_attn_pre_softmax_shape=None, + src_attn_post_softmax_shape=None): + """ + The decoder is composed of a stack of identical decoder_layer layers. + """ + for i in range(n_layer): + dec_output = decoder_layer( + dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + dec_input = dec_output + return dec_output + + +def make_all_inputs(input_fields): + """ + Define the input data layers for the transformer model. + """ + inputs = [] + for input_field in input_fields: + input_var = layers.data( + name=input_field, + shape=input_descs[input_field][0], + dtype=input_descs[input_field][1], + append_batch_size=False) + inputs.append(input_var) + fluid.default_startup_program().global_block().clone_variable( + input_var) + return inputs + + +def transformer( + src_vocab_size, + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + label_smooth_eps, ): + enc_inputs = make_all_inputs(encoder_data_input_fields + + encoder_util_input_fields) + + enc_output = wrap_encoder( + src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + enc_inputs, ) + + dec_inputs = make_all_inputs(decoder_data_input_fields[:-1] + + decoder_util_input_fields) + + predict = wrap_decoder( + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + dec_inputs, + enc_output, ) + + # Padding index do not contribute to the total loss. The weights is used to + # cancel padding index in calculating the loss. + label, weights = make_all_inputs(label_data_input_fields) + if label_smooth_eps: + label = layers.label_smooth( + label=layers.one_hot( + input=label, depth=trg_vocab_size), + epsilon=label_smooth_eps) + cost = layers.softmax_with_cross_entropy( + logits=predict, + label=label, + soft_label=True if label_smooth_eps else False) + # cost = layers.softmax_with_cross_entropy(logits=predict, label=gold) + weighted_cost = cost * weights + sum_cost = layers.reduce_sum(weighted_cost) + token_num = layers.reduce_sum(weights) + avg_cost = sum_cost / token_num + return sum_cost, avg_cost, predict, token_num + + +def wrap_encoder(src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + enc_inputs=None): + """ + The wrapper assembles together all needed layers for the encoder. + """ + if enc_inputs is None: + # This is used to implement independent encoder program in inference. + src_word, src_pos, src_slf_attn_bias, src_data_shape, \ + slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \ + make_all_inputs(encoder_data_input_fields + + encoder_util_input_fields) + else: + src_word, src_pos, src_slf_attn_bias, src_data_shape, \ + slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \ + enc_inputs + enc_input = prepare_encoder( + src_word, + src_pos, + src_vocab_size, + d_model, + max_length, + dropout_rate, + src_data_shape, ) + enc_output = encoder( + enc_input, + src_slf_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, ) + return enc_output + + +def wrap_decoder(trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + dec_inputs=None, + enc_output=None): + """ + The wrapper assembles together all needed layers for the decoder. + """ + if dec_inputs is None: + # This is used to implement independent decoder program in inference. + trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + enc_output, trg_data_shape, slf_attn_pre_softmax_shape, \ + slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \ + src_attn_post_softmax_shape = make_all_inputs( + decoder_data_input_fields + decoder_util_input_fields) + else: + trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, slf_attn_pre_softmax_shape, \ + slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \ + src_attn_post_softmax_shape = dec_inputs + + dec_input = prepare_decoder( + trg_word, + trg_pos, + trg_vocab_size, + d_model, + max_length, + dropout_rate, + trg_data_shape, ) + dec_output = decoder( + dec_input, + enc_output, + trg_slf_attn_bias, + trg_src_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + # Return logits for training and probs for inference. + predict = layers.reshape( + x=layers.fc(input=dec_output, + size=trg_vocab_size, + bias_attr=False, + num_flatten_dims=2), + shape=[-1, trg_vocab_size], + act="softmax" if dec_inputs is None else None) + return predict diff --git a/transformer/optim.py b/transformer/optim.py new file mode 100644 index 00000000..56b5af3b --- /dev/null +++ b/transformer/optim.py @@ -0,0 +1,37 @@ +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +class LearningRateScheduler(object): + """ + Wrapper for learning rate scheduling as described in the Transformer paper. + LearningRateScheduler adapts the learning rate externally and the adapted + learning rate will be feeded into the main_program as input data. + """ + + def __init__(self, + d_model, + warmup_steps, + learning_rate=0.001, + current_steps=0, + name="learning_rate"): + self.current_steps = current_steps + self.warmup_steps = warmup_steps + self.d_model = d_model + self.static_lr = learning_rate + self.learning_rate = layers.create_global_var( + name=name, + shape=[1], + value=float(learning_rate), + dtype="float32", + persistable=True) + + def update_learning_rate(self): + self.current_steps += 1 + lr_value = np.power(self.d_model, -0.5) * np.min([ + np.power(self.current_steps, -0.5), + np.power(self.warmup_steps, -1.5) * self.current_steps + ]) + return np.array([lr_value], dtype="float32") diff --git a/transformer/run.xsh b/transformer/run.xsh new file mode 100755 index 00000000..2f6f1ffd --- /dev/null +++ b/transformer/run.xsh @@ -0,0 +1,8 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${transformer_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train.py diff --git a/transformer/train.py b/transformer/train.py new file mode 100644 index 00000000..5d10fbf3 --- /dev/null +++ b/transformer/train.py @@ -0,0 +1,278 @@ +import os +import time +import numpy as np + +import paddle +import paddle.fluid as fluid + +from model import transformer, position_encoding_init +from optim import LearningRateScheduler +from transformer_config import * +from continuous_evaluation import train_avg_ppl_kpi, train_pass_duration_kpi + + +def pad_batch_data(insts, + pad_idx, + n_head, + is_target=False, + is_label=False, + return_attn_bias=True, + return_max_len=True, + return_num_token=False): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. + """ + return_list = [] + max_len = max(len(inst) for inst in insts) + num_token = reduce( + lambda x, y: x + y, + [len(inst) for inst in insts]) if return_num_token else 0 + # Any token included in dict can be used to pad, since the paddings' loss + # will be masked out by weights and make no effect on parameter gradients. + inst_data = np.array( + [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_data.astype("int64").reshape([-1, 1])] + if is_label: # label weight + inst_weight = np.array([[1.] * len(inst) + [0.] * (max_len - len(inst)) + for inst in insts]) + return_list += [inst_weight.astype("float32").reshape([-1, 1])] + else: # position data + inst_pos = np.array([ + range(1, len(inst) + 1) + [0] * (max_len - len(inst)) + for inst in insts + ]) + return_list += [inst_pos.astype("int64").reshape([-1, 1])] + if return_attn_bias: + if is_target: + # This is used to avoid attention on paddings and subsequent + # words. + slf_attn_bias_data = np.ones( + (inst_data.shape[0], max_len, max_len)) + slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( + [-1, 1, max_len, max_len]) + slf_attn_bias_data = np.tile(slf_attn_bias_data, + [1, n_head, 1, 1]) * [-1e9] + else: + # This is used to avoid attention on paddings. + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) + slf_attn_bias_data = np.tile( + slf_attn_bias_data.reshape([-1, 1, 1, max_len]), + [1, n_head, max_len, 1]) + return_list += [slf_attn_bias_data.astype("float32")] + if return_max_len: + return_list += [max_len] + if return_num_token: + return_list += [num_token] + return return_list if len(return_list) > 1 else return_list[0] + + +def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, + trg_pad_idx, n_head, d_model): + """ + Put all padded data needed by training into a dict. + """ + src_word, src_pos, src_slf_attn_bias, src_max_len = pad_batch_data( + [inst[0] for inst in insts], src_pad_idx, n_head, is_target=False) + trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = pad_batch_data( + [inst[1] for inst in insts], trg_pad_idx, n_head, is_target=True) + trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], + [1, 1, trg_max_len, 1]).astype("float32") + + # These shape tensors are used in reshape_op. + src_data_shape = np.array([-1, src_max_len, d_model], dtype="int32") + trg_data_shape = np.array([-1, trg_max_len, d_model], dtype="int32") + src_slf_attn_pre_softmax_shape = np.array( + [-1, src_slf_attn_bias.shape[-1]], dtype="int32") + src_slf_attn_post_softmax_shape = np.array( + [-1] + list(src_slf_attn_bias.shape[1:]), dtype="int32") + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + [-1] + list(trg_slf_attn_bias.shape[1:]), dtype="int32") + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + [-1] + list(trg_src_attn_bias.shape[1:]), dtype="int32") + + lbl_word, lbl_weight, num_token = pad_batch_data( + [inst[2] for inst in insts], + trg_pad_idx, + n_head, + is_target=False, + is_label=True, + return_attn_bias=False, + return_max_len=False, + return_num_token=True) + + data_input_dict = dict( + zip(data_input_names, [ + src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, + trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight + ])) + util_input_dict = dict( + zip(util_input_names, [ + src_data_shape, src_slf_attn_pre_softmax_shape, + src_slf_attn_post_softmax_shape, trg_data_shape, + trg_slf_attn_pre_softmax_shape, trg_slf_attn_post_softmax_shape, + trg_src_attn_pre_softmax_shape, trg_src_attn_post_softmax_shape + ])) + return data_input_dict, util_input_dict, np.asarray( + [num_token], dtype="float32") + + +def read_multiple(reader, count): + def __impl__(): + res = [] + for item in reader(): + res.append(item) + if len(res) == count: + yield res + res = [] + + if len(res) == count: + yield res + + return __impl__ + + +def main(): + place = fluid.CUDAPlace(0) if TrainTaskConfig.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + sum_cost, avg_cost, predict, token_num = transformer( + ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, + ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, + ModelHyperParams.n_head, ModelHyperParams.d_key, + ModelHyperParams.d_value, ModelHyperParams.d_model, + ModelHyperParams.d_inner_hid, ModelHyperParams.dropout, + TrainTaskConfig.label_smooth_eps) + + lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model, + TrainTaskConfig.warmup_steps, + TrainTaskConfig.learning_rate) + optimizer = fluid.optimizer.Adam( + learning_rate=lr_scheduler.learning_rate, + beta1=TrainTaskConfig.beta1, + beta2=TrainTaskConfig.beta2, + epsilon=TrainTaskConfig.eps) + optimizer.minimize(sum_cost) + + dev_count = fluid.core.get_cuda_device_count() + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + buf_size=100000), + batch_size=TrainTaskConfig.batch_size) + + # Program to do validation. + test_program = fluid.default_main_program().clone() + with fluid.program_guard(test_program): + test_program = fluid.io.get_inference_program([avg_cost]) + val_data = paddle.batch( + paddle.dataset.wmt16.validation(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=TrainTaskConfig.batch_size) + + def test(exe): + test_total_cost = 0 + test_total_token = 0 + test_data = read_multiple(reader=val_data, count=dev_count) + for batch_id, data in enumerate(test_data()): + feed_list = [] + for place_id, data_buffer in enumerate(data): + data_input_dict, util_input_dict,_ = prepare_batch_input( + data_buffer, data_input_names, util_input_names, + ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, + ModelHyperParams.n_head, ModelHyperParams.d_model) + feed_list.append( + dict(data_input_dict.items() + util_input_dict.items())) + + outs = exe.run( + feed=feed_list, + fetch_list=[sum_cost.name, token_num.name]) + sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1]) + test_total_cost += sum_cost_val.sum() + test_total_token += token_num_val.sum() + test_avg_cost = test_total_cost / test_total_token + test_ppl = np.exp([min(test_avg_cost, 100)]) + return test_avg_cost, test_ppl + + # Initialize the parameters. + if TrainTaskConfig.ckpt_path: + fluid.io.load_persistables(exe, TrainTaskConfig.ckpt_path) + lr_scheduler.current_steps = TrainTaskConfig.start_step + else: + exe.run(fluid.framework.default_startup_program()) + + data_input_names = encoder_data_input_fields + decoder_data_input_fields[: + -1] + label_data_input_fields + util_input_names = encoder_util_input_fields + decoder_util_input_fields + + train_exe = fluid.ParallelExecutor( + use_cuda=TrainTaskConfig.use_gpu, loss_name=sum_cost.name, customize_loss_grad=True) + + test_exe = fluid.ParallelExecutor( + use_cuda=TrainTaskConfig.use_gpu, main_program=test_program, share_vars_from=train_exe) + + + init = False + train_data = read_multiple(reader=train_data, count=dev_count) + + for pass_id in xrange(TrainTaskConfig.pass_num): + pass_start_time = time.time() + for batch_id, data in enumerate(train_data()): + feed_list = [] + total_num_token = 0 + lr_rate = lr_scheduler.update_learning_rate() + for place_id, data_buffer in enumerate(data): + data_input_dict, util_input_dict, num_token = prepare_batch_input( + data_buffer, data_input_names, util_input_names, + ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, + ModelHyperParams.n_head, ModelHyperParams.d_model) + total_num_token += num_token + feed_list.append( + dict(data_input_dict.items() + util_input_dict.items() + + {lr_scheduler.learning_rate.name: lr_rate}.items())) + + if not init: + for pos_enc_param_name in pos_enc_param_names: + tensor = position_encoding_init( + ModelHyperParams.max_length + 1, + ModelHyperParams.d_model) + feed_list[place_id][pos_enc_param_name] = tensor + for feed_dict in feed_list: + feed_dict[ + sum_cost.name + + "@GRAD"] = 1. / total_num_token if TrainTaskConfig.use_avg_cost else np.asarray( + [1.], dtype="float32") + outs = train_exe.run(fetch_list=[sum_cost.name, token_num.name], + feed=feed_list) + sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1]) + total_sum_cost = sum_cost_val.sum( + ) # sum the cost from multi devices + total_token_num = token_num_val.sum() + total_avg_cost = total_sum_cost / total_token_num + print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % + (pass_id, batch_id, total_sum_cost, total_avg_cost, + np.exp([min(total_avg_cost, 100)]))) + init = True + # Validate and save the model for inference. + val_avg_cost, val_ppl = test(test_exe) + pass_end_time = time.time() + time_consumed = pass_end_time - pass_start_time + print("pass_id = " + str(pass_id) + " time_consumed = " + str( + time_consumed)) + if pass_id == TrainTaskConfig.pass_num - 1: + train_avg_ppl_kpi.add_record(np.array(val_ppl, dtype='float32')) + train_pass_duration_kpi.add_record(time_consumed) + train_avg_ppl_kpi.persist() + train_pass_duration_kpi.persist() + + +if __name__ == "__main__": + main() diff --git a/transformer/transformer_config.py b/transformer/transformer_config.py new file mode 100644 index 00000000..4cd16924 --- /dev/null +++ b/transformer/transformer_config.py @@ -0,0 +1,186 @@ +class TrainTaskConfig(object): + use_gpu = True + # the epoch number to train. + pass_num = 5 + # the number of sequences contained in a mini-batch. + batch_size = 64 + # the hyper parameters for Adam optimizer. + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.98 + eps = 1e-9 + # the parameters for learning rate scheduling. + warmup_steps = 4000 + # the flag indicating to use average loss or sum loss when training. + use_avg_cost = True + # the weight used to mix up the ground-truth distribution and the fixed + # uniform distribution in label smoothing when training. + # Set this as zero if label smoothing is not wanted. + label_smooth_eps = 0.1 + # the directory for saving trained models. + model_dir = "trained_models" + # the directory for saving checkpoints. + ckpt_dir = "trained_ckpts" + # the directory for loading checkpoint. + # If provided, continue training from the checkpoint. + ckpt_path = None + # the parameter to initialize the learning rate scheduler. + # It should be provided if use checkpoints, since the checkpoint doesn't + # include the training step counter currently. + start_step = 0 + + +class InferTaskConfig(object): + use_gpu = True + # the number of examples in one run for sequence generation. + batch_size = 10 + # the parameters for beam search. + beam_size = 5 + max_length = 30 + # the number of decoded sentences to output. + n_best = 1 + # the flags indicating whether to output the special tokens. + output_bos = False + output_eos = False + output_unk = False + # the directory for loading the trained model. + model_path = 'trained_models/pass_10.infer.model' + + +class ModelHyperParams(object): + # This model directly uses paddle.dataset.wmt16 in which , and + # token has alreay been added. As for the token, any token + # included in dict can be used to pad, since the paddings' loss will be + # masked out and make no effect on parameter gradients. + # size of source word dictionary. + src_vocab_size = 10000 + # size of target word dictionay + trg_vocab_size = 10000 + # index for token + bos_idx = 0 + # index for token + eos_idx = 1 + # index for token + unk_idx = 2 + # max length of sequences. + # The size of position encoding table should at least plus 1, since the + # sinusoid position encoding starts from 1 and 0 can be used as the padding + # token for position encoding. + max_length = 50 + # the dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + d_model = 512 + # size of the hidden layer in position-wise feed-forward networks. + d_inner_hid = 1024 + # the dimension that keys are projected to for dot-product attention. + d_key = 64 + # the dimension that values are projected to for dot-product attention. + d_value = 64 + # number of head used in multi-head attention. + n_head = 8 + # number of sub-layers to be stacked in the encoder and decoder. + n_layer = 6 + # dropout rate used by all dropout layers. + dropout = 0.1 + + +# Here list the data shapes and data types of all inputs. +# The shapes here act as placeholder and are set to pass the infer-shape in +# compile time. +input_descs = { + # The actual data shape of src_word is: + # [batch_size * max_src_len_in_batch, 1] + "src_word": [(1 * (ModelHyperParams.max_length + 1), 1L), "int64"], + # The actual data shape of src_pos is: + # [batch_size * max_src_len_in_batch, 1] + "src_pos": [(1 * (ModelHyperParams.max_length + 1), 1L), "int64"], + # This input is used to remove attention weights on paddings in the + # encoder. + # The actual data shape of src_slf_attn_bias is: + # [batch_size, n_head, max_src_len_in_batch, max_src_len_in_batch] + "src_slf_attn_bias": + [(1, ModelHyperParams.n_head, (ModelHyperParams.max_length + 1), + (ModelHyperParams.max_length + 1)), "float32"], + # This shape input is used to reshape the output of embedding layer. + "src_data_shape": [(3L, ), "int32"], + # This shape input is used to reshape before softmax in self attention. + "src_slf_attn_pre_softmax_shape": [(2L, ), "int32"], + # This shape input is used to reshape after softmax in self attention. + "src_slf_attn_post_softmax_shape": [(4L, ), "int32"], + # The actual data shape of trg_word is: + # [batch_size * max_trg_len_in_batch, 1] + "trg_word": [(1 * (ModelHyperParams.max_length + 1), 1L), "int64"], + # The actual data shape of trg_pos is: + # [batch_size * max_trg_len_in_batch, 1] + "trg_pos": [(1 * (ModelHyperParams.max_length + 1), 1L), "int64"], + # This input is used to remove attention weights on paddings and + # subsequent words in the decoder. + # The actual data shape of trg_slf_attn_bias is: + # [batch_size, n_head, max_trg_len_in_batch, max_trg_len_in_batch] + "trg_slf_attn_bias": [(1, ModelHyperParams.n_head, + (ModelHyperParams.max_length + 1), + (ModelHyperParams.max_length + 1)), "float32"], + # This input is used to remove attention weights on paddings of the source + # input in the encoder-decoder attention. + # The actual data shape of trg_src_attn_bias is: + # [batch_size, n_head, max_trg_len_in_batch, max_src_len_in_batch] + "trg_src_attn_bias": [(1, ModelHyperParams.n_head, + (ModelHyperParams.max_length + 1), + (ModelHyperParams.max_length + 1)), "float32"], + # This shape input is used to reshape the output of embedding layer. + "trg_data_shape": [(3L, ), "int32"], + # This shape input is used to reshape before softmax in self attention. + "trg_slf_attn_pre_softmax_shape": [(2L, ), "int32"], + # This shape input is used to reshape after softmax in self attention. + "trg_slf_attn_post_softmax_shape": [(4L, ), "int32"], + # This shape input is used to reshape before softmax in encoder-decoder + # attention. + "trg_src_attn_pre_softmax_shape": [(2L, ), "int32"], + # This shape input is used to reshape after softmax in encoder-decoder + # attention. + "trg_src_attn_post_softmax_shape": [(4L, ), "int32"], + # This input is used in independent decoder program for inference. + # The actual data shape of enc_output is: + # [batch_size, max_src_len_in_batch, d_model] + "enc_output": [(1, (ModelHyperParams.max_length + 1), + ModelHyperParams.d_model), "float32"], + # The actual data shape of label_word is: + # [batch_size * max_trg_len_in_batch, 1] + "lbl_word": [(1 * (ModelHyperParams.max_length + 1), 1L), "int64"], + # This input is used to mask out the loss of paddding tokens. + # The actual data shape of label_weight is: + # [batch_size * max_trg_len_in_batch, 1] + "lbl_weight": [(1 * (ModelHyperParams.max_length + 1), 1L), "float32"], +} + + +# Names of position encoding table which will be initialized externally. +pos_enc_param_names = ( + "src_pos_enc_table", + "trg_pos_enc_table", ) +# separated inputs for different usages. +encoder_data_input_fields = ( + "src_word", + "src_pos", + "src_slf_attn_bias", ) +encoder_util_input_fields = ( + "src_data_shape", + "src_slf_attn_pre_softmax_shape", + "src_slf_attn_post_softmax_shape", ) +decoder_data_input_fields = ( + "trg_word", + "trg_pos", + "trg_slf_attn_bias", + "trg_src_attn_bias", + "enc_output", ) +decoder_util_input_fields = ( + "trg_data_shape", + "trg_slf_attn_pre_softmax_shape", + "trg_slf_attn_post_softmax_shape", + "trg_src_attn_pre_softmax_shape", + "trg_src_attn_post_softmax_shape", ) +label_data_input_fields = ( + "lbl_word", + "lbl_weight", ) + From 161321db40ce7d71ab40c4bb7fd54449b52fd75e Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Wed, 25 Apr 2018 17:19:56 +0800 Subject: [PATCH 16/67] change diff ratio --- image_classification/continuous_evaluation.py | 6 +++--- lstm/continuous_evaluation.py | 4 ++-- object_detection/continuous_evaluation.py | 4 ++-- ocr_recognition/continuous_evaluation.py | 4 ++-- resnet30/continuous_evaluation.py | 4 ++-- resnet50/continuous_evaluation.py | 10 +++++----- seq2seq/continuous_evaluation.py | 4 ++-- transformer/continuous_evaluation.py | 4 ++-- vgg16/continuous_evaluation.py | 8 ++++---- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/image_classification/continuous_evaluation.py b/image_classification/continuous_evaluation.py index 2c869f80..a15941b9 100644 --- a/image_classification/continuous_evaluation.py +++ b/image_classification/continuous_evaluation.py @@ -3,9 +3,9 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.05, 0) -train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.05, 0) -train_cost_kpi = CostKpi('train_cost_kpi', 0.05, 0) +train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.1, 0) +train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.1, 0) +train_cost_kpi = CostKpi('train_cost_kpi', 0.1, 0) tracking_kpis = [ diff --git a/lstm/continuous_evaluation.py b/lstm/continuous_evaluation.py index fbec0024..660e137d 100644 --- a/lstm/continuous_evaluation.py +++ b/lstm/continuous_evaluation.py @@ -8,8 +8,8 @@ from kpi import CostKpi from kpi import DurationKpi -imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.05, 0) -imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.02, 0) +imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.1, 0) +imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.1, 0) tracking_kpis = [ diff --git a/object_detection/continuous_evaluation.py b/object_detection/continuous_evaluation.py index a49dd968..87b05c2b 100644 --- a/object_detection/continuous_evaluation.py +++ b/object_detection/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_cost_kpi = AccKpi('train_cost', 0.05, 0) -train_speed_kpi = AccKpi('train_speed', 0.05, 0) +train_cost_kpi = AccKpi('train_cost', 0.1, 0) +train_speed_kpi = AccKpi('train_speed', 0.1, 0) tracking_kpis = [ diff --git a/ocr_recognition/continuous_evaluation.py b/ocr_recognition/continuous_evaluation.py index cb0c82af..15e4028f 100644 --- a/ocr_recognition/continuous_evaluation.py +++ b/ocr_recognition/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_avg_loss_kpi = CostKpi('train_avg_loss', 0.05, 0) -train_seq_err_kpi = CostKpi('train_seq_err', 0.05, 0) +train_avg_loss_kpi = CostKpi('train_avg_loss', 0.1, 0) +train_seq_err_kpi = CostKpi('train_seq_err', 0.1, 0) tracking_kpis = [ diff --git a/resnet30/continuous_evaluation.py b/resnet30/continuous_evaluation.py index 283fcf48..b33c7877 100644 --- a/resnet30/continuous_evaluation.py +++ b/resnet30/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi -train_cost_kpi = CostKpi('train_cost', 0.01) -train_duration_kpi = DurationKpi('train_duration', 0.04) +train_cost_kpi = CostKpi('train_cost', 0.1) +train_duration_kpi = DurationKpi('train_duration', 0.1) tracking_kpis = [ train_cost_kpi, diff --git a/resnet50/continuous_evaluation.py b/resnet50/continuous_evaluation.py index 4fa0d179..a7ef0852 100644 --- a/resnet50/continuous_evaluation.py +++ b/resnet50/continuous_evaluation.py @@ -3,12 +3,12 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.05, 0) -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.05, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.05, 0) +cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.1, 0) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.1, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0) -flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.05, 0) -flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.05, 0) +flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.1, 0) +flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.1, 0) tracking_kpis = [ cifar10_128_train_acc_kpi, diff --git a/seq2seq/continuous_evaluation.py b/seq2seq/continuous_evaluation.py index a1c86023..8b71cb9f 100644 --- a/seq2seq/continuous_evaluation.py +++ b/seq2seq/continuous_evaluation.py @@ -8,8 +8,8 @@ from kpi import CostKpi from kpi import DurationKpi -wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.03, 0) -wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.02, 0) +wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.1, 0) +wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.1, 0) tracking_kpis = [ diff --git a/transformer/continuous_evaluation.py b/transformer/continuous_evaluation.py index 9b4cb407..c4bb2a85 100644 --- a/transformer/continuous_evaluation.py +++ b/transformer/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.05, 0) -train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.05, 0) +train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.1, 0) +train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.1, 0) tracking_kpis = [ diff --git a/vgg16/continuous_evaluation.py b/vgg16/continuous_evaluation.py index c302462d..3492a1db 100644 --- a/vgg16/continuous_evaluation.py +++ b/vgg16/continuous_evaluation.py @@ -8,11 +8,11 @@ from kpi import CostKpi from kpi import DurationKpi -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.02, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.02, 0) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.1, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0) -flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.02, 0) -flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.02, 0) +flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.1, 0) +flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.1, 0) tracking_kpis = [ cifar10_128_train_speed_kpi, From 4ed9b442138c0f549f27626c8af44934c455bb60 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 11:28:08 +0800 Subject: [PATCH 17/67] Update continuous_evaluation.py --- resnet50/continuous_evaluation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resnet50/continuous_evaluation.py b/resnet50/continuous_evaluation.py index 2e7e5eb4..4fa0d179 100644 --- a/resnet50/continuous_evaluation.py +++ b/resnet50/continuous_evaluation.py @@ -5,10 +5,10 @@ cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.05, 0) cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.05, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.01, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.05, 0) flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.05, 0) -flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.01, 0) +flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.05, 0) tracking_kpis = [ cifar10_128_train_acc_kpi, From 1a9ed9e131767b6238a9a54f8d7748a896e6ad58 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 11:30:03 +0800 Subject: [PATCH 18/67] Update flowers_64_gpu_memory_factor.txt --- resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 51f01560..24b11164 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[2772] \ No newline at end of file +[4772] From 5c1ff88b4460a467d4bfd26fc64be8eaeabbee25 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 26 Apr 2018 05:17:51 +0000 Subject: [PATCH 19/67] evalute [c02ba51de015cdfde510543a8cdacf66900f5ee9] --- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../flowers_64_gpu_memory_factor.txt | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index fa232989..25ce5558 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[3.4473652839660645]] -[[3.0381433963775635]] -[[2.664496660232544]] -[[2.4615819454193115]] -[[2.4176268577575684]] -[[2.4198837280273438]] -[[2.4023916721343994]] -[[2.5021331310272217]] -[[2.513448715209961]] -[[2.5482802391052246]] \ No newline at end of file +[[2.992563486099243]] +[[2.9548912048339844]] +[[2.6905064582824707]] +[[2.600815534591675]] +[[2.477994203567505]] +[[2.375748634338379]] +[[2.3474063873291016]] +[[2.298875570297241]] +[[2.3196849822998047]] +[[2.2911221981048584]] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 24b11164..ccc41822 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[4772] +[3686] \ No newline at end of file From b1305af005fe20309926ab9cb3e744afe4221d05 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 15:39:13 +0800 Subject: [PATCH 20/67] Update train_cost_factor.txt --- resnet30/latest_kpis/train_cost_factor.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 25ce5558..4d6a8816 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[2.992563486099243]] -[[2.9548912048339844]] -[[2.6905064582824707]] -[[2.600815534591675]] -[[2.477994203567505]] -[[2.375748634338379]] -[[2.3474063873291016]] -[[2.298875570297241]] -[[2.3196849822998047]] -[[2.2911221981048584]] \ No newline at end of file +[[3.992563486099243]] +[[3.9548912048339844]] +[[3.6905064582824707]] +[[3.600815534591675]] +[[3.477994203567505]] +[[3.375748634338379]] +[[3.3474063873291016]] +[[3.298875570297241]] +[[3.3196849822998047]] +[[3.2911221981048584]] From 1bd0d9648f558a8eb53d37793b0fba7b0c917169 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 15:49:58 +0800 Subject: [PATCH 21/67] change model gen gpu memory function --- image_classification/run.xsh | 2 +- image_classification/train.py | 2 +- lstm/get_gpu_data.py | 44 ++++++++++++++++++++++++++++++++++ lstm/model.py | 29 ++++++++++++---------- lstm/run.xsh | 7 +++++- object_detection/run.xsh | 2 +- ocr_recognition/run.xsh | 2 +- resnet50/get_gpu_data.py | 2 +- resnet50/model.py | 29 ++++++++++++---------- resnet50/run.xsh | 6 +++++ seq2seq/get_gpu_data.py | 44 ++++++++++++++++++++++++++++++++++ seq2seq/model.py | 29 ++++++++++++---------- seq2seq/run.xsh | 5 ++++ vgg16/get_gpu_data.py | 45 +++++++++++++++++++++++++++++++++++ vgg16/model.py | 29 ++++++++++++---------- vgg16/run.xsh | 6 +++++ 16 files changed, 229 insertions(+), 54 deletions(-) create mode 100644 lstm/get_gpu_data.py create mode 100644 seq2seq/get_gpu_data.py create mode 100644 vgg16/get_gpu_data.py diff --git a/image_classification/run.xsh b/image_classification/run.xsh index 13db4406..ad83cce1 100755 --- a/image_classification/run.xsh +++ b/image_classification/run.xsh @@ -5,5 +5,5 @@ export OMP_NUM_THREADS=1 cudaid=${se_resnext_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python train.py --batch_size=64 +FLAGS_benchmark=true python train.py --batch_size=64 diff --git a/image_classification/train.py b/image_classification/train.py index ef5b53d4..99d775f0 100644 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -304,7 +304,7 @@ def train_parallel_exe(args, args, learning_rate=0.1, batch_size=batch_size, - num_passes=5, + num_passes=10, init_model=None, parallel=True, use_nccl=True, diff --git a/lstm/get_gpu_data.py b/lstm/get_gpu_data.py new file mode 100644 index 00000000..7f06dd9e --- /dev/null +++ b/lstm/get_gpu_data.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +######################################################################## +# +# Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved +# +######################################################################## + +""" +File: get_gpu_data.py +Author: paddle(paddle@baidu.com) +Date: 2018/04/02 15:57:14 +""" +import argparse +from continuous_evaluation import tracking_kpis + + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + '--batch_size', type=int, default=128, help="Batch size for training.") +parser.add_argument( + '--data_set', + type=str, + default='imdb', + help='Optional dataset for benchmark.') +args = parser.parse_args() + +def save_gpu_data(): + mem_list = [] + with open('memory.txt', 'r') as f: + for i , data in enumerate(f.readlines()): + if i == 0: + continue + mem_list.append(int(data.split("\n")[0].split(" ")[0])) + gpu_memory_factor = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + +if __name__ == "__main__": + save_gpu_data() + diff --git a/lstm/model.py b/lstm/model.py index a79b8ec0..0f442ebf 100644 --- a/lstm/model.py +++ b/lstm/model.py @@ -10,6 +10,7 @@ import os import random import commands +import subprocess import threading import time import numpy as np @@ -239,18 +240,21 @@ def print_arguments(args): print('------------------------------------------------') -def collect_gpu_memory_data(mem_list): +def collect_gpu_memory_data(alive): """ collect the GPU memory data """ - while(True): - command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id - status, output = commands.getstatusoutput(command) - if status != 0: - print('Get GPU memory data error') - else: - mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(0.01) + global is_alive + status, output = commands.getstatusoutput('rm -rf memory.txt') + if status == 0: + print('del memory.txt') + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id + p = subprocess.Popen(command, shell=True) + if p.pid < 0: + print('Get GPU memory data error') + while(is_alive): + time.sleep(1) + p.kill() def save_gpu_data(mem_list): @@ -265,10 +269,11 @@ def save_gpu_data(mem_list): if __name__ == '__main__': args = parse_args() print_arguments(args) - mem_data_list = [] + global is_alive + is_alive = True collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(mem_data_list,)) + target=collect_gpu_memory_data, args=(is_alive,)) collect_memory_thread.setDaemon(True) collect_memory_thread.start() main() - save_gpu_data(mem_data_list) + is_alive = False diff --git a/lstm/run.xsh b/lstm/run.xsh index 5f140153..d184f534 100755 --- a/lstm/run.xsh +++ b/lstm/run.xsh @@ -2,8 +2,13 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 -cudaid=${resnet50_cudaid:=0} # use 0-th card as default +cudaid=${lstm_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid #imdb 32 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=32 --iterations=50 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=32 --data_set=imdb +for pid in $(ps -ef | grep nvidia-smi | grep -v grep | cut -c 9-15); do + echo $pid + kill -9 $pid +done diff --git a/object_detection/run.xsh b/object_detection/run.xsh index cb961181..1ae0c11c 100755 --- a/object_detection/run.xsh +++ b/object_detection/run.xsh @@ -9,5 +9,5 @@ if [ ! -d "data/pascalvoc" ];then mkdir -p data/pascalvoc ./download.sh fi -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python train.py --batch_size=64 --num_passes=1 +FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=5 diff --git a/ocr_recognition/run.xsh b/ocr_recognition/run.xsh index 59e9d990..9b2bf69b 100755 --- a/ocr_recognition/run.xsh +++ b/ocr_recognition/run.xsh @@ -5,4 +5,4 @@ export OMP_NUM_THREADS=1 cudaid=${ocr_recognition_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=2000 +FLAGS_benchmark=true python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=10000 diff --git a/resnet50/get_gpu_data.py b/resnet50/get_gpu_data.py index 83e57e34..831c9eed 100644 --- a/resnet50/get_gpu_data.py +++ b/resnet50/get_gpu_data.py @@ -28,7 +28,7 @@ def save_gpu_data(): mem_list = [] - with open('mem.log', 'r') as f: + with open('memory.txt', 'r') as f: for i , data in enumerate(f.readlines()): if i == 0: continue diff --git a/resnet50/model.py b/resnet50/model.py index 27c7ee0f..4c9088fb 100644 --- a/resnet50/model.py +++ b/resnet50/model.py @@ -7,6 +7,7 @@ import numpy as np import time import commands +import subprocess import threading import cProfile @@ -332,18 +333,21 @@ def test(exe): print(s.getvalue()) -def collect_gpu_memory_data(mem_list): +def collect_gpu_memory_data(alive): """ collect the GPU memory data """ - while(True): - command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id - status, output = commands.getstatusoutput(command) - if status != 0: - print('Get GPU memory data error') - else: - mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(0.01) + global is_alive + status, output = commands.getstatusoutput('rm -rf memory.txt') + if status == 0: + print('del memory.txt') + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id + p = subprocess.Popen(command, shell=True) + if p.pid < 0: + print('Get GPU memory data error') + while(is_alive): + time.sleep(1) + p.kill() def save_gpu_data(mem_list): @@ -362,12 +366,13 @@ def save_gpu_data(mem_list): } args = parse_args() print_arguments(args) + global is_alive + is_alive = True if args.data_format == 'NHWC': raise ValueError('Only support NCHW data_format now.') - mem_data_list = [] if args.device == 'GPU': collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(mem_data_list,)) + target=collect_gpu_memory_data, args=(is_alive, )) collect_memory_thread.setDaemon(True) collect_memory_thread.start() if args.use_nvprof and args.device == 'GPU': @@ -375,4 +380,4 @@ def save_gpu_data(mem_list): run_benchmark(model_map[args.model], args) else: run_benchmark(model_map[args.model], args) - save_gpu_data(mem_data_list) + is_alive = False diff --git a/resnet50/run.xsh b/resnet50/run.xsh index 81f7847f..e18303da 100755 --- a/resnet50/run.xsh +++ b/resnet50/run.xsh @@ -7,6 +7,12 @@ export CUDA_VISIBLE_DEVICES=$cudaid # cifar10 128 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --data_set=cifar10 --model=resnet_cifar10 --pass_num=30 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=128 --data_set=cifar10 #flowers 64 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=64 --data_set=flowers --model=resnet_imagenet --pass_num=3 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=64 --data_set=flowers +for pid in $(ps -ef | grep nvidia-smi | grep -v grep | cut -c 9-15); do + echo $pid + kill -9 $pid +done diff --git a/seq2seq/get_gpu_data.py b/seq2seq/get_gpu_data.py new file mode 100644 index 00000000..f5c43404 --- /dev/null +++ b/seq2seq/get_gpu_data.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +######################################################################## +# +# Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved +# +######################################################################## + +""" +File: get_gpu_data.py +Author: paddle(paddle@baidu.com) +Date: 2018/04/02 15:57:14 +""" +import argparse +from continuous_evaluation import tracking_kpis + + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + '--batch_size', type=int, default=128, help="Batch size for training.") +parser.add_argument( + '--data_set', + type=str, + default='wmb', + help='Optional dataset for benchmark.') +args = parser.parse_args() + +def save_gpu_data(): + mem_list = [] + with open('memory.txt', 'r') as f: + for i , data in enumerate(f.readlines()): + if i == 0: + continue + mem_list.append(int(data.split("\n")[0].split(" ")[0])) + gpu_memory_factor = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + +if __name__ == "__main__": + save_gpu_data() + diff --git a/seq2seq/model.py b/seq2seq/model.py index c2c9763e..c8534e5f 100644 --- a/seq2seq/model.py +++ b/seq2seq/model.py @@ -8,6 +8,7 @@ import time import distutils.util import commands +import subprocess import threading import paddle @@ -377,18 +378,21 @@ def print_arguments(args): print('------------------------------------------------') -def collect_gpu_memory_data(mem_list): +def collect_gpu_memory_data(alive): """ collect the GPU memory data """ - while(True): - command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id - status, output = commands.getstatusoutput(command) - if status != 0: - print('Get GPU memory data error') - else: - mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(0.01) + global is_alive + status, output = commands.getstatusoutput('rm -rf memory.txt') + if status == 0: + print('del memory.txt') + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id + p = subprocess.Popen(command, shell=True) + if p.pid < 0: + print('Get GPU memory data error') + while(is_alive): + time.sleep(1) + p.kill() def save_gpu_data(mem_list): @@ -403,13 +407,14 @@ def save_gpu_data(mem_list): if __name__ == '__main__': args = parser.parse_args() print_arguments(args) - mem_data_list = [] + global is_alive + is_alive = True collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(mem_data_list,)) + target=collect_gpu_memory_data, args=(is_alive,)) collect_memory_thread.setDaemon(True) collect_memory_thread.start() if args.infer_only: infer() else: train() - save_gpu_data(mem_data_list) + is_alive = False diff --git a/seq2seq/run.xsh b/seq2seq/run.xsh index 078cabe0..2e315c0c 100755 --- a/seq2seq/run.xsh +++ b/seq2seq/run.xsh @@ -7,3 +7,8 @@ export CUDA_VISIBLE_DEVICES=$cudaid #imdb 128 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --iterations=50 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=128 --data_set=wmb +for pid in $(ps -ef | grep nvidia-smi | grep -v grep | cut -c 9-15); do + echo $pid + kill -9 $pid +done diff --git a/vgg16/get_gpu_data.py b/vgg16/get_gpu_data.py new file mode 100644 index 00000000..831c9eed --- /dev/null +++ b/vgg16/get_gpu_data.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +######################################################################## +# +# Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved +# +######################################################################## + +""" +File: get_gpu_data.py +Author: paddle(paddle@baidu.com) +Date: 2018/04/02 15:57:14 +""" +import argparse +from continuous_evaluation import tracking_kpis + + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + '--batch_size', type=int, default=128, help="Batch size for training.") +parser.add_argument( + '--data_set', + type=str, + default='cifar10', + choices=['cifar10', 'flowers'], + help='Optional dataset for benchmark.') +args = parser.parse_args() + +def save_gpu_data(): + mem_list = [] + with open('memory.txt', 'r') as f: + for i , data in enumerate(f.readlines()): + if i == 0: + continue + mem_list.append(int(data.split("\n")[0].split(" ")[0])) + gpu_memory_factor = None + for kpi in tracking_kpis: + if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): + gpu_memory_kpi = kpi + gpu_memory_kpi.add_record(max(mem_list)) + gpu_memory_kpi.persist() + +if __name__ == "__main__": + save_gpu_data() + diff --git a/vgg16/model.py b/vgg16/model.py index 93954ccb..25b5719b 100644 --- a/vgg16/model.py +++ b/vgg16/model.py @@ -7,6 +7,7 @@ import time import numpy as np import commands +import subprocess import threading import paddle import paddle.fluid as fluid @@ -246,18 +247,21 @@ def print_arguments(): print('------------------------------------------------') -def collect_gpu_memory_data(mem_list): +def collect_gpu_memory_data(alive): """ collect the GPU memory data """ - while(True): - command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv" % args.gpu_id - status, output = commands.getstatusoutput(command) - if status != 0: - print('Get GPU memory data error') - else: - mem_list.append(int(output.split('\n')[1].split(' ')[0])) - time.sleep(0.01) + global is_alive + status, output = commands.getstatusoutput('rm -rf memory.txt') + if status == 0: + print('del memory.txt') + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id + p = subprocess.Popen(command, shell=True) + if p.pid < 0: + print('Get GPU memory data error') + while(is_alive): + time.sleep(1) + p.kill() def save_gpu_data(mem_list): @@ -271,9 +275,10 @@ def save_gpu_data(mem_list): if __name__ == "__main__": print_arguments() - mem_data_list = [] - collect_memory_thread = threading.Thread(target=collect_gpu_memory_data, args=(mem_data_list,)) + global is_alive + is_alive = True + collect_memory_thread = threading.Thread(target=collect_gpu_memory_data, args=(is_alive,)) collect_memory_thread.setDaemon(True) collect_memory_thread.start() main() - save_gpu_data(mem_data_list) + is_alive = False diff --git a/vgg16/run.xsh b/vgg16/run.xsh index 4609cfc6..7984ae3f 100755 --- a/vgg16/run.xsh +++ b/vgg16/run.xsh @@ -8,6 +8,12 @@ export CUDA_VISIBLE_DEVICES=$cudaid #cifar10 128 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=128 --data_set=cifar10 --iterations=300 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=128 --data_set=cifar10 #flowers 32 FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.0 python model.py --device=GPU --batch_size=32 --data_set=flowers --iterations=100 --gpu_id=$cudaid +python get_gpu_data.py --batch_size=32 --data_set=flowers +for pid in $(ps -ef | grep nvidia-smi | grep -v grep | cut -c 9-15); do + echo $pid + kill -9 $pid +done From 23adafea7e96da3b1f4b71c24b577dd1c3a9dfad Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 16:03:22 +0800 Subject: [PATCH 22/67] run.sh add FLAGS_fraction_of_gpu_memory_to_use=0.9 --- image_classification/run.xsh | 2 +- object_detection/run.xsh | 2 +- ocr_recognition/run.xsh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/image_classification/run.xsh b/image_classification/run.xsh index ad83cce1..55d4ca84 100755 --- a/image_classification/run.xsh +++ b/image_classification/run.xsh @@ -5,5 +5,5 @@ export OMP_NUM_THREADS=1 cudaid=${se_resnext_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python train.py --batch_size=64 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 diff --git a/object_detection/run.xsh b/object_detection/run.xsh index 1ae0c11c..d65d63a5 100755 --- a/object_detection/run.xsh +++ b/object_detection/run.xsh @@ -9,5 +9,5 @@ if [ ! -d "data/pascalvoc" ];then mkdir -p data/pascalvoc ./download.sh fi -FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=5 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 --num_passes=5 diff --git a/ocr_recognition/run.xsh b/ocr_recognition/run.xsh index 9b2bf69b..73597253 100755 --- a/ocr_recognition/run.xsh +++ b/ocr_recognition/run.xsh @@ -5,4 +5,4 @@ export OMP_NUM_THREADS=1 cudaid=${ocr_recognition_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=10000 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=10000 From 981f2252ab07cda02627737d80df7406841388d4 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 16:08:44 +0800 Subject: [PATCH 23/67] change image_classification batch_size --- image_classification/run.xsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_classification/run.xsh b/image_classification/run.xsh index 55d4ca84..d6c0aace 100755 --- a/image_classification/run.xsh +++ b/image_classification/run.xsh @@ -5,5 +5,5 @@ export OMP_NUM_THREADS=1 cudaid=${se_resnext_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=32 From b86895eb64487b79a35404ff85550e894f8a2fcf Mon Sep 17 00:00:00 2001 From: root Date: Thu, 26 Apr 2018 10:03:38 +0000 Subject: [PATCH 24/67] evalute [6d934560c75f920ebb618cf71810a07c9dca8e8d] --- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../cifar10_128_train_acc_factor.txt | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 4d6a8816..4988e426 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[3.992563486099243]] -[[3.9548912048339844]] -[[3.6905064582824707]] -[[3.600815534591675]] -[[3.477994203567505]] -[[3.375748634338379]] -[[3.3474063873291016]] -[[3.298875570297241]] -[[3.3196849822998047]] -[[3.2911221981048584]] +[[2.9115326404571533]] +[[2.639657974243164]] +[[2.4914097785949707]] +[[2.3820090293884277]] +[[2.3763906955718994]] +[[2.3482797145843506]] +[[2.3738651275634766]] +[[2.3713483810424805]] +[[2.382887601852417]] +[[2.3706557750701904]] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt index 9b7bb93b..5a653718 100644 --- a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt @@ -1 +1 @@ -[0.982226550579071] \ No newline at end of file +[0.996386706829071] \ No newline at end of file From fe0a80e931f367921a4f53f77fbbc78d525e0fe8 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 18:49:31 +0800 Subject: [PATCH 25/67] change baseline --- .../latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- .../latest_kpis/train_acc_top5_kpi_factor.txt | 2 +- .../latest_kpis/train_cost_kpi_factor.txt | 2 +- lstm/latest_kpis/imdb_32_gpu_memory_factor.txt | 2 +- lstm/latest_kpis/imdb_32_train_speed_factor.txt | 2 +- object_detection/continuous_evaluation.py | 2 +- object_detection/latest_kpis/train_cost_factor.txt | 2 +- object_detection/latest_kpis/train_speed_factor.txt | 2 +- object_detection/run.xsh | 8 ++++---- object_detection/train.py | 2 +- ocr_recognition/latest_kpis/train_avg_loss_factor.txt | 2 +- ocr_recognition/latest_kpis/train_seq_err_factor.txt | 2 +- resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt | 2 +- resnet50/latest_kpis/cifar10_128_train_acc_factor.txt | 2 +- resnet50/latest_kpis/cifar10_128_train_speed_factor.txt | 2 +- resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt | 2 +- resnet50/latest_kpis/flowers_64_train_speed_factor.txt | 2 +- seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt | 2 +- seq2seq/latest_kpis/wmb_128_train_speed_factor.txt | 2 +- transformer/latest_kpis/train_avg_ppl_kpi_factor.txt | 2 +- .../latest_kpis/train_pass_duration_kpi_factor.txt | 2 +- vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt | 2 +- vgg16/latest_kpis/cifar10_128_train_speed_factor.txt | 2 +- vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt | 2 +- vgg16/latest_kpis/flowers_32_train_speed_factor.txt | 2 +- 25 files changed, 28 insertions(+), 28 deletions(-) diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index d82c6e08..f7a99049 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.17615979313850403] +[0.07615979313850403] diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt index df4023a8..d322f77d 100644 --- a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -1 +1 @@ -[0.1855798816680908] +[0.1055798816680908] diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt index c934b4ef..05395bf3 100644 --- a/image_classification/latest_kpis/train_cost_kpi_factor.txt +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -1 +1 @@ -[12.8537046909332275] +[90.8537046909332275] diff --git a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt index 7eca9b0b..5e7028fb 100644 --- a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt +++ b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt @@ -1 +1 @@ -[1172] \ No newline at end of file +[17172] diff --git a/lstm/latest_kpis/imdb_32_train_speed_factor.txt b/lstm/latest_kpis/imdb_32_train_speed_factor.txt index f05bc2ec..069acdeb 100644 --- a/lstm/latest_kpis/imdb_32_train_speed_factor.txt +++ b/lstm/latest_kpis/imdb_32_train_speed_factor.txt @@ -1 +1 @@ -[833.5032958984375] +[133.5032958984375] diff --git a/object_detection/continuous_evaluation.py b/object_detection/continuous_evaluation.py index 87b05c2b..0f811ae2 100644 --- a/object_detection/continuous_evaluation.py +++ b/object_detection/continuous_evaluation.py @@ -3,7 +3,7 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_cost_kpi = AccKpi('train_cost', 0.1, 0) +train_cost_kpi = CostKpi('train_cost', 0.1, 0) train_speed_kpi = AccKpi('train_speed', 0.1, 0) diff --git a/object_detection/latest_kpis/train_cost_factor.txt b/object_detection/latest_kpis/train_cost_factor.txt index f8af6455..a2c6ef46 100644 --- a/object_detection/latest_kpis/train_cost_factor.txt +++ b/object_detection/latest_kpis/train_cost_factor.txt @@ -1 +1 @@ -[50.4810791015625] +[80.4810791015625] diff --git a/object_detection/latest_kpis/train_speed_factor.txt b/object_detection/latest_kpis/train_speed_factor.txt index f5dffe7a..a2d8dc16 100644 --- a/object_detection/latest_kpis/train_speed_factor.txt +++ b/object_detection/latest_kpis/train_speed_factor.txt @@ -1 +1 @@ -[22.96385064308009] +[12.96385064308009] diff --git a/object_detection/run.xsh b/object_detection/run.xsh index d65d63a5..33efde97 100755 --- a/object_detection/run.xsh +++ b/object_detection/run.xsh @@ -5,9 +5,9 @@ export OMP_NUM_THREADS=1 cudaid=${object_detection_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -if [ ! -d "data/pascalvoc" ];then - mkdir -p data/pascalvoc - ./download.sh -fi +#if [ ! -d "data/pascalvoc" ];then +# mkdir -p data/pascalvoc +# ./download.sh +#fi FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 --num_passes=5 diff --git a/object_detection/train.py b/object_detection/train.py index 6d0c38d4..d5ebfa30 100644 --- a/object_detection/train.py +++ b/object_detection/train.py @@ -323,7 +323,7 @@ def test(pass_id, best_map): args = parser.parse_args() print_arguments(args) - data_dir = 'data/pascalvoc' + data_dir = '/data/pascalvoc' train_file_list = 'trainval.txt' val_file_list = 'test.txt' label_file = 'label_list' diff --git a/ocr_recognition/latest_kpis/train_avg_loss_factor.txt b/ocr_recognition/latest_kpis/train_avg_loss_factor.txt index 1d7a3785..e1a834b0 100644 --- a/ocr_recognition/latest_kpis/train_avg_loss_factor.txt +++ b/ocr_recognition/latest_kpis/train_avg_loss_factor.txt @@ -1 +1 @@ -[3196.62353515625] \ No newline at end of file +[8196.62353515625] diff --git a/ocr_recognition/latest_kpis/train_seq_err_factor.txt b/ocr_recognition/latest_kpis/train_seq_err_factor.txt index ce2f11e8..07787fb2 100644 --- a/ocr_recognition/latest_kpis/train_seq_err_factor.txt +++ b/ocr_recognition/latest_kpis/train_seq_err_factor.txt @@ -1 +1 @@ -[128.0] \ No newline at end of file +[828.0] diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index 7fa7ef20..7078a541 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[2508] +[5508] diff --git a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt index c276983e..736ea74e 100644 --- a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt @@ -1 +1 @@ -[0.99755859375] \ No newline at end of file +[0.91755859375] diff --git a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt index c4fe04e3..7fbefa39 100644 --- a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[404.4730529785156] \ No newline at end of file +[344.4730529785156] diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index e6fd5379..22992733 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[31014] +[41014] diff --git a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt index 8585524f..37c9f66b 100644 --- a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt +++ b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt @@ -1 +1 @@ -[78.7945785522461] \ No newline at end of file +[48.7945785522461] diff --git a/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt b/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt index 82cc4a46..e5d1e87f 100644 --- a/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt +++ b/seq2seq/latest_kpis/wmb_128_gpu_memory_factor.txt @@ -1 +1 @@ -[3976] \ No newline at end of file +[6976] diff --git a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt index 87d52665..9c67e0f0 100644 --- a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt +++ b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt @@ -1 +1 @@ -[2408.184814453125] \ No newline at end of file +[1408.184814453125] diff --git a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt index 010dd732..71129a12 100644 --- a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt +++ b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt @@ -1 +1 @@ -[122.8649673461914] \ No newline at end of file +[422.8649673461914] diff --git a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt index c7656c0d..ec75001d 100644 --- a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt +++ b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt @@ -1 +1 @@ -[804.0266849994659] \ No newline at end of file +[1004.0266849994659] diff --git a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt index c38a317b..19f67e6e 100644 --- a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[2332] \ No newline at end of file +[6332] diff --git a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt index fdab1315..92d74724 100644 --- a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[440.19189453125] \ No newline at end of file +[140.19189453125] diff --git a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt index e5cd1eec..3267b65b 100644 --- a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt @@ -1 +1 @@ -[10606] \ No newline at end of file +[20606] diff --git a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt index f15d1c81..bf4c61dc 100644 --- a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt +++ b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt @@ -1 +1 @@ -[26.19645690917969] +[16.19645690917969] From f35aefb4dff2b168a0214fa7e7bf020e57ad532f Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Thu, 26 Apr 2018 19:15:39 +0800 Subject: [PATCH 26/67] change image_classification passnum --- image_classification/latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- image_classification/latest_kpis/train_acc_top5_kpi_factor.txt | 2 +- image_classification/latest_kpis/train_cost_kpi_factor.txt | 2 +- image_classification/train.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index f7a99049..614ca030 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.07615979313850403] +[0.03615979313850403] diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt index d322f77d..d46d55c2 100644 --- a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -1 +1 @@ -[0.1055798816680908] +[0.0155798816680908] diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt index 05395bf3..c6e4bc36 100644 --- a/image_classification/latest_kpis/train_cost_kpi_factor.txt +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -1 +1 @@ -[90.8537046909332275] +[190.8537046909332275] diff --git a/image_classification/train.py b/image_classification/train.py index 99d775f0..ef5b53d4 100644 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -304,7 +304,7 @@ def train_parallel_exe(args, args, learning_rate=0.1, batch_size=batch_size, - num_passes=10, + num_passes=5, init_model=None, parallel=True, use_nccl=True, From f2078567dcb91c101bcbae6c57a151e9f84382eb Mon Sep 17 00:00:00 2001 From: root Date: Thu, 26 Apr 2018 11:57:50 +0000 Subject: [PATCH 27/67] evalute [c816121d11f7aed2939c5b859423883ce8bab050] --- resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index ccc41822..8ddb2138 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[3686] \ No newline at end of file +[3114] \ No newline at end of file From ee4abc28c5706a71c1bad91f59a637889d76c880 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Fri, 27 Apr 2018 12:04:13 +0800 Subject: [PATCH 28/67] update ratio diff --- image_classification/continuous_evaluation.py | 6 +++--- lstm/continuous_evaluation.py | 4 ++-- object_detection/continuous_evaluation.py | 4 ++-- object_detection/run.xsh | 2 +- ocr_recognition/continuous_evaluation.py | 4 ++-- ocr_recognition/ctc_train.py | 10 +++++----- ocr_recognition/run.xsh | 2 +- resnet30/continuous_evaluation.py | 4 ++-- resnet50/continuous_evaluation.py | 10 +++++----- seq2seq/continuous_evaluation.py | 4 ++-- transformer/continuous_evaluation.py | 4 ++-- vgg16/continuous_evaluation.py | 8 ++++---- 12 files changed, 31 insertions(+), 31 deletions(-) diff --git a/image_classification/continuous_evaluation.py b/image_classification/continuous_evaluation.py index a15941b9..4033e7c2 100644 --- a/image_classification/continuous_evaluation.py +++ b/image_classification/continuous_evaluation.py @@ -3,9 +3,9 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.1, 0) -train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.1, 0) -train_cost_kpi = CostKpi('train_cost_kpi', 0.1, 0) +train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.2, 0) +train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.2, 0) +train_cost_kpi = CostKpi('train_cost_kpi', 0.2, 0) tracking_kpis = [ diff --git a/lstm/continuous_evaluation.py b/lstm/continuous_evaluation.py index 660e137d..1a6a119a 100644 --- a/lstm/continuous_evaluation.py +++ b/lstm/continuous_evaluation.py @@ -8,8 +8,8 @@ from kpi import CostKpi from kpi import DurationKpi -imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.1, 0) -imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.1, 0) +imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.2, 0) +imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.2, 0) tracking_kpis = [ diff --git a/object_detection/continuous_evaluation.py b/object_detection/continuous_evaluation.py index 0f811ae2..47e904a3 100644 --- a/object_detection/continuous_evaluation.py +++ b/object_detection/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_cost_kpi = CostKpi('train_cost', 0.1, 0) -train_speed_kpi = AccKpi('train_speed', 0.1, 0) +train_cost_kpi = CostKpi('train_cost', 0.2, 0) +train_speed_kpi = AccKpi('train_speed', 0.2, 0) tracking_kpis = [ diff --git a/object_detection/run.xsh b/object_detection/run.xsh index 33efde97..0e864a15 100755 --- a/object_detection/run.xsh +++ b/object_detection/run.xsh @@ -9,5 +9,5 @@ export CUDA_VISIBLE_DEVICES=$cudaid # mkdir -p data/pascalvoc # ./download.sh #fi -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 --num_passes=5 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 --num_passes=2 diff --git a/ocr_recognition/continuous_evaluation.py b/ocr_recognition/continuous_evaluation.py index 15e4028f..8a75c496 100644 --- a/ocr_recognition/continuous_evaluation.py +++ b/ocr_recognition/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_avg_loss_kpi = CostKpi('train_avg_loss', 0.1, 0) -train_seq_err_kpi = CostKpi('train_seq_err', 0.1, 0) +train_avg_loss_kpi = CostKpi('train_avg_loss', 0.2, 0) +train_seq_err_kpi = CostKpi('train_seq_err', 0.2, 0) tracking_kpis = [ diff --git a/ocr_recognition/ctc_train.py b/ocr_recognition/ctc_train.py index 3b3285e5..9d3d35bc 100644 --- a/ocr_recognition/ctc_train.py +++ b/ocr_recognition/ctc_train.py @@ -101,12 +101,12 @@ def train(args, data_reader=ctc_reader): total_loss / (batch_id * args.batch_size), total_seq_error / (batch_id * args.batch_size)) sys.stdout.flush() - if batch_id == args.iterations - 1: + if batch_id == args.iterations - 1: avg_seq_err = batch_seq_error[0] / args.batch_size - avg_loss = batch_loss[0] / args.batch_size - train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) - train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) - break + avg_loss = batch_loss[0] / args.batch_size + train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) + train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) + break # evaluate if batch_id % args.eval_period == 0: with model_average.apply(exe): diff --git a/ocr_recognition/run.xsh b/ocr_recognition/run.xsh index 73597253..ebbe41c7 100755 --- a/ocr_recognition/run.xsh +++ b/ocr_recognition/run.xsh @@ -5,4 +5,4 @@ export OMP_NUM_THREADS=1 cudaid=${ocr_recognition_cudaid:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=10000 +FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python ctc_train.py --use_gpu=True --batch_size=128 --pass_num=1 --iterations=3000 diff --git a/resnet30/continuous_evaluation.py b/resnet30/continuous_evaluation.py index b33c7877..800645c2 100644 --- a/resnet30/continuous_evaluation.py +++ b/resnet30/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi -train_cost_kpi = CostKpi('train_cost', 0.1) -train_duration_kpi = DurationKpi('train_duration', 0.1) +train_cost_kpi = CostKpi('train_cost', 0.5) +train_duration_kpi = DurationKpi('train_duration', 0.2) tracking_kpis = [ train_cost_kpi, diff --git a/resnet50/continuous_evaluation.py b/resnet50/continuous_evaluation.py index a7ef0852..d88bdda8 100644 --- a/resnet50/continuous_evaluation.py +++ b/resnet50/continuous_evaluation.py @@ -3,12 +3,12 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.1, 0) -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.1, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0) +cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.2, 0) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.2, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.2, 0) -flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.1, 0) -flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.1, 0) +flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.2, 0) +flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.2, 0) tracking_kpis = [ cifar10_128_train_acc_kpi, diff --git a/seq2seq/continuous_evaluation.py b/seq2seq/continuous_evaluation.py index 8b71cb9f..1bf21055 100644 --- a/seq2seq/continuous_evaluation.py +++ b/seq2seq/continuous_evaluation.py @@ -8,8 +8,8 @@ from kpi import CostKpi from kpi import DurationKpi -wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.1, 0) -wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.1, 0) +wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.2, 0) +wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.2, 0) tracking_kpis = [ diff --git a/transformer/continuous_evaluation.py b/transformer/continuous_evaluation.py index c4bb2a85..ff5423aa 100644 --- a/transformer/continuous_evaluation.py +++ b/transformer/continuous_evaluation.py @@ -3,8 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.1, 0) -train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.1, 0) +train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.2, 0) +train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.2, 0) tracking_kpis = [ diff --git a/vgg16/continuous_evaluation.py b/vgg16/continuous_evaluation.py index 3492a1db..d2b44d6f 100644 --- a/vgg16/continuous_evaluation.py +++ b/vgg16/continuous_evaluation.py @@ -8,11 +8,11 @@ from kpi import CostKpi from kpi import DurationKpi -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.1, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.2, 0) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.2, 0) -flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.1, 0) -flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.1, 0) +flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.2, 0) +flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.2, 0) tracking_kpis = [ cifar10_128_train_speed_kpi, From 19d8124085338a0f078c37f87872e436b239f2ac Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Fri, 27 Apr 2018 12:11:28 +0800 Subject: [PATCH 29/67] change ocr_recognition/ctc_train.py --- ocr_recognition/ctc_train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocr_recognition/ctc_train.py b/ocr_recognition/ctc_train.py index 9d3d35bc..be6e02f7 100644 --- a/ocr_recognition/ctc_train.py +++ b/ocr_recognition/ctc_train.py @@ -101,12 +101,12 @@ def train(args, data_reader=ctc_reader): total_loss / (batch_id * args.batch_size), total_seq_error / (batch_id * args.batch_size)) sys.stdout.flush() - if batch_id == args.iterations - 1: + if batch_id == args.iterations - 1: avg_seq_err = batch_seq_error[0] / args.batch_size - avg_loss = batch_loss[0] / args.batch_size - train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) - train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) - break + avg_loss = batch_loss[0] / args.batch_size + train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) + train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) + break # evaluate if batch_id % args.eval_period == 0: with model_average.apply(exe): From 5938e7e1ea086183b43b4efaeb755936357d98c5 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Fri, 27 Apr 2018 13:23:12 +0800 Subject: [PATCH 30/67] disable model ocr_recognition --- .../continuous_evaluation.py | 0 .../crnn_ctc_model.py | 0 .../ctc_reader.py | 0 {ocr_recognition => __ocr_recognition}/ctc_train.py | 0 {ocr_recognition => __ocr_recognition}/eval.py | 0 .../images/demo.jpg | Bin .../images/train.jpg | Bin {ocr_recognition => __ocr_recognition}/inference.py | 0 .../latest_kpis/train_avg_loss_factor.txt | 0 .../latest_kpis/train_seq_err_factor.txt | 0 {ocr_recognition => __ocr_recognition}/run.xsh | 0 {ocr_recognition => __ocr_recognition}/utility.py | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename {ocr_recognition => __ocr_recognition}/continuous_evaluation.py (100%) rename {ocr_recognition => __ocr_recognition}/crnn_ctc_model.py (100%) rename {ocr_recognition => __ocr_recognition}/ctc_reader.py (100%) rename {ocr_recognition => __ocr_recognition}/ctc_train.py (100%) rename {ocr_recognition => __ocr_recognition}/eval.py (100%) rename {ocr_recognition => __ocr_recognition}/images/demo.jpg (100%) rename {ocr_recognition => __ocr_recognition}/images/train.jpg (100%) rename {ocr_recognition => __ocr_recognition}/inference.py (100%) rename {ocr_recognition => __ocr_recognition}/latest_kpis/train_avg_loss_factor.txt (100%) rename {ocr_recognition => __ocr_recognition}/latest_kpis/train_seq_err_factor.txt (100%) rename {ocr_recognition => __ocr_recognition}/run.xsh (100%) rename {ocr_recognition => __ocr_recognition}/utility.py (100%) diff --git a/ocr_recognition/continuous_evaluation.py b/__ocr_recognition/continuous_evaluation.py similarity index 100% rename from ocr_recognition/continuous_evaluation.py rename to __ocr_recognition/continuous_evaluation.py diff --git a/ocr_recognition/crnn_ctc_model.py b/__ocr_recognition/crnn_ctc_model.py similarity index 100% rename from ocr_recognition/crnn_ctc_model.py rename to __ocr_recognition/crnn_ctc_model.py diff --git a/ocr_recognition/ctc_reader.py b/__ocr_recognition/ctc_reader.py similarity index 100% rename from ocr_recognition/ctc_reader.py rename to __ocr_recognition/ctc_reader.py diff --git a/ocr_recognition/ctc_train.py b/__ocr_recognition/ctc_train.py similarity index 100% rename from ocr_recognition/ctc_train.py rename to __ocr_recognition/ctc_train.py diff --git a/ocr_recognition/eval.py b/__ocr_recognition/eval.py similarity index 100% rename from ocr_recognition/eval.py rename to __ocr_recognition/eval.py diff --git a/ocr_recognition/images/demo.jpg b/__ocr_recognition/images/demo.jpg similarity index 100% rename from ocr_recognition/images/demo.jpg rename to __ocr_recognition/images/demo.jpg diff --git a/ocr_recognition/images/train.jpg b/__ocr_recognition/images/train.jpg similarity index 100% rename from ocr_recognition/images/train.jpg rename to __ocr_recognition/images/train.jpg diff --git a/ocr_recognition/inference.py b/__ocr_recognition/inference.py similarity index 100% rename from ocr_recognition/inference.py rename to __ocr_recognition/inference.py diff --git a/ocr_recognition/latest_kpis/train_avg_loss_factor.txt b/__ocr_recognition/latest_kpis/train_avg_loss_factor.txt similarity index 100% rename from ocr_recognition/latest_kpis/train_avg_loss_factor.txt rename to __ocr_recognition/latest_kpis/train_avg_loss_factor.txt diff --git a/ocr_recognition/latest_kpis/train_seq_err_factor.txt b/__ocr_recognition/latest_kpis/train_seq_err_factor.txt similarity index 100% rename from ocr_recognition/latest_kpis/train_seq_err_factor.txt rename to __ocr_recognition/latest_kpis/train_seq_err_factor.txt diff --git a/ocr_recognition/run.xsh b/__ocr_recognition/run.xsh similarity index 100% rename from ocr_recognition/run.xsh rename to __ocr_recognition/run.xsh diff --git a/ocr_recognition/utility.py b/__ocr_recognition/utility.py similarity index 100% rename from ocr_recognition/utility.py rename to __ocr_recognition/utility.py From c6941e37592b69ccb74b6a9f1ffabf3e730b741c Mon Sep 17 00:00:00 2001 From: ceboot Date: Fri, 27 Apr 2018 12:12:27 +0000 Subject: [PATCH 31/67] evalute [01da25845e2c0a45d5ab6ece400c980c199d4412] --- .../latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- .../latest_kpis/train_acc_top5_kpi_factor.txt | 2 +- .../latest_kpis/train_cost_kpi_factor.txt | 2 +- .../latest_kpis/imdb_32_gpu_memory_factor.txt | 2 +- .../imdb_32_train_speed_factor.txt | 2 +- .../latest_kpis/train_cost_factor.txt | 2 +- .../latest_kpis/train_speed_factor.txt | 2 +- resnet30/latest_kpis/train_cost_factor.txt | 20 +++++++++---------- .../latest_kpis/train_duration_factor.txt | 20 +++++++++---------- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_64_gpu_memory_factor.txt | 2 +- .../flowers_64_train_speed_factor.txt | 2 +- .../wmb_128_train_speed_factor.txt | 2 +- .../latest_kpis/train_avg_ppl_kpi_factor.txt | 2 +- .../train_pass_duration_kpi_factor.txt | 2 +- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_32_gpu_memory_factor.txt | 2 +- .../flowers_32_train_speed_factor.txt | 2 +- 20 files changed, 38 insertions(+), 38 deletions(-) diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index 614ca030..dbbb970d 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.03615979313850403] +[0.23652850091457367] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt index d46d55c2..e7112b7b 100644 --- a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -1 +1 @@ -[0.0155798816680908] +[0.5419688820838928] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt index c6e4bc36..384913f6 100644 --- a/image_classification/latest_kpis/train_cost_kpi_factor.txt +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -1 +1 @@ -[190.8537046909332275] +[2.975904941558838] \ No newline at end of file diff --git a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt index 5e7028fb..4fb6348d 100644 --- a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt +++ b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt @@ -1 +1 @@ -[17172] +[1560] \ No newline at end of file diff --git a/lstm/latest_kpis/imdb_32_train_speed_factor.txt b/lstm/latest_kpis/imdb_32_train_speed_factor.txt index 069acdeb..7ab232fd 100644 --- a/lstm/latest_kpis/imdb_32_train_speed_factor.txt +++ b/lstm/latest_kpis/imdb_32_train_speed_factor.txt @@ -1 +1 @@ -[133.5032958984375] +[883.2451171875] \ No newline at end of file diff --git a/object_detection/latest_kpis/train_cost_factor.txt b/object_detection/latest_kpis/train_cost_factor.txt index a2c6ef46..9bfed99c 100644 --- a/object_detection/latest_kpis/train_cost_factor.txt +++ b/object_detection/latest_kpis/train_cost_factor.txt @@ -1 +1 @@ -[80.4810791015625] +[8.533137321472168] \ No newline at end of file diff --git a/object_detection/latest_kpis/train_speed_factor.txt b/object_detection/latest_kpis/train_speed_factor.txt index a2d8dc16..000d4641 100644 --- a/object_detection/latest_kpis/train_speed_factor.txt +++ b/object_detection/latest_kpis/train_speed_factor.txt @@ -1 +1 @@ -[12.96385064308009] +[73.42477785941152] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/resnet30/latest_kpis/train_cost_factor.txt index 040e98ef..29d8c699 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/resnet30/latest_kpis/train_cost_factor.txt @@ -1,10 +1,10 @@ -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] -[[100.0]] +[[3.121091365814209]] +[[2.9679136276245117]] +[[2.664355516433716]] +[[2.5711519718170166]] +[[2.484081745147705]] +[[2.44614839553833]] +[[2.416034460067749]] +[[2.4315545558929443]] +[[2.4579968452453613]] +[[2.449829578399658]] \ No newline at end of file diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/resnet30/latest_kpis/train_duration_factor.txt index 13bd6f8b..4fdf5fe6 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/resnet30/latest_kpis/train_duration_factor.txt @@ -1,10 +1,10 @@ -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] -[1000.0] +[22.25151491165161] +[21.59505009651184] +[21.59479784965515] +[21.565481901168823] +[21.499217987060547] +[21.321773052215576] +[21.280965089797974] +[21.29200315475464] +[21.28358292579651] +[21.292808055877686] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index 7078a541..ffe22e2d 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[5508] +[1394] \ No newline at end of file diff --git a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt index 7fbefa39..dc2cb5e3 100644 --- a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[344.4730529785156] +[871.095703125] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 22992733..7c29b91f 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[41014] +[10352] \ No newline at end of file diff --git a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt index 37c9f66b..4e3deed0 100644 --- a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt +++ b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt @@ -1 +1 @@ -[48.7945785522461] +[122.87747192382812] \ No newline at end of file diff --git a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt index 9c67e0f0..8301f111 100644 --- a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt +++ b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt @@ -1 +1 @@ -[1408.184814453125] +[4430.63330078125] \ No newline at end of file diff --git a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt index 71129a12..d7e63ce9 100644 --- a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt +++ b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt @@ -1 +1 @@ -[422.8649673461914] +[19.267375946044922] \ No newline at end of file diff --git a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt index ec75001d..b9b85cf9 100644 --- a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt +++ b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt @@ -1 +1 @@ -[1004.0266849994659] +[56.41797208786011] \ No newline at end of file diff --git a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt index 19f67e6e..aad9b5c0 100644 --- a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[6332] +[2198] \ No newline at end of file diff --git a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt index 92d74724..042002c0 100644 --- a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[140.19189453125] +[736.5991821289062] \ No newline at end of file diff --git a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt index 3267b65b..49d5609f 100644 --- a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt @@ -1 +1 @@ -[20606] +[8938] \ No newline at end of file diff --git a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt index bf4c61dc..4a131d3d 100644 --- a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt +++ b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt @@ -1 +1 @@ -[16.19645690917969] +[51.50917434692383] \ No newline at end of file From fd5ba6801bb57ef8a5604f47c28ffce6bdc26a54 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Fri, 27 Apr 2018 21:52:00 +0800 Subject: [PATCH 32/67] add three NLP model to ce --- language_model/README.md | 148 +++++++++++++++ language_model/continuous_evaluation.py | 17 ++ language_model/infer.py | 65 +++++++ .../imikolov_20_avg_ppl_factor.txt | 1 + .../imikolov_20_pass_duration_factor.txt | 1 + language_model/run.xsh | 8 + language_model/train.py | 169 ++++++++++++++++++ language_model/utils.py | 40 +++++ sequence_tagging_for_ner/README.md | 120 +++++++++++++ .../continuous_evaluation.py | 17 ++ sequence_tagging_for_ner/data/target.txt | 9 + sequence_tagging_for_ner/data/test | 128 +++++++++++++ sequence_tagging_for_ner/data/train | 139 ++++++++++++++ sequence_tagging_for_ner/download.sh | 16 ++ .../imgs/convergence_curve.png | Bin 0 -> 134124 bytes sequence_tagging_for_ner/infer.py | 71 ++++++++ .../latest_kpis/pass_duration_factor.txt | 1 + .../latest_kpis/train_acc_factor.txt | 1 + sequence_tagging_for_ner/network_conf.py | 127 +++++++++++++ sequence_tagging_for_ner/reader.py | 66 +++++++ sequence_tagging_for_ner/run.xsh | 10 ++ sequence_tagging_for_ner/train.py | 127 +++++++++++++ sequence_tagging_for_ner/utils.py | 47 +++++ sequence_tagging_for_ner/utils_extend.py | 28 +++ text_classification/README.md | 112 ++++++++++++ text_classification/continuous_evaluation.py | 17 ++ text_classification/infer.py | 50 ++++++ .../latest_kpis/lstm_pass_duration_factor.txt | 1 + .../latest_kpis/lstm_train_cost_factor.txt | 1 + text_classification/nets.py | 124 +++++++++++++ text_classification/run.xsh | 9 + text_classification/train.py | 140 +++++++++++++++ text_classification/utils.py | 105 +++++++++++ 33 files changed, 1915 insertions(+) create mode 100644 language_model/README.md create mode 100644 language_model/continuous_evaluation.py create mode 100644 language_model/infer.py create mode 100644 language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt create mode 100644 language_model/latest_kpis/imikolov_20_pass_duration_factor.txt create mode 100755 language_model/run.xsh create mode 100644 language_model/train.py create mode 100644 language_model/utils.py create mode 100644 sequence_tagging_for_ner/README.md create mode 100644 sequence_tagging_for_ner/continuous_evaluation.py create mode 100644 sequence_tagging_for_ner/data/target.txt create mode 100644 sequence_tagging_for_ner/data/test create mode 100644 sequence_tagging_for_ner/data/train create mode 100644 sequence_tagging_for_ner/download.sh create mode 100644 sequence_tagging_for_ner/imgs/convergence_curve.png create mode 100644 sequence_tagging_for_ner/infer.py create mode 100644 sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt create mode 100644 sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt create mode 100644 sequence_tagging_for_ner/network_conf.py create mode 100644 sequence_tagging_for_ner/reader.py create mode 100755 sequence_tagging_for_ner/run.xsh create mode 100644 sequence_tagging_for_ner/train.py create mode 100644 sequence_tagging_for_ner/utils.py create mode 100644 sequence_tagging_for_ner/utils_extend.py create mode 100644 text_classification/README.md create mode 100644 text_classification/continuous_evaluation.py create mode 100644 text_classification/infer.py create mode 100644 text_classification/latest_kpis/lstm_pass_duration_factor.txt create mode 100644 text_classification/latest_kpis/lstm_train_cost_factor.txt create mode 100644 text_classification/nets.py create mode 100755 text_classification/run.xsh create mode 100644 text_classification/train.py create mode 100644 text_classification/utils.py diff --git a/language_model/README.md b/language_model/README.md new file mode 100644 index 00000000..91ce2d7f --- /dev/null +++ b/language_model/README.md @@ -0,0 +1,148 @@ +# 语言模型 + +以下是本例的简要目录结构及说明: + +```text +. +├── README.md # 文档 +├── train.py # 训练脚本 +├── infer.py # 预测脚本 +└── utils.py # 通用函数 +``` + + +## 简介 + +循环神经网络语言模型的介绍可以参阅论文[Recurrent Neural Network Regularization](https://arxiv.org/abs/1409.2329),在本例中,我们实现了GRU-RNN语言模型。 + +## 训练 + +运行命令 `python train.py` 开始训练模型。 +```python +python train.py +``` + +当前支持的参数可参见[train.py](./train.py) `train_net` 函数 +```python +vocab, train_reader, test_reader = utils.prepare_data( + batch_size=20, # batch size + buffer_size=1000, # buffer size, default value is OK + word_freq_threshold=0) # vocabulary related parameter, and words with frequency below this value will be filtered + +train(train_reader=train_reader, + vocab=vocab, + network=network, + hid_size=200, # embedding and hidden size + base_lr=1.0, # base learning rate + batch_size=20, # batch size, the same as that in prepare_data + pass_num=12, # the number of passes for training + use_cuda=True, # whether to use GPU card + parallel=False, # whether to be parallel + model_dir="model", # directory to save model + init_low_bound=-0.1, # uniform parameter initialization lower bound + init_high_bound=0.1) # uniform parameter initialization upper bound +``` + +## 自定义网络结构 + +可在[train.py](./train.py) `network` 函数中调整网络结构,当前的网络结构如下: +```python +emb = fluid.layers.embedding(input=src, size=[vocab_size, hid_size], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound), + learning_rate=emb_lr_x), + is_sparse=True) + +fc0 = fluid.layers.fc(input=emb, size=hid_size * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound), + learning_rate=gru_lr_x)) +gru_h0 = fluid.layers.dynamic_gru(input=fc0, size=hid_size, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound), + learning_rate=gru_lr_x)) + +fc = fluid.layers.fc(input=gru_h0, size=vocab_size, act='softmax', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound), + learning_rate=fc_lr_x)) + +cost = fluid.layers.cross_entropy(input=fc, label=dst) +``` + +## 训练结果示例 + +我们在Tesla K40m单GPU卡上训练的日志如下所示 +```text +epoch_1 start +step:100 ppl:771.053 +step:200 ppl:449.597 +step:300 ppl:642.654 +step:400 ppl:458.128 +step:500 ppl:510.912 +step:600 ppl:451.545 +step:700 ppl:364.404 +step:800 ppl:324.272 +step:900 ppl:360.797 +step:1000 ppl:275.761 +step:1100 ppl:294.599 +step:1200 ppl:335.877 +step:1300 ppl:185.262 +step:1400 ppl:241.744 +step:1500 ppl:211.507 +step:1600 ppl:233.431 +step:1700 ppl:298.767 +step:1800 ppl:203.403 +step:1900 ppl:158.828 +step:2000 ppl:171.148 +step:2100 ppl:280.884 +epoch:1 num_steps:2104 time_cost(s):47.478780 +model saved in model/epoch_1 +epoch_2 start +step:100 ppl:238.099 +step:200 ppl:136.527 +step:300 ppl:204.184 +step:400 ppl:252.886 +step:500 ppl:177.377 +step:600 ppl:197.688 +step:700 ppl:131.650 +step:800 ppl:223.906 +step:900 ppl:144.785 +step:1000 ppl:176.286 +step:1100 ppl:148.158 +step:1200 ppl:203.581 +step:1300 ppl:168.208 +step:1400 ppl:159.412 +step:1500 ppl:114.032 +step:1600 ppl:157.985 +step:1700 ppl:147.743 +step:1800 ppl:88.676 +step:1900 ppl:141.962 +step:2000 ppl:106.087 +step:2100 ppl:122.709 +epoch:2 num_steps:2104 time_cost(s):47.583789 +model saved in model/epoch_2 +... +``` + +## 预测 +运行命令 `python infer.py model_dir start_epoch last_epoch(inclusive)` 开始预测,其中,start_epoch指定开始预测的轮次,last_epoch指定结束的轮次,例如 +```python +python infer.py model 1 12 # prediction from epoch 1 to epoch 12 +``` + +## 预测结果示例 +```text +model:model/epoch_1 ppl:254.540 time_cost(s):3.29 +model:model/epoch_2 ppl:177.671 time_cost(s):3.27 +model:model/epoch_3 ppl:156.251 time_cost(s):3.27 +model:model/epoch_4 ppl:139.036 time_cost(s):3.27 +model:model/epoch_5 ppl:132.661 time_cost(s):3.27 +model:model/epoch_6 ppl:130.092 time_cost(s):3.28 +model:model/epoch_7 ppl:128.751 time_cost(s):3.27 +model:model/epoch_8 ppl:125.411 time_cost(s):3.27 +model:model/epoch_9 ppl:124.604 time_cost(s):3.28 +model:model/epoch_10 ppl:124.754 time_cost(s):3.29 +model:model/epoch_11 ppl:125.421 time_cost(s):3.27 +model:model/epoch_12 ppl:125.676 time_cost(s):3.27 +``` diff --git a/language_model/continuous_evaluation.py b/language_model/continuous_evaluation.py new file mode 100644 index 00000000..e12f2b28 --- /dev/null +++ b/language_model/continuous_evaluation.py @@ -0,0 +1,17 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + +imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0) +imikolov_20_pass_duration_kpi = DurationKpi('imikolov_20_pass_duration', 0.2, 0) + + +tracking_kpis = [ + imikolov_20_avg_ppl_kpi, + imikolov_20_pass_duration_kpi, +] diff --git a/language_model/infer.py b/language_model/infer.py new file mode 100644 index 00000000..a183d548 --- /dev/null +++ b/language_model/infer.py @@ -0,0 +1,65 @@ +import sys +import time +import math +import unittest +import contextlib +import numpy as np + +import paddle.fluid as fluid +import paddle.v2 as paddle + +import utils + + +def infer(test_reader, use_cuda, model_path): + """ inference function """ + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + with fluid.scope_guard(fluid.core.Scope()): + infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( + model_path, exe) + + accum_cost = 0.0 + accum_words = 0 + t0 = time.time() + for data in test_reader(): + src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place) + dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place) + avg_cost = exe.run( + infer_program, + feed={"src_wordseq": src_wordseq, + "dst_wordseq": dst_wordseq}, + fetch_list=fetch_vars) + + nwords = src_wordseq.lod()[0][-1] + + cost = np.array(avg_cost) * nwords + accum_cost += cost + accum_words += nwords + + ppl = math.exp(accum_cost / accum_words) + t1 = time.time() + print("model:%s ppl:%.3f time_cost(s):%.2f" % + (model_path, ppl, t1 - t0)) + + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("Usage: %s model_dir start_epoch last_epoch(inclusive)") + exit(0) + + model_dir = sys.argv[1] + try: + start_index = int(sys.argv[2]) + last_index = int(sys.argv[3]) + except: + print("Usage: %s model_dir start_epoch last_epoch(inclusive)") + exit(-1) + + vocab, train_reader, test_reader = utils.prepare_data( + batch_size=20, buffer_size=1000, word_freq_threshold=0) + + for epoch in xrange(start_index, last_index + 1): + epoch_path = model_dir + "/epoch_" + str(epoch) + infer(test_reader=test_reader, use_cuda=True, model_path=epoch_path) diff --git a/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt new file mode 100644 index 00000000..586f51f0 --- /dev/null +++ b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt @@ -0,0 +1 @@ +[54.31165283314625] diff --git a/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt new file mode 100644 index 00000000..4bbac048 --- /dev/null +++ b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt @@ -0,0 +1 @@ +[40.58481127023697] diff --git a/language_model/run.xsh b/language_model/run.xsh new file mode 100755 index 00000000..5a40853d --- /dev/null +++ b/language_model/run.xsh @@ -0,0 +1,8 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${language_model:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train.py diff --git a/language_model/train.py b/language_model/train.py new file mode 100644 index 00000000..2430af72 --- /dev/null +++ b/language_model/train.py @@ -0,0 +1,169 @@ +import sys +import time + +import numpy as np +import math + +import paddle.fluid as fluid +import paddle + +import utils + +from continuous_evaluation import imikolov_20_avg_ppl_kpi, imikolov_20_pass_duration_kpi + +def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound): + """ network definition """ + emb_lr_x = 10.0 + gru_lr_x = 1.0 + fc_lr_x = 1.0 + emb = fluid.layers.embedding( + input=src, + size=[vocab_size, hid_size], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=init_low_bound, high=init_high_bound), + learning_rate=emb_lr_x), + is_sparse=True) + + fc0 = fluid.layers.fc(input=emb, + size=hid_size * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=init_low_bound, high=init_high_bound), + learning_rate=gru_lr_x)) + gru_h0 = fluid.layers.dynamic_gru( + input=fc0, + size=hid_size, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=init_low_bound, high=init_high_bound), + learning_rate=gru_lr_x)) + + fc = fluid.layers.fc(input=gru_h0, + size=vocab_size, + act='softmax', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=init_low_bound, high=init_high_bound), + learning_rate=fc_lr_x)) + + cost = fluid.layers.cross_entropy(input=fc, label=dst) + return cost + + +def train(train_reader, + vocab, + network, + hid_size, + base_lr, + batch_size, + pass_num, + use_cuda, + parallel, + model_dir, + init_low_bound=-0.04, + init_high_bound=0.04): + """ train network """ + vocab_size = len(vocab) + + src_wordseq = fluid.layers.data( + name="src_wordseq", shape=[1], dtype="int64", lod_level=1) + dst_wordseq = fluid.layers.data( + name="dst_wordseq", shape=[1], dtype="int64", lod_level=1) + + avg_cost = None + if not parallel: + cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, + init_low_bound, init_high_bound) + avg_cost = fluid.layers.mean(x=cost) + else: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + cost = network( + pd.read_input(src_wordseq), + pd.read_input(dst_wordseq), vocab_size, hid_size, + init_low_bound, init_high_bound) + pd.write_output(cost) + + cost = pd() + avg_cost = fluid.layers.mean(x=cost) + + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=base_lr, + decay_steps=2100 * 4, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + total_time = 0.0 + for pass_idx in xrange(pass_num): + epoch_idx = pass_idx + 1 + print "epoch_%d start" % epoch_idx + + t0 = time.time() + i = 0 + newest_ppl = 0 + for data in train_reader(): + i += 1 + lod_src_wordseq = utils.to_lodtensor( + map(lambda x: x[0], data), place) + lod_dst_wordseq = utils.to_lodtensor( + map(lambda x: x[1], data), place) + ret_avg_cost = exe.run(fluid.default_main_program(), + feed={ + "src_wordseq": lod_src_wordseq, + "dst_wordseq": lod_dst_wordseq + }, + fetch_list=[avg_cost], + use_program_cache=True) + avg_ppl = math.exp(ret_avg_cost[0]) + newest_ppl = avg_ppl + if i % 100 == 0: + print "step:%d ppl:%.3f" % (i, avg_ppl) + + t1 = time.time() + total_time += t1 - t0 + print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, + total_time / epoch_idx) + + if pass_idx == pass_num - 1: + imikolov_20_pass_duration_kpi.add_record(total_time / epoch_idx) + imikolov_20_avg_ppl_kpi.add_record(newest_ppl) + save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) + feed_var_names = ["src_wordseq", "dst_wordseq"] + fetch_vars = [avg_cost] + fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) + print("model saved in %s" % save_dir) + imikolov_20_pass_duration_kpi.persist() + imikolov_20_avg_ppl_kpi.persist() + print("finish training") + + +def train_net(): + """ do training """ + batch_size = 20 + vocab, train_reader, test_reader = utils.prepare_data( + batch_size=batch_size, buffer_size=1000, word_freq_threshold=0) + train( + train_reader=train_reader, + vocab=vocab, + network=network, + hid_size=200, + base_lr=1.0, + batch_size=batch_size, + pass_num=12, + use_cuda=True, + parallel=True, + model_dir="model", + init_low_bound=-0.1, + init_high_bound=0.1) + + +if __name__ == "__main__": + train_net() diff --git a/language_model/utils.py b/language_model/utils.py new file mode 100644 index 00000000..68ab1f5e --- /dev/null +++ b/language_model/utils.py @@ -0,0 +1,40 @@ +import sys +import time +import numpy as np + +import paddle.fluid as fluid +import paddle + + +def to_lodtensor(data, place): + """ convert to LODtensor """ + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0): + """ prepare the English Pann Treebank (PTB) data """ + vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imikolov.train( + vocab, + buffer_size, + data_type=paddle.dataset.imikolov.DataType.SEQ), + buf_size=buffer_size), + batch_size) + test_reader = paddle.batch( + paddle.dataset.imikolov.test( + vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ), + batch_size) + return vocab, train_reader, test_reader diff --git a/sequence_tagging_for_ner/README.md b/sequence_tagging_for_ner/README.md new file mode 100644 index 00000000..1f634da4 --- /dev/null +++ b/sequence_tagging_for_ner/README.md @@ -0,0 +1,120 @@ +# 命名实体识别 + +以下是本例的简要目录结构及说明: + +```text +. +├── data # 存储运行本例所依赖的数据,从外部获取 +├── network_conf.py # 模型定义 +├── reader.py # 数据读取接口, 从外部获取 +├── README.md # 文档 +├── train.py # 训练脚本 +├── infer.py # 预测脚本 +├── utils.py # 定义通用的函数, 从外部获取 +└── utils_extend.py # 对utils.py的拓展 +``` + + +## 简介,模型详解 + +在PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md)中对于命名实体识别任务有较详细的介绍,在本例中不再重复介绍。 +在模型上,我们沿用了v2版本的模型结构,唯一区别是我们使用LSTM代替原始的RNN。 + +## 数据获取 + +请参考PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md) 一节中数据获取方式,将该例中的data文件夹拷贝至本例目录下,运行其中的download.sh脚本获取训练和测试数据。 + +## 通用脚本获取 + +请将PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md)中提供的用于数据读取的文件[reader.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/reader.py)以及包含字典导入等通用功能的文件[utils.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/utils.py)复制到本目录下。本例将会使用到这两个脚本。 + +## 训练 + +1. 运行 `sh data/download.sh` +2. 修改 `train.py` 的 `main` 函数,指定数据路径 + + ```python + main( + train_data_file="data/train", + test_data_file="data/test", + vocab_file="data/vocab.txt", + target_file="data/target.txt", + emb_file="data/wordVectors.txt", + model_save_dir="models", + num_passes=1000, + use_gpu=False, + parallel=False) + ``` + +3. 运行命令 `python train.py` ,**需要注意:直接运行使用的是示例数据,请替换真实的标记数据。** + + ```text + Pass 127, Batch 9525, Cost 4.0867705, Precision 0.3954984, Recall 0.37846154, F1_score0.38679245 + Pass 127, Batch 9530, Cost 3.137265, Precision 0.42971888, Recall 0.38351256, F1_score0.405303 + Pass 127, Batch 9535, Cost 3.6240938, Precision 0.4272152, Recall 0.41795665, F1_score0.4225352 + Pass 127, Batch 9540, Cost 3.5352352, Precision 0.48464164, Recall 0.4536741, F1_score0.46864685 + Pass 127, Batch 9545, Cost 4.1130385, Precision 0.40131578, Recall 0.3836478, F1_score0.39228293 + Pass 127, Batch 9550, Cost 3.6826708, Precision 0.43333334, Recall 0.43730888, F1_score0.43531203 + Pass 127, Batch 9555, Cost 3.6363933, Precision 0.42424244, Recall 0.3962264, F1_score0.4097561 + Pass 127, Batch 9560, Cost 3.6101768, Precision 0.51363635, Recall 0.353125, F1_score0.41851854 + Pass 127, Batch 9565, Cost 3.5935276, Precision 0.5152439, Recall 0.5, F1_score0.5075075 + Pass 127, Batch 9570, Cost 3.4987144, Precision 0.5, Recall 0.4330218, F1_score0.46410686 + Pass 127, Batch 9575, Cost 3.4659843, Precision 0.39864865, Recall 0.38064516, F1_score0.38943896 + Pass 127, Batch 9580, Cost 3.1702557, Precision 0.5, Recall 0.4490446, F1_score0.47315437 + Pass 127, Batch 9585, Cost 3.1587276, Precision 0.49377593, Recall 0.4089347, F1_score0.4473684 + Pass 127, Batch 9590, Cost 3.5043538, Precision 0.4556962, Recall 0.4600639, F1_score0.45786962 + Pass 127, Batch 9595, Cost 2.981989, Precision 0.44981414, Recall 0.45149255, F1_score0.4506518 + [TrainSet] pass_id:127 pass_precision:[0.46023396] pass_recall:[0.43197003] pass_f1_score:[0.44565433] + [TestSet] pass_id:127 pass_precision:[0.4708409] pass_recall:[0.47971722] pass_f1_score:[0.4752376] + ``` +## 预测 +1. 修改 [infer.py](./infer.py) 的 `infer` 函数,指定:需要测试的模型的路径、测试数据、字典文件,预测标记文件的路径,默认参数如下: + + ```python + infer( + model_path="models/params_pass_0", + batch_size=6, + test_data_file="data/test", + vocab_file="data/vocab.txt", + target_file="data/target.txt", + use_gpu=False + ) + ``` + +2. 在终端运行 `python infer.py`,开始测试,会看到如下预测结果(以下为训练70个pass所得模型的部分预测结果): + + ```text + leicestershire B-ORG B-LOC + extended O O + their O O + first O O + innings O O + by O O + DGDG O O + runs O O + before O O + being O O + bowled O O + out O O + for O O + 296 O O + with O O + england B-LOC B-LOC + discard O O + andy B-PER B-PER + caddick I-PER I-PER + taking O O + three O O + for O O + DGDG O O + . O O + ``` + + 输出分为三列,以“\t” 分隔,第一列是输入的词语,第二列是标准结果,第三列为生成的标记结果。多条输入序列之间以空行分隔。 + +## 结果示例 + +

+
+图1. 学习曲线, 横轴表示训练轮数,纵轴表示F1值 +

diff --git a/sequence_tagging_for_ner/continuous_evaluation.py b/sequence_tagging_for_ner/continuous_evaluation.py new file mode 100644 index 00000000..426610d7 --- /dev/null +++ b/sequence_tagging_for_ner/continuous_evaluation.py @@ -0,0 +1,17 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import AccKpi +from kpi import DurationKpi + +train_acc_kpi = AccKpi('train_acc', 0.2, 0) +pass_duration_kpi = DurationKpi('pass_duration', 0.2, 0) + + +tracking_kpis = [ + train_acc_kpi, + pass_duration_kpi, +] diff --git a/sequence_tagging_for_ner/data/target.txt b/sequence_tagging_for_ner/data/target.txt new file mode 100644 index 00000000..e0fa4d8f --- /dev/null +++ b/sequence_tagging_for_ner/data/target.txt @@ -0,0 +1,9 @@ +B-LOC +I-LOC +B-MISC +I-MISC +B-ORG +I-ORG +B-PER +I-PER +O diff --git a/sequence_tagging_for_ner/data/test b/sequence_tagging_for_ner/data/test new file mode 100644 index 00000000..66163e1a --- /dev/null +++ b/sequence_tagging_for_ner/data/test @@ -0,0 +1,128 @@ +CRICKET NNP I-NP O +- : O O +LEICESTERSHIRE NNP I-NP I-ORG +TAKE NNP I-NP O +OVER IN I-PP O +AT NNP I-NP O +TOP NNP I-NP O +AFTER NNP I-NP O +INNINGS NNP I-NP O +VICTORY NN I-NP O +. . O O + +LONDON NNP I-NP I-LOC +1996-08-30 CD I-NP O + +West NNP I-NP I-MISC +Indian NNP I-NP I-MISC +all-rounder NN I-NP O +Phil NNP I-NP I-PER +Simmons NNP I-NP I-PER +took VBD I-VP O +four CD I-NP O +for IN I-PP O +38 CD I-NP O +on IN I-PP O +Friday NNP I-NP O +as IN I-PP O +Leicestershire NNP I-NP I-ORG +beat VBD I-VP O +Somerset NNP I-NP I-ORG +by IN I-PP O +an DT I-NP O +innings NN I-NP O +and CC O O +39 CD I-NP O +runs NNS I-NP O +in IN I-PP O +two CD I-NP O +days NNS I-NP O +to TO I-VP O +take VB I-VP O +over IN I-PP O +at IN B-PP O +the DT I-NP O +head NN I-NP O +of IN I-PP O +the DT I-NP O +county NN I-NP O +championship NN I-NP O +. . O O + +Their PRP$ I-NP O +stay NN I-NP O +on IN I-PP O +top NN I-NP O +, , O O +though RB I-ADVP O +, , O O +may MD I-VP O +be VB I-VP O +short-lived JJ I-ADJP O +as IN I-PP O +title NN I-NP O +rivals NNS I-NP O +Essex NNP I-NP I-ORG +, , O O +Derbyshire NNP I-NP I-ORG +and CC I-NP O +Surrey NNP I-NP I-ORG +all DT O O +closed VBD I-VP O +in RP I-PRT O +on IN I-PP O +victory NN I-NP O +while IN I-SBAR O +Kent NNP I-NP I-ORG +made VBD I-VP O +up RP I-PRT O +for IN I-PP O +lost VBN I-NP O +time NN I-NP O +in IN I-PP O +their PRP$ I-NP O +rain-affected JJ I-NP O +match NN I-NP O +against IN I-PP O +Nottinghamshire NNP I-NP I-ORG +. . O O + +After IN I-PP O +bowling VBG I-NP O +Somerset NNP I-NP I-ORG +out RP I-PRT O +for IN I-PP O +83 CD I-NP O +on IN I-PP O +the DT I-NP O +opening NN I-NP O +morning NN I-NP O +at IN I-PP O +Grace NNP I-NP I-LOC +Road NNP I-NP I-LOC +, , O O +Leicestershire NNP I-NP I-ORG +extended VBD I-VP O +their PRP$ I-NP O +first JJ I-NP O +innings NN I-NP O +by IN I-PP O +94 CD I-NP O +runs VBZ I-VP O +before IN I-PP O +being VBG I-VP O +bowled VBD I-VP O +out RP I-PRT O +for IN I-PP O +296 CD I-NP O +with IN I-PP O +England NNP I-NP I-LOC +discard VBP I-VP O +Andy NNP I-NP I-PER +Caddick NNP I-NP I-PER +taking VBG I-VP O +three CD I-NP O +for IN I-PP O +83 CD I-NP O +. . O O + diff --git a/sequence_tagging_for_ner/data/train b/sequence_tagging_for_ner/data/train new file mode 100644 index 00000000..cbf3e678 --- /dev/null +++ b/sequence_tagging_for_ner/data/train @@ -0,0 +1,139 @@ +EU NNP I-NP I-ORG +rejects VBZ I-VP O +German JJ I-NP I-MISC +call NN I-NP O +to TO I-VP O +boycott VB I-VP O +British JJ I-NP I-MISC +lamb NN I-NP O +. . O O + +Peter NNP I-NP I-PER +Blackburn NNP I-NP I-PER + +BRUSSELS NNP I-NP I-LOC +1996-08-22 CD I-NP O + +The DT I-NP O +European NNP I-NP I-ORG +Commission NNP I-NP I-ORG +said VBD I-VP O +on IN I-PP O +Thursday NNP I-NP O +it PRP B-NP O +disagreed VBD I-VP O +with IN I-PP O +German JJ I-NP I-MISC +advice NN I-NP O +to TO I-PP O +consumers NNS I-NP O +to TO I-VP O +shun VB I-VP O +British JJ I-NP I-MISC +lamb NN I-NP O +until IN I-SBAR O +scientists NNS I-NP O +determine VBP I-VP O +whether IN I-SBAR O +mad JJ I-NP O +cow NN I-NP O +disease NN I-NP O +can MD I-VP O +be VB I-VP O +transmitted VBN I-VP O +to TO I-PP O +sheep NN I-NP O +. . O O + +Germany NNP I-NP I-LOC +'s POS B-NP O +representative NN I-NP O +to TO I-PP O +the DT I-NP O +European NNP I-NP I-ORG +Union NNP I-NP I-ORG +'s POS B-NP O +veterinary JJ I-NP O +committee NN I-NP O +Werner NNP I-NP I-PER +Zwingmann NNP I-NP I-PER +said VBD I-VP O +on IN I-PP O +Wednesday NNP I-NP O +consumers NNS I-NP O +should MD I-VP O +buy VB I-VP O +sheepmeat NN I-NP O +from IN I-PP O +countries NNS I-NP O +other JJ I-ADJP O +than IN I-PP O +Britain NNP I-NP I-LOC +until IN I-SBAR O +the DT I-NP O +scientific JJ I-NP O +advice NN I-NP O +was VBD I-VP O +clearer JJR I-ADJP O +. . O O + +" " O O +We PRP I-NP O +do VBP I-VP O +n't RB I-VP O +support VB I-VP O +any DT I-NP O +such JJ I-NP O +recommendation NN I-NP O +because IN I-SBAR O +we PRP I-NP O +do VBP I-VP O +n't RB I-VP O +see VB I-VP O +any DT I-NP O +grounds NNS I-NP O +for IN I-PP O +it PRP I-NP O +, , O O +" " O O +the DT I-NP O +Commission NNP I-NP I-ORG +'s POS B-NP O +chief JJ I-NP O +spokesman NN I-NP O +Nikolaus NNP I-NP I-PER +van NNP I-NP I-PER +der FW I-NP I-PER +Pas NNP I-NP I-PER +told VBD I-VP O +a DT I-NP O +news NN I-NP O +briefing NN I-NP O +. . O O + +He PRP I-NP O +said VBD I-VP O +further JJ I-NP O +scientific JJ I-NP O +study NN I-NP O +was VBD I-VP O +required VBN I-VP O +and CC O O +if IN I-SBAR O +it PRP I-NP O +was VBD I-VP O +found VBN I-VP O +that IN I-SBAR O +action NN I-NP O +was VBD I-VP O +needed VBN I-VP O +it PRP I-NP O +should MD I-VP O +be VB I-VP O +taken VBN I-VP O +by IN I-PP O +the DT I-NP O +European NNP I-NP I-ORG +Union NNP I-NP I-ORG +. . O O + diff --git a/sequence_tagging_for_ner/download.sh b/sequence_tagging_for_ner/download.sh new file mode 100644 index 00000000..99d81c1e --- /dev/null +++ b/sequence_tagging_for_ner/download.sh @@ -0,0 +1,16 @@ +if [ -f assignment2.zip ]; then + echo "data exist" +else + wget http://cs224d.stanford.edu/assignment2/assignment2.zip +fi + +if [ $? -eq 0 ];then + unzip assignment2.zip + cp assignment2_release/data/ner/wordVectors.txt ./data + cp assignment2_release/data/ner/vocab.txt ./data + rm -rf assignment2.zip assignment2_release +else + echo "download data error!" >> /dev/stderr + exit 1 +fi + diff --git a/sequence_tagging_for_ner/imgs/convergence_curve.png b/sequence_tagging_for_ner/imgs/convergence_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..6b862b751dd7ec0ef761dce78b9515769366d5f4 GIT binary patch literal 134124 zcmeFZXFQzU+CGd#3Q zpit{+clpmAJ!i2@*dOyxi7P!HzxPPpuBBvY-`5Gc zxd^%NwGr`*_xdIiMxjC^f$qxKAn$dBitPxU`PaGlWZB!v4#?68o0XvTG(vjdN{tXx3g;*HHLbt72qWq_OPBiF$#d8p(J zh35n-Q?h{gmAOgXuj%wxkH?r+AAG@$b9UgBgc$d zsPMZSMiY@~GjQ%aO8pF*47aHA72-I!SIF`=48{ElvG4_i=1 zP^1JbEVO~4{50+LkD}4_befNkoqc*;=>(2-#J(Vc@x-kQMHeoYfVzr_l=GtYsX#afPe{Hostzrq<^bLj)yh6t7<7JvSPGGQXup zeCvLT`VaqCa+<{VTxI8<5=bKto$s5=Ne)x$Vs1amh1|zVSJY-&?%(FuFx5jB3Nltj zUGBU=rZubIZL=sfT@aMit(v-+hBWNivDm@y-XaR@I)3^Q`qS_i!lT`|tF}y|Wy$%m zr!R;Dd}5k)wJ*8#Wh?i(`>&sdOe!IX-;?Ly8)kAwuELBeg-Ns|#~p)`cJ0h5>$|!mVTna}GHZrNx;!_dHP37w*|b5}fcho5lx9P74+PR* zaQkqB+rU**s^QN0m_L=D#(a>_->NoT&-x=ld*+b?v!1Np`GV5HJ515#xjva-8uBjp zP=QO?qs4yfiC`L|0T~xOLpO`BdjI<;9B>K##FWMP7y0Z_-!$Uv^`ghX#+!iun|!hmab;TG2>ZcCGhW8tK#O6g$qWL`D4c89Dw}V*`9t68PwN-3Z6u_~?KC>VHSzza#MfU_lO$<+J+CV7g_F93}}@P!3^zYf+zgvM$H44Z6wFtB7}@9 zzEwGoz8ea0etra9l^#D6l__kxbr|`S-aQMS*VB@26|P%tuAp9fe){~P5Lux%MQ8OiqK7(oI&HWJ0a>HO^tpYT14hM<}P zWFUpKxg*6*I+9fKFVj5l^Zpq-d+Y4K{v@pI6flCV%fx zh}r)jIrn8pnuYAls8J*x`oC7W;#SND)(}9(`?R^^w>E6zYNV$2Qpl+6tN!Nhh;{sZ zAK1DMrqnw6#shR?fW(GbX)A(;F5EPqk$@H7v~atL2 zEgD1qh%vB~15)_vqK8O4kKzO!9i6EnT?@-qr~HllTt~2Y+SW1xw-U89%sd4hza;JP z3HGjGqhEa)_;Y2}@$wQ-fnmPGsdvx{$YV0Qvz`lw61tKEO?|*-S*LJq*QKDXK~gTy2xa1!h*+z$9LRa z!mfvF!0vg}(Io?=vhsqSLLG`e>f;YjU$-3Y?cpdk(wwZAeLdlclE-?29y*ynq5V2G zTfa3?o63kn*1&UC7U<=T738HaM(wN&PK`nzcj=XZmZipJLFb#B?;2v| zEWc36{(Pf6?#?nHFoOP&|;}YHiGQDv17uucxT~8k0rIDMZ4X(Hb;f+Chj*gw} z>3HozPC364jCokqEVr;LOG)_3Aa4MGVK1i>yzgWcxhC^RQAmUBm*b?G1BgowKfI#@5h`X-4M9#A3OEHX%E+Tlhw0vFEA0II`Uca0(un8q*#)?+=x9Da380%= z&ZKcqR1N<+tQgr5vi3fsns4~LKtv<}nzeZ{h87Gw=KDY2Yo|clhY&pu9UpE?4hdd9 zyuf&W96yz_!+V4+e>nG10-OR1FtA^M%=#Jh7XHw0Dub5|1u&p@>j}Xqu#9!v8Oi91 zbU~A<1?E*fu_kUJjj#k{wqF-Pcj1HYeWe|LXZrpH}nDjO_sD@v~4SGW#9RLM= zXnQQ@T(}Lwy=y=JRWJbPF4tEScPE*KE1l3^F98t%=d?9xA0X#!PO=uqFft7+BOC^D zW95N7^OA1+AO*F>apMiwUmRJ4&(2$ZbWu2U2wkXXJ|7ngEsL_)TXqe_-&JpHqHN(@ z0m!JQ7l~%zoVDPsiPk>?G-ufe{|x1C~%OXS1v~4~RgjNfxQ!cog`nmF+zg zYIg1BevX@Ii=)#e{A*$4Hy;dGvUX<{vOM)##HU5CeJTAhH)Bq)joTBOuJ9;c!-1Ol z?Sd9>flNa^9YBrvBDnT=x@!v)UFyx(t*WB#GDUYxDl~V(42sHR33US?VW(>M?#(4g z)JVE~U{xTr*9;H3vkakGU*$rJH13wse-k&X=&_z`j+F3SF@@Z=)5_*Bm*l?wX4q|} zK5&a@_Up)y=(N+G%E51+eAcOYh*|WXuG`-u=e`=b|C)7FBy^|qljem%{x?0j4)ad5 zm4v$WII45*9w})>S2WAPzUX_j4!vUGQ`Q=x7l8#{iu4g63aN6 zZU3Z>jBO}0^5nBP$m;m@oqL`S1eWv%dQs5p<~1HR$&A0Y8uzhjTN?`l*wsOfY~vPR zr}?GHmgsikLW@0ZXF)=@yDYh5zIwT%)|}5@>ai9&1KO*&w{0y+D2MiJjXA-;+H%lg zQG`OOodcFb?%PFiIM%7gUlM+7Fki@d>VneNnb~H8`*nBi7}|?fDcAk9!HdyHbIda~ z-WcM!qk>i{3?N(GyKja9c6U&r=6ts}noU|{V0R7q=9QV`pB_8OD2OiA=MHXRkQ@PW z^J3IV!vun!w!F=?biZsf_g0p-!DLZmD_6%EkDqZ(9UXKzIX%jt0U#xN%w=zZ_bFSF zplZcmU(dTd0D~0BPWr2%XfxkY?U1$enBXmJ5x^CMy@(V=;)u0Nx?^Ntfc9?CkC5QS z94O4B!v3vTo$sQA+~MeB&JU#7#7|uDY(276laF68*dw7EeG*#7R9c+pa&8O`>Wrx; z343D;)`)D4+mW2C#zSa};7w3XgnelG0q4COFL$dTd^>_kVHI{;fs|FiA~x?j+fv z(wF(2Zm)c3rR$E^1yb7Ros-;8FRR43c{0NYK;Flw#BwBfZgcuR01^3rnpZj|z&s2R z+)U}C`xIWpV%p(`-o7A7+HWZNtyF*^D0+e>IUbsHB2Oz>mCi;QJir#2NODRzKmNJk zxM!!xR29CYvHVu`QYpj*`JHw@WsW?PWZ@%8^K>j7eT0%MCIZPFSTHrlhw>zE&a?0i zlmfX6pqERt#|;uHW-lr1uC|zsR+5{A4* zxmF!4{0Jv>4}(?lPF7AzvyE={ z$8$YtSeb5-qFhlKv=vf7AZ9pq#C;i<;>s#823b>GxeV}P7mJ`@kpLm@5(bioEM;Q= z>o$?X7C2gFQx&x1v6VkVOV3)JX@nvBx7hWhE)aF4mR(hP$pxz+2&|8%r=IWf*btT~ zb0@FfdfdP4(xEt0$|TYQa&d3#qrkmGpkBtR!yMQxj{Xx2pUlVzCXU|w?A+h+;u=R3 zxv;#)W2l@CS?jg%C_`a;D}FLqF9}3h!l?2DKi$HG zHgZ;x5A@YRLMuW7$x&0;S)gLLroh4s5u*RSICIqSixbv0WSx_3ULeTeN1-an(9qx% z0Umvng{E9lVmB+F5b0q(^#1whs1=`j4mQ!#(RDGQcK65x46v^4VTN!Y&>#R!X|O1l zzU=$@Y%EBvEZ8h;xNhYAHEkd2qFG&o>qWE2tZ56O96g_R+>D04B3B0A3>#h)_^Gg5 zG4RlPIlc&Z#I&(yydK&nKmp)IwLjmfA4u(Dk_n)3A8ypHAUnmns;VxET0VnxnkYjE zrR|rBIn37|Zq4_#fLaB-8h(V+I|JlhQ$`=x5S*4HNZwd@UlDih2-KZ=@JuuXMSVDy zmCTP9*AtB;MSjJHY+RP)@v^eYatN`Htf!NPL50kagMb*eWm4QNp*I9A6#19z16YZa zj-&YREIy_EZc`L99KtanC+=lvQd!+Kq!1GUumc$$C$o56cD?KNdHim8)I?$$e>t15 z>BkEyOgswX^|@eio=p?i`WX#M!_kDq%S+vCuW<6n`}&{Np(6Fj5=J;(>n2I$)fT2@ zfLzBf1$L*1`z)2VBORwqTxza(U`!1iwHBKqPGV{a4fv==m(jA%^sf#8=UQZMgM~$K zVWy#{$y|SsR3RmVjKLwD=m?4~<5H4r-A=My`f0))l7K`(BB-qg~+`85*nW2;IaTLu6A(C{t>4AAK#qT#ZPOxnm1skBcT{- zz&DHA-}7br2sK8?X@(Uyn@0B1wTo+1)BGR=&B+pz8h2Z1;R__gO*qqsN(BI9Dno6= zFWPZPP=F-&;CY)24=7MTu3_qxz&M~NI>j$kL$gq_1UcXGXLskkmQ8S^M0o&m3`|X( z0lT9V4{w}c6*7Jb@b~RQAA)wZllNgE-)dL^5jVu&g*8hEm)gr+po= zH~saMAe(u}jt$~-xM??EVec)yf!+S|S2f(I3up#5@o0?v*7&r9h&nIjS36IwRW;~N zAzwad2j;f>;&Pmu=RK$E3;DUwc{lbnJdhVIs;HW^TSar*)-G2;ge)55N|T*IEDCNv zlxla^IcD>96NSr4? zc`yeq8Jn5c+feE09De9Ng%FAXObuH)1cD&`y(INOqtHp4(sgt_++NaD%YQzmtOrS+sm*l><&{H?Ut zE-JltQ@}9zN?sRp9_jr#*bp4ZDdUyTIV(F}I@{c^RMt5@j-G^^X~vYq9M)+%2tcOd zhsoVThifx$Y3O(f4dj(?;9Eudenq0jnoVx{`AZTVlc5c}YwCDl>3IV+CD0v#3!9pJ z1;i9sr1%pi=)6YnkOY90k)wOSVS2(;TfFvNn z&=d?vm!vUMD?rueV0Ey@1tt66uVqwFA3O13%pjp3nvW9yCNgwZXYOynwa4N$DPjBvA zS>YE%Di1ZV2SnT{u>iUIUR%Lk@wRQ3+tAFI&+hgwpPz44ulfXTPex;z=gV~Un2TR6 z-R_aPTpDZfxa{dmA=Bz!?5W;Dqucwp;txFt70rJL`pm1RuAd49>{L_ljoqDfvzfBH zC_vv{1>biqM(aO*{P9!{%l53XnVC_Gmw;$ItXY_!-$VJMQ_l(@xs#zCAc~+Q=&h5v z9m8f3e%vl2c#TbI3Vf>*08DytsI7|7bW4i*)nd^qw0fuq+;;y92 z`21b8b)0k?qui>}z?&&Eq?g*#0o@n{Y?AXRn3GLK^Ss7TAJju(rFwe&&6rFmZ4WIi z?aNi;u(G=pK*3_}vza~3OY=$bd|>KTvIC< z|8S3L(gzqOi)tD|w`y^tg9UnK&x)k0SvC}b^yV}5@>)uB5JR+rRr7H~$m)+KB0%vl zIZT$4J#_H)9GVd&}m7#!rB=8(m zP>2?LUNsBW0vp47A7}Y221F8|_QY$|th^lc|HE{uhfa9kFab)wUYPw`x2+*%#vvc58Ea~3}{z83n#3sosF@Kmb zBTrG}(oua>e4vkl2{j&va5qzf2GBUg@~dBt34chHa(_~wpf8@r(Uv%tPiL-kzbs^> zZe5G*!2vAi?z~lh`0$)vPRV0xm*Q?i6@ji0fMhQNVcK2!ItC*Uw1TZgn7{#U{m{Ush8H+Ju1Dv|=I+9zA%p7DW9o95y`odIA@atIUP z0mrS8eBH~n2X=nfZB&I;noP~Q9>@bQoxy2Wx`S~iCQW>ZQN6W`tgW6et^aB z+~6TXz{P0u)U^p#mTBj?@i!OPo?g1dry%Z;zC+KGDnps9UXLSnIIz!@NEq zndUoCGI~0yHv}FowN_WzBs#5&m-adu^y09zzd4W0>Zo#BTha{ z)J_eaJ4jM%DjpXPeKl{tL1l}r22hX;di$Pt2@Q4G8g$)>*%EaYi;kJ zzt{}i--;pe$SVHbFRuNe#Gb~cB6GON`nyjup8^ziwD{w{(a7=bH2@dp>X#`^@aN2RC0idD zn>Sv#po`NeL`0)E^+u9Khy&2pfHRBqx^pKi&AsiSM9A)Ts<0kQADg*f7Jar!m9qn2 zntQbCUfBRR;c^?Ns~6?Qhlcs=*D;f|o>Ml~e+lEf1R8%>Ov8GQEEsUv4B+PSfwReW zOF*}YcIwiBng{B6<$$gk1&n0ph?7iJK%wN?aJ$e!!AatPZze}kHLi6CoUi2q>cTFM zS2ZCLql$aKn8>KmJ?q( zcg&*y^tDvWV<*nIXlte+*rscWX5w+Vmge>7r}PK!&pnjP2*y`fvzB<6j0^;9Eo*($aQXKokr_p| zloC_J#r8uWXS&&*wvR)=LI^Et930Oj-46&)MyLKyFNMtOMqFxUSlRBu&iMo7r-SM! zg3GX}_z*e!O=JU;vUBd6I=h*^{?8t}_Big+xzAY@LjVk3ss%5epsQz*N&4`i8bpT} z0#vWvfv*<}bm+*>g&s*Gzxx(yz$;=^5%t;NAS{=LwtpZ!=Xy#pQIfqS;~XSoPn827 zJ3W|a#CG?f9%|4S5=;iBdkJV|@>ceX-m?B1keInfyT+jcw`f#Yqtov*H8owT`hF$m z5>VS=U>E%|xbKjcW(&B6CF>|fTx5T|0x2J&hy-G#_z4MU1M*ydsG5FC;BOE*etllJ z)dA80&BH>|r=6t0IFHjg@S0f;OqtDV2bXy^i=e)-Dlop*J^os%TR0%DoQ45mGoUjm ziS^R!4@m#tm%=}Os$YB%WPoC;EgzQa1v<7m{b@}=l@D@}FD0&?TZOj|5xt^cP1aEsI5`133xZ<0ijq5FF-r@(0| z06;qk7#0h9&+6rBlmhBwxIU!sKATZX6vO+f8-jurfaBdV0kldFJRBC7zm{2zT1P$0pLFAl+yTyFoZ(?5D9HyXq||`*Zvv`R``WN1u<#EO`PMFQXR&UcG!7qB zx;&;we#`X0oy7EHsAr#N{aw@_xyBFTt&ywYif))mG;uyCHUfo0PQ{Qq?tuBMVWhVc zn-Xl%s7N`QkqF2>ObB_|aPa})(mkllb(0>~2L}T#T}rsi<~~qANtrEJ87z2zP@EC_ zfCzW3guxreglm4jTV474$y{8Jct93_J`9Vc9}!GdfOq&92D>WBtOVmy+fCaGZp zDZeAm95LU1hdV&J8>~I3Eao`) zEwW!1t&lDK_Fta2yY9R4yUmCVldiuC}N0fM%*%XbF+ zaYbJVB(+7rDQ?bun;W|Z$$jWR={+)azxx)Nr!*0@P@JCBZgkW-_)UV-j|3DODBn}# z-*Np2!^})PD73Cge(d~f?g_mM)ExK+rLX=qap?r=EKWO|T{D0EupmC51j@3=b^Z{| z{hffwp8<4mlze`8| z4cGsM>;GTFRjhCZ-TqfF_@?~Jn)iK)&{h>EOm^w=KxHLBIXu~xo++grtfF-uYO3FX z%>P`gQ~ExR4j&%8?-zP4u_c?GlhftKyHl0R*Jj4+({ zs6+d&3tVY}Heah-)*)?46rEK?FX39cbyZN!sS=v`O-0k#)@}oz=p!%k$1^1d09N|1 z)`kBsPyrHT2BpbtBvFctrcZ!^qPWayCQE#AE=4egheocpk4rq>gK_CK$3vT5X%5YD zNS{6K7!F{treBiOk$TSmB$_Z8zLG!K-vxw-a=Bp=W5F54L)S)N&bynCe61qqb<=pm z;S$ND>E{tH!90sYogoB1xHd{^&}XpwOk}IcWu~$lw!uGp3h!ky(g9htNit3SKia9^ z;7a~?Z>OmtEnQJPLd(92OHNqX%5Q}4!_OvWg>lSgX&m}+mC9AJZW`1(r>7>ji1Svq zvz4m^x`5C^fooJUT@zM!o|uLZl;Z^(nD7z_BjodmSBVTFUl}URDr^G%)*p4cTX`37 z`%#C<93XnTB9*Daf5;PWw3MtS!d>~jwpQMm+<$`HbHqE%Fm%_cBl)#J4{&QgLzmuK zY#?&IogX-YUZ_?I-Fci`yt8FJ_-Urfy1geW(ew1pfz9tupK0}L_L>ZB=mXWa3`i`r z*hyYvlliT_BJJJwY-mcb_RK}c+NYfoIL)#4?$$p1z{=Oxrv|OUNZ#9HHD3QA7&F^W z37VMLXLOs|+-74t&S%n^=Hsrz;)mMTEbSz4f)VoKO)h><>&#?->Y1?Xy)@Rf`-Hz_ zaT=W#uxCXW^q!n!YDkn86A=6^Twz>W`c>hZerrv`w+n^Cl~qAzRzI(}ViWh&f8dyS z?!nHpJvosTQbkbx875$5vt5qx>h2*>(CYlezzn?2g33$q*>!$xvl3yn@OccLyc%+^ zm!ivxi{8A$qbsynu5;nM3Y*PNeVmK}o;hEgMqdT}*%#t;|2L@imJ2_#PeFgO4Bm&_1Hh$tvs#NHd6aPGq%y68gvI1tIQttkG0=Zi_8Fexm9Bi zURZgzZS~loTieN`Np$d=X<7r<>%uJe$lkDUQNprF(#TdL7Tc(A6%RG8lh5x+7jp&o z-`w@y|23tox!z!K@45hCa55~IU2$V$ zLe5=-W;rLXR}d-#JM)DLt3PebwG%QSG}*>{Encd>N*Kc)0i^5m13&8jftW-8%fCbe zqkpANo3qz-p$%#*Q`vu8@0)MS=*h88YqqCn_6G?SwD|MuOQh-NJUCL2Ze(;{a=3ly zCFF{Q!frw6Vsk!@QUc#u?94BbFV%~pjr=u9x3OwVH*K-!6=voa1pnF@5SneQrjehp zq7vanad=@%c@ANkEl*Z7B;TifJsFadXd@fidew>TcoNx52q!m%B)TydO-I3R6yK7; ziee$JvIPw2CGJP3>Y~m2@M5Zgi<-;}amjGV-G1z;NddTQr2~h5&a)nvl%NI^2`&Yi zWo&_tR=WSmUx)q|=EyJVZRI5;C-gX!G>H}D%703?6GB0u9@2IT@6ylqb8nRAEb+K@ z3$ifgmf63UztQxjKRBR5(<}io$8lAz@(&rPCyyl~$={8^wdaAph9eJ8u{UAx59sT! zNa;?AdyGtlq8160K*z!E00~+x8sIcbr%pRMlSVHyz;OB{Jsqq5jP!N(S$VEqXo?uu zk7$H8V#&o#hX<#h;|MV&pW5Q|UPfMJINx~Hwg>0c;?>T;T_2*i(c3-gIatPo^A-$l zm#J`kw8A{Wb9poOWuIPR5-EfP*5g@s*ZUUHv?aLNMsML?ubnh3;X``$sf9#H2BHrm zYMdrVnM}QBF&gAD3~JUqM&kDDexp4Q!ZgL7vl*Pp_e_Zh1lXua>6QP0>pBHA7AHMv zRV%ml%D>tQ3qLr{FMzG9u8o2*5NZKKWp&_d89y7GQ>JJ5IH+l~0-<1GYGqZ?jCF=8 z>!I|hZIMo^lg5jyV+CaL9Bf#X(^+u*2|NfXs>@w8tx_q3xfR{F9$~3C5O{9LZV~Av z`R8*)U(hsw`uIl?_v>>z7VdMrlkRrwPN|jUx@Whj47VHb(o-I+4Bgvw#IRcBS${I& z;Z$+$DZXVqw6l_;+>%YkKl(@_XP#5UaB}U2?AuYl1m@yS zlGM=6l8PV1-)L@)s>=@}KXRUZgb=}dh6-~KL%mnch#8cr7dxN43twh_HvfOl0uXA_ zybA4d9pSB34g;D7N@+H&5tjeK`ph?=f##E)XZaaPNX{ zBUbsrLcyTS>^Dxo7Q5f_SzN7kj<8==3|OSYJZ6{Y^2nY3T5mqQZrGSye5WZT3JvTt z`M6S@yJ)Xy=bURzQn+Aj<5;7gXbTJpKI%Bgk3Ulbj7%S79(B#J{FP?O=mrX#;O7>@H|0vF<)Od=d-23gtOusZ+W@hhNiP)uxaMI6sL zJx?}BOe}WYOzswsj?aVK?HCjD1!xMDE=BlMl!M`GWZxf-p+y6BB6{q_o1+cdV^wvU zbJ@aho8G{=5i*}$%&yWKhY(9qhc1N?2>ja62Yxty>neG zvH^=di}(_w@sQENcskeeQN;m?Nh;|9i2!*QMtgA1qCL-Q@hG#I1E;rqtIe6)-i!r8 zLCSai(n2KfMaFTg>@-=f%nnYb4B__Akee?U_ZT7h%Hrddm3^b*xhw00X6bWx-cK7N zt&r0+Q_gpLD(`??^mU-?{S1r+w(fJV&BaWaLv%`30gGtA0~bvitkZ1W^s&+z2PJ$c3hjlkVNPH|G%C__IpSq;q!8{42x#e+jFO_3;ZFpQHY4 zc<4GWLNY0DV{q)Md>WXYvM(iG_p1pqD_q2*@l$4ds6b7)rssU2&VA*J(edxQ>vBiU zm5mU3y%6>5>qDvH!@?g2s&0*K+{c5b#w-0LF-UpOEAIZq>eOrPO{sNx+!NvjoEKr) z)!^4{F2a@!7O9#Lw&MnlGeDt;?k*)rB2>BgA_aK`84I~=gibHcHs-!?T-kVk&%$Et zE!jt7Bgy5C=L>> zFY|xdB0u$5#}CLmW|&4zYoVh0>gr>g?iJ@bS3j!pmZSf(0cJMq+@E%j_naW_F2eYB0|U?_F6XbSEaym>u4 z-Wd)e7vyafuFWaeLv8v1eH3=S2ijHl|_B4^!{xDNJxZg>SqV zmIz7zp&I_I!GV=Jxb2H9`zr*$@}7};8SE8NxWO2$2>;d&;9`Zo<($j8>v={E;YcrZ zF~0>)EVX?0IAdso*Rk-sc;^(CbG@2p@Yb`uM9`-NQVi!K8LWakT$3Sua+MIqU}UbH z8^ZFSQPD~*cWA}C-Y!GS!r@rl2urx?3UlFjVl;%YAEeSA*(W1HyP<34B@vvTp^;EZ z;+9lo?=YU+RWLI#+4ePUi1ry~UTEJK?Itdb|KYB8f3WegjB8~ptmBf3>;BbPVOMq- zYX9etE#D+!_ro+YNibRu=PwRgZ!93#Y~$7jx9S~x-sJqD6MWzvp#qzde;1a6%+!># zW(gIOt0HJeQwpDj)nh;N{(Q&r>Pa7#5dFEtg@DlgmWJVaz1J0a3L>YNa+<%R8qJsQ zmUzhuWQNLqWZEL*R#pr|PDgz<_h<(91Q z3@&mIx6tZe?Gie%_p4{_S~B!gu)2J1sQhmApC3&c>V!=4@~6ki{U1K{(R!_*gM1r5 z;a`q5@}qYkXFJwhO+e|A?JAl7i|tnVfOnH=XYx%EtC?;8=W1qYn{<7PSP1ya^06Dh zRv>1Ok@2}Rb{s=%XV2W*yt80p;kKv^tlTHV+7X3{J8D0;TvmJoIvVLGZ_tXx-gSJ&J<(X$38eer zZRNgbIk2-7_-Dz9qxHQdcMB{V_0&4A$1X?utk#YlO%2Y`E6!fdh99 z+x0tY+?bC;Yk?Hw9U3jAcvJx{zW6eg`A}7{AXKdAa*{=nupXf$+wzn)53>zjk|Zm* z@??Y(XuX8Q%DA)Z#IlpWSbRTWqvltJZnrcCqeKfj584}lRR}RazJgNcoKWecTlHMd zkvhekZLAPgAG>IQm!Kuogm3p5HO$drRzk1%lLxK8X#_)KlT&t$UeP1bwCjT^;@v0` zu0;VM-T5xMmN9yd^GQ^MW|3H|j5*Rf!aP0Y=V!3j?*2t$iO};Ji1}GJP&-veAMg^6tKE)bcq_o0^gudk8s1pbP$jb+2CMEM*R+jit)!lg-`} zLZM6NPllx&cnJfzJ3TyyqzoWu|hjYnR~yiQ#Kjw*zd7T z()Z_Zp<`s>vLTy~^Ei{$QXU#cIvb{!#;3WtvB4~&q(8LGeQ=P*){#pbEzf7+d!S9--r$$-@q%wU>so5YwnYQLU7BY` zH(;X1pgeaWISpSV&cvOZWJ5vUxnIweBLEbM(&j$-I~7^uuNqMxmwaw?iaYY`%j)6~ zUX9APHpv|$w7D5;RWF-1&84^B7b5*SdZ47T({QZ8|Nq`lDIW3Z;oe*+&g{h`92PJ#qxhsNe)kN9H7Cgv+T_ zN!o2f4!>rOv48V?T1Nbhm)46?d)0ds6IT5&+W-_xdal89{Oz^x6&o3E^YQsz-cXI@ z!CZ%*e0l`CH64os`ZUvJWumd~ zC5GHOp;YlsbcJmw(-{{*|GjB{wf*6?Ws~8}$wiD;V|>S$>v+idxngn3HqI=B?6n`a zos~|Ju<;*fjd_$MVbg>xTQXk>-u=~y)xF}KNk|HTe`1U^OKh2&R>>;kLdYzyd$=h_ z?Cmo$P#>QHidfUJBQ!_j-dITVdj3PklYPh$uTZJtQ_pAcuWm&Fy$b*IG!4#13lHVZ zrf3uuu1(HfzH>!);KM7_BX0e5HlGvF+9;*rg($ddu+B`<4XMGvUp-0cOF)uMukN@H zlWa9O_L38cQLCbgZ%lE-%Co?J>Sb!Kyic7M(!<7$;;E1aujlsi9^h}4GRH; zz#BW|pYie)zJZNuOF6!BTOZ{wBZN?**aaG2-*L;H&Mfo^7L+E($Edov6chiIwVzof zn>(+@6xN>bJA95B6`DZeDIH!TA`uTQEF4lCIP24UZaR|ly`kAf3qr+W6>9Q~mhchlogh!SPd=WZ&~zbRS$^e?Y6A6~5t|IUNm zpBzzyzfffe<=i~!(A{cyQPa~mMDB`bwKV_7W7Ss?2~A`<85?trP%Ba}L(MVU4>Af^3g%-i?Ko0N1KR>tv=Rp|S)2Zrz zFT`usf4_q!FXq)SoE2!VemGiYpn~608l0P+aqSs0OvtUZtx}EP8tVo z<@%eKUliURa;Fy(<}yD!ZuwWyy4moT@Kp`NqhbF+)xujh?PfMC{AE8 zb%t}3{7AB5yQ+RyPvzRIe0r&UJt483ojO!mrOUSPz&&2$*O^S{Bemz5*VCnNU58)r zI!ct4u;+FBE^J0mzhaD-zRSzQZtrJF>1zaEjP{BPw8OM_0pXcyWmJp)O5CD_IjEUEopWwMOB|=h3=YSdaA;R;q|?5klRZDsefPm z`V)HUZcq1>(a15iG+Vz%@9qydFBW?yflD~|Z%z>BN189#&cV84YouEYPtOL(1tYQ* zcsO3^gMPsz@1f#h{^6TBxqK_N6|RZ+0(}yZ#9blHo~>AX<3;GqK%~#)uln)aJ>N$V z)>utXI3AS&eHZHyBYOfbplWF?8nYKA$56y6IUX`&s=JV5Z@=^FHWbIaV+9AhuQPsV zD0~E;_f6;jGIz3N`TuU>fWnm~TIN5iCym`k$yozNM&Ix*NuJd5HWouuE1)B&J3x^e4>VAA1q4bQE$C(z(>>o8P3__3S$E9EqZ?Tbzt_m+ zi`L_J)QGy*zjSTM_0Z)gY&#Yxey)h;=s-($N5DjfQ52K&D^2l%8El2)_GuUu6a;V5i2oblK&c@Gtp^;2|G-GUwnK@9PVIdy%6{#X+7I0L$+)N zcrgD^4f-)k=D;xzZ9vD^yKac2$bz$K9G3&hAK0V?20HT(Hm_bSiVs`^PH!1)iydX^ z{JhQ^6Q^e*e{}KMy5EGrHHxpps;9M??rH$$BTcc1SUgxX-m_e;YkGHKBcYTUMpykR z!m8k8z1)=>>?rTz=~Zw2Sr60J!n;!wwTM^ppUaN-EqeJgUxH&FpJP{vE*K*=p>t{P z?DJK*BGQI(1B5Caq<-cncDrX2ncy(Bgkm7bWA4)uH=;eqQ;YXVyK|B*+YVs3?gdBd zf$r<06Lpz7d&Acn1Y3WEUgNHG-O#-7+2>~@|Jh#AR(DBD{qHY_!Xj+RW}S(58`nRI z$e0LK9dtuLwRl~vHd3WMmw;`&2w+R0QLWGN63{wZkmf&|Je;e^Nd`JumVlnla^Q%F zi5{uxGi(~L0BMX5a7Y5CEBW?iNgU9mb9=9UF8E2b@WD?Yo;K3B?&+ zml^C&VaOKg99=KX1N^-9cjR|^g%^;p^Y8^<9fet)A;Kr$2Gd8^x*SRO9Y3yk>#G=k z$K*RO?e51vM&|BHR$q@v=g{D}^N*g9{0SO=`W(>9LSEx_Wv7oTz=3JCgw8>|f&7zF~ z@mU&u(mX9u;sr6fcW$f5vS$N1cHxIB=8{EeMy$33uH2bq1+foni{&^zv}6;CGC0|| zMgMwT5+1ZMS|%YVYSqI*UZ7YvizGN1;VfcFIuPRl%dbg6XROR28je$yT&3EowLiAa z#eWS-+a2e>2lOa<>FU09xDFNv-HQf}`WQaw-#h3YG!LbC2hJoI7M7Is0>^mF8YYo{ z0f#Sm2QO7&Tzr~n**?@N${cNw$*ejhczRZ9c)W6frp8$yhoF7l{(fZS=-6l2&n_Qa zwIj}%lw4WyCAN$eu{>_W`3C~h_(!AXvGz3WL6RF4EUS2^dEHB8+2k#r^@MU5{#sI^ z@H~!P-<{1lE*)Xq@-nYS+?BFAt-i&ni7fD1${vWd5G9b!sZU0sTL=h=z zDFKm|mQ-3mK)M;EVPNPkK|s1YrE}aGy?cR;xz3#ZxG0@VuQL1g;ZDS}b9WjvCA#wJ#UA;LMj2DlQ9DMN z7QjdzMy$QJg#$J(-0IwM8Z^(#E~ds=J{*%cya3sx&1v5WV)&Lti9T#(mcVNMoN6q5 z^W48Cob^|qIF;JlCwY2GvB$XNblPM4uws6`cz98bpmg!Mzq?MrV5oOfFkCFlMlOS! zeCI{V^)($!5TT*+v7$c-w`Eo&W1O;`&t$(TR}tApq_sgJ*VLwlK??(A!Ye~f@{>^x z$_gAwQzgb|3a)pKn*HUiF8YYX*vp3sm6r14QZX7G8jkh_=_sm$NEK8j76m?8=b}1B zl{m$+G)`k_qo!7D2(u2Y6Q6`f(xw~g1g%Rl@L+P$#8DMR4QyQyws8Jeu=%SW+El~7 zO=*3dykJ)>jor;~d!f#u^l4d@O_l!v+Yh8jY`v`a$(V0DW$QFIa_DM)|!IvA*1K?3$~O{s?N8gsrQY`iAX+%z(&V<1lj(qz*Kn% z?1v>2-QtM$sr8({D_sNA-jIzD`5a8wlc6mMx<|BdA7Q3_dfjqAC(vfXp<5?%Rq!3v zx*t5WQGY2lYyKX8)?CJ@HhAOBYZ^o}>cwq8&AW(9Sr$4LPQp-Ef4EwiDzjbxGn?a+ zJ?OHPXi&d^Y^q^2Uv#VgqinOP0hM2dvT0sCXlqt>q8Jv5AE4@sXYO+>$LWESXr&qz zb>L3hr!_RIc{5mk3K-0!au7@UB*H9FU#Uv>6C9?d{jqv1wY-iXB+wtBtF}SA(?Wp0 z1x=xRZTcYhf#Bz-UuIYv4B_YM9Vmow1Bwn{)_Q_U(lS^#wf7N zU#;xm?Dl7g6;eH2D4QQtFIa8!*qoc zVZV$)%M&AJ{KC^djvg|=rOHe&ycR0mUNxDeSlmy^%M&~sSo>$LcT})kX)P6!4lq&u zF{Y@U=HD-X^Rfz%H8GJ%(Y~zlQF#X5`qr+NuUj6+jQl z{95jPkej#SX;gZ2Y&D;zw%BY`W#4mLCMF9ha=CV<$`Q|jjdjy%xIBvPi&PghJ|+ut zB?FU#J~GfX41;_TCGRy@9E$BTjUF}B!dZK6Qt1_zo z>)>NT8=fUj-D>&5|KR90^~_OCJ5DtGr7g*qB=SW*5Rkw+P9PivFVH_)>3rIAPPqKr zmP7HS_}WQe2h<;rX}5jGm5NTFN8g}wCX=*fBIv%lofo*&Q6El5ovv{;Dgmp@6^k2i zd9b0ph%u^)7?Kku;ewY*^lUpeW+Jh5Eb> zk)eYQYyI%ywEE{n^FA_g(Hcd8ljUa)5UBN;sekEmovm-vH6&ohOCMei`z1EHZ6`ZO zpl@?IsilZtv{D&kgB!$;3+ZJ7?k-QECQjzh8^6(TRo2?f&&}GfdVWgJUaaMC{ge0J zV|yic3WRsMmiXF#ClTsDCSzFLwa?nL){KVD0yt%i;eTuK##@q;uM5_fOUV zs^tj4crIT*|68-*uk+7nfIuawuy;JrAM0C9-F9AC)g*#~_pN|@AudHfiT1**r}Ok- z_&NzwYl*?@(v%XXdcGg*Q*b`ypkNeda*D0s-;FGsiFOAQAX3C0k{$~@av+AczrDR>P945 zyTq(w;-&?I@!~Br7R%nYcSAjtnJpY-&>6Rz>7b>#w_Fv-WcYCIld-?W2&vVUAL5pG zhTccoohLAkH-38p9RFpeSR7L_O+)HouG{mVBoBwwi`XVZ$0h0xa+;{n-=LomNRtp9 z+~PJAJ$yLX{^g!QByGE5aR0z9j7pEAy14C(c_MA1gRzYNN>r7|86K5Rp@L!={r)tp zEy$1u`Vz11?Rs$djLoX(t%Kf&D&Br$=(DZ^>-(VJQCWXDnLnl0(O;9Uo0ruZ<1Rp- z^KCQsz+a6>>EVOlH9n&)DVFmY@^`N1!kPnm>LQ^EFVa{B1ZQk}o5fpBn`FOZ!Pv`lt?6&B}y|``SvsC+tm3vpSM1@K$SaB1pD-!vf_PJlh zx%6S9G;KqLdwSeE^(IQWaYd+_98(jX-_;kWOe;U{T8Ww7TNgkpMuQIFpSbLe3)i(g_gt>l?hTrN zrqN$iSaMomsRezC3qqovv$$vm3sVNMI58?>rXVw6ijEI;PZWw zGi*f9VQ;Np{=r2X!#aZcUznKk4)TB*>v;V{44A zQ=Y>`aNQ!??ZRI->^--7Ag_A6v^v4Z`(;)NvKK;N0q47DcyYE`V20kfy%n$7Qq>7B zPkk2vN5txOdHvRDN3!3hro{1Ng5&je(@^{-d7vwxJOz&A?;>j0{x&*-$m5N&j+v z8#GIOLW0?8pY|)1IqetcgxHAqH~ZVKEuaB}p&~rHGrbu~&X3i#m#lc&C?>{BW=B7OTCUx$T;-jRCL?o7I#x%xg9B8k1%qYVyXQQvDaz@J0TbwwAp1Y+C1+ zzsr;g5@C*L_SDas56?}wPkV|_Q)TsrX?UbgL&vVv6kC^4# zQ_hPohjvV!LMas_esQ5&shT=XeMNn{fE>N%c|Y!AX`AJA+>pfxnLgPNO1*xDIu#Mi zZ7nTk&?h1&aMjJID!(;9VNMV*Qb6_D$pgY#VKy8#rXAqhxg{ZB{= zO|%#dYzX=SuC|xd4I@>b5&r9*i`BBriJ4o?71Lb)O&c8DLY{-wiUc9rFxwqNv_FEXX zCMmp{fF)O#2Rz5TKRvYP!bTcF3mZdUOj7uzY^Q3hZTp;Q5uXKA@ixL0M^&G)YsXB1 zT_QLZ(K2Smbk^A1(Q9vseSqy;FV*E&`@1k+mae*CRZ)W`Zr&wkCs16u{lk z?_0$-TE3cY@x#t7?*+}Q0`#uK7VF*dnAJ_>Gq{@-!a{M{Sx$6zJd?f$b%-7umGc=b zb?I|x)G(+Y&L#@vc6U3$F1nr7vKyG|Sf&z(c9^|t1f1$#y<2?f{@fIgg4^h##-ZS) zD0MFntf%c!^n*#YAbHDOqXjkIi^8nyKYYuFJ;%mAh~QsFg@e6eJPl3-e{mA3zrilM z#f1xgPpOhERSo_F))|cd^|PDi*A(~OUz{#Cs)TZZfzQ=!anNSE3|m62o_F<+{VR&9 zHcd-#S0c#Ei|0)Hn=rq^{=6EefYx?+O0D?T{@oG>5Kt`Us`jyz|5eLO0+~@kY52?b ziXVUsjUka;a;sfIoT-fRVshcMu~<=#s8#{G;;?Cf?{ONl6O8j(v&!VI^I2bX|M+B; z6p1ELXzx~QO95_*e>&G?r8h^x-4y3#Ld$j>)$CN-!zKRITP+W$JXs1AUJ1?2(jkgp zoo#kUQWmt-t4##8u~;>!+vO%f!e*K*o0Ap@jDos%b`25!_ZCDXo1h{LT+Dk{y6qnRl9p2$=3#orh6;VT7ULN#YAl|zGAh^N@S!#BFd&Q>+wH-lm zSIwh+HwN&zUG}w3vvIwP0P=t7H&?^$x5Eo}zPJ6pJb(H2Hwm2)sNpd1Tf%MXyB!(( z0VcX=`~fTzbS^Y9zwl4eG^n4X0a)L6tR$_MJ26oS9dt35hibO1lCTo}jvomp;1vT9 zmDVyz6q&|QR@p#Z-3@DluKf0m^+&H~h)lRBXhB;+Z(TicnmtX0@5#Qfm^A-X+bJhS zeHcOJ9@}|Svwb>Kdt4Ol@DBXuj(AFx)yJ%jP|pRS<~Z0YTenZwKV%bpR2OsT%!dZJ zoSmwBX(TGrMAI}kkHy>IN@Mvj#xnYTU8#o>aXUFqH$dlvAG*@Ge+=G)N5|L zn=~HC3o%$PeWy4x%hwR;H$!qX#NmII`o+a{t(tWmKi_Dd+q51jH1d~>{}&gC8vRQq ze~eRVo`)KPOZ_XznJu@v-<^3OnjjhN9FR++cCV%Hy+{;FM|=1$GZDJDe9mXvjxw)`#aJd27Da=&## z(%FR#Ow9UW@awTte^9%--65D5x3s3vlJ36tvUOdteP-kYXt(gOF)ko(=rnk_;oYK3 z$-n546cw^pAvzXRt^KWJPV!mP5?3=$s}n`zAJAK|C4_Or#~ZIqd<#spgqVpr-t$ck z-}!WhA*L^!I4aGSYOR9IaKw>f#^2?WJw7(uns+Xpa|3!&_|E@Co(ciP7fnb6(( z3pEsQJ2GTDL?H`!EVv8*clZ9=W;V2={Z042G#n~KI*vXVnp{dz?9Vw-*(M3GVTm2&cMdxrtHnc}sw;y2)8NdZ|`dzX;WrV6wXtNTv_p4A-^f_uV;1=bPye811Bh z2`6ROBZ}ufw>_ArOV{vS96DNUa5Juh6*$8@l9$`fy;Lx55sD>=m;Av+OP?JpluqWR zRd(96{s@g88jVJrLUYNU^${9QDNtLlfz6Q;p;#G;X}N^QwO~IH0qL-DE#;r{I!VRA zTtVBbNARz+&L(Aw%!gNl3Rm0SGE$nPk2Zx}IK$z~KeFY`MV^MBNc|5@dXkVgZZ z(G0e^da{}!^jPquvKt}RH<<#);rg{;m1b6Kjm-vli*WF_FXHMwtaXpMgzF76j6xJP z1noqCTclPwEB3?Xjrle5FV$wuwWO20&)JoiD;)PRjikESoamO4s?By4SEXOux5F5% z?TQG;X-RHgUW3n6mYfJ*Jsia)D0l}=gVbeMucPV`y-bP$2}`VubnnOU>_YNgoC#mV z3iF!rXm7`2JK)tv&;-NHUvi}q>VtezfYEypp1x6Y@Y(sJHsTRVTjI_tp!S_^5T;9;Gr*tbpfhkUJ7>eGeN;HyKUktmh%akvEak zupTDdDl{YLndfTWj1p!UH^aL0)o1zYurUzB3*337Ttt`ujPUJeDF>f_mDqpVq%?Fi zG>?p|MLsK2v>%24fig;AApPfYwUQ?Lqkgi1eg_FK*!#biHYyl6OTbWm+Tgn6%xUDl zY++27C8&KbG{89J8kNaX7UyMhVY8yG92_jIs|()5`Bh=X$t1a;-2Pi!PfqI)1p^@dGl3~w%K_w;)gm*~Yc$aN3A(iQU(P`X<CL3liRzScNv-Y#y`TIRAi+VAgv*Xn{ygY4bC z1?z{~%2&NNELwIpcJ1#i2}gF>{;a$xf2{{_>+9j+O4W|gJjR(Ua-D{0LPe|%JEarN z^34cVpu4IdM!K(vR=tl;#sKYGD`!>3jr+TJ)6^q27T%O{O5IQ4<}1brSRG z!lR?EzRXHRgKy^rwZFqW8h+Z{HTNQwSj!ppY?klg=8|v7PhN616kB}kB_~5l4Awp3 zIN4Wqe*Bnpu@QfgcZK6;$NdkHW8pdqnxVSc<+@cXr>>oEICN$UEf?BCk&nvETWqXX zIt3Y<0(u53&f-E}*Jz}7OEo0EYhSx1Gf<+hT(;@MjPKKtzIirWMYj$L%AQg<6j@oa z<~IW`r`HRpoVH3N{sz~Eiw^2c`Y-&DN%V{UPQ=Gm3ryuPYfRyGRx`ElV2n8Tt+}~A z1f?D)$Tn#%F0J9UV>Rvi5Ec%ECwSje#Z8V6(~L1G=qrQCsVd*Uw$^&lcR!jwXIkEB7ew~aL5r>az*qOS za()U-^;cnH(X*ygD&AHBc_@vK7;7)ViK?#c8a#N9G|7_j{uxe@17B5~uZ?=$bHN`VNNT>373nIdSyRr~B z`$+_+)BC>QhaE$G(qW8 zo_KWIt12?)d;y^fQ>Zgk>V&#Td`~0xh1nwSn1C6#12u~yrxfQ}D$}I#Qv?$hPc)ov zo*cWz;KKSjtC7oq@)~=Vs2M%Vz9BK<{BCPr=TbaC>TbM1LE|^!k+OPT8$_ka8Rw%c zRaN|sWH^2bFsBN4h^Qf5Adm;lE)Z!9uRD2yS)CgI9xb;3jd&<_q_FKvtvO|3H|2rWlO6O%b-Pov7PLfa$TW}^(F&RUmb;f zvt}#B&fmOz8r&)a&qbw(GqKmwfpPNbiM<0d*Fk($7S7=b6gJzL+HuYPhr%Kcv@Roj zEKLTSsWTu{Z5l7fGU000&-yaU@}JG$S57nnrsob78LGjbwgeCVBtC=j^1ajzPxk_O zFI|(fXGyR(tgi~(y#R2|3jogH`xgd08vf^60TI9QJliSfW)ac%$J`S_QFPqEe=9;3wJhS$3$70 z{3bQE+*j_i!}`h1G)f}6@^F8XR`*yff;+v zI6lhn?@nX%^_1|$s2eDrsZ{0QQR@+pG8l~|eKcLd()_Zz_jSa=`qomN+@TuYF&+fk zhT~w&(c7=LkeuDW<&AyCcFp>pL(gzsip~$#hySpH9WmlwI{)Tx9kmzilo2 zEFi#@s(jRaZEWR$kwX*dt}Whfp!d309JCh%r#;DYhuNo zy-~qyTu7e2o|CoiU8D;j7W>r3}pFD*<{Rcc%@Xm8V%YPPAIU6h(k@;R~wr*dRA zY-xYIUgG2Kj>TH&fjf2AndQdSD*H8L?OMG`oL~xzZUuW8y!N}B&GD7*!VF$xtMgT0 zm5j2D#xTFR5CwjiB%l?poAij=Q{M0jaOTI2JzuMNJ995mcWrul2WZP9-i)0v9gHR; zY_=-@^|0Alw>A3UCI)q5C~o}d+6wk^!yvs&t?@v-1X?2&6V8k`?H1Zv($tpSZWF%8<&HM2 zdz#a&I~{G;`Q}RH?iRm%jI57rMVnWK)&*+%bV!axD6kc05zj;3z2Fk;wSu!zEZ7lP zR5#&rn$>In@&wqc#c2HZ({A9K| z$kT5J&-0;sI8IoEwu?4OtMVij^^{MO0a!{68F-qQnoZEp7JxU9XXb_&4f7jl z+LIhS8r~+{Nt-p{()WzS$cE_+B+qvDJ~u8yw6$bU;1JKQsNN{yXoOX6upZvo26Kiu zhMfRHXz*0WITFequd+>A(jP_I&&0U|-zFs=YmJy|60*&)&h0A}Ivf`olaPuo&z{?h z2h3Dy=eeq1n+cHlnbCZ0SEwRm+v58--Qa(kTARp6ta!F<4WcjE*GA!X&|8DBalS); zO_4!Uj^HI11zh%|b*VT6O_~ID$U8c1Y1hT%3J&eCl@_0KqYNRC);8pRsdzg4VyDJ` znIXCt4_)N~K&_)%6WM39oxKUsp)f>?w2-hrjyW^xa~>AK7Y*q zpz{qwiqxUYRi4mD+O~QYHuvuPM$P@(HoGr%;h?RWRgW!4V-4F;6P~`udP7~zc+1c5 z7DWp?e?0Q}!NkXc)OquAtZG?o`8ri~gam@*QMU}s9qX24R^EseMmSr^glx#pNLW2Z z&zN=Klw|IOwCA1IT}dW)8;K<9b;YpK89PVwcUA2co^Yq*5Ibc>+HE7`dH1Y=#aGnw z{3*T=UuTs|yjT*433!_A;O_JxdWr7V9HB2i0)?)-&2eUUQ@JmsrB?A(q?NzjSOix& z_N>sMicXy26Z^8db$p!4%xTCNm(CmsTQTW~>)?)|Sla3+c2%|MNl7J$W6GyI4c|fr zy=$>#L3y!E^*@{8U(c{EjnFFK%@t<&Lu}l`CH`9VOS|=D!c9J2v};91+2WwwYl6ix zhILf(7^i#K&7Qh_ZHvyN(pin z?92vR8A=^~%syPUY0E;O_v2x=>7ZcVI7l}WXfKlZzC5I(*B>!_b9S6w>V`=QfIV~* zn$<;EBIMKGU2O7SQxWqNMpiU9Sv#dYQ5RjC+%_3`zXu`u!}X0&sjuWCnmQi-T|VFT z%P8oe?hAY`HO#!jUUP-K;d+7V&u41Gf=Rb_6{2|oQEv)>Q;t#0fH-^}+_Ny+kty14}U9;8J zL1B4k1l_rt)Jnm#%$1^3oe>Km&;pz6A*PLm#nU!#M>8T5fe7ORFLkN3*GgG8mv0)M zWbv~m_eos{DbePZBFsOUDlXY=XMJt*`bhUa886tDmg39j4T#ylYi)o3GpuMF+Gt0Q zDM(;Rdc$7;WY9JVnMmE?tj$?oM}SbQdY*K-nzX|cujxK&Py>1wRU$o;rK@YYwDsRx8QYtN0KU_DuX1RGw0{26{l8tgzX38NVSC= zhp7utarqGlOnwM`MfOVDWYBe9Ky^f9mi#@V2wLUae#`{FXhWQ9zlL`Pr)ClwpkrY2<$yVG zgrcOyRVI#Ynq7{kPL6QB{o4AWU`4&X-ON@S{cPVG#8t%Wk0rh%JImsXioaS4(senu z3Mfn&suvP8XL$LfYkxb+ssTLshgyFatq-9UDpA|qPcamphnYw0BIeXX%UFVN2tK zhjeh({@^XX&#ntsoQPu0Z9rIgweCb4(B#=R1B+K3#aIRvSQoqxaw}@W4j8vdr?Y=1 z9(v-_3|p`^{;2#CvVtJ`5De>z@(dzi$W@pFi9>W%mk)WQ7v_-}p zBKcd9&;+2#G8rE|?wafs*#UGb+E-p79u9LD%RPDm5Tj;3(;Czo>U zEgs2fEN6S<9`Kj&@$St7`Ohtoqnli48Q;I(!*U2c(r*4s!VaeSE(I8TPI4SDoClt* zJGt}UbSAq(Ep%(8ODI`eBMpAUeOTf|DCsEYYh!m&)MHUl#gRC6w>cPRsVjbwraLuQ zn%1Ml(@l%T1rD)z1jyUAG6ruh4M|eqe}3wwYN6g`MBY0;&Hkod4z>jHr2UjQF}~|O zukJe8{n&lH4C9o(S2(Z^J;QTvh z8xd8rfjTLYSS?(I3P=44F2e~;Zz;VKll02vF?(4v-9@s1E}2mffHgmWwb@!R;>#C? z-x71jd`pg^SJ_n@*Mc~8X9p;!Oo}i&x<$92HIkwIDXG_QH zka1>+zzAdi&y?BNdB!pRx8QYbbqC*vf^bSIFhAY5Wy{%`BtA(=p?Gd?G|JFVZ*zst zI=4Wk|EyMLGKN#5ZsA)`HLZDLXRPOwCr~=2ZR4i%L*~U&jQBQ{&@5Hr=SsP@itkc9 z$h3i#@zVk?X22%?EEW5x>E|hPgXb-4pgL2hghoqKdzJbZ+=w@)-3gK?)yY2ywV!>lFzy6(~6(w(ebZkP@%w=qI~(+TJ} z2ubyGnR`&&4jT(i<$lGqSC#$ImijBwcfeUaTL!Vz$HFW_<8|?8tS# zS3!ct*s<5WW|MSl;YzM+WJb9;)5ZTq($Lk3zBOKj2v5at?}%QY>n8u6?k_80xX;F3 zc*iHnPlw%xH0SJl>HjPk(fE~E9h;|YE9I^c2VoPaTQ`5Oo23z`H{d9_0peh~d1G=8 z4~v7!MAnU7S1ZX-GO#++=W$uTo^AADk26l2dyGwh%wIFkTN3Jt( z0!{4|&J76v9}6H1#j^zNZ9DuRdF(KSgIboYfJ&Q^%UKDLYdy{1Zj$)J?OajJqtKIqWbSEK6@Wv&^oLU zz+-aqtGUSZpF{MTB=C10q-L@yq41%9sw||fSW7OpBKSD;OQcYmti*~%27jmDcByHg z0jYFLE#q|4b%5;g53zt=0#-h;S@Go*zFDmSu0WHc6B&$Q^Tv0epK6KGzIIoMK5#AI z`nC7aVn^j>tbE1B!1H{g*@(m1+NO0!&@|Oe?C|OP zO^r$Ql1Hw0V3(_gXEn@9oV&7fxm8VFF$)Ab~>nQ*n$UdT_^H7N9)kR*F zd+!OKrgAg>J>ut+_wPs$jNw!oH|$3jc~`DtRF}l-X%!($IZ@Wgnb-m6UE7*qw5&*o zC+(Z3#AE%h(k{IMdRXLJU%sY}F0DN2mTqL8L>&Oo(KZKb?OLgT$_o7+b?LH8>6nSL z7Ic<se!nrRwr<9fpT5=yn$?4tQ82j9cS38N)Xy#NxK=jjOiZ9KexqvUV%1~=knQ` z+OOiQMB$BafF)H!`yXpUP;dX%kDJnc~|B*mdQI?D0kS_ z?Kf9xaiJk!;+QjI4p;E^+RP99Ev@q+&LGuT3RrTN+I4!roV#}euRy)YC*BmqB{W~Y z^=`*bl=Af(^8#6YOAxNy8=VpNDA8=1xUodX%N2>a3a)$MxaYFQ3#bG;R8gEFE_{$G zjN*wmPH2xI$z>5U3-6AagR;~Jark>6fj7R*zRtQa6T!$U|BO9M@izq8lg@ns_!DnT zdUtCiBQ>n;%yt%^(ce}XJFJU}i_5>f%4*W1QZa z)lPM+@0okeu=DKcz{@@L$7%T--U$rdPi!(JMl~WFU?tH^1t7l8YgRE`4sCZGV#~pMli`hib^6u`g*#OZ+Pg(Ww_n6F)6J7> z;Y53)@;q;LQ^3+I#@rR(O|mVPhx6B(rF5g;Ebpr-M6KRxYvx4A?YZugfD6cHT!e-; z%UMfxw7TD{=fK7|R~phJj7}Ye)?%C|={iVm@U0$&4Va2Z?dVo;46;#rhs`gAmO;!; zD+Y2-tG0DXSVZ$t#$rc;Uvd`eHo`wV+q!QyV|1o6GA2#vcSlYF3Rv3o|C$IXrCH#& zJ&7O}KfSAlD~Qqb;_iulyBS>L|D^TU6Pz!!oU3($6w(l7h-$zA4)(&>_iJ8InzBi3 z&*%m@%;(DVBz5`uN!B{LOO1vZu8X=hz{fv05374e9Oq;_89yIPJbK|yQKZ73mB5k2 zv3Xh@A(+IM{|J^$^l>E~e@a=X@S0E9Vg;ksF?D-r1)e;5dqb;HZb7x&NjkwX*jp3R zdUMJ5M%Ka6NBY+ke((dDtH+_^!#~6UdYTS6C9WFU#rL{${o4$pt^2~8CDQ&fAmMNP zFtm209fu7`-dHkwOA;{#Gxq)xp)3Dq-;G1#*)qy9i~`{&gHSp zWRHFODx=bFni-8Lk8zEOpV_AJxY2P};(LF{mT_f;)PfYyd}JY#?p*dd|> z-wq6^Jo5SdHETP#WHD?o4pk<<6_ZRwois`n=5fzhA~^%6uNot4J*M>SSu{eNfG)lg z0WtL}5C^X~B`aUOHC*&bAJzLd@m33ZFbIvrM)vZ9cE*CO!SWqjm#f?eS3E3X4d$v% zauuO?>#Dvu%d^Kuo2av?{Ak^FK-MyGBBFk+?=t$c+J#8yx5Xl1%konwrCw5Ush5Fb z7{7brZ|6Gp%{uxT7ahBH>PAKNba_eNG90{l)qSsL6;@ShCR4Is3`U2soY2rx!Vv+5 z$^YIie>DJG^h+)Qsd(p>+X$osNWABh7S;V;x-Ns%t3 z#Mkt)xEYwBf*E_pCr(r)PgH*J9n{gOsG1#<8WhS(1NC%;;n$WY>@R?CayKQCsgXV5 zm{&f-4SU9*LXhG27qH&Tkr4eYT#hW@a;2e1`{g%LanU?Sp=Fs73%Qnyi;StZX*#|y zWy`H&>72$+;9dDd*3Ezh7TfCpcJ99B>@sFr#QMY(e*oMG4A6Ysr)?s%e)f^ z9kEn}``wa`#(L3n!0?qp4eHgE1vNy6CzfH>fxb{gtoAS4M>ZXuJ*zV#ZzBb$15`d! zT8kIg7$JN3Z5n)jo=wEoi>k~rsH$cL0@cANoyRyff+Qj@V>{nL1(K)4+Wx|afRjI` z^Ed}cD*pvCk3%nGspjzOU+dL43{8Ua~T5 zkIQ;Jv+F?Ysr34un{qAaimzTkTiP7tGK-MN+8+@@snI} zdX~!muS`$*5~=H8tFxvz zby#!Ixi-!Ui0u!x&V;!vE`w_S!vPh^`kcajpe-B!!^v+Q$J`pu>=;mUm}pYGNBXZD5xwIfTIFcasV+%@&QUw7x2hrSj-(sr;l@?f`m z^WJ2|%xnitNO*Dq!(ts+yY6Er5#PN!ggW^{^JE9|!)Sgt&+k`*z5|v?B4>2+aBL`< z%y|3Egz~ERmftdj6#a4li^|P4%vWI4dF-ZRBQyZOjBq z`gE4gajb87O%#;dhVkH&E$lHfQqEgQl}bNIa2QU7con@(W5^6!+)o9WY}Xz}l0${u zHc^W{yZJ*hz>3Nt4$cT0z09PtP$bXTQwk;XFFUs06>#%ka6mTOHpq`W+DEYIrE0PaI&mCH=Xafx>Tm2M zoOSJ~uSKnlUx*%78R;?Nv8wpQ+~3|7L+^quVxOSh(gA#Wmb@-hPPYN>(_WYIQKX?O zUrb9rAeIOLmj!N0o(Z7lZ`u3JyO{Gj7^pk#wUKnYA)KdwGEx8OZgmTGvzW87w6U;A zr57;6y1C=D#aCNIR>A!iE9ds{7IEhG>e2<(^=JDA10ldE^wG`BMAwWE2xw`3`?;cn zqtHyV!!$dU><1&|^C4_A(t+2kmIompPJZOKt9-mNGEeDk`P<|{&L?0<9q%0dgPgoP zV^(VHD}IQlpPx*8dMIs2|EEy&Bzho`Hd+S5iLpo8i2YKaUCIU9ra%JQUd=5VT@C!2 z@=br;-Fd<(e9CA`=Y@up+gbA;4aJekR=h5S#W5{`;sY&xCbQbN2%9gEw@Tc7-=5u7 zef-dH-s0Z*%=Q`QJlJ7MKa312T-4(vNp-z!vF8Ytj^Q%?yqn7hsMbo{dUjCo;?D){ zwE-$+R|s zH10gYB|=~1&#|A?H}I!S?sp%BzuouWaPT-5M(!p7&iX)GT=R@S26p1_jh{TZ`_}$s zokhI3UN4dZRJpEMt#=~)qS@miUpaymb2Ns4gXX~8_kw0 zbP%$2|9`P2l(hHB+PwOxE~qG<0BfBzo0u-XDKJ;*8Ndqw5LjBYxH+49R=Qd3?XdX$ zZz_*&rS3!i+yF?wS=5O~QA1VW;5gFPb2hK}o%EH`eFY#5HOa~Rr=uTtU`FLR`fe0`$;Crzka&!g;5kFxzknRDJ` zDtbmb5E9qBWa^qCdd%s1WOu9IE6(eSDYbZ4^Ke#Tz%r{^$Ri5zR2Fe;>w9B&!g7;{ zGlcgjnDFpz$jB%6#z^lm3Ps0K)VDVrw*>g~|2+<< z6ZvR~y1Of>pR|v9o&N);sp_lEfRqY@kx;ca$Pa%u;&A*C`3E;pUskx93~`IbFr<0G zlUH-?@vt7qUXHkfwn?{)=u%F;ER)kadGPa=IQ&15g_7&8*rvD>wKYb zD9~=k8AfWUlksT|etmGy8QJ7@k#09nr}g#J<07>Xv@4HByTHH#MSy3!D>=XCsV zfNh=vZoy5J$y)47`t|s)5#n6==>>h8&w{ z?yf#izQdj;U;BBXDtHmoppqfI^|O!7SEK6OTQVbP0Bg{R-{W(S+2=@4-<%9!49^DP zknD16*sZ~%sr7SPET)7`Tmx<+*-#GZ(o$%g+k5NflZdorFIT;uA&nIN9v{a7Vccj~ zwrdG<8;|3FtQ6+XGmK+O_gwunH|x!p6h=e$8XeffrEe+Re*g0P=;ig}63nzp1La(B zrRIz@W-=j)5IkA5h|Tol*_=u9)DxyA`NFX3LRgg+hPO#G1Zv@5S|JUBv>v#uk{stT^*W&; zXgd0JmfG)*bcB31KJ?L($an-z_B}E9sauTpz)VicLDnkx%hT>e1}SLo_CE5h_&!Yc#GGh+R6i+8klxCRO8a*TLh{f!><1PA@2U zuC^!FR^rjX?3+JJ^XnrM1c(}j()Zl?@P7?T_^_+=Ko1_24SYyfpH;-_jTbB)g7;a+B8K}#3?L?O}omIyf zTLeEbR7@xkPKU>Z;2O2Asll+a!?CGKF3!sCG? zKE(i&+F|eIq#TW15a2R74$DWUs82kkmaO0AC*U+t0gS|`SSjD8JLu9?^gwT9>5lJh zxd6CJ>yMZL!cwIa!;J@Hp$P}NJKKmp_{o)E*OQ3B79h`jgf<0pfh`0E4oZ zV^gs~tuHfxB=eS2y?p1oU$|;zNV1|mEx@g23Gyic5 zqD}O+WZLx98#Ha(cKB=zIN2L}4fG_3&1PYRE>tIVKQE6sjmnEiZ~?m6o&>8nW1ovd zGP4S(gd^3YP@QZ+xY!7Zy3H>vBoTU3yo!^HsU?I_ylSh76*3XFQs@{GJ%g7+zqhWL zWe$P>E%nQcZ$a=%3kVX8VA-x@-Yq8u_cQK6%igG?T6>A{gzfX`v=1K1q2Z%xG^HLo z+KP;{G``=QRBRmq|E6770MQNRYWc}MT~Rh<;8{{|8ma8?GaA5p5%!xz!ijf`Gj!U# zNz>=J5+F-?_k~;&jFwY$vJ7##POdAKk6w!T;?{(Ia^I`*oSJRtx0WZvL>e#5-lo%t za)$S*W_J2Xc!Od~>%$elww3QB>U)EJGQiH9iRAUH7v?Fw&0#t)=U915t4uzO-#X-_ zTEb0RNXH_pMarx6>O(_bQ1jKLaF<@{ji3^?Ayb%ysDM{hWf^PMtp+@9O{!k5!o?=3 zg>0JLs|#&5Vt#)w%S_*oeH4W^BVj0j_Jio&I|jm zJ#+Uy+O}_!1;k_l7YUBc;kHrr2s89Dqe{k&0O^U>~GkN zI5fax*z&3lT}t}~xBG#2!C;NfDjr;1>4NLyYEHlx>J=gz%*^Z| zdT4qc6XB%TsYrGV$DP6FMynA0vnS}g2h`2~y87bqVf*akj)Lz%`i8y@MzZTry_%}% zg>l2DXsr{H<>$IVX9aZP_Kyg0&ZV|KSus*`qh*ACS|(U7R^q$(|5JKT@cbhDTa2rZ z6#kHx@i7okn3%ki)#y(vyRGK@@lEVHtdK9Q0b*wMIbz3Kaqa3k0U0=?vv2QVe^3FF zGw(O(#fS2msu-q7&WCO_2f|U)L=k{Fn|(}WTD!#;E6ty|e28NZMWJ9XFHh5#pqahp z0Iey|qPl^t>8w_IV_9{Cns#NVs4zWZH`CQ#m z=iJZx56mxn)_1SH*5|q|Q94S=W-EckE8^FUtv}o-M-eT3S^ad_de^92VS8E(b+*A2 zTFaJuW>|hje=CtrByY7$KAliz;S_IUW|-B;O+WX3$WfaM zFepfdyM_CzQTfyLwurH@EAF@8csT*^GB`j);g{QtsZu9aeESFSWi3eG5BX7OJre>n zkmsYl`p`Rj1_pa_a&az#=M#C8%e3ai=+MLXRcjqW%9Ui<3e?kPEQm+5K4qT%8i&RYIuQ*4CN`lh`!OX$#gkB6NX~@hW=ZL|Zl=2S2u`>)94BmiXHp z)!b zY6UgNJbsbK&rgskW89t(e%cwNt0dC5`f=6wz?T{-^fO9 zrT|}CeJQ|R*T4-br1jUh`H0byK9#M-FB5B%!$(a$lMJWYQj|xml%WO0)+h3w)fKA{ z)X5u`gkMv@Jy^KpNwu9b-}(G(fh{VJ60tTx_-~^rg=UO|s|9IsRV0m1apGvjJ~J9> zcUvYVo~Ue)%lXqIZzJ4t~t6Jt8`YR)Xjc_!W-1F3A%G4k2nz5NN@QG3 z31yCjwIN98`#x#I`oC56IsQjCAa;@wqjvzYWORW>)_&0LK7;zwfc+EsA5}n$!|YI& zc+Nu0+`>slG0Gg-ZEmw_?iTk{Q?9t$#TZQ={LA6gVr%xa?nZs*+ecT{x52}B^(V;K zhTc4N+)yjf#Obh4buNhI$+q#wgVD#+UE!~`B1Y3O^mwZo3;3{7Uv_FKe|Rrn#nY7+ z=Br>q4($DgbcYK!5!0E%=71fwjZKSAFTrmL0wXqG2N^Q<)F~zSp&NfxHepk-4U@=m z=t)a?X5zo#yKG=2ZTw>~gDY>kWg@Q5WeLvsPDfeD~z1PekEotGw~i|FcBel4z%o$a9rt+KmvDeBR-Ti-<{ zqR#OtI5X(+19EAMU%7JYbiA^pLXYfQ>ZC{X+#4Ti$^Ha)VNsK)dU9}BWWeTvy@B(p z3mMQ4`rP<)c1Ai5&6J~#mGK%_al>eMr;g0K>W)7MH~m`n>~{E4-bbAFn&-`>9pUu3 zK98CdY!mr9Q0=c!A?wE6we#QW{M{phSW|c_`YU25i3@k-@Ox`KW+gzssY5u{7kqgd zPlq;7S;~_ab#0^6WrZ=@&Atzx>*wxi#eJ0mOaW!;bKSKr|EOdbp#xlx)u@aj=>5+O zInu3pVIs3wCR7q9B!^h!NTbD9#2)!{6?zCt!E}aVO7?ar z02Z2-fkF@3*ZzIlqC1w6`1`45w4oaKpGo2J*jOr>h_3|1m}~c<$yR~o*nx)zgujU5 zNDkWGRR44@mh)grUc+)&>=S{kIN_6E)GES|gQ_`3DL?bL4OUA1=%aNPprfvqQRS}B zutr3cd~wh181fD*DY!}$j>bQR5Q^co;!*<97aG1LgZS!tT6x*i!{h&0sZX2SS1QOT zmsRBn)>3ya8sIHDuJ9RN7KPgxT;~IEnFUAUk9WXSj>C2{*h6X^F;H=+UI<0lM8U7C ztI(AK(Y@XM0$PU;+lQL{=@zy(j{fcgRBUcVg$oLQ+dK%&@MrsW2lg!La7Yu7xV^rZ za8trCA)70F*FKDC@C~KdTAV52iC8p_&SLKX>-n*M$r-mnOM7MrdSR49d-kOfQFNf= z`svWD-?W0OEzjx`5Z5dOZ&~@;SNhD3Z$8Lvay6v}XGqB1MP1Yt43(AL{{5lt8nu+t z#D9Cw2W7_G^#6C@^9i5kku6j{2kMXwt&wjeZ+sO?L**7X-?sE#lyga=ggaDeX!wWj z*R2|58G34SftZcH1K#yz08ReR8~w*bTx8SFiO0A+H)~t-M8l!WDgrAFv}?htvj%d? zSSK7$&C^;zQOj=90|)o_TxV%*Dg5AkXkXDNpJ`hFYLc7!m($-KP`3*W?_Q6vq~&EP z3`{$(`ivV^qzT(5(hL4O!w3Im>!ZF?dJZYUThGVh4m!CbwCE|6r7*sb9@uEC2EAC3 zzaewzr%S0`3G|8KoIg3!3o!ER$*W6umfg@()(9Go%Z%j!HwVlH^Vp)6UncYR^{=o} z$fTkYCQ;vy^9cUFWAW|;uuw_#Mw8Z#S23YCArUVi)zHkH?{2GippXk#>j`v$o-AsD zGfTxH783F3cKSUz-_e9EqkFdgvz@PAbp?4pBz;MR`pmv;QW&Y`+mtGcr&$9E0+4gw zrTJRqFQlZ*;6LV&n5dgVE$w>Ltq6ruaIAO)D! z>TK$5*=gZP@6zPxi}alV3t}Pl$+Qusp2s+s7NVby`Om!Naue_elP9yzdH=uu{twNg z?!#n9P652i`NfQlek2TRbuv_()y;nt>QD2gnw!T*nMTcCY4OuYXHBWF@>1e2?}ZYb zPPf*cickU}V(?G;eT2fTbN1wB9h%Y5w3~#bZRYh4f5S`u8Z02~Ib5dc>a3Urp zZ>Q_xGHY6Na~sfU1!`GvNMCoBTH!w88x*kq46T%gpu)aZAXM5|WBisZjBHvzDtzRz zMHWy38j3R$s9&+S-xe~?$l=?9ZeUKw14;hDlcEOQ%w5sjszpwlBc zDU+OzCvrcAiOnJU}e z>cBR!jVt<@k`&_VV;gMUqi8L2)bvQ6# z18hRAelQLbqAp(=eoeO0@86UxV@0}`I_tZg=*Ucm7+c^iioGxQdrmVeyqy8EXaqQ{ z%kYQ&B9`7#^G|VRv)j#$rAo1ym=PJw&&MAht*jA>mu%BZMo>+e8l3SRSjU;lO^Z-Y zZ7eTb5PzHM9_;E7%Xr5CxoUVMCm6gKv*Nk3XhXbeda7}1smI+K$K1B3CY@q9)dl2O z9vRShab2Q6THsuvZ~ntL55st<`|Vsw*(rna+fR^DPnL z6jx<&O#eE|D{)Zy{PpS=hFhyAgi#r!I`eK{UCdf*i*YKV5H8R*uNVj`_QlB;lw%?j^1GA?Z6Twn{CT}C;|&Irwf?*qB?q3 zF8ag2$>8AdnWzrvivE#KRjBHv#&gdBr=;5!-{jfGj~3MFj9AVEIW==H^1L8Uiud(J z8KZE61M6%C(-#&dtG>;M7$#%S+^in%n`@jlR$X6pPqNoIrU3n4XE+EXd%B`E70F9u4Rgg zW60^308H0LmOP68c=>bh2`DOh0h(&b7#8^kOK3{|Z1h8N#gCs<5>ijSg9Pn5i3039 zfu_%0J9u+cUyHQMxgZRtq8pMMzNo9w`2d9{uFke5#XP%SS2ezGmL~DriLQsXXzzZz zEQCIvDovA5iF7nyO5a)grSAUjNTUNUXq%{E4TZV@TK{er3r?pdS7!2Q9e9bW+5uSF zyw;dSbmlQjO|gXMD$>{AFFjW7YBoBT1-|w2O;KgvG(ta&{+So zpN+fz%ML-?wl6xqWf6hrr`rXh=U0_+7TEW4uc=}eC+%MBX}A6f+xE8Zf6iCUY`DGf zL1e0SkV1u{mDce4$PmurPsR)OoiFH%uLG}akT}ol*l;) zIal5CC$CszFUXoUOYaWE6SpssUmHIJADm?xe1F7l{;8JjVu4TEta&fF8-vE7tSy!{ zu5fcjlJ1W+fxH1`#OF!A_SFu{IN6NUTfJ2kTj0`QqG|BFg4ok<>AGc`ufIR_EhYCd8o@$OuL)?B=(ONx)^ zC6*vVv%`>-ncf_{J8>I}UQGJEl{|hgq}w>KdKa|4+qyS5sB|ZSHO|YAXTACAHOZ8( z+7<8jpn_jdr#^06V`TIdevvlI05wUI(&T_FPCU0GGNc%Nk8A@Z6v9PfsuuFkqKj?- zwPx*BrybK(Y%e0Hu=C@fVvKk5@TslX>T{j;bP%yOLmQ&RP%a_KChpQD%LQ|yTOlqu$c6pDhPrl> zqFt(&5DHM=VyMG$-jZJm_VTR_pvFuc)vG=ilzFI%AN~!cU zkzA_oIBGm&)KSk8SG2`j|8T|LE#Wmh`Q{#(RAy`CinW;B&#RQ zJ;#e;P>cPUC$8Z!G>4dZ^qzu?F&g-o3}Z>_a$ z<6sqEtj>*=mUaqsbpI7`d?$$g0>VA=J2Lq9szxcQZJUnN4IRxAKg5B>GgJ!T@v z(-!Q#_DKKuLp3neB5Yf)dp(2v+3laWt8ZwVmGusE1RFw35@^17g>jQK`FIQlnn=Sj z&j=dHwtYX`VPtd(0wM);dmHg<2wCp>fK+ zCRP8up{piQQ~y#CmaI8(mR@!-K<<&oO?2~ z(9N~CNhzy_QP`_gS+-o(Kdv*~97+m8DuOK<&VjpQ$UN~=M}1Wab)UWSAQm7yA$Dc?@jLtI>dhlv;r`2rjGF;v_mQoGej3RL zo?6-U{)UPavU=l?m(X=y{rp|SJ(GA&jS^HLDbC*oT;GCL9tIPLi@oo-QIR!2^>fb} zY2&niKB2?ec!nh-AMN8|evMx5>l7EESA)sNMY{d9WqN-@mgg;t&}<~D!RRzwopdRF zT1MTmp5hJ#tzH9!t$Y2>)-ryZlEgDTz)YAURYW`)kis-XUuRYfL=fvm{3#J7?g9yw zSH%&M#QWpJGRBp8nRS8>Zu{o6@2UNdNGu<^8s5?8AERM}@CtFfIiB8RZKuPSmQH1kKTDOvNB9 z-euYPOGuI_U#W`J(tcza7_!haQZ79-%6ok<@m`bT+*k=H2`a8aX1BUXLs{B08Kiy0 zjda#r`+h|9Yoaw)1V5lBytR8jLq9XSRWhlj7 z@a%MYr1i^xlqA1%<#P|wUYZI7>>ns&SBL^@vmgt%2M=q{&;HKScLWYk3^b5bXcgt} zJtR6mH*tzUS;HQ+?7!iP$9R)8GVl%!iCf6N-5q~}@)p3t8WqBk{aTS(7#tMBLW`Lq zMSF}PN?6n=lZWB~nRX-CzI{SGM0p@OZP)Vk-A?ureDKU0ECa`wJj(#0NyOV$f$`De zLV3P5l3m?2(b}TrA4EzKQUiUYK%tnW!vbWaWdk z)jjKJJiiMPdXlG9)5657POpMxuaM}pZKf_Ow{&9orjP%JQ%jy_bP6+%BWe{EIdH)QF+}vTdSINdQ@!0U&J}FP7DcpL^;AT>%4>yXyok$XKQV55f5b9FAWNcyngB

HqVV$e^W(qa<^0GW4v5`dL**iFNcDQVC!L!vr0A> z!0{s|JErTh4RO~TPZtsA;XB*Ikf9U$A8E?tM~=0d)8)HjrJb>?7kO4o8XY=~2m7;j z8ksTAbxELn5A4|XIw!L|9jO=fzo4|=8VAqL`n2Y5Ep6D+gq~_ZdC9}VV}bM`mk;-U zd?tnNP+M~GOBx!W{JKcY)Bwsg%WwOLsWK%+%x#1wawrONjyi3S`;}mqr4;kYsfNA@ z$b+cN@-S&q_U!1}aX{n6_DSJD#xtf{;9a5bZASLdiP`=eB4b_Os%sA=Z)!dZ%^fd$ z6EC4s97Xjb=kFt1JL%5dBRgl2zduPzB3LCzV+Vnw|LZ`1Ka$CP!A-Vue_z00R)b|b1q$-(w%`kAFs3e`hV^Y{qF6Eh+nI2V-rBZG%0T|7W=VINy zYeRe>C{MY zVhoQ!H33A!_Xt7NAx#U=SeKO!F<;mseCmbZb=6VmoI$2@Etb{xDbCIKYiomE0aN;O zmjv#yHFp|FUl-m{KL4D!h<82$q;^s-_;H^79#@?ge(HN`M9=xkc92?33PD+DopT_f zH&%3wQk$wbI5avU|MYD;x3zqsv{!KBM<|#xQTtZOQ&Mls_KT(z+)?7#KU%9B_8T}6 z3LbGAudZJebpZJ)0`y4y+5nE7E4*ILet?DMWPYrxecpBBVdT$i-)|4>)0jY79{whJ zlWv_22hmxgXNzEF`iVWvUH>f+%HZYBf3*({1ZajHlnnv+M?7Y*u`Luu6xL6&Oj3KV zw5OqDw*&LVHDKNU#{j3o)^uQ1n$L69UEoZ%2Vb)N7@4zbTGeV5BIL(A5a~cVq3>i! zCmA7WfEA|WCbh`6Y4{T#yPI-#m)b=r=fGp;ee22q9ABuS}6P#Zr7 zBjmf`x@|@G-;c@rOMv^@s4?p$j);bRyZd4$e`PHS)sU)Bm4}N|tn!6*K4}*nTKjLk zy>FX8nq&20*QaaU>`$yc@!p>;QB}~8eIw#FE>=1j)SWJSNmpq2?L}8UiZ}bz_kzbt zt@1?j)Rb*MAPl3 z7LOg_z(c(&@~Xkq9UtpXB;Us14n}CxD-lSv zgx6U!>^oLLP(Vdi>4scy9XRpDy+bJfCmuey^YCqc7-uDLEdHqWwr*Yt{&aCJvL`iK zh?9OI?618GQI5^g|K#@A3g|W*RjB#RvSXOBl2eQ~s*?j(4AJlDtZ-T0Y3tUkV>P?F zPnUsxwdZCXI{W|>k9%+BhIKmnxJLP&y+JdtB{F1ia32qE-peL%-YbFOHRVsRC3m%b zdy}N!Fg^It{fvXZL}kD1RiCK{@h|*%$qWUFq1H@dHDHxm(XT5nWqq%N&EoCvk)ZW& zdmXb^Jn)WE!LIz{xIxP|cA>UzQ*7Ilf>aIz!Eb`uY|0q0YeTPpMW}NVVh2T>H^g)82fWn`;T(W=%8Dw3 zNraYE#5Vq;>@vmAn{;ee9totnlQFTY;uOI zwl=XTCps%~%sokdr=~b1_{R*7{AGU>g{jA(&`UeIi#t~g91(p4c=*D_;@CE_!0b6R zT}!t+cz7-Jgo6IoTOZ+f6GZS{7rLoBSgCo@Xn8b|NZY|nt{?6X(I8JZ)3@UTq7r|3 zeYUw#yC7*v1jMN1LKwmHfAg4--^E$a{u({FQ4-+1de85VYD!cFS6C~|4CDMx+?(_x zjxy6DBoS)`jZb5?QzyX~;!Sfd=vD+8ZSxGwZ0xIw7r^Uy3pcAkG_G6xfflRvlXrZz zY8k42!8gHz!J;m!qQRb53Gkx`zWur90Oj$b-7DP26d#i*yi<+Pu$fD48?978y zVZ^|yA%{SS>n?)roDx4_i{6@dLy{-jyDeS?yo}?W4yI@aH22RACZBI`KbvmZ{QQIe z&YCN(KMEdO+dJY20Shht&^?p;uda4U`7Ji*gS5KKweGOGe+D0T5nBgtk zcXQ%j-m`NpAp}y04WptLo!_#gW0|81V+P)IUWxoPYhnGrWjiM3Z?c0E zL#Lsg*3V&8&zcj-kQnqIz9*Fe|N4ks>W0_?a3Qzk)vEFK zTg?5v&~`Rbm)y(iCyRZIUPx}#1l?1kx2ygzsk3R+oJ-s{m7x4Ew0U^Az8qh5!gL}0B(`cXO`_>yl7?qS@NP+hz0Tur!(2XWqrv@F_BU*zI9fe9!uk6+Z8Q3MIMv970ZO` zRP}1#63S5A(s_Vzpp6Kpqn`Hw!B=#F*!*W z<<6e1zTYMf#ojexiK9$cr->NpbAKC*`P?4J0TFT^j5^gs&l1btI+vqUme9#rk|FJ zPQhVjte&YUzhD9zhtTIKV;OdG%p2t?5Ly3#h;`B_Fr{vuiv@fdY82J2BZ50}Ne>yv z#AZO>3`Qf=FgQL*VKvHXv{=kHp3-jWH~rH!SUNd zd?b%!0WsoD9W7DYbBj+Ov+ed89sY{Sf5rL@L&1Q2S`U9LbHXAUwn{m>9=TUw+dJGu zWt_6FMNaH8fk2^#sWe#2!NgqSB%VTDl28d7RJRxgvjX zaK(sqb}&X{o^uSes%@ki77q;nAm$ok5!cbp_<_;?L4@}8vhDky*heMp30y*p?Tdn> zW&?qYDZV7;@{4^(^&1f@sU_j9R9mfz5jUlRt_vth7VOD0q=_;i_)BfS{1TW^JVvfo znAy8h&rUt7(;D}ECvWP>{h4a5|;=PLgg`W+N!mllOnY;w<9CZmRZGILBhu_ooy?}p7c1@- z*ZgM97EY8tJIi%b@EF7FDn_H9M%wc2F)5=NT1ed_l}ZXQGW8U-`zV&;go>8T3N#vh zM143pgh3j}*pV;jPYF@NF)9Uh)`Rr_>RBj*e~)$i00T$5i6`6Z)vI9(EA7`?Pb?`t zdKYD-dR-Dt<6T~BXTwG2p<-Ta;#x`eU-W^P6?#o0e@xWA69qn;7W3F}c|aCOk5Y+1 zr#(B7n`Es-vFlip9LzhHcott41$)P(rJ-uMr-N9>SA^nPcWM}74L=LI9Qi+E9-m~MV=wZ zo)73mKG|!rBCgUzW@=>joBWI}4ESmM3c)vjJG3;;kCH{D3!9dO|56Ou_6!-;ai2NQ zXzI&}yA&(DsfA;E;Z1m9ufYnbW09n5s@v?ik4!D>Tm!b0?gj&bN0w=`$XYjS& z;&o`;8)JAsIk@O)=NfN0H})yRPB$ciZRanEH&2M3bmNkrmhU*CTf2V5q_8GR5eZ>h za-7r4cdXG%)62dTkrd&ieR^R$9RQ#&esU@WZ_sk2*|uHfyNRASSEnDR)Fo>Hb4VNe zn!wjJV2x|U->xa8VHb7%N zNwsE4BzN24oeo2_#d)^f>y_{?aZ42Kn4jK<7#7_j=`)%JYsN-j-4RTc&8E7&R2$`-#mK}W7eLif+4qDxF#ARAi^9gF$$@!==Qc92O zAjcF><6S9&E9+q5;C&o%No0MxQfIOB{*dLqWt zQKeQqK2`4&pu1+zn2f)kuM)p1A74r7DH~u?i1>cK8&t3jNa%k)@ScaHXzawuzkW=e z&WFS)*@ zIkwVQrB#k;#~>MxsZ58_aCIi`v=G(%t!YmL!QNlIj?u!g4f;g}RR2yNWgYu6eY5A; zMdB=<>P*F*q+F-Z5xTbbOP8I)4}G!sd8Za|B9O(_%Sz5hsGaDtzXDt_uBF$$V7IZ5 zd~@BC5Nn;Lm>9rcLVUkAGepn+zT-nCC!bFeV+yZ*vqd4-M)HnxJ|DVxHD^at`VO4X zgQ0U!E@Z+prci$WmT9fSl#+Z@od^tN9tZnlaTIr znmdB?{PY*@AJUOfO(c_vPaz4CaxQ6q(riHUvsTgobxS4PiuQ|bsn>zTwC{RblQ;Gv z_sp&=2C~m2j#gyfHBFmUSn#$udYtY0WSV(L2J`l+0he0^g0RlvqIF}3Do!M-E=2_% zkEwwQihj<9^?LlGT`zbnYMEufB)Yq+>et9*?vCuJm-7uhH_O50^$%xrb5_>VH8C-a zYTcoiFZApxQD#y6lko8}Tkcu6&kSak@^Y+fl+0Y=d2s)vyT{ou3fcVFBAM;50kI{d z{*r!bG5m>3yZ)U~F;3~xjI%S=S?R{Qg9+oc#Tkle+dLK$&?^3j;5*aX1^)Jn_FT_> z-sUm43)u24vc|LEz1yz4;wkh@*%=rsYH2Kv!2d|cE2rTem`w|m8c(|^W{fEn^)Ae{ zMf^+}5I}3@_JjPEp*K8RT1&`*7tzKFON?i}G|^=i$@$UWxx@;iAn*|YcptgPc|wvV z6(Li8UC1);&NcCll?2{ruP6;+tMCdFxm-51*2+E$!=VeF|2W|1QlpuFY;!U5Mp)ei zu{zk*>9m;Bj?$Rl<>GPGYzk2t{K zBZtuGL8+FEMSj?VtD6a%zJZ+Zjys2Aw%1sv6fTdq*&TE1{ z6^jm#-ncpjDeF9Hx7%+~cC)QCs9E@fVsmP$zW8P*Y_?_*zb%qw&<*H4VeDQ4+g>(0 za_P{~`3c()qXREkI!cb6dPvT%ycO^~9SU6vLQ3opbA3ScQ3cNne#{xsK>vpy3Au&@ zXyR|@LB$=*)U`T&c`A|~P-d#g6~|s2@f?0BkldSgvY?wKH8|iW=VFVfR_7?tbM<^E z{0>)Pu3FstNX&<185z~CM%g6l@P$Q)EM5I)K)Mh!>TCmkOcXob58QnqACy+gf)~{m zt@OsxH$jUM6H1XT`^l|n?+y*3Dga};RfsVSVyQ)w^$kJd2a`if? z``+*OZCU+w8L~9a%sM+t>s+o;B3#;B6Yt$Z;~mg{crRi)r-}Gt)R3}Uw5EMribO== z;kaoh6=m3F>{^hVPc=A}@(V8D#1oG3ZZAe4_LtSe`#JyDSZglCCUmv?q7yj z8^{!UE?od`Pm2Aoqq=Kc0#yuY_gei=E5E3W2y~;E@T(CEmwwi5Q5o0(f8YBVnz5^Q zeLRL{>_<<1(ELrV*Ic*1O$XdNp#HI8d~z9=g%#b5|i^ zW|5VIM<(q_#H^B=9yl@ZDkj4Eg(tFYALMFd8*hp@Vil8n+X2B4M`O{wObK1B89ljx zE6?#Ng5H|cB8J%lt__fcmW11F0qgiKW4p9Px;s=p`lbH45+y5mKW7OSLNt!Is&C57 z`?No@A~w=e1yh0C&bX@+r3ssZ`(lv4anR$Hu)d`+lH>vT^2!)sAC#T?-LZc&)V?Vk zoE+O?=5Dm$L#GFOi>IaEfP8LIuS@v7u@lg5#}8MrlAJD}lv4HT>%?uQQe*)*)}r*B zqvX<%g>#EYoWi%2M8rlwp}o%LlC^k-W?VTdM|!JT{gx8DE4r3rz#0P6QJ~BzOgIO9 zZ0T~TSZ@k4OS*{~JNPx?O+^noWvHWqg-Ak8V6bBSx&J)m8@SN8@-;NbiCZ&6<*sp( zQ*ZwoMF)60RW91|D!>du9m}E(rnZg=WE$SJ_Blk8KaJ|~e-U?h`fWB^BJO)S7%D=R zkL=qYGfm9)vCNE;*(Oma4I)*|m3L5i3%=#ucbESR2&$@Bk4XezQNxC9_Bo7`o?gzV zWxVL3v&`gp_OdxY|HLqPL>$ooHcy0vTi9P5%C+3R3DksCfb)zn!D1d|uYsOir>!aC zG@75R(Qg@Om~VUX1&d2=&qt6mphV z6CzkiRMvhRM;~k|r(Eo)1SA7Q#+Wrb9P-N-sNM|vXr*#{cv*}G^~aTCJ{apecEZ>6 z*QUG^?3=$f6p_Tcu0SPFY~)0 zU}cx5dV}n@9`Y}nsCW?uR2dGfR4ek2VmjfL~Yh<96 z5B7Q9<2!JH0kW66?Lc3s36Gxc{U(GQyblIue7Z@QS~<~w$g&m_aua?HCK0t%f35Ed zZ?yxD+)2`Vey#Bf!J5}g^G{m9u?|dmr7Aqjp;5>&=;X&6VW+n|_lCpJP+oJasYYP* zezl*_?L;ht<3N=@dAB3atb*$p7BS*_EQ%vLZ`%x_#P%WpGuGkUe;dqeDYx#oo! zm~;ysZt02&BA)U^In-N5T=;A9U-wfD_0IPqUfd@s#`0*8j$3RNI25ez(fPb5!Jn`+ zj$4c>r~lkL@MWXnSb6SNytlus7}U81YU8SFtJZ4#QtP=4Aq)sf7YZ7|w0Fqko;N2| zp112;+Og0)P7%dW;L-tRfSNguT6hI)A?T(Xgc%iC{~ zx11Mrgsvz#0P6oSfsX#^m7gt^k?QxKn=-tu`-DlgM?EmtQ7IDlA+YP<7Vq$k0xWjP zTA2mfWLNJ!BFpo!fkUj>RJj4DCR0E+2vc9VJ}`dG-0SOD^o?m;&qcF4JVN{rZGm-t z_Zj)RbIZ^$akI^`cYzU`V)Artt5CHBnnUD(^DC6S1=Ikf$%kgv7Ba`yUGv8SMX6h& zQYI`<8nM6|sD0A|LPAVCJQs17M-{GXaN%lSBua%O@cB*o2i0-$m6LYG$QmBu9&hL_ zaeMZS1l=Uo{U(fGYpVu4QjYpEKqAk}@P4@4uk2MyH5BCQ0j@KNC!UiwZ!~|d&Y{Nm z?eU91%=Wc(4CAJs6g&RIIn7-CV*ydL1xUjH+nbI3QvGAk11;d)3uuog*_xhhzis57 zG;^^^EmW3|ZQMuYE5X=OPN84>O5ZDT&np^@ImjMoSgJ+um0U-~p3iyJ#+nyymyCH* zlrV3sQ)wFk7}kAL??6#Sq@}F(Hh%*Mt~-=1OHn5ceOU@`opFRUecUIN_EycBx$n*X zI7Y6AX|)<)y;@9V+fSBMRG`D3M7=8y_EbO>eHZ5?C4|FE3Ajxqv?#k%AupFpi9cxW zCL$VKA^o@JBOg_K18C^gy->3bQc&srTaV5TQ1i0{er&PiBXO7nK@1sZ0p>rV+msCXM&Rk()rl@kN-5W^;zUv5)@U_g;rVr*ytIT zc5g$+GB;N5B0pWIEA0jzlwCc`9V)e2)%ACfb zgM2lX47lINYrZkzDZaXj4W-^Ctk=r41GwwPHNYJgevz%<4*aU9$}%wVQRl@vi0YQ} z@xNLPwHJ=tw>_WpNt+m)N>=R4OaRr>brLurYu;P}ZBJa{Icjewido_%{LeqQURc!c zar-3t^YC@*w+E7jMY~M-zOOUAE8)Hkb)<@_DkZ=0BER=1W%AG47iu|N60CJQ}{z$2?;0tYjGRG~N z+`MGX^5dpvo;~AJ3uRrqS`uXsGFS%iUQ*NGaG^rd8$`brT>DD-<(WsDq}+fxFAw}; zM~&~xD5=@~2~?NAeqnftY^(Wa(9(3~Q(QFjI36P>GeIXGEj3NhF;;wDWW64gdcXcn z3@?_Q-%^PVQW_(18YE_eBo3^>q-%L1^V$yWcW2_v&pvePycD-W&(NwOdPqtRa{n^{ zhs9&XwyOw_m^WTh$9pvTzx%geMlrUVd*fgkydYZZiS8T$`7QSp_m^pgQiQD$Oue7} z@nNOx$hQ9_MAL=UgnXJx*>|c*m|J)XlTW7$Rf#X{!_5q}@J3=>C^A8@{h2hY2LON9&l%UODb2^ zA`DY0G*b}Quco8s79@!B-5f(Ez>Pn+cBvlZICW6aaeaP{EMVf#^?7H%z{uHpgvxn> zjsnz>H-G9gx5M3YXjHE|V)=wP3&bRT8<`^aC*GjxIsagrEbuZsIGB+Ut|T_HM{a&= zm$=W!)>{Vowf@-gJfC+Q9Rs%RC|nr0Q&^LP#^S4A6t^YM^l92BsmU(7vlk5DPNYui z)fk@N2w8Uj{JcF?&y?Y~++_11cJigNQ||3&ee=lTnKdj@=7478B4(U*!{7~wQ}mMu zPVDtCq;o>d^P`YfRTf+si`2-I{jrMs6={JZ#jtA(;o5#OgiRq&t;`78MgxA`gL1jJ z|7tjDVC?gh@Hy??GYYk|nSc%YqZCEjj&Qc0N?6}4&b1(oP!o-HGlRO&7i-e?zbgY? zm3l`+Mm7Gv?C{n8Gp)#7=*<~yS58J^O_ks_?I3q`=9y`GzY~)!P%eP*Ug+*0J|F=L zR37W~?x5oogHB12sF*g{a^*r27G)?y?U&8z2-ZaIaRr}3e`SSEyu}vjeKTOd#Tg*n zXjM*}`U+D49?T1OMlW<88<`UFM5-f%uYYn`F@H0|(8Mm7z~-TRPKxwtsP> zp94ZM{b~1cqY0*9|??baA6Z(-voGd zw_i&Qz4C~Wt(5V&H+eYw*y9fPvcWbW-jEIG6e$A@a~tdxXBeps+~8|It!l`D)|8}R zhGZs@Pn*6Q*rYbs8S$gHha_+i;1ev7KHwb0NAVe+-O zrDrykwMI+gV-TpQgMIg`IQWjn( z(~A}&4AV6g)yiY;9$ZQx3h4<4$~)zezyG|?b>a4zE>_(phhBwTqoVw*sr;`#*L%W^ z&(b07{cJ|f6YV=c^C5HH%xl^fqx@Cn!_kAH&A?WGz;C!!sB=U8VBou9^e~B7yr{$L96BtAQB;l-ak&-S zA6Z(d$9~Q0G0u^t+|~>cjgb=gZ*}(7Q|vSnCLtjE?CYUwfjNDt#K}?x0EAokGWt~< zG*md?4qGzdwGmDsN!b)4^R^h^fo5BnIJl z*?O^fmu@zX4dBx&*^i^+ap(HJK_6ACC8aA;tg)hrG$K%*o7DiFs}x656R>lu4no*a zo!H%91-*$roMAct-OM|MTP(F4D=LFY661b$ayog2__?I-wyM;A7iH~C1VF0@#~RFw z9JlyWrpT($NO66G5QeB>O7gqNE5DCw(_@wFajz;F-V{Qgg&oe^6MsG$(2MlWu-e8{ zxGs*2at?brIa)sTGZ*t>(7^s#{-)?Wu)kbR7wY+>F)b!5 z>g%~kbr$5S#_$z`*0ZHI9J8M3mw4z{`R7MUt}OrDGD7j&^*RS>ODNkla*x#K*j? z)Xh8K%sx$kmipK&xW7}9OK^xcauLE=8D^kcA{oojl2hqyRj*pKzKA$B;9A9MbH-nUa?$}6CY&tfiK1iCUgN}X-cG-n;eQl#gDY_Poi^^`Gs|95ICyV* zAeBTwd*|^t6napbT13amnsp{&q)Du7)RHbSBQk3fWVX9L6?!1oz{Dqrr$qY2?+rSgANfyx5DP`bOjn@M-)s7VeOgR|fNobztq z>)NX)K6T&EeM>J_%Rs)Gk;_}4D$|}UM@wh-#DDP$oWqHoL*L1iPlt|L{o>u`h?EQq z^D{kE6kNPA>uaPb7K#%OIAT^eg=s(dV&^}IlisC7O)zKQGVpD*W$^rr<_l2X`l^5m zPkbgJSAWONez*Sj<~NbJI4a7x!8W<&;bLs!{`z?0_IzgoTl^o|Wn)uEg=Nh{T@G^m zVPfMUda1*w$$QLTUGsgRDRXwlHv(jYn(rT-(=69XZkW#o!nXfrwZ(NJh26k*eo+Dm zRhu}R%F~iPxcCo=L&N-bPFnZY*z8i8`K?wno1&umZcTWkv~Cq6=NaE6fP7~i(7A5Q zo^UXMKepr}dGI;I5a3wH;IM@G88P&^{g@v3GyEWq%~Ld~;r_Y-BhrO^u=D=Z#c{NG z3+rrREMe-MV2P$LU+-3yp4mjWsaRyEIc`ui~$>% z_|)<+XlJJB>j#ZK;XqYiI11H>Pt`>ym(ZiC-*gY5MvGlH7bDy}o~}I$=R|+yd|&pG zX@`wh%rKMjUmEw(_!CVJEsCxEgV?~u-HY3no4Z`V7mlS8KU9r7^z$coL)d%Mf^;|o zvU$~aT@PoNQqs8O%xN@~kc1*rt5}7JnjcoZpP9xLFHoxhN!_i#KSZpA95ckvwcCrM z_$(hVS>ESKz*``5;npfFPBVUUUv#gxy?UOLrbTkJ4!1*99eLK))-2Dr2li1G<3|$< zS)x~er8{K!*vh)#9k97@hf+Lc>-x(r!hke~J+uGDOy4jWJcq?KiMR_*aWA?r#Noko zsOMJdQP3R$f5&v0`5~X1<8YJu>-vVzp9b`YlT@ptVi@U1%`D>NWgOq2JbVj?EnW+q!ACQ{+Mr@$o=g1cV*%VgFcd_PR|2)~$NA-IuDAs1=O@%o* zK{>HtD;xP*xwk?vA+1EoL3Bz^shUViDo)?*r`Az%CD-dCWen`U^xzF_p4D8=d>JE#50#Y#T{c2&wO7P(Fm!JWxiEgwBj@?EoH3a zp-8xX$~r4cm|1++0N+kx6Zc;TzpMNx;^S5k(#V#-k_lJDEi$S?2hNOglS5e<-=BUa z2EL>bwLWV(0PEv^m2sudDDBuHaO86IJ2tRO-ME|d91p_4{;2H!u+oJza6MV%=Cv4o zeVIp#ERG4+!5t3y@BP>)QQ()$Jcy$piv42XaI4V6=?_a6rl(Nqq2HbHC_r5?q2u&Z@m`fyX1C}4u>J2G6Cef#9)@*?Egn&qAX zo)Wb1;Bxm{Iwf<3h#I$bKR#CHzb;lTs+E3g-l~bNY>(3V@EDCH8wjB7}xusQl+7Dj~lP8 z%mX~p{LZaVRQRo)dg5@bx<9dtxW#$gz4o;RD#&=sj({o!MgJzLPUxG@ZfX^BQmyc~ z2)CTQrW9vN8OBg4SIN7`cmB9%m(vv*Wvxx?=ThUWoB91`$LrD0m!~YmkK8;0DPfO_ zkXSQ1XuNQ$f|>NDDd>wb6hrTaoxceq67*)6i8?H>9*x0bToD#c*uI#x%_{L1w$zdM z9Ywt_@T-soM-+d3(tU_;hXapKYhPxGPE(cujs@ zmi1Ib-BBZqZ07gk_-iu?^jK<6NA5B9sywCIahBo33SyM|P9RD*&5WRdM!#l2AD9nZ zHC?l5(@G6@Ov*f4TLVSjHex`3e!fj3jIY+uF1Y7U*bK&-Kxt)HrVDraOJOr?Gm%FO zuV5;!jK8Mu8Y3xaCp<%^j4!qtX9GyuKHie8%yn|-N#*{;A&jAx7{dCM$qv1GWQ7z* zmsR;)x$l3L!&k<`Hq<%#G-O~_kw#C;VHz1H`di?Z_V{cOZ|;1J*2P8Cp|lE6w3;_C z8nl`-8^mQpolpg;Sq8ELb>@Na%QSN5CGn-YCca#x>l$(2F z|4&wOuT@9utxU-u8ULRbMFtoCk6Gsoa_&iQF~59PdiyKn;oX$)dXm6bYyY-<_>bji z>N}sethxef|BcYX{_hB_uPx;E;z_~>hVCe@vG=6ovL2fy4*;x7(lMZ2aBUI)pbwXm zVa&k`=L){>`USZgkK+BqD}GSrZ~5p+1;aTM>O6}A$@>}(O-bN%VyhGFkt+lek$fik zJiPN$6r$(?ZkiV_NnGz}v#U6(JX`aDnH;#*wzZY&R^02jyL(pXf03eW09s)y zLQsv)r$+={>r8!cs-cUbwK%H85g=}K-WiNQ3DM1G(TJI5wVn@Bwj7rHq?PbDq!w}t zOjf0x;d}F`!ZexyvoyMU!N0*stlryqGlIbpJBEtAm6T@9tJVyW`Dd zuT5cN9*}z>?~Rr+wGQ9n3xeCBn3u)D2>5^$m$ND|e~Nzn6co9BD&g`t97|CmGx0t9-n{g8LB!#HpK@c)t=xwyK{3V0C?Lqh> zCGG0p0PCD1`0BoVY^$E@r;j3h(Y4-iVIg%??NYzN3EmX{HnqhV|`vo7Iw%az$bhPbJ9ejtvay5Y-ul=Ktj@fEEfe}pJo78?-K zewjX4$gNx{JjGXFNz}SJ9S0)rZ%W_Kk{wq(RU`dC=GJi&TFA6=nw2YI8FydLdzcIo zDN36OHe9R@BxTBb+xs+BR#av{Nbq{ifG=?>$H@N|#mR#E@A`-dOuP-_MHu!7Ql1>T z(x!VbqX_@a%Q1c9kl^nX+Ta@Ya(U%^>i55!d2{pJjmJ(2`*`lc4J&&C00=L#;}IeMLV<57XOCve%mRPAQ;YGQ;*m z2FvmT=SPOL^C+~j@m5R1A!XR4x{v&- z=o7?YF&}jF13RmNR#cgqD*Ei5pVZ8&QX7(uCFH;Jy`$T&kA=q$U^l~KiTq3BllZTi zMBrB)?g%$M;CUAFx}Syw2DnKSt?tMB(%ukP-``jgV_YYRC*_ClZzvMBnse+r@I~AkF3k^Zt(Ci@RGQFX;gs^kO1z zt;@VT6CuU8%Hz-rm`|jh`ugGiP)3RccTZx9O&sg*mAaaC|JqbH7*g z_Sktl$G^1x&fGj>gULP9OY-jr@J2mAL*#qoPz9B1x3xI|gC;o;v#T;#-^ip`HvRKR0@LWC$x2 z>-n2W#pPk6`^VLC*i1E0f6d*+zMw0k~KwUm7hYs}=gd_6rq zxAy(vZ5tKS7g=C2pGN&UO9F#mp6lL48G?>r{Z_t*kd!w+aOmQs(IM?_hGH%Oe6f4K zYd{SpQ2Ohe$ky4Y7|?ZJ7-1>UK+Kq@(-)8~Y*f*5Vt+k!fHk{6bn@QWFLnPWPTUDQ z$uGNRl?C>-re-8s_VnXim{H0-5z}2km5>_HSH`paI3_}A{Vj)Ku5p)F2eh^W3{Twk{URm?w6O4Z-l^1Z}A5tV+;u|$Y(Y~`PLEdJ>qwcwc2mfrM}bJ1;am< z9&ZUSY2Z=_y8e9kY^rJl zQM+8*E?&A7-G*uHf8EY4sSEGuWX zW6s~*9?)@Ct32n1^h><)Xsa~igtn&43;3k?oA?%>%98r4kk#+#YMovxKwmm9NyY(D z_cN*nOADw^ef&k_H3-#Xp1^4b{Q4-@a}PU*cRact1@21rR`R90W-7gf^xPGs{c2P9@wzcuvgx(cwXx1M)2;Qzv&<}7>Y{umg2pN&jM^dju` zbDUfC>mGLPlOKYa%8*Kio2*6Mmni|3O%5Yx%{PS0Y21!k!p>= z2ZJ(?)bg5|lAG$VMyf9cGz47-`{VQQ)zW`HlVet=jd`{+(p$Cd&J&O2>Gv&nrOVz9 zG!Ma##%rce#(p7ST+co7tzZCw$o<;CY3_!t$A1U&?ON_nkh?9v7sh&f>x4(F7fl=4 z+1YzKub{z_l{i9oARaAlr$K8TnVBn0`Rgk#ji@P0u_M(?ff78_2M%grWf7p)^ zaF{Nk=s(G=*!ijtgq9gRt9x?Z2DLw7SAQC+N3;w`77G01o}=5mTI9b<0MI||d|>lC zI=PFLpwciwj74fc(GJ_OxAQgy6axtOCbi#3a~^wbW&4=gZ25NL;zvi7)J+{AE>< zT!GPQj(&fJ-^Kwuu(|@wcsEgQ8|`ncA2eWJUhV{cGa5OaAJlcs-rE&eZ&9Pnv%tW6 z%dVdfNT1iIpRW6^shD=vpVVM?oppQj@_RyqA)yNQvD}RF5H0OI#;4vK2(Y3e-Yah0 z#c?I(NZ6EHPEe4)e{zO1$6H&hrQ)h1GbQOZaZI=P>_*P{M#A;T`5FonuLN(nPm@U= zI|Q6mD9`=6(;9G@{?+Ue3r3pvUH;xKAv9f@PCONnb?Z6MrKJ6G+=RvJzjmQ_R7BWG zRQl?9mg@~YKW_p5LI z4Fk$~fKrZZCgJgovUSv17Egh?j?r*Mi;eNl=$9jTNBI3TEWFX2=TL` z&Mg#JK;x9X=V@I1_M(ns+0olJ92Y%Vtvh01zC7$acbWxWHA zs^h=pDqmUPQf)zX3{J()!?DSeezyDkZE?QHS+lesDibD$%Tf_N3&O#C`7#Nlt-uKr zaZ^EhAUQf{%KWsS)`N#;t=z4ar~_20`eaLaw+o~M+_`_asP4>)-kt&+qx*yts>duyZnwx+y}>*Wi^HaAI^_cWqD*01HmyLt_$E*+E16Edq+l9 zCb{Bn*Wa&U0oN??@dXM&NiRR}3VtQBBK3XTU;Z|j^Eb*z z!gLz*HpahS9Rr%SdygC7Rpxn8@4VNVg$;*9zx`Mqc6_`Z*cPab_ohv(f8&;8(rlg4 zdb2$qS$u!+MTvOH~1q#vNThcdC-?SRmK(mmzf*IypuwC z-*P+e_A}>1*3VzQdfr9%4E-g}i3G_8nd->K(=3ggMZid zrIDJkiTJpWo%f#ukA?Q}Ccz=vQ}$E3@E1X%&ubTpXOMZzcC2i)T2wJV526^7On64q z3*;E?QvLG*Q+M^itDpx?+Gh0*!QX79%l9)<4fH zXX;>$gbNDui4O8TP=H|-zKz)VSgx7kf#US!j)c#p-mjy>n-2NeLQ=xA4GDI565;gH zKua@qsp52s+b_9f+J=+7!_5jajqB}g<*Qu4=dQKhfY|#PMJ=lU{mPh6ZwaE}hYRIn z9n`vt35Iu><>4lt28sFw*s=rM>gvLUc~Mc+-Yd->EX5AZn`Y*swR*hkc^$+n^@7_= z88!ZprR{Ti!W~9$Pn9T3N7!1CdDp71cqi0I@#Lk1j9)mPsW}8j>iYAaxB}VsxJP+=@w0F<+5j;nw-_z66=keylHxz$5pyTPhMc2>IMguVU0w%R7xGb`JAmM{e$121Y;oST@kjs|BAqioV6K! zE{3Qhev`W8;b)ZY!g7^~V+BErkRBMW5_qjrpo%+VERR9eaZTa-cpMqL!$#5Va>xc) z%Ib9#hjy$Rd+O#8^#Nng>1UAQD&3o|$I2Ka7|yXP$By`zZDDh0ccDUkv-;NU>3R0$ zXvB2HUPomXA}c7ESKX)gO=&_(sSB`8kep5-ken2qE`cZqZ_Q8Bb}Qm zc(2tU|MavB8Z<^bui_c1f^av@IdB+GXHzMF+w*;Fbjd!|uLO+(QjGm~_xz3#iR7_J z3poolSdcR|uOe#G?A8w$Ps?`w`g2IBbg|J=;j86jV?c!?EB81WNXV{nV*feMZ#`0L z9Ti;+tM=L&LUg(~M|42+RM<;umhvqsORf(vn>tx;xy*re#QpvFUQkyNSh%xrx4T=i zN4c%6jFpQ9@Z%Yd43iVxZ>uYXP`RYTma+-fNwGS4vBD0iUV)=q8VVW$yurKIxM?v` zJnj8!hGY{zrX7^MJA`=Ts*Jf=KSKYwhH6ZWq+x635e+1+zS0m^|HLV{15}ZVHVZy0 zY9y4F3k5{f%=n1u9;jd^!YOnS{`(vR8^&gQ&`i|r^)F}Q)={jy_9>FvJLL~_F~-rb z(DLO;QNU{J@E@>mkWmvOw;~u20^L<9_NgCP9IJ623MpmkLR1tf7)(HCKW7s7LFz{5 z8+ra{>xf?3lm)X&V}%@c?+Ftfkh5InNB527$%K2dYnqrcN?nM;!i-ftw#VW6-Cw-> zb}KcGCiwC5hUciz|+@#HoOJ3=LoF$yG^EFl=mVFLl<7YLNTP`|J*u zY!L9>8*c<*x4d^WhljYMmxNQPD$?%iQMzL>Qe07cJcaJH&L7dh!E#y6o^K?SGu(Tw zyU7D6gs{+vhMCZq1`o9Wp9(o~^7!$mxdDH;lJSRPZ z(!socKzmBFPIQtA3(cx|d$eb>hJc22yA`U`nr3EDFQ`&vEg2mY=54XDoPxbChhnJV$%`Zt>RzFo?M_825>R9=NSbnIrZ_^_&O-*?e5>Gl!y z_5<&v_oZ*G(J^helygg?n2k{bG*O3a=(9^~VSxeT$evK4UQVBo;*nXB4-Xslh<6O( zTwV3cwKkgNo(~!?JIgi0i6SIP9KYI#rI5X+XFlcP(G6>HA}#gZWsht~-FPL-OZ+!? zJ611+$|c@`aX4Rro+NS6xC&W6%FWyo?aVquVfw5d}SQ1DX`>%B9KMap|( zr8}LD+W93aXo>x|MP-$(N`8-*Ih@w;%bX|CSu(#NYR}i(ak^J~sU{qg6DTec({@5s z^>r?-&q>YacG<|cZ-hsgIC@{=1WJBKCxI07M@ULqVh%6*?w2zky0T-U1~lgaoJ#Ty7|A z4m$D^f2f|wU_!AAw3|L+4FW7P&dy%VRLqF^R0pVM6!f24-`9NvPbKsaYWh1T3BsdV z1b&_EXH=VLVSUKL2ZqHCQs(YGsi)41B{sr!dwK0a231rmor2LHgJ0S6beIAs_z-;> zu##@S2-A=MKFc#1H44fV6l0xXb*KcoDeIp#x-lH@K3@wkY&+}<0}AglMzKnBK)grf zTdp@p!FX!Y0UW9?AfrM|bBxv$Mo+v<;a@0D4b+8&QaOT}v+t)1Y#9c_-CW5yvdj7m zy_(IvhnrJ3MrG_+$F$1I)24;xT6kcbhHXqXD=d*D_0dY*1y&U;0kliAMU61%!VP9Y z=UtiVZx&UU?Ms5P!Dl0$UwOgSqgWV6V4pJ$0s?}gX=TnY(a}r`>V|ce(f5}U zPpD8ci)1w&U=jb)){XhaGCI)@>ji==n&2+$9* zmcM)r_BcwCb00(Kl*$XjOMq>NFSuuCpav#ACAu~h1Dhrr<2AvBrLe(Cpz&#F=@*p0Ke0{on8gihyA^F5X6S5{Z# z!1ZV8x9cVmMYZQ#=fHpheMbfBW{heC!sL^{x`@9i?X6 zTu-fZLd6mZNxQO4*G1C$WI=N*!DUr801?{3q_TtcMsAhRvP&vj1FqiA~5nYvg!vZfjF*Ebaem0hF)5+kS~N%&Myka)tyr3h%Fl+XXWYa#tCi=rW-) z8`X$@i6b_rHsU!u()9 zvRieQ6Pmgve(qaTBcT6gdN(jy7@e5M$=9A|4($atYloQ*ttE#bs=#MB{Y=$%Pa4;V zBDlG*n7uF1xge>#cBYN#&vYdGIbLCj9jCZzg|WCY`HrNz)kVLo#p=5}DtDIhmVd7s zZtE*~Y!i&9_btTFGNwp4=NOhVg%f1t78^(~D2Z+PPT`&Sz`$MC3_>5~1_W2;Sx zo1rG^NPHt)pV$r?bt8gImdqpDET4qRM0-Uc#a<lJEGhY3H<9Uu@(D6QREmch;On zqvBH)FVaeSSh^bHiCWLGH%P%!&ZO}s+&R*wIhLD62r800(Uj=qMdzNgcp@@>5>%=J zy7dD9;Ri?GT-Xy7H+iU#4yVoiP zYHG3fC^~3~H3jeqg(fP(8V|releFcX7s*w{^0a>1sJ2uVMUB{Cd6>u+(LF`sqH$P8 zl6Ja@aCc>8<&kS4G;}(m=T%!FWsF@uuR{R%+33(f5RKzNYgM_Zh_=yNB8WBGWB}TOWDfFCvSrvRl~BhCD09wK~opT1q=C9C;3}3)0ms=VffjmeWd;bpL!hIiux4FHAe>~&RBcFJAs>kiPr}8mNMZ_(k zgctoqcY~$aUYGe!)E5$R#qIof|0Ek$MbAsaxZU96wA5jo$oJL2=YVs_L{uEsPPJ&W zFvL9t0i)H0>SR+D{{E#2H>yd`KT#qNjwrjbcWIU)RJ_YyGG4q0srDs`BeT6UmTx{t z=-&j#P8S2%t)V%(S=?X5YW6Gyf_dExmzzC*-4@j#nEa4Vd9}W)aWNM)XUGhgoqL-fxbRsnYJ4Adht4kcS7{_R+Frq#*wOKId0B z7*Ef$W`U^%+|SsKBrpb1R2g@)ep4v4Y2|D?sZ9|243JiFllR z?0*k|vQ{>KUh}=2C@m`!?$}id+{DCTV5mlPbBhsN5_jA< zV64pES6ndbzbXwL<7JQpTh|-&HOX?zIM*76`drgTn>TmX z$`bpz4x?$^14ePpEM|`x&I_{P?g=eS&vB>2UM)=7^((kXEEq-q1D09WNeR>ZslY*E zQpl6EScH_cCM^=0E~*fK#NfBK39F7FA06Z7r+;G=;kGyvGs}f|?%NwP*(z?+tzm zR8Ia@L0&m!VWE|Vksg+`L90>(Be*l{!tBy%FJ7Y4sjGCSb1#P@cbOzg4y(GlOezeT zD!*lYh_!IYvuU)cu;P*PS%|zb7WBEXzm9hFl}3(rJt+4`gJA0&M<;a<4!xXZAA%dH zXPCcmF}+1|1UgWDS4Xb(Xdn^M-;vH=_Tu$8GsX-_g7KI{6up$q&hsM09iDD7V~ev9 zSP9`1a}Db;J+ZIF*B5N7mik@^Ph;O69xnFR1PsAOpoD6mRTjeoet4(~X>noV!5*AaSFO%(^WwE&mKk;bI|uzaUuxX~p-E_U4Iw!EGAp)> zX^d%Wv;E>Z^G$E>BdW@Z9n6BR!O2WPWy5 zo9lDpI|@ovNy>-*1Ud*M3BGI;lb;RiJg=sp#1nLoRc6#bVJvblV`}Qb6QO9hts$Lc zHb#6QCA*@VcDRonUTVGdxG_Dt+%-VWwXig=8X)0Lx}VcwtP9_fB2fzh?T z?$(T?ldgHn$d@y8u(famIUjYnO&q#-I^B*>p8$@*ysye>ErvQc`_3 zcT$^oKuS(Z(Xj;0MonqFk@7`ly(?U*Lt1TkIi4{_t6`}1D8DKJo|)VIo}*@(aKXrl ze!vK;%fJIogN_E(+NmDSXPF$UYjn2WWzUBNP=C`*haK>{W(l) zdne^vB(`@8jD*I}7pABRyUH$YR%b{s0(AqJH}WmcRC%pUdg+VVw~p)>J-$!nc2vXj z-)=<_OGVk8Qh9RDy1H5xJ&U6JH%L(FR6>#(u+m|M$1AL~oz{F?A&#CPWkn(_^J&Bh zh;!uiNtsCqSY;~w5lfeI3~5Z3&pR0;*+5cF0OIu?6thv-B*l~rd{1}$WiOwO3R5zd z4byt!@|yc{75Qis~P7!)-4)v zB)3D7Q54yh$*zFSIFrsYG-v-7sUgEhAclIguR18}fiByr5`?1wX4hDMJ}MRJ1HRuN z9{c@%I!Z2(GWe$lD}`D=RuBUw14cr#kAUKR^IO!~w&Dzf0!vvT!vr|mqGDqo)% zGhXwlB+Ll>Mq>t0WcBn8C#k%#+g|oReqYYccIvw?JYawSGh``hz;r?86Rn$xY>P2P z>d{RcpUY3L|Dr|v56c9F?}s-VD+z#2F8r*8;am4}Y&iv(Y#!}_qV=|SfI$e&6G`wd z-O6J>-P35@s^USPfVK0{i?cS0;+yOR9Zp*dAWCC`!t#+)~A6SH#p=9*j&tvDF({O&-N@_xXQ?5~1UwA;y&L8L; zN??a28l1YV@$-&HJr%(2S>wAcW+b}%luUk49hahlvwSGFL(;)O);LPv-QdF|r>o=rd1dqi zop)-tnBI~F{;3q5GCEmpHX|_qIXXVth1cWusan}ar9?|5uea!i6BPPO?rNGZaOYUO z>-EPH*x#Do2Hh+{CN9xf@RYe^cck(+=K>EG+cx_N>nijKW;Nmy)yNgXnUKPx$E7}z z=CR0LD~~ z@)2m=tckoa>CKPAmXmGw$tnxWt6%l2KB;NiJR0s{xVXp!w)BjevhCr&)4)z-_&u@a z+UR3^fj^88u_EzkPGO>J=}I$5Qey}vtM{F<>O?dzw_pC1u|<6ht+(}ZjnlzfKyhbWL+_yv3a^PnVL-@778)WJ-m@)iv^lUN|t z{LpumIhkW-1ov*UU^$~kTnMVd@qzgLk;VH@1xD|UtG%FgiqQFs-(ICqR%?S zc#CEl_mJS+$!xyHVmWah_2`%wM$Hi3IIj@p|4KCrSA>lZhjR3fEU=*$Rd9johq-}R zEH^$m{`}AaVm?*RgxY@_D>goIOi;;~BXa8FCHkz3E)GSx-WP(scYN)=f95ry9XnNK z#I?-7SWJm$Q(Uj5GbG{@!nHLpg)%=cy9?vI6^vp!LQ*D$UQr2>4c$=1^JVXCvQCI> z7pVPreK(`78g{jEdxD$(UdVz*FpmhKvt#6@BbZ2fhMFJ>upRByl66!JS3zU_r5;7>4(j=UZTTgQ z@P^v=Z_$6E@-M@&m`}x>W?33#DY+&770Ujmj@CHV{Q6xAXi2j4_~44w%GGh$ZqDKo z@s6G?y7(^Y2&xQ^m~E?G1=3rw;h9;Bpun#J{0H~(Lg_3n+{oMmzO{wM35mH*c8=cC z$Y%?Hy(w2gTc6bY{${{tKD@misF;9lm}!D800HH>7mOQ9L5lFM5^%4=Dr#|^AkVL8 zzj)kzMvT(aL4iGOG8W0q_0NM`NIqsuS7a^I0f~y=p<5aq2Ha029<4P!8v8xUAI~k4 zJ*6FN;}gWDsOQ(8`O|yZ!V-qbbJAWy4pkyN)gr`Y1(_9QHuv|Q^A;zrNInenE6x{VeePU=+VzEV}DM$>mB$~HiPqRkfuxt1` zI~QUy$cZVo<&;b-lpo8{}&Zr{t=%@YQD+lqN_22rfejTRDcSNnUdvQjrH? z#}HsYw*Yb2XEO10jRpEz&Q^js>JejS!-1AdHFfZ6ss*&#P6u(hL#z9l;wz7UE~r~ zak~xH^CwHY;0I^q(4&utgVDP+GK9=KD8sr6iCL3iz%-)Ia7NptNG#7{=R0Heo}2?k zmyKJ$^4W7oK6M1`oWSMxPM#!+K~TY?hB?tq&C{6FD)vCOu-s_ThIOa2lib|J1LG#c4^<@M)qSFxW_?qn0@gd) zNfYyEtjVti0zdFiDvB@W-=}+7Qh;1f?o->U3=5b{r6HJSG@GNn1NHkxppKqg)#*-5 zBl&}eyI-15?K}kwiulCFE$e=ho3LnQgkd`7K&trDC$nUe#$&U9mX_4mdjO>?u zZv4W3t7QCLrT;N~ARt2xf=El%x`5tWep}MIoH|dBx>cI2``8j;9nkMxAY3Q+UwCT4 z`0(;y%iq@zV0;s_i}_-h7|h)7hm)Qi*Z>!rTd=CAv;I`MLl?39Hanl$8` z#Kf6HbDEuzOw14yV`K9Ex;k@tJzvC0w-l20aZF8}n588i4~RK`@@{#(M2IBlJ%bzjL; z7L)6{crQjfmu7Jsm|QsGF66VPaGf|SQDq4APx3Khndwhl)&1a*un`KfJkw&&x^ z6$~Y7i*nd?&vfGj{xpdJ5=BZ>J}o+ug`>8hHvjIj=pec0R5?0F4loN+v-rwc&!a3X z6-8t7P&oBrK|Txj$8j3}9Y9+5eWpL7JIgq4t5b&qy3WzHDAcbTjQ{zEFzGGUV;|zgmvRYs}_>&p0XRMC+^IbYe%w|sl4pGOu zAaOeNDa>1l=M-E@mN)PMwcwLD|N7{%8J|m`expkADmnQ(_uq+eJa%%h!(-2cw-qbi z3w@l$MUBaTFDipWU0+>OK))R&{hrp@?+`$L?*qTw zOj6yfo>S0ScYy>06o^kfpa<`qEr{TGZoxvZ4@mI5Y z^y&V(+j~m&@f1;a^l5m^j|QZ!$0Y8Pk&s zjb!w$8`#2+dbBfg;<=vZ6gm8A8x=#(`!T!Pb>BF^{8GF@bzzRweFbscAkz0oUU(zx z8};r-T*as=bGIGgbfGFf8@(mBriN77`Qw@YHMRM!4s^rsGlxjAm2sXj2oo>V~6E zO+oPzmdCYm49$O8LZ_}2XxBD+ZuoJI#7Z((07qSdp=k@ei}McE5qG0%wdJ^OP8@T| z&2(yPEwvEQJWpDo!g=5tjG=ChmR}yJo$q1sz9WO)#l#X3q>gDtIWpJ)UEYCO8A3&nlykl@ ziaqIH^*h42Iv5|Bk3kMKB+nDMLNY9|{5Dg!{hGSF&>vy@kO2BZaVOQ4r7`29n)mk` zYe|4{i^fffL!9nAB+_}u*lO@lw1Ql9kk8?TS!HEC-`pEfzrn#?5#3K`mfcF~q8Sdi zZ^dxJv1;sBW~!`}U|&Ew;V8y7YXWLAxe`2G=sF!lIomj}CN${Zg4&2RAHL>re0IZ6 z$!)4CdBKbF#@uoky`!*9#r) z2c@N^@D2boTb8b-+B~&}50z;fdBc}M_-ByLv~@c4lA@}9Jizy7%H~n?;LyeivbP@r ze>X8#8m76JIaApfP8jdCsEc~26WAT$(s7gPJs+jE+|I7jwPr8D9wPS#O{P`>SYv;3BxpJRJ9LGMDr(=dwWul_y+u08T zsK}^7q;V8FLc35x``WljEa8uMTLFfkP>dWb*JP!&OX|EpINfdn2~Y`5=f*HB%8yT< zcT{g-R2`djFQF=VIqfy=hCCPQ^PL6lq{i+nzmF?ou0dXv5L^=qUkoWC7DfIKUta+g zWw*8sLk$ulsFV`YAUU+OGIXPKt3ycl&>|u-ba%H5N_R_#v~<@XEg&H9KjVA8?>pyx z&-&M5u^wP8Wy)E_B-Yim<0^CX8>a#l0 zc61Mq1Bn?h4tBuW)KsE)3piW$aQ{y_jVt9iLDF-X=I2FGqc8xrkwvUB)}Jv%8asD;FV(wn+i z(VMa?A$j7o)s@IY=3dd3RPvfB403+q<(3Q&g;K9(m))k%zp>L2s0J8~^)x#g$Elyl zo9v6Q(}rLI^la9kP3;kqhp6t)OZ;4U_FPWzd_5%Fn;w5a)m4kflF$?Bif`%_E8 z!2L}TTV!eeB4ZakA}kCuTRWI?CYz z;5=ys-(OTHL;aE`mum1JJlCI_whkPwzY|rgF$4~CehUhz-j@}kd0E|)nsBewD*5HA z@UJkQ>o)w&^ywUl>s`5t@aF69*78s4>cRy^;~@D|OgZq&q@CX_xleSU8&t@O#ZHb* z?#*>^0Ek}pQ2U2QXDJ6aFAx0qL6!3-9W@;IdY{%Uh)!pcwU)l`sK)|;9jmGOatu?^ zs6jE^m&n?0I59Xgh>2VIhVAoBuSn0gur&sKH$U_b`izwR%?~8~XENbS$5r ze9~7iFzQV`Zjj^q*36-bOlRrxrcK7-cQU^duRt0wqRh(NF3p(R6}V4yCLMnz@*7?a zjqYRTZuS)Vh&o!quQ&77M3EMgJxhDMWZZk)RXf!xPWCfMIbB=QOibFE#^DM^^`!jH zA&#p<*ww0RU;!YZy`*R7>oZG@d;!LLV25G*?c?q=Xr%TII)d78&YnD^zoCm12@vxx zdzEg*2nO2Dp%{pfFQ1nmk+3pxK(P}YhF|eI;bjoeP@4w`x)JAM- z8X5rz)$z0|@^JT4n^o_c^k%OKy(Ti_Zm3;+7j?|*?`3}Q$y#U#&-ub}s`PN@I)eRf z#LiqiGV0Cf&zou+HJ#b6iF?HaWnv9FR|n0%x-5$l_zINnc6bv&#OuOBgb;}HRAXje zCKp0gD_b5E?sfF?REsB`=dX?};bInMoy{w&Z{iNSUklFFLL>5^@rCe^d*BZ}RbR%F z7>f4wD07-1tk^GM?G2N%M$40N`Ks_03xl_hns^wxo7qw8|p>Yz|rG~m^I~wJ3 z>YO$LeKFG_Z9F?#RJppv95?7nsy<_wnmdMYs(0Fs30ELm84N4$zCUF}D|>s0A@zu9 zx+k)C`_-Df@h_D*rz@xQb@7KPOvOGp)=8Zx=2MsMV2 z6KimvS|P*YJVsu>Mj&e%o>Osik)suQl>)m?W(6hxt%56@rmG}zj0N=rlX1V_&n@c~ zm2!LDy{d(jwujQnjaKV<&K7oRB@he#1epExU~^?UqwS_L+^i+ld#{-wCiEDH&D}tA z6%t$4mt^k0Z|$WMM~6BCp-Kss;{sE0D2Y>y05HbwRFV0rlzS8_z#6EF;>xdRLm1Wo zM6Q%-tAtLrA+h;eDHSqdr}6UdSr!!2b zv@%I8pRMArsKp!9bJX0qR_HTF8n|rK2`1%4?)!&UVj;XCkhd;12~>Dj^|Rq3MojM~ zb4BHwV#B049!qKv9;?9Nw&c`@IeaUP+U&1#&)(Q3ez7e$t;WIi16nVrf$4{`X8ho_ zm?>^8jlLjk9%8tx)!h}A--;`HyfK+;#ypM{zv3vql=IO!Vu2g$nM*mkzEgGy_a?8J z@%4I5R;%o@r}5H-(~Q>ZlIIN72+hL8!TlLEKiXG3hEtMAzHduAuS5w5<*FQSgv>O# zH~9t_hl zvXs8)NqILOhaflVHfXMd_KYh!!cdBUq>vai*MPmiP@Iz*ML^Q?(SP9UV-DL$Vz4%A z0E1j`X>vpIK4nx^R z?C!uX6!RsC_?$e*QlAXkiT&&%n%N|5BgHaIFvjBP?V{`qdxO+s`lvBC5u4NXT@Lyo zYx%%cj${2*I-!D@@lYvT@k}XUi2tj`Pn*dg@7T%h`7v6rtJkDduG-$3)tXqOIyWEv zUpml@7{KkBo+m)W;6xP)JGZ8o^E6alOx}reTOMeA^SL8;`%9bJ%Kc4_af;KOx7Z5e z5Cu<|%+g+eqs!Z2Gu$Qh1xp?fCyrFb@|#8KW=0$xAtIK2?(W{sFs+P%s%R6Wm?6

R{>hFyC(;qx&P zFr(_d*ro{N+GyZj9lgU^;ygtfwJZbsBcoyA?C-5xb8-$#viPdecXlo8jr7aiToZdD z8efH6{IrcI*)$fdI~Af#mpqF9s%X_<-c~p{Dg1?|+U3^7o*< z@6Y$nV=hVv295mg(|p@g7_V_KbNXQ|r2Bf2^}VOXv(x?$@HdE=m#h>30=lU&S_0YY zd;(eYF|?gjx|0}|zkhAuV?W)PYE^lOLr`$X_jB8ySr=oxR{*IxH=8%8Y< z>=j~z;-VvF{GRi0(4k^Us)YAobi2>8pYb;c%U1J;9K5NnBr)grPk~vU{xILOu9H0_ z6y?_fSV2Ni%Rm#Kl_d89w{yXCQ<{R#>iO{C>-P1em*+xO;vq9!7h}ueb)pkfoLR_mXB`k;yMl$^;T%}&cysA zznMqA1K%f4AuHG?7owL)i`iTiXymYO*>_>f#;QOB-G*o{w~$d?r|@Vg(WxTmVK5gC2!k1OAADU zOD!|mZ|B}hd4e7GNj;#k1%{FAU~P6xiX4urwM=&;nD9ZF=CGysgxw43sgPa$2w~jY zRCj7MsnfP2W)aI#p)6^)jOry=DWyLm0%`1RRmr*Lf&voqFPL-6chk4GEj6`HE+$jN z7OEWyQPRc@j%)G-nw!5UJ190w^{mp~;tWsV%=GKsylRpaSaF2j4Y^)RvrKuN_OjE; zgwd)y%mQWn{Uo<+e1}i@{ZBES8zWR(Rfpkb3BQ{3VYV1|w%AQ}wVC5lPnhrRKJK*D zBgc&&Y}O}7dQ1trkkzI04ghC}htMMf{$ zUTf3P9dBtGur>o`?~^=6Re^HpbXC%&8CT^opXht1<@7wx)}aFQ`uB0r2oyo=+B0l& zpU2Z{*hK)gaxFm0Ek0jt?LI&!q6mzU5#}@2HJazW*Cf|>;uhu_J?Q%|cIQV^`s`+7 z_Q1)gs=;>UBBPMXY9;{|cPX_k1Ud#E5{juQJ(XFkFBm77|GetU(&#og)5%M8tXbxw zN^3!}N*JfNNRYbVNgQ#pthJdb6gERV%fHjf5k6&+oGRj@Jg=;U$Bw z>d{R#_C9G#71BiHqQIMqRHO2jP!%WXd}+D}>;puPMUfN>Ne?%mO~(d;-yXxT-F795 ztM@0yzM+PeaNxy2qh3;izqr|@dknSU+TfI5O6^+C4~5N7r&e!9j`t|1w?~ITiUA(s z53B91vyeQ6-8mtWV&e5GQ+4;x`JV|yM#~uk7rwMbee+Hth7ZZNxq^e3)dMj#_ASVF zm9u__&zemUOnTHPIZ*dah*&*z&$pK z<`t#*YQbak@lGXGkhx=7zQU}@MEaI(vP|iNjH|s>riwftVCIuRsW7h$Qxia3(}Ctb z>BX=Fl7RvJ=>ZA@oUouo0Yy^>4{e{rQ}e*%qOsWtqJ(tho|kpR#no6tNxX7=m!+K% z-Bje*-lsVD*l6rTG$(hL<)}bWKmy@Rbr^(h8mYpdj1o{}sPz&}o|xG@jVoL>+E3m& z=pFUY&bACn`_RWe6#LbeG1@DN!=h!NGD5^B*L5x^-H_73cuvD*X@=B77{Kp`)u!^s z6Vz-o@;>`jwki7H_u=5-&qSyq@_`}YBVJMFt29XI1bcfW;HH;0%wh8mv9#5st7?=G<1 zs>rk5)0%a&DIl2-{%oyLW^EsJl7T@F_V=d-%|Ylw?QaW-1l$GSM`9$cT4J~JFdjOo zEScn$F8AC z`LjJkJKQQAwzI+!e6v+n?-Yn!wSxmix2|j&1;=MuhE%*VpHU(Dm>{U&h+M7s{?N`3 zos@oc{D#5KqDnip1Wm#n#r-hMsra?8)QA#p1zel2N9?LgpnKh+K^(z_XLX@)D&|13 z9NUzl#W{a|vSH?aTIN!`RL4bbp&`fK$81}4;emIi{H0LXwZtzq>C|lBprj(D=rH%h zJHK8!o3;z%10DWiFY?EUFMe5MKLX}sGL^B-eeB~EbQlQ$9rFe8KoeRBGo{1-0I}R9 z4>iLDly$1A3FXpW&)*>}U=Y4#3%vA8jpkm5L8=^Q!QHYE$szl**uEn~Jn>_?r)|&h znrPaZT-^`gQ>on##I)C{J|3?fC^?yJC3rw#W`swZgKh&P6J4$z^$%D(=NmlP zSX3hlPv#8b9rINxZYLZna?ckCeqQ=ouHg0MN0jFmhx1`AmR~@7GWPY1bJyeUZ`$@P z1+|y03{K`cqUsDKF!`*&?`=+0WR#u2M2WvMDEf=6o?oEIFe5fIbE^+I?8HzZ^$r*~ z6~LE+sy{?=j9WhK9c&SQ{Wl$_=y!< zQ|&3-G3P`-;=#jm=4ox&E=zsLOtm8};~ya-B4Uxe6!%28I2me1JmH?YZ)8BJa(O0* zP+}d0t2NP+-|#%20l+YLt;?t(OB1N(80#Hg#U)fV>2c2p-(M&5Um5odIc)EeERKpU zC1d1ap;nRgZMIva9qDqrf%fwn0`_j1Ux+R&g}#G3B|LARQ}Z?|livXtzX9M!Y68x|Blr2yxu znH~>yY_mkK-knLN<{RVM85*k$lTE$T>7OUzHbf%rSYnmDSM-Tj)Fgs=yI%DRjv%};({6l!3&QP59R$U@y zdrvzzx`soxL^wF=KGm0xu`2>H6z>^HN2lP7A8}T$l83c8<>--@5q=V1U=(B*Ttxfjuo^PQqy0Qct8e=1T^0V zq-J#%J`%X=K;y{6NRj=!tS%V+ZrNS8qx&h{>JI3VtTI*!w>ZO2*@BEv-IUA1MmbyU zRi&d|f{MwGD^9~XjDc0zR8IY7jT?RR_75Kz*}~b6&XC#>whILXk0i52PY%(e?ei6t z_&bzFFk03-+7Y#Ib7Yi6g|hc=Ac4~6#zd;P9~x^n{j_F*&5^PhiWfiB>|sNTaGVCS z>wf70mxh-sNCb^OE3e>Z8X=u}D@6(ZG2^E#erXN|X5`?r;{;xp{gDvQXdrv4*n6`< z_{d4IjhCs*A_mrbcaGGvEqQJKjXkVF^lTtqMy}c9N%F=|U~v4ZpdbGD1UHfjqqaUc z!PC=GPN32e%B|3682nJDIhx+w&bzOTafuyVnS00AQ*}~Xkg{R> zEfPb#Qya2P!>d~)Csc5*)RxSG19BHzDp$2W`rf6W%*Le$fGkgdSZ5J6DB`ri)JpU& z&X>URl{qaopp#OWqF{%`U=OIaoCpIsUF5xht&XFLQNnG1sU$EVaYXUWWlOsDe)Gv0 z%Jjz6THhO>6Zn{d6t3zFdfUsBI1%p(15cf2@Glz5@%IWn?4NT+l;vPA8XXhu# z{^m!e1HrNJ{Rfe_!0USAJ3Q<$yfiT0je@_xu7#U6>s5^6ZKDq*5YC18&kIp4(s;6S zY=J=MERkt>Y&VZH>A`ow0CZ@$&}(W}F%4JcxpLYOH?2fET;GwD?x{;W!(1+u$)Z(R zG)Q!ljUqVdj4I^t`&v?{lCdK*QYcJuVc*VPyQa}T5ALFsk-kxBGE!MkGQ^&~!Owjj zLj@Bq1I5rH_m*ERM6USdP;47|&O3KmBE8QI>msC~Q}>R&#?c-fY~Sl42;vij<%M!` z>YQX@TO!B|)0S~Re~+$oe;8{_Dd1c5ZVOf`SVo%RS6xqjmLB6IzoJ0f^wR+cc=2?6 zWzEwUX*PbxsX|*D;TSr%nO{w_EpZt{VuOGLmo5?$lTf!0gD)B*fc+7GyI^Nme#s;f zfHgyQk?zXPMBXV+XMyV~;hYmm$yr)Z__GCEsbd1A2e(bMM5!@3zb<`Ly4$3XfP)EP@E+Bm^mN7V ztLeX3QsLN>*i^|_^JEe*rJ5Jv79|^A=bq|%VI4#9;%sHo*-AXJI+UxQq#RJ`BL#n( zu@U37in;R1k6yQn#y1@%A4JihMGu7kdz2(6UKLc7^lHF%9h1M zMdRVBgZcwpliVsw)2E2Ak&E@ z5cHKehFYj|E{`VI6lM7zJH_VfaC*nA1=2jOt$Ho^ zR_^B;?Ad-XYRqk?igZdd8FHT=<%q`A#`A^h%?~I$dqqx8`+AMjI?;{^X9Io1&43&I zZDo?{o#+Qb<5POvW#fV)24}a<4BO$x9ycR6eN6=bLC1?NFg7Mh;VmGJ(2?cMPsg{D zvK3h%(j4X|{mc21+-Yri2PS(2U_`*^m=rt6vPycXXJlEK&wA2)s3K})D-oY0O`c(l z()QJf9sXK82S@zgJ(#daTP!!Tgp{G++R2;TZ` z*(y}RR|((2mX$K_1$V}CxY=8X;km$;9`{jwl$lc=P3Kd;^@Z|rrvAzmhxK#|g>)T5 zyb6;=r*Y%GbY85Q-}NurzwFe@4dV?_gu&#Q#tFB;plOn&1(mvkHOf0#_)$_2@NlB=y^ev#4~Jso#AYbqk|twe^E)Riwwq^_v1i2le8 zBwxu5vnh^|O1705AHuX-^P*yEEpW)(nl znKgy`q%2GpxBoJ*qxefk*`rD%7lBgE;8u#XvLk=NSIiEUbOshK=}!nDCJ_BCl`Yu) zsRObzQLB5v@&<#&VYM3w6U{UdZW>I#G=i9R%Gd!{!}*we*xJMqcekQGvj&CJdbjJA ztO9|fO(2F2EP8ykP|F4ywx9j(yFgA5mXG09v^%At5&adD<59ZW7O>8x_ZU;&PWu{m zmHx4fWJ~EC>mEWqPax<&VSuV+`~{#ErwIL`TRkjbVR{UUN-_kACMPyAfp|_?lSjHf zPXmKnwLwZ(frHz;+4oV#6wj8XJn1W1@-*v-mZrKksy$Kl8OjyfwqeTm?LU=A>u8zB zywV{&T81&?laORRt(g?A(|NMUNfj%xp%fGLZXZ<+HS?yV9f@Zr{Cm#G-=p#nMlWF_ zCZK5%z({kQO5Yyw`GF7qoVW$9-MMS&SBu``wnl^TN3?5{fZs~QfIW+?Wcy0$s2zq~ zOQ|UQ^qBykbUNCnVPJRR1I3PCr&21Im?R%$fT|H|B}okmUmh{S|Mm3wOr+N_F#8Gw zJt1FnOFic-f7D|w8I=s*-98qw*VF6?q*M$a!Kv@qg%TFo=!<~7=vE+`(iXxz zehltsf_)-t2p@7z!(*C?Z=)KdoZUI7;?qzzG|Kvr8a(@s=LewGzyMn%xqu1IEsWYT z!tELoOCUZ$n=M+*ys-2BS5MY z7-hlYYMeZuP`s{#5s2m3i)eEEGXD=|Bgk45Bm&KhZ>!W+?T zA{xC8?<;C;HMPY_THs)Z3VMh14nfkf2K_f}OlEEs4hk+TEVaKK&+5pS*q?!QbXcja zD!uN##j^&)^b70&eb(rF;XkoOFSIpyb7hp17-@@6c?toOa7MqtWL>ULH%3R2;{PUK z`umC_Jy`obrd{&hLU?{Q;a4GOgjak0o+(p=(r8G3@JK^NskF~>6M13P@UE7DJFP~V zZJ0ICbSu8<%wq9Zg?q(0$W}!yyLMR=&Q0&~KFS&6s?Jxo>ZSfW1$-eekdg?UOBtWA#&DY^W%uq8thZH`&<8HW@?zGDF|^BU-I=~j|Y7+xGTepzI`1JjEtKqR82 zhyn6sX>C$kGT_{20$)kdnPC68AL+e)%T|ic6(ko<9GpK=OjPNcqaE5xAR?c6hzPfl zCyG|sq^c4^J}Y>=D{U*7Z1-EN>GZ_F*fm+d$@_&K;YcpQ6yS?a+>;&t$g<4l)c4$F zz>_~EL2$K*hS9=ssQ2hrXyEZS>A_S(=D6^_gfkJ$gYTrhv)f)66KRM#@h6$078-=# zw+-D9lQsq-L)Z5<5aC@(mlF}|wp3y+eR}Wcz6Gkm^~IY#rat*N=*o;(pBq)6L-Adf zavaceHUDEod?nlS+xV2)kVHLgOHb}Mf4y#%g1=uvp$b=z0N{?qKzm+RtJ?u3Qgr0l z#19|dny~n2%stcM2z>^WF*f)4lNRpnuL_3_%`kTb zIvs`FPb`r$^my?~yMJ3AL4RFO*%eAH6*YcvH{MPleKWDZF~b=PrtUkTJ2pBvMz|qz z|4al{xi?yoK6kVbhgzNDgcQj5)eXJ>MnHz`aQ5Zr6AI)K!lW&J|xLw{CuG6#8zSKe9JH3gplJQocZ-QJ<^QYrmJ7qy!Q& zs?g<3G!hF{Y@k8hgzO{~0nBU(3exxgiWP9~Eah)^H9W()$E=ML@POpAfud6;oi;|m z<3BkL_ri6hKoOk8^s? zMn)Hh1M3kxzc-Kx@0`cZsjyfm9}RuAbip4j|B7Gi?h7;!A#rCP&krOHjzgUXc-1PdI3YQRJu*(s;f( zZYE9B=ZQ1JCrgdIb0FDW|I^uaBHMUkVP)@m8uf%~vLfMR`GZ~yM=Y^g2T#Q~S}ndv z)uYB@Z_*x2pv3e6!y?S>ij#d5KgvnTe*u$Am#6d~!GI|XPz-cvn2#+CV-8t`=HFuA z1uBb%WDZjuaXW8hL;&CN2kdYelF-hO3Qi*ZMnBh1;oI?!YS?n>J?T-WtySgXHpTiN*YZ(U=|TP#!vpLDbge{| z8w&!~fuv6OAUZlG(;9t+JUXDc!4m-BR7yrhcm?q)3aaBflcwi2om8FLDRPY5G44(x zUK6gJpCIBXKQIl{MtP+Q<>eAGIVL9x3V)1P1#-{;GC5~Yk^f6znEMw+ZA=BIr4_U2AT*0i+obTUk2AjPh%K%C$a=uaYw0oucNxGK@r=oNRC2WEnq0iQ3B zz;(p@==0wB)tg41ocDty=4950kpqN;f&w6jFHC97h7S~ie=7{G?8_$QKk;R+5P!MPo5$W5#v)=lMW(lGo&%)3_X99# zCgJR!L-Of5lg~)dJzN1Ge1EtrJ8@Tl?|&9oL~`(CrP#at$>mS-zURu^2a5-y1VhAad1B4%jq!fo0i$&~dh0^ZcG z9sQ`2*hVM?R+&S0H0kKJKgA%u(%-@bI7y+OlDT|eN<6}}?td?L1ViM%`Syfxvbd6* z^(-|?tmFq9-CZEmO$Ixpk8rifmV{9M@AlF0sG33(e4yVbd0~V&CTP|QMqm$ZhR}z`-M=A~aPr@I}hQ4ZBuUHrX_7=to~i1}Q?< z_Yu&5GzA60w&Wcs(m1F1=a;GIJC*#shQ=qtS_t4}Z4$s?+$Hsgx$MS^I{}0Ff1(5~ zu=0+j=H?FNG7xU)JPN<40fN$;H8IscXO{x=r|!F`w)14V1IE>RK0`%DFP>HgJgZhY z+NeX1u5t`*2^L`aH31kROhNU?{g+xkM$5`dIX(yl$ZF{hS;}87S6SdB^zhw;qv`d@3cn5?!bVn$vwe4!^$nFtsY=Kqr_r;hk7-MQH&m6`{%hChrtuVZC>0;1n1LmC0VGDX02 z2uRu{;Z5qu95c1qG->4claMx1gVHD}v?;p_WN8tV`TW(tNtbZ<6w;rc4?g4vqA&*) zQ_=iMm$17>f2BK8nHyl2YJoz71trK^pxtu%KqC$Q{@e3V_;W9U0Yw2F%1AR3d3lUd zsEmJYBQVyqBg>se-c4bf;gDH+zGkcA*Y5x0t+_>qN4>Fyd*oarRZbqRxq=)AY;CZQ zz&|ZN<GT>I_RiBaO3hCxdN_-CE)FBtn@%&gzFZu{m zLC=vU_)g3-7xYH~{RNx;TC2u14(xy^00~Fg`$RJr`wlu`T*3b~o($v(_t&wsZ0;|& z0-oeA>HbT`xY(-omie*Yzkg3`kM(0~3;O;H+jxsW=vj=ZK6moYPXf(WilD&ut<))J zk?`LZ6^yb4a&0lMUbBG30m{j znL%$1-dol81K{45b<&8F7~UaWDCrleOgAvFGNG0;kaDTs6WQ?8zp}o6tqvIB5f*~B z{2Wz6T`t}c0Fmt^=-zwg2>F9)9(IvPLHsdQC27l_w zNWg4H(qP(szW)ND8qq}1J9kNNcD*Wt*MkR*THU)x=E$H5xm3%tOD zjmf2cmFv#rBh{DB@>|M$PfAcjPxiIZ525W$c<5URJ`q0@wbCVMczwJv$O8>-Yde1g z!n@q<+CnBmXDN*aWw(Fbe(jk*;x=t%If=;sd5d9bu|_k(e;;T)o|Pc0p;f3)=zrD6|{LH+olji!2f zG`tarce@y%h{-lhWLDc|o!#29q`ycOuo=&m4Fz%A+Sze=nBn2BFanw6g9j8xX>1Zd z-q^BA(2JpV$4?yE_)mKBo1-}_Kici{Pg{9Hwc0E9R&n%iUzEI4G5^7_rt~?JiG_pX z_KP7%GUS%FOx?uC@Kj8HVTTjs3tf72ntjfs-Yw?wxl+})Kb+sDu;LQADiXVY6zKMs zS^kgj>Lu8zlz(msyGJ-O9wIyD(gI9sCYsipf4$|p;JH~e)wJhoZE7mzv#n&JSu^i8 z`^+OY4NXCYs^39|I^nOp1v;NYka{gIvhMj?V=w)5)MBny6zs1vt;&tus00Pazdjct zpHk2R#6Z(M?FpUjXXME7;JQ`Y9tYS!JI>)-au_E-!cy#yA(Y;4hx!Dq~CeEV#uQZ)AsF7DR9mfAEmq+E)>l|30>duZ4MASKvm~?GYnqNU4@zxM#>vQ#z zfoozhDV=RPkTiKnd7H+CXech&KX+jQe@dKNjTnH{07rI9Pm98aKYx43opU)|R*$H< zUx>-_PlfG2HW;u6kwPHV$9yjh^Vw>x8jkuno*sJ)3QXN?4}Tz{$)gwyeTD%{$N;wY zNdSz$$UeWf`44l$y$aBjO+1w;lW;9t7873|W$=!vK)u2Wug@-f$T5zoQQBp=?mc0+M~kRkgn5%OY3W2R8Ou zW?h}0tDD>FSgL5ufZt>eqc(O3QLO^i=SD_GS#`#rP1~O7nwTth`i?*3`xTBr6kfnt zP@&O&XoH2m6?xp%@N z1g!3`;8}G7wP`DDThFykR7wQ#*LzW+JIqDL6N%+zx}KYA`|7(fyZ7e;b9O}~VZj?# zV_ih36a!yMpr&d%l&D!6mS$~Q`CLGmN>d!=4?mdl)4pA=3O>?g`0Q%g94|h>esgo# zcBb7_P7VJALI<@NAT}~#o?6|4Z&b;B*H_#n_P&s-vkY|{5>uvw(H@tFzWSD_^9{zM z>+bfJ!k~X1rE+?Q(`i@(?2g<&xe@RADp5 z6{5gSeL6C*-4?4{FNE)n9Ogy6JrcBlm+2p)f~2K||9PAv=`lTIVPI;7=h&DHvT}(y ztCWM3IlUw*V`LA+;r!*i$#%y2HpxhV*@Y&rdskP75q{b?hqO0YJ?d9z0;-9Li6qt` zUp9NC_{V1Jz+F5>#RUa&cN6Zt2Z!LJ+py>oMZLRGtr;I{)b_jGnf_Z(2qB_asHYJYIMaz$sk_7xx3k+>zq)=5Nw$r#%MC)7n{F z_LD|)?L7AtPkJ^as%+f>Qhwv6lKUscr}tTE90+B$4K*c`XlWTuT{|(5)t`R`3a~5JE$b(mMSR;^{fLN21)Q`IyniZ&VP?v{_WWv zJ5{&5heBnxEpU14TI1K!Ql_ZyZ{gkI!mh1$S^Aq1^8>#_a=)zW?Tl*$qT9Dix@o+W zn}B;lZ-s=7O(^>3uU(Hmq#hSn>x4*o)d9)fDNTO$q7;DyLAB_^2+>Gxba{rQHuq7E z1C3-X8;xk09D3q5R?Yt0AoD zGF8x=PhXG@%e}Q*K-Mw}b_lOX%zv`T)YpWytoQ#5*eG(!>QTk`zLGcR!z`y)Pu%Am z<(BVy%9{9|uU{`c>$aIfrq%kMAFY#JelH>dJ-N7;=-EoF7#Qm7fh6CJr!Zx5Onudi zc*_WNcd3m3;%_Z%gF zTb-T(_vYh=Rqp!>(FpOAaV-@33drT(L_%|!sGZP+5()Uv!V5GPyYJPG3HfCF>8;GF zd|T??>l(ky8kxICqsFxkS)8Kn#gVNeZ3o-SIj?K-ii}+=1)nuNcJmIwd_l@m4HBtW z9}VgA#93#3qMr8=Jg!nL$yMj7TLQ)@2XYhjHtbs17Nhc5+RmtLVFkH577(Ago=c1T z7sFNf9CW&H0@k)`JnWMB@i*eue1TlFr8?OVYS9Dec#DRHR8InKduKj7;;fm|{P2`! zZ1Hm8>|^`pa7I~=|5d2TZ<&*`GqrcXje=4>CnYA&fm^>{yc|H5lxP7lb4v061ram0 zX@NEP0WFnle8Q*6P{CVDV(d(gh(^WhxHT>F&&Zd!-^4Spg|WhO_ckD~Z)lZ>2Rn>8 zsA;-cz&OA3Ss24yrW2x)7A8{Vsr8KmP{a>^Sc&peHf5`Bb zLjBpt#@L%>+G}*m`)Mi9d+S@)+wP-O5$Ex$DdUp8rZZ!7j)Jq1)R;m|P5s=!Af^6C zdH=_i-&=B$nw4CxUS3|yVw727^vPnSi>K8!Y<#c%4gzT(*AwGaJ|4KLzv^S54tlT#^lU1D-&E$hd>eg=?5PXO0>-O_U8*8MD{k|8lqeeY60v2~q)zM_6ti>@B} znAco^a1wdK9xOJrBqyq{68qFWS8pD>Rv>5RvntG02V;#GyZBw}%dW`*Y#scdz9A^g zAh3SDiDQBx<#N1w{;F7sI#Y)e^?a9eNOFgr`flZ4NWXt`6zJp9LliI$=j`b?&kp7K z7^_j=RXvjYjsL6rH~z)y9g-JK*ryMK^Q(77va9zbRvyPd3SQeF;wio`;@ZG4`dB|e z>YZtfUTHs6&=JA3BY9Rjm-F0spa-a3i(?`=JNW10G;j)2xxS6Lj?oJKm(z(}z!NlQ zq35vh&k{pWgBnGq-`Sd->o6I&$guom`iNmS&ntEF&u+xogdi43(-P^6SL)OXlNgvo zDI~5Hxa^qaV9;|Z|M-b7wuZI4`tSa$1w)@8DG~tG9$2_!$b}CO`2Ydf7wP#1y96p~ zY9(2@qsFv_xKdbUXO1Q*%RM?ug3_`I_vntL1jhinI()#JtTtY-V4LBgbJRY0pd{5- zNEHo`>i=ys|N5~|6OGuhYI|W(eSLz2X&$~iEe8-+xx#^kLd~H*A$OEWyMwsalN0C?JaH>6YoHhj1jT}_vr1Q{@yM=Z!sR&-Ga)JVE6htM4bXD{{ z8}TNuvSYE?s;YP%2=P@r*ev{-t9a&qP(cnEig#=b;Wp=mq#heYt+2Ucwq?s3!s#0?B0106MwF zdTrO8V?BFPESpk_`r4{5BKzZ?`iZwEh%xshO5-pYs&GmAqa8RKhw{ zHL;s+g|JCPdhU~On^!Z+cFA^=IUE!ESj$z){tzgjGV#;&`@lAz)3=Jt0SIVcCy>hj ze%9X}*n^}zKiS90;KB2ut%5}Ei)|LDgovg@{TaqYu^jTjUG+`wEd_yxAr5wfmP5Ln z3OSl94hwmR`DV>>d~uFQ40qagn-&nk$qj5@wRNo+)oGTo&@h(ugaBU#3{CAJC4Ck5 zE8)46l+*|RM5FZ}h;sR&zurCEGJ)niTynOinTnbDVNb2B#?}vk*62%F| ztiaeFTG-L4k-0aYhOD2MpQtIWUwopbcedcXC$iaGppox)^`tuzAMrGxR`L_RW={9A zT96L2ls^V02pdd7_pb*8QtD(&u*?+{NbroZ_dI^t_o&PU&+oImy8-j~y`sD*Au`%E zD#;eg@Jl97L+Dfw>?r{@F8PPcX~gBG$H9wvCMkcZa*$)v@apEGaJ1_b^?&?(;B!pH z2*A*Y&1vrChBEyI5&QiS-_ab`@rf6~`4`C}*1kQvU9YseUa8)$>)`XH8+wFnM3@YI zV>hJZ@+TwzgglrM@&;jdeUNe;XgTkG!zK>B+lc%3&+wOu2JyB;D>|`WP^p1RG?g3^ zP^<-?wtDEea-76)>Q+wYAFAEYyV!i%5=hW_<*cu>>ZgjA;GBQ~aHSQ7gb3<Pk7^>rV4n|;GM8KW4?q*4QPKitw|Fhr!xAB~qZChUiGVUD<<`F_`R13K|3Tj`~ zuJZBqqh3LU6KhSK4bQ<1XTkxgpxGM!cUQ0NB`we2y`ORDnX4hx-(Uuo+NVHg*N+)8 zvd(YNCG$7Il2A~C-i}yU&~WT=J|4T;i2aJA+dU@H>lkn@Tq5*5?G9^mD@-k!?Deqa z_S8Y=xFUhc#X%#Fr@7}jxg zL+ypK8soRa`@O?CxWSDw>|b$GR((T^>C3DV@3DWIF(gM`uVF4n3^!64>sCor3Qh%M zxu@J`7Cg@wy_Kff!OXLVU2HMAd_R=NU6Rx|@jq3RgpLOV@(q+gb*|O~#@PrJUTyv&qa&N z!Sv+o&WYzhBQL;->_E^DDq2)m}+2*IB@DOh01wEa@LZQ^!yDp&pC|rpGu~LnKZKMG$Xg4m%~)+ zcV3Wyw`;0$x#4g#+7iU-TpugvW2C~?u$aGJYeG0R-udRBRwXpfi-C)W7WTZ(-fr!W zr7-=8vjWNJ9&T|)G*{F7Qx_g#wpmKz$=>_5gCfc*C=i0BM6?{h6UDyJ!#qiG*5fIS zbYlDh-qwoe#qcN3`KxktcEekal@$HIl3eT+|1J;KFH+a6x*>Q4 zmwjZqHARs9%nbM!&QEp~Fc*{DT8tf!KHPZ9`7@Rdkv~V56)82a*b+p?TVK1R&+v~; zkE9e|@WXw9oPS^}zq=_`R3|a8P{f3ADQi6?Iu9J-Fj>3{_i~1*vhdQPmu--CV=uD; zs#Hj1nvgSG66ea{C~`yv|B?=&5b< z#%V$OikwNWTY3M_kmb;VA)*Fl8{!Tk%^G`DPj$l1%jfJE#(g<+r-<+n+2C4JS2oVj zd3VFlB3!yge^V$6^9hTP_J;pCi2o-JMn*b{G!mCO*W*B0zdiB~;az7_+V6iR_?^ms zx0%dE%luXFoRIv(dBodZ4MA5M_LPf<5dHF#>=;+-V;JJkGhF`#F!EW){AW>$l6l-3uFD zL$A9~BM`FPh2oUgHd|=CI1Fw3o9%fufOs-dC-i6WEK?QXQzj&r|%OhbR1;%m&EjS*yDPm@PFZ zFq(;Pj`!C3p8JeO6O@lzU%N=~T6;e&5@EJ!V5Fyc!!%IOR}v`thJZ-rdu4mu{i|!e ztur$snRYGBe>;2|$NmF<9ozAO=%PtMU|W$`-Jj0!zn9@d71rVY(I-T&o*7+m03#kT z$bRPrQd#?uw!U)|UZ@sXy*^kp$i-=10TNq!^XXPEf!W6@P6J9Du}oZQZ$sR4h5oYS zr%LfS39qqSGo0g>cWVE4S8@vK1IJSJ;>_Hrp?ywpE+dfI$U6ZG_c($r(m&P zJ)w3dgGSqH?7@tmMY}iC4ZD57YZLzV`$34jp;uy#Aj|gp7A}1Q!Czl;_W$mq z2Nj=p%eg3wXcws2cQtDjnAVDAElvm_N<0>=UYT7o@L#}gOz$Un$|7s<2GzG34fcaATPSe7^%OUB{5I?>+cjXevW%Co?&%*b+{0@aQ!<~ zUB9XB`9T9o-(7v#sPn_6xs|Qqz6s@u?hdo>;Ya`?2&!4-O3 zV#6pKzbnhNb5d`gZh4CTv5*Q*a4J8F5A@NCmMlIqd+zYK^-7Q=}f%P$bm6})dT;A9*^{v#|H8)4(=Lqia$IH|5c)rfu_o(^9=iQKl z+sH|5`z9&ATKGPK^u1kjN2_{K>iOW;F_pUzR$JW=;*<3SG^o#o= z^f!}dNVSul+RT}HimwO#$r`l1?tpvC)}7jXL2ST5-D9g8 z`StrIAG2icgvx*Y+21cVeEeIX>aYmYarQ4XQ~EdXXu>|@Z^FIwtO2XXyVSd-=iEOK z4*zrVU5|Vq{U9xiLPY=nib#f>D$M?+B{??y7GaWAvMO_fbng_7|4!U45o|!I1yX#9 z$>6$7(7}J6^gmkrOOHJC=)ojR_!p(1WRtg7Cu!CzQ|FJaOidBfm$dEQj&Cj=l&4Xk zu`C~aU-GNs|N9rT@FChj{+7p&S#RV?lePkknEm0-Fk+Qb_YOkt{lam&6A5su#i2 z7-)Y>(_m@PZ`6mSTkUhtX@Z_P@}lx-IJ;!deqe@ERMuJB6P_#_U+mFW=g)mimGW&(Jy?I|^sBsA z{Qua<|LFOjUHJoeuOAAJJ6ut{;U89c{iyfqB}s~8jHS{psPT2Mp;p*iG;3X_S6wtU z(PP@HeGP79Rj)tAm{I=ofPa-0Pb_$W0E?S>;f8u*qKn?ayY9SU?F$*QV5+0Y-OD?K z&@oa;K_t;ct%kpU((`Lr_tP$k|4b%~4cM<6IAW+Ly&#P3UoJQ`fFCugtqrw~nku15 zq{Nz)WV)NOOhYG9cj7{5nPh(ELW@A51xL_AR(2x!(lImv>mkSw6#ZsR3(+|PQRIUnHJ^C*CvJ8 zjw#fdPK$Zm**domdQ3BOiOz3&_(2=3#8kn$d^BN#C{X$g(VJO84r}p;&NNA1)5*>h za^6$sj8Y_kdk{ORR5~+W#g}1E;`|)tI$N|o?`@L$nrpQ)Xz=rB4`nKPzs|n9pT4oc zx=sZnW7mTp@f;mRldr;*kctGp6y9OWenLY=Y>8u2jP4DOB1J9_iJExlU!BTNe-RXg z5)JdYm6a8~ldKwAT3VKGGbw_c&SjhWF9gDWSB5hJSQmd3U9Xp|f#PF#YN8DCG&^P^ znDvAA!P^Se`ROtLRq(G;Lf0^XgDh8^*UX&c?VHYm*KkJm92JpJz5G`)_N&=Je?kA? z6C8U_=Rcz zXf8H56MwT0z4XQuo%sVW5PTldNS&8M6TXCU`#~iw;to<3jr0-dQq5$M*WKOSRd_`X zHc&2mMW$D8Fqtg9ZY4p>W@e}sDR|W7Bjsr#h|bd5?f5(Wxo?LP?ff@ceMZQ}!Y1UA z%5ZNj^M(8gV_@0flQWfoImnQjnpzno*2i47TyO6yOM^H&QM%YSur}9711g1jZ!$8d zS;g%3gF8Lz{S8af)A>p#Tv|VOfSU=MdU`G-EYu7SaP++6ExA!yN1zBgXZ7S0F1>$Q zpX=I6bnSn4m-+|8NQx7vHff`d)y;y%t_>=Gk&&}2T@*rJ?NU4!X)8<7Wl{i0@ zXi;0ua-_V{Hb@!!ehuh<7Ytko#Ehdcs`*5XEY-ajNC036W$;~kl5UUBzlzle{I zoOWINQC+oIzPXP4;ZnZZtFEUPrv16iU|}PiPp`5o-#WMBooYhqTkOO%pu~=(3KQ0o zZ@i^JAW9#FpLBA(q@wNKW5Bk&sU9zspG;Ek7f5kwqWQ=0e_hpevY?!XsRM`YAa z>#R?2ZcK~U9{R}z{U7mt!d9?*OdM3b@Z-aerLn%Kig+@43I&Ut+sVpNbX+Pg&sr+t z>pOETENa`q6e5HG>3dSN9VN|$85#Ew0=S1Xr>XkMHTz_YGi5c0*L@p3Xp@1vMkyUf z8-mii3o8eu?e}@P*%)>uwykx*@bc#Z@syMR4(DUtvHEwr;@k)Ht}@^qhS!+J{xtW?$TD z&uDX@0FIE5HP`S?#AUCPUf{UpgvVsQwv8rU_&ea{@x8I@|HQ7dG1_W2wc~C-4(qV7 zP-w%n;5pj9W91Acr8I4mbi2VWsX+C}?CP+M^3LCC7uuMn*Wz)^_AkBwI7UGcO|VQ~ zBQW0)Tmd8=sxvp?B*qE7iHHbz+;>lCv|orggZ^G7j4_&n~>$ z=EP>?xeCV2j-;bod4|NN-ehK~X%mUW$7wJ5HfxPG=rRCb12i|5&0(He9oIA2AM@6# zUwE)EvPqqFWzP%0=5Oq%M|cP~tAB`>eWexgiR}8W%H~K= zzb8yhQJ*{aXs=v^fKlvZQIjz##A|JcB+_kLXs`rs<_txZO<&>K=h=^1^r2The^kyx z)5ziDA(Ifq0s-`DG%CJ4#8#?P>y0C=o})g_jJRtxZIvEmQr_9^Cw-yRoHf_-)Qs#% znOXFz-S77yB&7IE8I6Z{5sL=pLYB#0rMJI4pNU;8Nw(+coJ&-SFmD9ciH0H?UFgV* zu%`24ZAWXKH@K7lGXK#PIYoxBj7Vc-cRQkRu2fI|rfq%uiN%523XPq^^(S#6>-QI4 ziw~rANYlj{X17V>IQYmnd^AUVzt9&>tzuHfqhYK8{L?P-Cv<^QKR(b`iIp$cBf9|$ z-KD{p1lZ598T4lI98RqoyQ+l=^V;CL+PfsYB8_hNJ2zrtgzo`XQE${jCpYBGh;;-S z&zVP2gE^aqEmAqgNEIioF-&3Eh`I4w>|=F(B}-UE5zg~zz&^rUXJcx;fGT2@Mj=+!P9w8IgTNTnh7zaM{9P~g=8&L~Q7H9fVHlheRc zS!#;;dA(eF8p(+)TGT9?wxOGu1am2PijTnk0jlNHaI?7t-A=OzpYZVTi>ylnG+xY8??z+q z0v?JF6@L-u=)#vrQu+_0^*WsSFI6YUdAj3XFd4cYh0ePH5MLa?=k(&)oAHJX97`2@ zFIiYvKrxpEAkm5<3{mrKLtCO85wGrUYI@q&=L*h- z)2lK_5}i*ne&qgeIysZkeFxX&;RsoVATPozzslmi`3&R-DM}f|n~iuOr?FP#Yfq>R zU`0J{Q@_CG=E7K~MO=Msh+n97cVQsmoON3NTJ*yt-8H=Bk5Rp@A-ZA zRMAi{(n5Oqi`uN6%MtRo*r6s_8Wk^^wGbZB)h_EyA2#Q&CdL|9GWr8=1!t|MwkCZ9 zL8EeVP-C>10NZ1GdwaFL2^Mvc2UVq(zu(tqkyRU;l zAY6`6i#hKQ?XJNo@#G6TvgxA9$0vun!y*P1Ha-svDj@OlOg;qqS9}JzvlAtA4JIRX zQQ3SL=PoM~5cv=hSdmrvYAKR}kEO^oXpHeWrFq)%3$e}icwOLBuJWQDm;Hgd(VT!Y zjq@?^*a(BsxAKkg@@9&%(HKWnOKWSYtq3~TTm^?6Yo+{h?wmbWwA8{^Q{pNV>zA+M z>$TEu_;APF7dBwG4O#<-l<)*l0&)D>x*{G#^h}R)!T`H1?j0||qbRCgV@5)ktb{Hk zI3u>H%wgO}aKFo1K6qx4eSb6ULUbu=sj5ZKOJ=n|{M9SbBf^V(D0^|ZU{H{sJmJA~ zvebM^mh)s+$u!4g<9)z}9!_@zdh-+TTCOFY%)x%>Rl7e(?>4^37Z^TbXJtjfvHG>T z(z)A~iNAbp7HK~B7L0p(T%MnA?~7EEAill}P6eN>gGnoSwLmj?)IPZ6h@u$vr#y3S zfN#lxLl!>|m1@h}*N+bn4^86DRw4mBTXwgm2h?j(BjNUNiRRFaZ-6LAQP}5hJ+Z3z z{+s@LLR*@)U*r7|kpW*;yUzu4;7(!l#v#>Mw~?rsaidL%7~o$)CV1UzVWN}RXxwPm zS0XM48ru!5+_MUv7jZLVyzj+IwP$d2()DVzQCVrkBQ2{(MDBz$HBU*#Qo2RCx8W;|( zv6;y$R^PP11Cidd+=Np%-tEV61zYHH1!EGX4*Ui4b4_VNa9uh3}S_OQy5 zFX49Ow_U~E&7C4M?N2lJnky(Cko!Sg*Q|9?#q%prr%&B;E7hpUUpxW1Sd@vWR8zBw;xc1A@gz zBJU3PFi{hi3Gahg#mu|?!$-g+ut8z?exHZ zVRPv@2qijx;A(fCwFwL9ZgXj(=L`B0+CWuWkF1OW_Z*ejTsmL@gR&YY&j zL|9jFvUt&n1#1Ntg42W--6z^v%X zJ7po6$P0j$M9eUDo$e(HQ^Ovh|_-p;cuHQMF#DiS}gHo_*9v|#a zh`4|7{!e}7U!qMO^=~sSQ_P+}st=iTD%p}tX|T5?>~0hZajTAhv>ycJ+J7_>UmAl$2`Nm5!n zje|K6n7PSR=FbgiDwZE#XQ9em1A*JoiL%wHM`bl0fNOnmnr3ulc{ENsruy^(%X9l7`UsM#_lw1?iLqo1SVgs*c5@WH7(f)ql#km}SX8Xz~g~CDW{+GiaDkK+D7_MWTKB2AU7EysIAX zzUrWucctXfFP`UrEOz(=C>?#sJy>XgG&NAsDF@sR9y5i}VU5fG^RfSZvG`l$Q$W5= zAP#g{YeX#S_sw2A1m5E6Mv&(afVY5m8Dwcj{NB6lKyQ#iP*rC6hWo+@z=?83$J^7P zl=`!wTC^x%Ye$Dg=4vcyVIfHX%&4yw9&MCbwP_d?aq8;o5>7Uo=L6P3{My`!aoJ5y zj!jE1h-K0^3io@*DDQ)l-~beU#`Aejd-PM`xgFP9q}MfzMqXM&=Up)l*W7_XyenKl zfH>&ZVxVE!eK*%{=)EiX2GmRmktbjJSh`J}*(~R-4D~XIcG-;=?sf13)FSUNxZsZ< zAm}joffNL=J3)}KZ1@OitEi|5o$KSZv$OM@yH#dFwKqttz8rt%yJ=F zuszHrAK zNf=Itz{!o(>2ejfGFGvkyR!b_?fl;zL6|;jlZwS2H@Hz}OoC21YT%S+7D1SQlIAif zCylm-7+wU$vb)pA1*)CZ<7%D+lQlc|)e+7g9sc*9{0o38IAm#M)wy8)&_(z?0SbHP z8l|OsT!(^QN#x51JC1k{g-A=GV(oP_WDuzy6WP_i=d9bk0U+fEoR1Gc zJI~$+y&jE=gg4<73L&SbzXg@&nadK0oj8`|Z|4g7($&o5wVCJw0oOrF!3=IiHP<*P z6{Z2=W&+|v8oX4iMI6dGQryuTKrI5bKd7D50dR8Y%%$~u%sVm8>F)asff-lUKg#|A zh(1ey{@a6Hm9+xpF%7=K3X2Pc<)v$T#2+36OWt8$6yl6LRIqcoy^gSCc<0XG;)`H= z`M(Jy|LE!epmxt0ENyI*bBaH#(tT8px!~Q9u>^RICT;SyNG4F&(WZ|>7(DD~Jqcs# zfEc3#*f+aF`#ZJ13ilpxI`=B+fsA8%9f7Vcf&cfXArRqbZopkqvk0r9JfM&qLFDHY z>59c;&uY8`^wQtE5#z6*99fmby736p>thCeF(cUMp`JNuX0$RCPBHregC7`^2t^~d z`Au~M->#Pr7XmX*aT;S>WGG?R$*F!D6#kAqVrCu-knK5yr?R z?v!-}BOm@210!wvR4NeI+&(o;SCRy;MCNKxdsPx#W-~yL&mT5ZSR{QIRnSW+=2U(0 z)A8}MQ2tuSWpx>IWC`&G_8n z3IujMRn|#sO`=yA>v-+VudnpeU*hF}k)C|OQ6t>jCUmn=!)@n}F}jk(ZA94!eJOJN zC^^>jSba`{k{}uf=X7N_Z^#HrguhujL}`>%%+>ngwn9hy2Z14jHL<2+uv>QbETCtP zF0#C64$dG0v6jjt3ma2lhNur#_{w4U-6uzOEb90mYhM*)C`@3bba7@1H1^aNsOWEhj(E8SI+=a9ogok2x8w$7L4>cT)`)XaAQg* z_-zyT;IjGtCQF8`A2LvR^Y~^m=*_r)_ERSOukHIlu}`6zBIuHvwu}A6^du3i>BD-k za}usUhBFhiH~DTX{hkmqd}*l>!hgx^L_0yLZ9x`=Lqq{M9nrVoj9uu3=M0`7Lk8|lo% z7#N(B0En07jf9txv))E(F0i|f_A;;*?zKnigf0J0d!1@TZlsYvM*tpIO zvo(;e1i957f;|{y;u-#^j;=@whH7sd>jcFrz5bPU5OvdXsm3t(EjPIMiZosjd8?Cs zOU{-X%zuf(+w-;{@)M6vhlXGhC*mSt-yXoYjR>jd2bPxb8)`Ih{LXQzWwg^ zXNn7Hns!V+77FFdE@W{CE7hg0c`r(@oA_`P24c8YZq+8na~EU&C5QdlUF+`#g>O#) zk5?Y`J3?pZPD~VZoBe+G6N`nnX$V3T!-9gUHc*(Dm!I#04Uv*yipDR^{ozl!{TLiH6-5!VP zM3HptTcTyk)LwR1PK-Pz{kl0frWZjTWa+ol<6^Q-)kexN4jZyvipM`QqT6(wnfOof z-tK6P{}ii#!&wi2()&U=E^2u$Zu5c-KshuSR#Biw-EZ1+u^7M=onm37=IU?a9cs~{jep)6v1Ekg(DjkBkw+!J~s=Ua!=IC z>i1B!1}&${i`>eQPt*HbTf-0nBVYcm(VIq3Oirp;cxCFpyVr!vOt|r6l;qf}UVGBk?swFIi9q{rgq1B8^d(n5_weEf|NizB z%=sFBgd70oA$JEKbB7N#1wNL!vKv&|8gB1YToqS)dG^$3KwT_CQm??g{^i8-WmO2e zW8~M5w7V6DTA=U)6^qL+YnLZLwsgJ8u=S_XJ6m3QoOCn$jSIOBO*w>-i<_erGX<05 zj^S|8504tUy}f<*NF)eCpFx$#v>WMHY0485PflCQ4Qq_t%?q1P%Pf0YIA!dkAr0l^ zpgOS+%Ik}O;Z}tJ47I7VlO;iB5p=K%e>IhV#}tIhEM0hFdq&sy{#a{@_&C4kM#^>8 zcaQMJkwKXw?`fv$9_<_?G<9VJn)<=sa_n)hA z4VD}mFXX@2+Y&0xBdH^)u`svLZO_4_78}C{8rCTzj6B`kZ%%A3$8CP;6X*#2YK*ki zNP4Yr)c+S{`gfrNz@FqE-&d%;rdSQwC8nLi=y768a&gI<1WASvCiMGmGq zIHc85HGst-+{Cb8m4FSQ1_^mpMRm;>?l`}Qt|0F1NUbq!kj@+{qwxrDYisMKcyMyQ z1fIMdwzJQe&$A(ipJbi`fCmKz`r7ta*??^(x z?_LFRzi16Qo;!lL(ozJ{#bxx!*z9m^uG#Se3KpzaF)?>5DF*zH2Vd@YU!U)&dVW%A zg)ySd&BA3Pv2OgfLPXnrmi@5tP8B`GbO2>6VjmV2KS%x$YnCrk4H!bg$@0sd9mRbw zY)3yb8P&x`bYD@_o-#(AZFhf9xEC!v^lRhRPiKWOGejJT0}iS)1593+npmy~?Y1vR zpw84hks5o0-NP6{z}fG>eML_lJYZ8%g)tD?igr0ip2LdbDnyoW-&k8V0Sp=9V);Us zb=6o~T46T$CGJWUvEHz<&1WI^PqenTFQnyy(#PEn0~epIDxZhaB4Vn1b0f=bZltVm z1tSgW(F4JfQ>J}be=cR0bU7>js4T|8Yj0|rfSBAuOm!G6Ene$&Es7jnK$J9%7=r3He1 z%4d08OTqEzo9VSaJ^*-Zj?PA7pd8{0dAuWT-=CSRugnuIz^J5Vz!>Y9UcQGTOw_Oi z78oJL>%+2Pk8D9Y^ak~X`P#j$`ZmOfqn#Y+X>NW_{hu=MUEo^00T5>! z9Sb~ucM5X=~&U{|3VFzKk;hhYn{_= z=lCUVOLklP-nQP*+-h9c%k5a@oa$FV5>3%?qYMJFTH9&@Rqsk5;d4_SC^tPeRgpBu zhi7X`k6=MOtvd(nS`L8}1@rBL6LQ!vLGrMIK~NoGce1QDtV9A60Q$r;NNpqG8D-ZZ z)U?e{yg0?V67{X*qd<$T1YHcZ0W1$m@CjK$9M)eliwJt14AP0rmLo4=olOk`?Yi>a zzCqAxmoCIcq3V|be-6m+#5ZbuVEalM37$e1t{8OG22PC*JcR!Z(D9;rcdFfXtfssB z!zWzH@OCiUsV&vD;({=49^G~ijgf|&KCm*q9thH~hnj6=DaY;YyU$T5jFMl2t0aKA z6d>*9KCk5vh36B-(#IUwMy@Zxim)k?@spsbm#+n}qYfSlJp>R7%(vE$<^TbceBar0 z2-b?Qr#P7rFLArNK&sj$+`H?8K5Pc3p0Xt#{jxg3*jTDycDCqyJmCqP3U-oV0mO=M zFX+zeDNAvT_K0HbzL_8?I{?emLJM(1Z*d}{yY)q`*n2|ES@=edH!=Vn_2#l23eN!T zKB2jVjH~jac^a?>jLa~Ute>@*WOWq4vmG!=FvHQmr0D&5^Zyd=KJ^C5rmIPjY9Hi)MgSC$n$H1HHbKD2iy%4Bsc?=1U)*b@_ek_hO=Or;xygTGbD$wffm_giTy(hMNiZ&g1ygZ?0Pn)&AZ+g&Ami2ssv6TFFaBbaKN-19 z0;IFMqul}$W1f<=NNzzYXr99F_b2BS=pFKkaUQb^&DV{hx6d$CfELg!<3yEsdIQ+4 zBD2+&O&=N9*SCfzjHT}0^J=>=fd^=9hqE%T#C6c!&MkOgo3&qr)}G}dyHRJEP1wzK zkqLeuLg(cU(CfPUsS~^qZ$f@SkPRV(xAXx2L$S>=G{7=W6wPHeV-BMc*wDy5C|J2X zjR7u%tQ{fr9IIRlV-l>2_ zun}nCmGS^oy}(W|?iP{Qc^lqN)o*}LUQzaQ!iB20!?Ch(6!lR9ZyW5J?sq53fKvD7JY3RHA$SPZwg__$SsM=q`v8 zN~Y*4S)fV6e8wz`_j|X2WF~h zXaKL*1ONg=e}6E++{g#Y>L$SC=~vm=LNf{oDQca!CRoR|Z27qz{yfcM^)*$L)M`Vn#gttrbApK!P7;@Y1w;FZa=Gth>`ytR4cK$x#vZ-2JE-WncgV5Rwq$XKe_eW5k-g^U}a_0xU z=_}i`jo9qo4Ts+a>;ylIOP_deyLDzMqtrrnh!1EaIGT@<8!oCN+=&7v`bY}iRj9`U zj{%?oaTc_JC~8+b!d07xFbu9Iba z5oZIX_uuX{Jdx!E;3OhPO>R{wq8rbeYn6w<*4lmw{A;MnZ#*{o86|^R)D%#!AR-vB z5_^2{9JlP|kBTx0WM zVZo_$?!9VPS9{VJ&5c3!-@k)Q;=Z$@q7aU8l_kT0j@b#Fup29a?f{s8_mxnt;Ugr_ z43KMP-Tw9q_~q{%De552^yGu^yX`Tl?kfw1r0mm-#ob?T;5wc(krDnUY zmZK5FR=VlA>ibSo%zRuD2IP8kVdqW!3o#3Y3%IQ=E@AA@Lp&9|Yax$G^hUP;b zzx{9k>W{j_@RU91xO-vgLf4rBqU3amh%!bZ->A3R$V1Be!h1qtnNvho2YZWY_YL&# zV zm~&g@H#4y@M*>bw%`#r>_jU-x@R8|fNOX7$60JI*pjhj%StY$DyVSqA-jCD+ZE8-? zCM)Q@ci}OWf_8v>#84g!J?xW@)BXd99+g86o>U6|exw0(&gPl{Am~dh&G7d+k;eTH zBpv1X{UH@UbRbNrvkP45jg7PfJ61`UPXeAB!0=ChPp)jq>Qr&==q@$8^7RE$I?tIE z;Aal&9PvPbVfB6-n@iP1cSz<{KdI%V9w{}d9P6UFy6YG8OwR{-yxpuGY7EARbD zYa^xpXs|VxK5$DN1hubzpMKCc_{bCR4Ob9GV*!ym$SezFipAwkSaC4u#e2EW?$a$_ z8Vslcw6G(dOH~}c5O=&}++dm#I1b_tWZ3)>Hqfbf0D!`3Uu_F}3y4;oIcA+65XW)h zg2dyTbdhS{o*5xVUfpnfKeVeWp}lso_NqfmEIv`agB#Ya5*c_vrCsaJ(2x%mlW#45 zF>f8{b!lyF^0y(<}xl_U1jX0;XlwhvF(_ zF%93jwdFaH2y&ESW8lZ*q9PJ6w5DLwy9X6IbUYwpH>F7YI^E3L60h^8LH79LD?u5I zL8``#+hm;2p*Y*}PvdL~Wkuy%$TwHlct9aQ{!g>mNfykG9Qe$H=L|^J z?nP2Z;fDpOKEY+i4`dF4#5=%0DlKKHj-aoG`#tR)QUMyEnIJ9c8(a(wKa8n3yG>HR z8%pXcf)9X%ZV!BVBJ+2O?qQmM*U|#mN8~nNcp4AomtO(awBsgxMjZA6`ee(&P-ZqL zv;31hcXyG8=}OjBmp@F^#wWjU1OpW7PeBeag6@xjf_=^#bp$!zczRfex$neZpu&Cg zyjH^Qr0p<*zdmel4<3CQU<&5jeIB5vmLXgln``vgYk3{Xt#;Soo8`?;Yv}r9qw=qO5rEL89*EGr?AuFCpj!RFwoub7X9=M zWOhA5*@WxBW0PUGNuhT?rw2Q$Lc$Z|u73z275q8KQ~FBGUVxFfEwPJoYkN0?ee#Ep z#-HjgG%^@u*M6y|shqfl4y?esb53yx@p|iV>k*z}|0Vd%O$*4aK!X!~kt$eVaeuhG zP3f?Y1h zTj(g+HfPl!bXGkbtqsh8eA51HeDrI`N7FWjGGk<>;Wyy}nOW$`k&EBKP|ohN03Y4v zEw4at$r)=OWd{M5GVq!_0p>blcDQ8%qYm2!xfI_*E?Jj_v9elZyS7=xZaPM=v3BU2 z|Cy7aj|`5z@E0Pw6J8vg4v?9Cp0i=W=q7ec0K5>rjT!-aUt;GBFcApj{zEO^Oq9GH z01SKAHjA->7V}|_9Q!eWK61x4uBixeP2Ul233#ASWmxOm*j$*}Wf+`lcx36yB|6%Z z4DyLe2VbPl(DGj3 z{_zju8PMKOC%ioAEvBdX_!j$m>w7Wlv5nlFou@_{?t>u-81bMT=wbeNL3i<|g{J~k zMZy#W{6xN~{RXi3D@M*LA!kq!*!$yZYTnuz5ncZ5e@pg%3a6{iK+lgUcILxc++is_ z((phU79p>gWmIP_J@O$4eD>)&1?&O5=cP(SQVk36F z3&+>)?2Lo;hQ{%`R}7gAti+6GWi&|k7F`GH1kI6~-#)_57plmkAe9JiaSB$Js-iD@&jYV zu^e6+@bm-PNJI1?_vKM0UY(~*7y6McDo%LA`#(OO+Q0y;gb0XAR~kJRa>v8fMrxtV zi_~6Nb`ENGZ>zT}8t@$>WvTE4uosA!d%;3&k^VA`x(fZE-N>q%%|%&*wzTU`TH@y7 z=7y2sj&-P09W`OfQLA^kA7r3bV9>{9fY1)hE!g%*Kn;HGHJY){ndBP56=XfL7ctp+ z&F+oJmfo&Ubg*jm)sDCD0RtLPNZfF^-Zh4dSkDzA5kPDz5dPynrWyB=8bNA8g3Hp9 zqeLu{AUY>%r6n#l@9VhXA`S_f%#t9WW)i@sL~v)uOXQK3{ENj7xd&Jf1{NRtVUzs$ zI5NbkKyU$$iNsu;$lUq`&|BIvRXiLpnP_G3wZ@&iM|y%b=4PvSyIh=SZ$s% z-H8JzqgAMis8!*L3}2ch3qpOgssVN@HF=?9859%;W&z|AIz0(c)ZTlUivYvD-wE9( zgamZm0{zU+hoNdx;s#(S4{^EC%QN@mE<~n5Eg@=85Ie!qVieZmBVSffYIeW%OoNiv zxP!fStBdgImEn4JHw}=W$6qU%TR6L3v|IUhepz}hfU}W+x#JQjO&N8=Yfdp&kp+m0@w zg*ibVaw^8xxeXF&L;Tf|o%n~68}e^nTdtJYO7%{Z7YqA^s^aoGkIlinneXLqU(`JB zb%6mZewHDVr_mdfq}PHg0Bruec<+VQ4Xfh9>9U<5$& zkR8UcL45gZyPi$9eA(3V#23hAv55TVb_EU$ebc$tqps1m)-XtF#kYNyhY;sSD`WTj z%BWFd1H$nbN-UOB0>|wNF?}-xC4z0|>cysA+i&qsoNL#?;-;#UPM^v@73lfBCgja_ zas(nazP#If;g&QLV6ZCE^TkT~fd0PkX}kuwyq45Noj&{t_aBTf!U6xJF1%$~O5gTD zBWM(36TZ2FdoCx6>{xDmb|>1gnwzJ8+>TDF5})jYhcFerI0$gz)6L^Cvbau(mCO&k z!PqPMMF*CMQt_0Mlqa|XY+400VOm{r05{EPMFc6EAm{y~)#@(mDu_i0!M1^*O&AHc z>qY|qnFVmggaj%&(X1cNH-W(~YI^QaA*jtP+P2`P6RwV4S0Q3~LP8YQvFK6&D{%+( zv-D5UrIOmotlVG@)b#h<1TB(d>|D&JkBMt`b%blNH}=9Zs$}*;;)Qx3tsWf^{k++F zDmmsk@UrDWtGQrYoC!z%vLKwz9(mT&8&yxy;uu4-&d;P zPr}=~-LEmTdCkN>qORGTg5bnFKp;-;UmFoSg_{f(dJUmMd0@0oBB)6v18!uZEVcOU z9x2a3`nb;>j~QFQ_=c{cVQ2^$ZoXYFZPN}64j7XFd)&y$^R38bpUxLooy4M5>mp6+ z2uXtGu8Gp%4 zWgampI#?E9UgN2ATL%tO$f08A-q+xQ%H)-4D>V<@Y#!8zNL)%oh*RH>V3+F=DvNTX zl>}m+tXCX7j9$t_xM_mSn?ZmQSMx!|BhLzY?$w?Jl?&u_>0%PW6FABaRRMQ=Kr_U& z&I9i4EaC}p1Lwe<%-rcEvo%(u7#H@!hO1}N&)ONLu^g<9_fBNH3PmRfp5z)&7#~N= z*(8fzK}UPg{(w5+88#tODzZ&WdzddAkX3twg*ZTmvlW1Zm2xc_8>kXl#r!jhqn-MD zi(;qYaX3!FbuxrNAV2jfU2e#*K6X%`sILW#P)yG3zA@G#>`wMf2e!j;Cz&i)Q?4D( zZ=xuod&-%RC@H@`mD-1c`snJzLUSh3Sa?tyf#AZCt?LvN#a8A`C` zMJr;fnoId+*%GX7I0X-Yn0z&`EU&rjtb5W$@REDB5eah3uH5}{2(Ni@jojNTTsZ(z zey;Yl)xShYw$0P?wgm101TL~tvKL;L4=QGvfCw$-PAY+cTd|2&$hgMswh%QW#v4XU zP0tIwOr~s-$c7FFK<2n1D}3R z%KwXm3dfT?M(S9s7ARFO#;KvVaq@e1o_vK;FltRQBlUFVuB>&yZ}%N&;z4K=EIRih zSp{&7OYnQfxNFN>dg2EY`8F$`IOYY~JvN0O8Y;^NsVzAZCDyyNS|Oln(GHUEbrC)en z5NgWCRH2YO2Y#^Lt3yCY)tZHO!OqFfd}D^xvo5CtW7Eu#e<++X zJKU=OnLF?Yzb^RXcMFhUP-w&;(n7op-~~e)DFuBaZ@NdN5Ry;;d{crIG9ngsQ)9NKO@* z8ty1vs41{ zh8_n7aEO?e|2K8`m&A_XQ=y(0xKX2B_MBrJWP40 zco~|+-!h*h;fPO=CjvAuc5Gy*AR1wy`+=iDG3!9>7r)w*{ zZRJ<04HtVU(ydSaUM1)OTGzHT!x!v}Mb!tY8@iOpP>jq=l>~s~m0r!lE3j%MN2}Q6 zeBoN2fB*wv&fEa4%j~?P`LggIcx?CU4EXBh8CCUANyS7#dIL6?S+Ob}t^Y zrM`NE*F7R*&u|PtwZMSlM5@ue`=hP-QqHP3v)(WO|K@ysZ) z?m2mYJ38C`(bLiWl~Vh=&e4@FoepGIN={PHGyX<}odyF^$5?dDUTCGZ)N|dTio?n~ zO)i>`tC<_FO@fK~C6inb%;DZn9(M(@erQH@x04=U;Q0hW_n{!M*n;zWOgJhnXctUr z7tr$SZNmFTZ__TkVj?E+R=-au?C?hljeL3dF|y)o81nrsWn-JGIG#8kN8K{V!-}nw zZj;Wdh7(osAKo8p_>fbP@RYD|I3TiS@MicB;iLGe0QsB2RpJNG*-cxY{2H3`LF2D~ z;r(>&B^VE4PAzfKww9pkgAZ~2a6HwI-Uq);s=iM-q@sdJL9wGl{>}B%A8)rb0#NY! zDA)#h61K2ZVgLbzXF}i2%>Hy(>sj%%oiw|n?fZFf>GjRthTZT8!9HFi^sCc>Wq>Nz zH6~}YCa=zHaCr$f3Wl;G=G008a5K}kK8&D1ArO*{h3WzY)QMFAaDF)6#BTQ6!|91EW<0ffc~+GC!rSoRkqU})|ceK6~S2nM#3s+E7gmA0g!BSn@AHLJP-1k(Ay&04Lv4@=d`((wR6^$pt_3Y zeQ2IMF$xTxEn@Cl`XrVF{nZMZF$+dAsN%I2XQ^LYJ;`TSaz4F$V`bS>b$FWQ(ve45 zG$vyER)zMhRwQg(J?!l$yl=HB011r57~gFzhhv_*{kPE>3vmgGo-_z+tQ6<#7Y{l< zwou|+j1pKdQ6DruXmRVNvBqVjSCG82*$>`Y2hw0n*hgT%12Mi8I)trHPF%S;$g}k? zq{>^oFJ2xKEFp8?)Aw8+w)zyv643~CJ+~X=S>o&QE3N<@Wc`fA0cAJS$If^|^WBd- zOF%f?6#^8t~Jtg3h(SVU*(lQ=ExlLW;oLJ4YMX z5S5mh68(xNkorq{LDy-6dtXjkfu`s>s(7s1A{fT^3^d7y7W8|rBEd+NBmv;atVZfy ziNAhznbR4zX?-V|=0P2k)2u2TqH6BVq0Mfin260~R=c93z2JYX#?uQ`!37SO41-x0 z*vJFStbbggen_EXYEyyLHf%8NXCUU^cHkrKc(w8OlH3daa1jK+`~*OCm7w-&*FUGt zzaK;u-zazj*-IR6g}@$%oPs0}VEb(_NbG>$8vA2GM)3b%-v80u|L;gIlpY)+q1_c- zySX@l-)Rdt&jlv^W!C_2rfO(A>r z-LD%jiqkJ(HDtPM7=DySko6{I+7(U@XDhqt_ezd2#q$ zFdKQDLFZN6vyfXoU@(%W#M(?%3p6Q)1ZHfAm9Dg9vdjC1SFb@6APhhun=hjJ;mut0 zM5y+e4!HNW9mRlg-BJvO6O*7puj!E6wUA!DS!UZE9z2s|3~VKR&?13#YxO@tzXnuZEs zi(i2r(E=u##Em`*ajP?xNJCc&galU1|4}F%Oe%jj`hG7*40vdHxQ7#{z9vFf4k2bLcJXyQ zCI{0dJlw&|jy7kEj?SJ&5TEm*2B?fKLz9f4$xD$OnmwALowq|T7?e(!2`GW)CrfL5 zKpu-%{QeI2^Pf=P0lh%UK`jcT3tgaIKWDuT#N^eznqQJ@D_qO8b)f;Dy_*tlC|I)j zIQtKOE72vI$0;hzU zL+K}x5inEd`5>kmaBw?~tJe=Ould+||K_^U6XfC8AKi#P zjZf=qL^ZFKrJ%zjq+`>(wfP%BH;`+R%1>w2&E>gQhd%J zVX=bbVYf6Pv*jRP;BwUl2nDw$Em2~VtZri^Z(|E;rM3XWpfh7T}2fbhLnn z7w{q}bBSvA?qTlL`SGC2kMdno>5a?bFmF~`+dD7OZx`jY#*-~%2d<&)VudJQd2oa~ z1=AzpAi>MG&fBkp1DX%h>ar~4pA5nU3)cc1!V^^i?r2M3pJX0770?3;a@TiEQtF1c z0#v@WL9ry48wZsZI&I^)J;>M?qo272ovQ+6LCvcIJE}!OE6dMHe(j^hPR+8;3k!|s zQGy{jOv*Rg!#NJO@e7^h$-lObP^ft!N;?#38~l@$qdhO86fybH9g$eBu~d6A*j>zU z+qERS-YCPhMN@MtqRccyS&T8>@sQ%5Zd9FJUt2~SNtT2>+((h=z;<>B^k}{j{v?-l z1{*ele1g(GUkVP4HmHKBN=X0&mBrPF;On$*bjGFaM^WVNp&E-%2bPW(`apQBO_d1UH1(JD78 z_%Y#w(@0FCU)CT8HaIHr6T(3MnTFaeP^QMj}8IK$%n&t&W)E!y*39+Zvj-2ZfAuG3N z()4SW<-6U~SC%h~XLYf!rWrgN{c&>r)6;IZ4DS@kX{=gbd-iS9g1D3ppe7U%y6?Bi z>Lz4iy-I|F-I1lst8r@!sf}8Vdtvm+pu#VJl+lw1?i)1ViA-wWhwUGQ$TpntACPb{ zqO62Rp?lIY(j9i&m5ar|x=?muomradk?QZjwNPNFJp&K+jpqc>G#AA4a;x(csy<6Z zg5%WO*|3^kqMQ!8_m)Wut0{Kn zA|0U8ORy{Ssri^S$Bn^$1nNFSn!?SxVwUStx>(+^gQXz7gt{6fHsPXFD~EiIn9mR> zMO5gO88KB6eRW<6d3D+&_l`ez6~WCuHIS4GI$o3^Aca7+y7Am$rKEz5%pNcuJox28 zu$$6`EVB7YAvq$i56vW-FV|w(mCBod(^OhmKTBJv5p3*oQVqRRN=+2F-w*4Wdq;@% z+SQ;1W?1+z!1*e1QNmN_OcCcD16pc#iB@a5@v~j*^VRJ~JlqDfShn<9oKMK-#K1F- z*vOlqgFRGY57>J9=x8F%_oaFRHx4&ju&jZfeN8qW^1O`Czu=RVb{ahJFBt8$kMRU> z(FBLIJKV7v6G^BXC~-wsgPX{n9ajPGQQNFgT$6*Or8;7xq|`TY_=(ZF4q$;m+GEJU z1`43_Ln!C{49XJ-aDI-7kETqU`j$+@a>GW1KPbtib@oT2zX;dkdPQ?wTc2!7HGvmb%d0uN* zvNfe`3gfHPOjh0zhEp9(Xv?Vq2`Nq{In*BzbrE*3Z(k&kJey;qzOZkpF`&>$58wpn z*~agoZu`oxMg0CELY6ESn4xy2^nyJbPFj!Lo{ThS&P6C#=!mJ7plGV`epBm5+rAG> z@zqPX*b7Y%Ei3ePsLcVRq2OQ{B0KEo*S^0A9NRHEa-pxg?^7BEy=$`+{RcA%aTNaw zr!D(Nul{u?4~Np{U);=Tnlg3Iy*hHqp+uCyEjNwmECx%sQgq3j8SWc0KzBgwmEsUs zX`fO9v-x6Pbr`v}Y38lG7k;F!M_Gv;*(-XDLc^wXAI-%?4}OZJkWBj5%Yb%W_9pib z@iNPiugm%NQ4#Ifwc)Uo)WKY**uPAnh96NB%}g16Wy=h-KLuVRVE?ya@fQ5czFV8`V-+8QX8KEQv#asYa1irf z6Rz=q){wG`0vaE?!%c{^EQ*aD`*x=GkSyYA)Y0{uC}xtLvgS_ayg#y9 zC4BoB&oUX8`7$`LZhzkQ;|OHOw@mrkJ*e5-lA+4uM-#fq4mMOjcNUsv$hd~oE(3Wo zVK$F;#qQp5!@)h3p+#CGix02CQn|`nH(2y-TbhGes;akH^YxKS0_1IP3vk?n*zUK0 zn>2~@I(OzA=R{dw+~n$KDP8yEg-&4!Dc+qkQ2c~PXXd4VI}4qcKpE3!Lg&`@;%zSXt`m~?v#r`dgO(}3ZW<)cF%JC0+=VsJ<$JB1s$c1sc; zpBs>05B*v`PUOx;rd0R60~Snq7w4?Wrk5Wp4Cr7c8R(F%buM(4dG5-ch274W-d0~! ztjQffT=TG{A2H+bIDHuqHV&}VQ>TT3IJ0pR?xVA|Rb*Zue|8OkJI{4g6wLW(;ji19 zBzKU+V9aH&0KdxJ#RruQ>@B}WzqLT{RoOq|K9IN=->fz~Suw1$;*8W}IBeF#7qCkh zP&`k1k=GpCr(lEFAOrBBl#60MO(EMkKS_k_u49*NiBR2m1F9+w3mv}n(?f{vP4W4~ zmHa&G@VAW=J!H>>%hva-Bmwr$x@#R1%Gx+F2SP#a#yDq~fD@>HhUy>Ym6W4m6ztv= zqt5#p>I?esw*IfvZSt|i)LZlPh0dVEOG`z%+PY(2tGWERQ=-?@KAP$LdB5Fv)-M-a zlhFC5*8p_`i5GTE9XJ1sL)62k8~R_%XDd?!k2{j@p7j?{4tr>3dd-q3v8y)sffFo~c*>jUhu%C! zF1ZQp7{cnkHs}5lC~nhCEq7yO72X@>BW?o1YE3+|TrW{)xS#!Q4CA6Ue_Kc8iLZ>T za$lKhx$i=!a#8DTJXks6a6jz;sz{23tg}}hp8E;YYFa(6locyPgkdWDPJq_uha@mI z^$fLvttVp?ooAS&qoK(?Zh0`)%IenkFegGiM@&2LBI1)mw;SOU$E#O#r#@+NRUu`Z zFVVnI_2UmoxbzmVP$+G(!u@?z&8j?k)lGE>;s1a)|~Om4)}-g#vhJoVE;R6#9diACJeg)#ROL-%V91*)HpB z3^2^#@UyaU{QY5d(&Jfl^qAyTw2auddQP}6{WWh2QTu0PA||j+-Egj98_W%)P8upk z7(;?)@&9CeOs0^;Q5F53iE3w3%%=VMIqalmg084cKwY?VSkuq?DbsU5HA@%o1^*>_1>Zq+_lk# zlxnP9!S#&tR~G9hnkl5Cge3R5^?5e^Vm24+o)6tTYoxoDBk8)45e`!)*2b^y6-!P~ z5oOG9hZ=&;S!xJQlPm^L;3z7$JGBH*Apfc_aH^IGnT|F%1 zC?A+Cp3G7Py%`J`Cr}3-hNAFth-?;3wjpFgZIGICDxvcn+-1m|W$Cytfyr~qk zX;PrpAlxJ;Q61`HE;(vx>Ej#SUSH3E@3N~r5+m}1D=`6SGoawMg{R}v$zr%8N)BT@ zSJ6p=*90i@0cFM@37KPFSyBs;|2M$=%8$Q$LgU2@7Mv?O)!5xzkT>;7w#NK7$o9Dh z8W-`iZjpIm>*!HODgJ(RX9kM)~S_glNUKwnIW0GXX_ zImdzffbIm3VwcJXFx;}iw7}M9lhq6*IkQZazY98Q&oYJ}<#Gw*s% zR`*2l_pArU1mROBIGPikSs6Ro3GQt{E9L+GDf-?1H3!IF$k>1$*m~wHhfsUn=!fL| z@J_m_S03g>ZHBN-pst@Fd8P_^s0gBsa`K*I48~vNhR!z~(v;r< zkz1`DF*_gT{c|>MZ*MwimZilrpe6=oWx{vrlM>)zY`qZhwRj+XAbK7JQI0na?J6r{ z<{yrL!n8=3CE?keh3>vF4M1Yg+WK@pKIw=`0iNx-D%psd%96y-K0|O@skuO%A^7K& zu7rq-j$E0W!A2qA2vPCL0~_Zd*f`+%nU^kPvZudNVA9RZNmq~w2Naca;%M-?LetmSt5aBPTx$Z!2h^E)rrznQ;1ruDJQ?YDFB|thU0cZ z+!XCmtdJu7?J;KwK_&7_wrY+Xhk!|+A=~D6JR@4HgN~J8_jBoz72|F6r@5ctFy7ul zarzP)a=Y*I|G5b)%G>wn1AFRq{@|-q_TeXK>A0BDqr42LA+7()qXemjTy+`;BEfx!gZK#$ z{?p?A@$Jp7o*R|l6SAwvvJABxsc#Kn>#>+-7shB4Zul)1=K3@y9#7-SQVAV?9rZ4!t zwtLVB#y`r^rrSy=}8FDz{xmBwzd?FC3PQrQ3;^JvLfYDzylhwC6Q?vcH?0gq% z7r0Q>3k7}>%fixD*t36vw!Id~CH}J^D zqRMNwaz-^H&Z-Zc<>ZF=WM#hs-mbix`qpgUJ1>#7i|w1hFaxgr6Dr8<=dP;l(Nir| zr+hoe7lSv+9(d#)q1!5cPYy{RDS`cbZr3XAh8N!ggR(j>rNiUAuE!bW7mRHu)<1jW zLhQn)@MQ^;DgHo~)zUKx=21P#(xGcFlzanq=V{^EflfAM%LHbmWVbzHm(|(;BSNiH zMYOCegovq)#15@Gsz`y>V$QZKzym`B&sl+#+Hj}8gtCe2#@%`*LxY8#A!i1}4!$_Y z)-J3a0fR7m`f9@ zZ%<@cUejIZyxYi(1Xm}bLNNcG9mPG?olf|Gkz_Au?P!Z|9*{mT+ET%)maz2C87Olo zfX^Wd^*s2%=Fp{8?1~YxdDsYX?`5drpn_f__-_r&riosQn?x5J4c{N|Ej=LGp*7@s zu?udkgkI6c$20j@$54Tjc_ape{-h;e*%Xgon4Hz#>9RF&zf^f%csGO>EGpVM`Q{pI zKrQ0GpUY2wiOo&9>uv4Tap;UE+MrtDXs%lBdDRE-n{|otxl(^G1szea$c_CTxltX7 zib3Xg*nE}=XI6~}7c2z>D})9KCmMTUHc^5B^y%l8XV#a9q^hmp=kx3JluECzG)&m|03$n1iLGk*P+PZ)QbQY zcFD@JEj`U=a-hbC#2NM(mG8WoWv%(e*gJNTx<#B5J00>`8Wggc1s(7f&8B=i#s_lW zq;x?C;?J&^N_fj0rB2y-tH?*{rH=U>X`?dX@%KkA!5IdlZlh2^G$z!j5zZ1FZK?aV zz24uLT@mwDhRTkcPsj`K-Fy-QGcT0h!KFe52U#UjlcNc{gI5=f@|D3x}uXrE8a? zeTpPp6he9chphia75HxTuseN3u0A=UNX-vgk%Y{w| z>%L@)j{OM@QKIZwjbl+p2ON*lE10{dK`KT^wOn`6Mo63EF6!#jqaTX8lp73FBVj=R z(Ml>?1n%dr(o|oAk-WAA{?|UJ+6bvf*7*{(hP*B62t#1PCGvgR%G#b>8;VrY>YJuU zOCOQe>k{)0Q|c}P^Ln%D>?prF2K+__@j%Qsjj=V!d0nC&r~uzlNI3J7L%!Xuq<{m? z#s;9+aK4%Y3G@D?owr5av~xZ(U}>osH(J>2A9~=OKM0#e^vgj1^Y{(shJh_e+0;>6 zf>PEkGg7G{nyyV(V9h-Q0Z)e;>Q55VyoCEGD&0;eLYXnTbAT%3K_oNVrgN{W+L|DF}~>9f0~#sKgOT9|Bs7(?rdZ+?6UVG}KweAA!*2r(kSo2diU4Q z2gtA&l=g-KtRj5`@ZYMNV?~;}j4-V)BxO8>tM|j!Pv9z#t6Jw#1F(;T6@v2wT(Lbv zvA6PXu6Y`9cOf1ZU%I`=gY-P@i+d1|$MlDoT}%PB$hxap4)=nRUaw6;s@0qxe?EVd z4E(JKQSwVa?ISNejgJ_7IcjWt1}f{9ULOuG?wJXo$1Wi!Dv_A?-VOYMa8SFZ1f+p*gmM=vO6hA$tUHDij^?J#l*GJK}Zbp>N%H z0R~jJcWl9U`6V%hk%Xzrf1{X&bq~lp#0Og^)F%Xdc99kW1Cde2ie||97{NlK9@b-< z=o>EQctZOgq!!Cb0kzGrA5AP6){@4DqvqWyMYhzcF=*_1Knt<3e2HBVgK=Csn7*0T z;KXqJxik@Q@kZ$(5KE5X)Hhl0e3nAg=N+`r- zLbc@D+;UlDxP2;(cs9@Kj@EaBZ+!)+4lg0WS0OQ*q^#2edeS6AJ|+J)@olKyR<+3s z@Zi5fYtmCovA%goQ(OlMj?hwE^K7dtmA!+KyL>$(s_nZDYX2*g_BjzcsG6aHk%gKY z+AalxD1)Y-swS94fpeEio29RnfancSz30_}eqxE9?L;6E*ELm@%{6u-CF z6YPGve3oR#SpWPolC9>go)DlhAxO-1_SLldgu?{t6eV$IPuLlOend%D)!?eb(t$qN zUPM^@d}!<`j{I43^22-W(d;UFX{{!0+1(~m*!v>`Q~Uz$&{#Sw5dGA_uH0~Bptx^s z17Pz5$#JK1idwzAW;;bM5|qJssOj~F(LYyC?vcV^W}6N@yN}-Ew3!@unm0SfM?Oz# zf-h(FV<6s+X>f{~K78lV!39;bOXJBDNd#-=n|-g^m&l#1bHi4~&S9?GigWS0bzQZD zpsPOsd-H-=#lbt zbc6gRyh9ux+#~^1Ttfp`Z40Qc`~L7t7HNfiHfmBQM3ux|Csvp>UnIUPSD+E@${?6?7=msBeVTa)q}gYp;g`X?=H1TMcKS)9-BG@uX$6V_Ek& zACO~OZ69xB#eS=t>9_eg2W;+&JL6O8Vb3vw$7C8)SeMS}1zE}B%A-^*;H}$0vE4GM z-`rhnX$x(Qt~C|guhflJ;Rb2mCFh_#Dwq@Tdu(aX8#nvrPx+w$fv(gJIfA5p+yBRI z`c**iPHpkaqx>IjsV5emfe%C$ojlcd(D-sWV4RNY%I2~MR$n6?gQrgOJIydLHPK;B{ypq`70O}U?BBfLHa zC4(#2T4%lkxMZ4BioFlGnhaSi&%y>3k|uuzI{bS$;{44Ekgz)h(1Not@5Bx}c5Dl< zUI}{4@+bveB?qGTrSu=5l}B!dH2~Pe2+B8a2>EEwPvsw?OS>uQD?6qDN{W+X`S;+0I7!w(p<0V3O4WRZN)qxQnFPBN zXCZ9Y^RyK@Tq_YQGw~vzXME{*_HA3xlz9>%yU;P^N+_0wKckVxK&nUP2nYqAB;`t= zy{%i6Nnqd&-bSVHsB5bI{UQLbu68NUnt4B)1biWl5kmv%*_Z%u#eS?{Oo`}>tn@Zi z$BB`I8hcMzgpP85zbQptxo}OTIn(rDh8G3Go+ABgP0WqL$LtMnB7xBzT$B> z9WYT;H=j88fpSxX=JY2-Bl^Wz)Lv*gBR5b}F;HiFw_-PPMHQ~#qqkGBJEhgYv1FN4 z`ih;2un*`n+Qcu@Ymbz;5ScHM1Q8)hI_;d}RAmPeQVP%hLOlXA0{>HEl8tOm^c3-={CpW6Nz)urR*Y+2D-P)3 zbz(OdY7JbwEs59aLJzq`77b9YGWJ(+&CamA1c4~V1 zrvRJV8GcP3f)(@qvPj~bk(RI!+l?h>1#wb0rTPee%Z*J>+*TV(n9hU%8|w;1cRbBe zqjDW9v=@2!Sf72TZ2_VJaRuik-|Of1QT5~^G_p`v=v9)CNpe47p*9jI35Y6<#Xe~4 z!)XG(%m}f+PGhq$$HFw@C=y^VH3?Nlj_~tY0saoryt{}GLHTX6xW>I8*X>10wISP? zEH^k*q3W6We(U*3W^D>*PPW^=xu{qQv7rFxA{M3k9E`MI*#6okOw<<8L=?L;Y4mN4 zM32TH@o$XO3^r@n6u&1Fl7owE_AnrNJpx>bs3~Zw?pcf7V7NR2zw(mJuCFOeRpzwU z;!0hHsr>>=0t`d$?PM~x)P)$vzImmo`VAm4)G=9W%51Sxb4ziB6D$?k867r~;kkDE zEQfH@67unONOMF9_i!Ctz;L5j|1MYmgXLl;*yYH>1f2AjgeUgo>_jQnzye@d+>>ON zHcA#zNWWx&OSfKCoeweKH*g<*JaU3{T`iFQ!qsK>gY5nK>Rre`Oxiq!ChYgRlAl^2 zL`x?l4IeM>?ou_z7SwiLX6Fkh*`-nbsniT6Mf)7Jw}Yvhb~WGsc`YP216gM96MCn>OE6~760aXscuWOF3u_}?c&4AcUbw_yt=iCjKw8^mFH74 zcwt>4UkQbj^r>Q8In($6w*u&N`r}mkJy9#mhzs-yLrkRKoRG6$wXNOAC8-`d%4+1D zm)F9WPBMAnMpF31^!S>vCcQ~lz#jDkg}AEZikp`TQ<9CpJ%YS0zmXYJREhKz9(5{y zy*iv_MLF5{_c8zPS@gHHT(slKoUcyziXqr$8k94wi=6}!$8ZT*T|3|eO zkV}4%46Jwi?a)KKqP4b#qq^$N1rGl|l-d7>2W@a0TAhh!b{b*7Cm68TSiA(7z~f!{~->@lzhE(*m(yJ)|46xj7Z2U58Ir$qYqg!S{#aKliWMEE9mNMi7w2?fboHF!*0-$?pM|_O^Xw)GYnQ zY9NH|@01vu{QoKX|Ci_gKxUt%EKX9zsqRdI*24Q)ma@gOCaK@QHy|*OJr8u2ZSCnQ djoUkaJlQ^bu$OhFX&3mp0W;Gt)w}cXe*m4zVR--m literal 0 HcmV?d00001 diff --git a/sequence_tagging_for_ner/infer.py b/sequence_tagging_for_ner/infer.py new file mode 100644 index 00000000..2d0bd949 --- /dev/null +++ b/sequence_tagging_for_ner/infer.py @@ -0,0 +1,71 @@ +import numpy as np + +import paddle.fluid as fluid +import paddle.v2 as paddle + +from network_conf import ner_net +import reader +from utils import load_dict, load_reverse_dict +from utils_extend import to_lodtensor + + +def infer(model_path, batch_size, test_data_file, vocab_file, target_file, + use_gpu): + """ + use the model under model_path to predict the test data, the result will be printed on the screen + + return nothing + """ + word_dict = load_dict(vocab_file) + word_reverse_dict = load_reverse_dict(vocab_file) + + label_dict = load_dict(target_file) + label_reverse_dict = load_reverse_dict(target_file) + + test_data = paddle.batch( + reader.data_reader(test_data_file, word_dict, label_dict), + batch_size=batch_size) + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(model_path, exe) + for data in test_data(): + word = to_lodtensor(map(lambda x: x[0], data), place) + mark = to_lodtensor(map(lambda x: x[1], data), place) + target = to_lodtensor(map(lambda x: x[2], data), place) + crf_decode = exe.run( + inference_program, + feed={"word": word, + "mark": mark, + "target": target}, + fetch_list=fetch_targets, + return_numpy=False) + lod_info = (crf_decode[0].lod())[0] + np_data = np.array(crf_decode[0]) + assert len(data) == len(lod_info) - 1 + for sen_index in xrange(len(data)): + assert len(data[sen_index][0]) == lod_info[ + sen_index + 1] - lod_info[sen_index] + word_index = 0 + for tag_index in xrange(lod_info[sen_index], + lod_info[sen_index + 1]): + word = word_reverse_dict[data[sen_index][0][word_index]] + gold_tag = label_reverse_dict[data[sen_index][2][ + word_index]] + tag = label_reverse_dict[np_data[tag_index][0]] + print word + "\t" + gold_tag + "\t" + tag + word_index += 1 + print "" + + +if __name__ == "__main__": + infer( + model_path="models/params_pass_0", + batch_size=6, + test_data_file="data/test", + vocab_file="data/vocab.txt", + target_file="data/target.txt", + use_gpu=False) diff --git a/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt new file mode 100644 index 00000000..9117fc7f --- /dev/null +++ b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt @@ -0,0 +1 @@ +[0.045416602259096893] diff --git a/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt new file mode 100644 index 00000000..79b2f4f2 --- /dev/null +++ b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt @@ -0,0 +1 @@ +[0.07692307978868484] \ No newline at end of file diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py new file mode 100644 index 00000000..5eaa704f --- /dev/null +++ b/sequence_tagging_for_ner/network_conf.py @@ -0,0 +1,127 @@ +import math + +import paddle.fluid as fluid +from paddle.fluid.initializer import NormalInitializer + +from utils import logger, load_dict, get_embedding + + +def ner_net(word_dict_len, label_dict_len, parallel, stack_num=2): + mark_dict_len = 2 + word_dim = 50 + mark_dim = 5 + hidden_dim = 300 + IS_SPARSE = True + embedding_name = 'emb' + + def _net_conf(word, mark, target): + word_embedding = fluid.layers.embedding( + input=word, + size=[word_dict_len, word_dim], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr=fluid.ParamAttr( + name=embedding_name, trainable=False)) + + mark_embedding = fluid.layers.embedding( + input=mark, + size=[mark_dict_len, mark_dim], + dtype='float32', + is_sparse=IS_SPARSE) + + word_caps_vector = fluid.layers.concat( + input=[word_embedding, mark_embedding], axis=1) + mix_hidden_lr = 1 + + rnn_para_attr = fluid.ParamAttr( + initializer=NormalInitializer( + loc=0.0, scale=0.0), + learning_rate=mix_hidden_lr) + hidden_para_attr = fluid.ParamAttr( + initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)), + learning_rate=mix_hidden_lr) + + hidden = fluid.layers.fc( + input=word_caps_vector, + name="__hidden00__", + size=hidden_dim, + act="tanh", + bias_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3))), + param_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)))) + fea = [] + for direction in ["fwd", "bwd"]: + for i in range(stack_num): + if i != 0: + hidden = fluid.layers.fc( + name="__hidden%02d_%s__" % (i, direction), + size=hidden_dim, + act="stanh", + bias_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=1.0)), + input=[hidden, rnn[0], rnn[1]], + param_attr=[ + hidden_para_attr, rnn_para_attr, rnn_para_attr + ]) + rnn = fluid.layers.dynamic_lstm( + name="__rnn%02d_%s__" % (i, direction), + input=hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + bias_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=1.0)), + is_reverse=(i % 2) if direction == "fwd" else not i % 2, + param_attr=rnn_para_attr) + fea += [hidden, rnn[0], rnn[1]] + + rnn_fea = fluid.layers.fc( + size=hidden_dim, + bias_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3))), + act="stanh", + input=fea, + param_attr=[hidden_para_attr, rnn_para_attr, rnn_para_attr] * 2) + + emission = fluid.layers.fc( + size=label_dict_len, + input=rnn_fea, + param_attr=fluid.ParamAttr(initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)))) + + crf_cost = fluid.layers.linear_chain_crf( + input=emission, + label=target, + param_attr=fluid.ParamAttr( + name='crfw', + initializer=NormalInitializer( + loc=0.0, scale=(1. / math.sqrt(hidden_dim) / 3)), + learning_rate=mix_hidden_lr)) + avg_cost = fluid.layers.mean(x=crf_cost) + return avg_cost, emission + + word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) + mark = fluid.layers.data(name='mark', shape=[1], dtype='int64', lod_level=1) + target = fluid.layers.data( + name="target", shape=[1], dtype='int64', lod_level=1) + + if parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + word_ = pd.read_input(word) + mark_ = pd.read_input(mark) + target_ = pd.read_input(target) + avg_cost, emission_base = _net_conf(word_, mark_, target_) + pd.write_output(avg_cost) + pd.write_output(emission_base) + avg_cost_list, emission = pd() + avg_cost = fluid.layers.mean(x=avg_cost_list) + emission.stop_gradient = True + else: + avg_cost, emission = _net_conf(word, mark, target) + + return avg_cost, emission, word, mark, target diff --git a/sequence_tagging_for_ner/reader.py b/sequence_tagging_for_ner/reader.py new file mode 100644 index 00000000..5050d0bf --- /dev/null +++ b/sequence_tagging_for_ner/reader.py @@ -0,0 +1,66 @@ +""" +Conll03 dataset. +""" + +from utils import * + +__all__ = ["data_reader"] + + +def canonicalize_digits(word): + if any([c.isalpha() for c in word]): return word + word = re.sub("\d", "DG", word) + if word.startswith("DG"): + word = word.replace(",", "") # remove thousands separator + return word + + +def canonicalize_word(word, wordset=None, digits=True): + word = word.lower() + if digits: + if (wordset != None) and (word in wordset): return word + word = canonicalize_digits(word) # try to canonicalize numbers + if (wordset == None) or (word in wordset): return word + else: return "UUUNKKK" # unknown token + + +def data_reader(data_file, word_dict, label_dict): + """ + The dataset can be obtained according to http://www.clips.uantwerpen.be/conll2003/ner/. + It returns a reader creator, each sample in the reader includes: + word id sequence, label id sequence and raw sentence. + + :return: reader creator + :rtype: callable + """ + + def reader(): + UNK_IDX = word_dict["UUUNKKK"] + + sentence = [] + labels = [] + with open(data_file, "r") as f: + for line in f: + if len(line.strip()) == 0: + if len(sentence) > 0: + word_idx = [ + word_dict.get( + canonicalize_word(w, word_dict), UNK_IDX) + for w in sentence + ] + mark = [1 if w[0].isupper() else 0 for w in sentence] + label_idx = [label_dict[l] for l in labels] + yield word_idx, mark, label_idx + sentence = [] + labels = [] + else: + segs = line.strip().split() + sentence.append(segs[0]) + # transform I-TYPE to BIO schema + if segs[-1] != "O" and (len(labels) == 0 or + labels[-1][1:] != segs[-1][1:]): + labels.append("B" + segs[-1][1:]) + else: + labels.append(segs[-1]) + + return reader diff --git a/sequence_tagging_for_ner/run.xsh b/sequence_tagging_for_ner/run.xsh new file mode 100755 index 00000000..fdebda79 --- /dev/null +++ b/sequence_tagging_for_ner/run.xsh @@ -0,0 +1,10 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${sequence_tagging:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +#pass_num 2200 +sh download.sh +FLAGS_benchmark=true python train.py diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py new file mode 100644 index 00000000..e4df9c35 --- /dev/null +++ b/sequence_tagging_for_ner/train.py @@ -0,0 +1,127 @@ +import os +import time +import math +import numpy as np + +import paddle +import paddle.fluid as fluid + +import reader +from network_conf import ner_net +from utils import logger, load_dict +from utils_extend import to_lodtensor, get_embedding +from continuous_evaluation import train_acc_kpi, pass_duration_kpi + +def test(exe, chunk_evaluator, inference_program, test_data, place): + chunk_evaluator.reset(exe) + for data in test_data(): + word = to_lodtensor(map(lambda x: x[0], data), place) + mark = to_lodtensor(map(lambda x: x[1], data), place) + target = to_lodtensor(map(lambda x: x[2], data), place) + acc = exe.run(inference_program, + feed={"word": word, + "mark": mark, + "target": target}) + return chunk_evaluator.eval(exe) + + +def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, + model_save_dir, num_passes, use_gpu, parallel): + if not os.path.exists(model_save_dir): + os.mkdir(model_save_dir) + + BATCH_SIZE = 200 + word_dict = load_dict(vocab_file) + label_dict = load_dict(target_file) + + word_vector_values = get_embedding(emb_file) + + word_dict_len = len(word_dict) + label_dict_len = len(label_dict) + + avg_cost, feature_out, word, mark, target = ner_net( + word_dict_len, label_dict_len, parallel) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) + sgd_optimizer.minimize(avg_cost) + + crf_decode = fluid.layers.crf_decoding( + input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) + + chunk_evaluator = fluid.evaluator.ChunkEvaluator( + input=crf_decode, + label=target, + chunk_scheme="IOB", + num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + test_target = chunk_evaluator.metrics + chunk_evaluator.states + inference_program = fluid.io.get_inference_program(test_target) + + train_reader = paddle.batch( + paddle.reader.shuffle( + reader.data_reader(train_data_file, word_dict, label_dict), + buf_size=20000), + batch_size=BATCH_SIZE) + test_reader = paddle.batch( + paddle.reader.shuffle( + reader.data_reader(test_data_file, word_dict, label_dict), + buf_size=20000), + batch_size=BATCH_SIZE) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place) + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + + embedding_name = 'emb' + embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() + embedding_param.set(word_vector_values, place) + + batch_id = 0 + total_time = 0.0 + for pass_id in xrange(num_passes): + chunk_evaluator.reset(exe) + start_time = time.time() + for data in train_reader(): + cost, batch_precision, batch_recall, batch_f1_score = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + chunk_evaluator.metrics) + batch_id = batch_id + 1 + t1 = time.time() + total_time += t1 - start_time + pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe) + if pass_id == num_passes - 1: + train_acc_kpi.add_record(pass_precision) + pass_duration_kpi.add_record(total_time / num_passes) + if pass_id % 100 == 0: + print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str( + pass_precision) + " pass_recall:" + str(pass_recall) + + " pass_f1_score:" + str(pass_f1_score)) + pass_precision, pass_recall, pass_f1_score = test( + exe, chunk_evaluator, inference_program, test_reader, place) + if pass_id % 100 == 0: + print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str( + pass_precision) + " pass_recall:" + str(pass_recall) + + " pass_f1_score:" + str(pass_f1_score)) + + save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) + fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'], + [crf_decode], exe) + train_acc_kpi.persist() + pass_duration_kpi.persist() + +if __name__ == "__main__": + main( + train_data_file="data/train", + test_data_file="data/test", + vocab_file="data/vocab.txt", + target_file="data/target.txt", + emb_file="data/wordVectors.txt", + model_save_dir="models", + num_passes=2300, + use_gpu=True, + parallel=True) diff --git a/sequence_tagging_for_ner/utils.py b/sequence_tagging_for_ner/utils.py new file mode 100644 index 00000000..f40f1bb1 --- /dev/null +++ b/sequence_tagging_for_ner/utils.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import re +import argparse +import numpy as np +from collections import defaultdict + +logger = logging.getLogger("paddle") +logger.setLevel(logging.INFO) + + +def get_embedding(emb_file='data/wordVectors.txt'): + """ + Get the trained word vector. + """ + return np.loadtxt(emb_file, dtype=float) + + +def load_dict(dict_path): + """ + Load the word dictionary from the given file. + Each line of the given file is a word, which can include multiple columns + seperated by tab. + + This function takes the first column (columns in a line are seperated by + tab) as key and takes line number of a line as the key (index of the word + in the dictionary). + """ + + return dict((line.strip().split("\t")[0], idx) + for idx, line in enumerate(open(dict_path, "r").readlines())) + + +def load_reverse_dict(dict_path): + """ + Load the word dictionary from the given file. + Each line of the given file is a word, which can include multiple columns + seperated by tab. + + This function takes line number of a line as the key (index of the word in + the dictionary) and the first column (columns in a line are seperated by + tab) as the value. + """ + return dict((idx, line.strip().split("\t")[0]) + for idx, line in enumerate(open(dict_path, "r").readlines())) diff --git a/sequence_tagging_for_ner/utils_extend.py b/sequence_tagging_for_ner/utils_extend.py new file mode 100644 index 00000000..03e7e62f --- /dev/null +++ b/sequence_tagging_for_ner/utils_extend.py @@ -0,0 +1,28 @@ +import numpy as np + +import paddle.fluid as fluid + + +def get_embedding(emb_file='data/wordVectors.txt'): + """ + Get the trained word vector. + """ + return np.loadtxt(emb_file, dtype='float32') + + +def to_lodtensor(data, place): + """ + convert data to lodtensor + """ + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res diff --git a/text_classification/README.md b/text_classification/README.md new file mode 100644 index 00000000..43c15934 --- /dev/null +++ b/text_classification/README.md @@ -0,0 +1,112 @@ +# 文本分类 + +以下是本例的简要目录结构及说明: + +```text +. +├── nets.py # 模型定义 +├── README.md # 文档 +├── train.py # 训练脚本 +├── infer.py # 预测脚本 +└── utils.py # 定义通用函数,从外部获取 +``` + + +## 简介,模型详解 + +在PaddlePaddle v2版本[文本分类](https://github.com/PaddlePaddle/models/blob/develop/text/README.md)中对于文本分类任务有较详细的介绍,在本例中不再重复介绍。 +在模型上,我们采用了bow, cnn, lstm, gru四种常见的文本分类模型。 + +## 训练 + +1. 运行命令 `python train.py bow` 开始训练模型。 + ```python + python train.py bow # bow指定网络结构,可替换成cnn, lstm, gru + ``` + +2. (可选)想自定义网络结构,需在[nets.py](./nets.py)中自行添加,并设置[train.py](./train.py)中的相应参数。 + ```python + def train(train_reader, # 训练数据 + word_dict, # 数据字典 + network, # 模型配置 + use_cuda, # 是否用GPU + parallel, # 是否并行 + save_dirname, # 保存模型路径 + lr=0.2, # 学习率大小 + batch_size=128, # 每个batch的样本数 + pass_num=30): # 训练的轮数 + ``` + +## 训练结果示例 +```text + pass_id: 0, avg_acc: 0.848040, avg_cost: 0.354073 + pass_id: 1, avg_acc: 0.914200, avg_cost: 0.217945 + pass_id: 2, avg_acc: 0.929800, avg_cost: 0.184302 + pass_id: 3, avg_acc: 0.938680, avg_cost: 0.164240 + pass_id: 4, avg_acc: 0.945120, avg_cost: 0.149150 + pass_id: 5, avg_acc: 0.951280, avg_cost: 0.137117 + pass_id: 6, avg_acc: 0.955360, avg_cost: 0.126434 + pass_id: 7, avg_acc: 0.961400, avg_cost: 0.117405 + pass_id: 8, avg_acc: 0.963560, avg_cost: 0.110070 + pass_id: 9, avg_acc: 0.965840, avg_cost: 0.103273 + pass_id: 10, avg_acc: 0.969800, avg_cost: 0.096314 + pass_id: 11, avg_acc: 0.971720, avg_cost: 0.090206 + pass_id: 12, avg_acc: 0.974800, avg_cost: 0.084970 + pass_id: 13, avg_acc: 0.977400, avg_cost: 0.078981 + pass_id: 14, avg_acc: 0.980000, avg_cost: 0.073685 + pass_id: 15, avg_acc: 0.981080, avg_cost: 0.069898 + pass_id: 16, avg_acc: 0.982080, avg_cost: 0.064923 + pass_id: 17, avg_acc: 0.984680, avg_cost: 0.060861 + pass_id: 18, avg_acc: 0.985840, avg_cost: 0.057095 + pass_id: 19, avg_acc: 0.988080, avg_cost: 0.052424 + pass_id: 20, avg_acc: 0.989160, avg_cost: 0.049059 + pass_id: 21, avg_acc: 0.990120, avg_cost: 0.045882 + pass_id: 22, avg_acc: 0.992080, avg_cost: 0.042140 + pass_id: 23, avg_acc: 0.992280, avg_cost: 0.039722 + pass_id: 24, avg_acc: 0.992840, avg_cost: 0.036607 + pass_id: 25, avg_acc: 0.994440, avg_cost: 0.034040 + pass_id: 26, avg_acc: 0.995000, avg_cost: 0.031501 + pass_id: 27, avg_acc: 0.995440, avg_cost: 0.028988 + pass_id: 28, avg_acc: 0.996240, avg_cost: 0.026639 + pass_id: 29, avg_acc: 0.996960, avg_cost: 0.024186 +``` + +## 预测 +1. 运行命令 `python infer.py bow_model`, 开始预测。 + ```python + python infer.py bow_model # bow_model指定需要导入的模型 + +## 预测结果示例 +```text + model_path: bow_model/epoch0, avg_acc: 0.882800 + model_path: bow_model/epoch1, avg_acc: 0.882360 + model_path: bow_model/epoch2, avg_acc: 0.881400 + model_path: bow_model/epoch3, avg_acc: 0.877800 + model_path: bow_model/epoch4, avg_acc: 0.872920 + model_path: bow_model/epoch5, avg_acc: 0.872640 + model_path: bow_model/epoch6, avg_acc: 0.869960 + model_path: bow_model/epoch7, avg_acc: 0.865160 + model_path: bow_model/epoch8, avg_acc: 0.863680 + model_path: bow_model/epoch9, avg_acc: 0.861200 + model_path: bow_model/epoch10, avg_acc: 0.853520 + model_path: bow_model/epoch11, avg_acc: 0.850400 + model_path: bow_model/epoch12, avg_acc: 0.855960 + model_path: bow_model/epoch13, avg_acc: 0.853480 + model_path: bow_model/epoch14, avg_acc: 0.855960 + model_path: bow_model/epoch15, avg_acc: 0.854120 + model_path: bow_model/epoch16, avg_acc: 0.854160 + model_path: bow_model/epoch17, avg_acc: 0.852240 + model_path: bow_model/epoch18, avg_acc: 0.852320 + model_path: bow_model/epoch19, avg_acc: 0.850280 + model_path: bow_model/epoch20, avg_acc: 0.849760 + model_path: bow_model/epoch21, avg_acc: 0.850160 + model_path: bow_model/epoch22, avg_acc: 0.846800 + model_path: bow_model/epoch23, avg_acc: 0.845440 + model_path: bow_model/epoch24, avg_acc: 0.845640 + model_path: bow_model/epoch25, avg_acc: 0.846200 + model_path: bow_model/epoch26, avg_acc: 0.845880 + model_path: bow_model/epoch27, avg_acc: 0.844880 + model_path: bow_model/epoch28, avg_acc: 0.844680 + model_path: bow_model/epoch29, avg_acc: 0.844960 +``` +注:过拟合导致acc持续下降,请忽略 diff --git a/text_classification/continuous_evaluation.py b/text_classification/continuous_evaluation.py new file mode 100644 index 00000000..39977ea4 --- /dev/null +++ b/text_classification/continuous_evaluation.py @@ -0,0 +1,17 @@ +""" +continuous_evaluation.py +""" +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + +lstm_train_cost_kpi = CostKpi('lstm_train_cost', 0.2, 0) +lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.2, 0) + + +tracking_kpis = [ + lstm_train_cost_kpi, + lstm_pass_duration_kpi +] diff --git a/text_classification/infer.py b/text_classification/infer.py new file mode 100644 index 00000000..d2a0363d --- /dev/null +++ b/text_classification/infer.py @@ -0,0 +1,50 @@ +import sys +import time +import unittest +import contextlib +import numpy as np + +import paddle.fluid as fluid +import paddle.v2 as paddle + +import utils + + +def infer(test_reader, use_cuda, model_path=None): + """ + inference function + """ + if model_path is None: + print(str(model_path) + " cannot be found") + return + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(model_path, exe) + + total_acc = 0.0 + total_count = 0 + for data in test_reader(): + acc = exe.run(inference_program, + feed=utils.data2tensor(data, place), + fetch_list=fetch_targets, + return_numpy=True) + total_acc += acc[0] * len(data) + total_count += len(data) + + avg_acc = total_acc / total_count + print("model_path: %s, avg_acc: %f" % (model_path, avg_acc)) + + +if __name__ == "__main__": + word_dict, train_reader, test_reader = utils.prepare_data( + "imdb", self_dict=False, batch_size=128, buf_size=50000) + + model_path = sys.argv[1] + for i in range(30): + epoch_path = model_path + "/" + "epoch" + str(i) + infer(test_reader, use_cuda=False, model_path=epoch_path) diff --git a/text_classification/latest_kpis/lstm_pass_duration_factor.txt b/text_classification/latest_kpis/lstm_pass_duration_factor.txt new file mode 100644 index 00000000..460e12f6 --- /dev/null +++ b/text_classification/latest_kpis/lstm_pass_duration_factor.txt @@ -0,0 +1 @@ +[29.35670002301534] diff --git a/text_classification/latest_kpis/lstm_train_cost_factor.txt b/text_classification/latest_kpis/lstm_train_cost_factor.txt new file mode 100644 index 00000000..92d0fbf2 --- /dev/null +++ b/text_classification/latest_kpis/lstm_train_cost_factor.txt @@ -0,0 +1 @@ +[0.032780374381691217] diff --git a/text_classification/nets.py b/text_classification/nets.py new file mode 100644 index 00000000..cd572c72 --- /dev/null +++ b/text_classification/nets.py @@ -0,0 +1,124 @@ +import sys +import time +import numpy as np + +import paddle.fluid as fluid +import paddle + + +def bow_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2): + """ + bow net + """ + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') + bow_tanh = fluid.layers.tanh(bow) + fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") + fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") + prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + return avg_cost, acc, prediction + + +def cnn_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + win_size=3): + """ + conv net + """ + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=win_size, + act="tanh", + pool_type="max") + + fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) + + prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + return avg_cost, acc, prediction + + +def lstm_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + emb_lr=30.0): + """ + lstm net + """ + emb = fluid.layers.embedding( + input=data, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr(learning_rate=emb_lr)) + + fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4, act='tanh') + + lstm_h, c = fluid.layers.dynamic_lstm( + input=fc0, size=hid_dim * 4, is_reverse=False) + + lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') + lstm_max_tanh = fluid.layers.tanh(lstm_max) + + fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') + + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + return avg_cost, acc, prediction + + +def gru_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + emb_lr=400.0): + """ + gru net + """ + emb = fluid.layers.embedding( + input=data, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr(learning_rate=emb_lr)) + + fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3) + gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) + gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') + gru_max_tanh = fluid.layers.tanh(gru_max) + fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + return avg_cost, acc, prediction diff --git a/text_classification/run.xsh b/text_classification/run.xsh new file mode 100755 index 00000000..9f93ed3d --- /dev/null +++ b/text_classification/run.xsh @@ -0,0 +1,9 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +cudaid=${text_classification:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +#LSTM pass_num 15 +FLAGS_benchmark=true python train.py lstm diff --git a/text_classification/train.py b/text_classification/train.py new file mode 100644 index 00000000..fced1c09 --- /dev/null +++ b/text_classification/train.py @@ -0,0 +1,140 @@ +import sys +import time +import unittest +import contextlib + +import paddle.fluid as fluid +import paddle + +import utils +from nets import bow_net +from nets import cnn_net +from nets import lstm_net +from nets import gru_net +from continuous_evaluation import lstm_train_cost_kpi, lstm_pass_duration_kpi + +def train(train_reader, + word_dict, + network, + use_cuda, + parallel, + save_dirname, + lr=0.2, + batch_size=128, + pass_num=30): + """ + train network + """ + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + if not parallel: + cost, acc, prediction = network(data, label, len(word_dict)) + else: + places = fluid.layers.get_places(device_count=2) + pd = fluid.layers.ParallelDo(places) + with pd.do(): + cost, acc, prediction = network( + pd.read_input(data), pd.read_input(label), len(word_dict)) + + pd.write_output(cost) + pd.write_output(acc) + + cost, acc = pd() + cost = fluid.layers.mean(cost) + acc = fluid.layers.mean(acc) + + sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr) + sgd_optimizer.minimize(cost) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + + exe.run(fluid.default_startup_program()) + total_time = 0.0 + newest_avg_cost = 0.0 + for pass_id in xrange(pass_num): + start_time = time.time() + data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 + for data in train_reader(): + avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[cost, acc]) + data_size = len(data) + total_acc += data_size * avg_acc_np + total_cost += data_size * avg_cost_np + data_count += data_size + avg_cost = total_cost / data_count + newest_avg_cost = avg_cost + t1 = time.time() + total_time += t1 - start_time + avg_acc = total_acc / data_count + print("pass_id: %d, avg_acc: %f, avg_cost: %f" % + (pass_id, avg_acc, avg_cost)) + if pass_id == pass_num - 1: + lstm_train_cost_kpi.add_record(newest_avg_cost) + lstm_pass_duration_kpi.add_record(total_time / pass_num) + epoch_model = save_dirname + "/" + "epoch" + str(pass_id) + fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) + lstm_train_cost_kpi.persist() + lstm_pass_duration_kpi.persist() + + +def train_net(): + word_dict, train_reader, test_reader = utils.prepare_data( + "imdb", self_dict=False, batch_size=128, buf_size=50000) + + if sys.argv[1] == "bow": + train( + train_reader, + word_dict, + bow_net, + use_cuda=False, + parallel=False, + save_dirname="bow_model", + lr=0.002, + pass_num=30, + batch_size=128) + elif sys.argv[1] == "cnn": + train( + train_reader, + word_dict, + cnn_net, + use_cuda=True, + parallel=False, + save_dirname="cnn_model", + lr=0.01, + pass_num=30, + batch_size=4) + elif sys.argv[1] == "lstm": + train( + train_reader, + word_dict, + lstm_net, + use_cuda=True, + parallel=False, + save_dirname="lstm_model", + lr=0.05, + pass_num=15, + batch_size=4) + elif sys.argv[1] == "gru": + train( + train_reader, + word_dict, + lstm_net, + use_cuda=True, + parallel=False, + save_dirname="gru_model", + lr=0.05, + pass_num=30, + batch_size=128) + else: + print("network name cannot be found!") + sys.exit(1) + + +if __name__ == "__main__": + train_net() diff --git a/text_classification/utils.py b/text_classification/utils.py new file mode 100644 index 00000000..bda95656 --- /dev/null +++ b/text_classification/utils.py @@ -0,0 +1,105 @@ +import sys +import time +import numpy as np + +import paddle.fluid as fluid +import paddle + + +def to_lodtensor(data, place): + """ + convert to LODtensor + """ + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def load_vocab(filename): + """ + load imdb vocabulary + """ + vocab = {} + with open(filename) as f: + wid = 0 + for line in f: + vocab[line.strip()] = wid + wid += 1 + vocab[""] = len(vocab) + return vocab + + +def data2tensor(data, place): + """ + data2tensor + """ + input_seq = to_lodtensor(map(lambda x: x[0], data), place) + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape([-1, 1]) + return {"words": input_seq, "label": y_data} + + +def prepare_data(data_type="imdb", + self_dict=False, + batch_size=128, + buf_size=50000): + """ + prepare data + """ + if self_dict: + word_dict = load_vocab(data_type + ".vocab") + else: + if data_type == "imdb": + word_dict = paddle.dataset.imdb.word_dict() + elif data_type == "light_imdb": + word_dict = light_imdb.word_dict() + elif data_type == "tiny_imdb": + word_dict = tiny_imdb.word_dict() + else: + raise RuntimeError("No such dataset") + + if data_type == "imdb": + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=buf_size), + batch_size=batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.test(word_dict), buf_size=buf_size), + batch_size=batch_size) + + elif data_type == "light_imdb": + train_reader = paddle.batch( + paddle.reader.shuffle( + light_imdb.train(word_dict), buf_size=buf_size), + batch_size=batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + light_imdb.test(word_dict), buf_size=buf_size), + batch_size=batch_size) + + elif data_type == "tiny_imdb": + train_reader = paddle.batch( + paddle.reader.shuffle( + tiny_imdb.train(word_dict), buf_size=buf_size), + batch_size=batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + tiny_imdb.test(word_dict), buf_size=buf_size), + batch_size=batch_size) + else: + raise RuntimeError("no such dataset") + + return word_dict, train_reader, test_reader From e72f46cbf155e3ab2e539dda0fb17b4619160c01 Mon Sep 17 00:00:00 2001 From: ceboot Date: Fri, 27 Apr 2018 16:41:51 +0000 Subject: [PATCH 33/67] evalute [6e0b47b38c653a383ac2e7d16453536205e15f2d] --- language_model/latest_kpis/imikolov_20_pass_duration_factor.txt | 2 +- sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt | 2 +- sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt | 2 +- text_classification/latest_kpis/lstm_pass_duration_factor.txt | 2 +- text_classification/latest_kpis/lstm_train_cost_factor.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt index 4bbac048..40b5cde3 100644 --- a/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt +++ b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt @@ -1 +1 @@ -[40.58481127023697] +[17.121339857578278] \ No newline at end of file diff --git a/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt index 9117fc7f..fc6ca0bd 100644 --- a/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt +++ b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt @@ -1 +1 @@ -[0.045416602259096893] +[0.020249680643496307] \ No newline at end of file diff --git a/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt index 79b2f4f2..e7a19a6e 100644 --- a/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt +++ b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt @@ -1 +1 @@ -[0.07692307978868484] \ No newline at end of file +[1.0] \ No newline at end of file diff --git a/text_classification/latest_kpis/lstm_pass_duration_factor.txt b/text_classification/latest_kpis/lstm_pass_duration_factor.txt index 460e12f6..9adbc5ec 100644 --- a/text_classification/latest_kpis/lstm_pass_duration_factor.txt +++ b/text_classification/latest_kpis/lstm_pass_duration_factor.txt @@ -1 +1 @@ -[29.35670002301534] +[14.64635027249654] \ No newline at end of file diff --git a/text_classification/latest_kpis/lstm_train_cost_factor.txt b/text_classification/latest_kpis/lstm_train_cost_factor.txt index 92d0fbf2..6ee69322 100644 --- a/text_classification/latest_kpis/lstm_train_cost_factor.txt +++ b/text_classification/latest_kpis/lstm_train_cost_factor.txt @@ -1 +1 @@ -[0.032780374381691217] +[0.000792166159953922] \ No newline at end of file From 50f18e04e2fc43bde5de1bbe520798576a192a59 Mon Sep 17 00:00:00 2001 From: kolinwei <331911734@qq.com> Date: Sat, 28 Apr 2018 12:53:03 +0800 Subject: [PATCH 34/67] update text_classification diff ratio --- text_classification/continuous_evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_classification/continuous_evaluation.py b/text_classification/continuous_evaluation.py index 39977ea4..8c1fa2b4 100644 --- a/text_classification/continuous_evaluation.py +++ b/text_classification/continuous_evaluation.py @@ -7,7 +7,7 @@ from kpi import CostKpi from kpi import DurationKpi -lstm_train_cost_kpi = CostKpi('lstm_train_cost', 0.2, 0) +lstm_train_cost_kpi = CostKpi('lstm_train_cost', 5, 0) lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.2, 0) From 6e8eef4e9bc78f963616b5bb99a20c4d697bae08 Mon Sep 17 00:00:00 2001 From: ceboot Date: Sat, 28 Apr 2018 08:08:11 +0000 Subject: [PATCH 35/67] evalute [a338c7d82a21fcce22af3e03fe6d7c33fe34d9e8] --- image_classification/latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index dbbb970d..bbdc30d0 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.23652850091457367] \ No newline at end of file +[0.33659327030181885] \ No newline at end of file From 81253b9ef01e5e6bbb6664def4e25931ab8fbf8b Mon Sep 17 00:00:00 2001 From: ceboot Date: Sat, 28 Apr 2018 09:24:42 +0000 Subject: [PATCH 36/67] evalute [c93a624b32b9d07298a04fd480686296a6d1229d] --- language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt index 586f51f0..47024cc1 100644 --- a/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt +++ b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt @@ -1 +1 @@ -[54.31165283314625] +[32.465272032979705] \ No newline at end of file From 913eb61192e116fe34e4b2f67047e3089aed47e8 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 15 May 2018 15:37:32 -0700 Subject: [PATCH 37/67] add vgg16_aws_dist --- vgg16_aws_dist/run.xsh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 vgg16_aws_dist/run.xsh diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh new file mode 100755 index 00000000..15cfc5fb --- /dev/null +++ b/vgg16_aws_dist/run.xsh @@ -0,0 +1,42 @@ +#!/usr/bin/env xonsh +import os + +workspace = os.path.dirname(os.path.realpath(__file__)) +pjoin = os.path.join +normpath = os.path.normpath +paddle_build_path = normpath(pjoin(workspace, '../../build')) +paddle_docker_hub_tag = "paddlepaddlece/paddle:latest" +vgg16_test_dockerhub_tag = "paddlepaddlece/vgg16_dist:latest" + +# loginto docker hub +docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD + +# for aws client +AWS_SHARED_CREDENTIALS_FILE = "/workspace/dist_files/.aws" + +# create paddle docker image +docker build -t @paddle_docker_hub_tag @(paddle_build_path) +docker push @paddle_docker_hub_tag + +# build test docker image +git clone https://github.com/putcn/vgg16_dist_test.git +docker build -t @(vgg16_test_dockerhub_tag) ./vgg16_dist_test +docker push @(vgg16_test_dockerhub_tag) +docker logout + +# fetch runner and install dependencies +git clone https://github.com/putcn/aws_runner.git +pip install -r aws_runner/client/requirements.txt + +# start aws testing +python aws_runner/client/ce_runner.py \ +--key_name aws_benchmark_us_east \ +--pem_path /workspace/dist_files/aws_benchmark_us_east.pem \ +--security_group_id sg-cecaeba5 \ +--online_mode yes \ +--trainer_count 2 \ +--pserver_count 2 \ +--pserver_command local:no,batch_size:128,num_passes:1 \ +--trainer_command local:no,batch_size:128,num_passes:1 \ +--docker_image @(vgg16_test_dockerhub_tag) + From 4e6525c7a7d7ec4592c26df926be3d6d429c1b1d Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 15 May 2018 20:21:15 -0700 Subject: [PATCH 38/67] update run.xsh --- vgg16_aws_dist/run.xsh | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 15cfc5fb..e7781ee2 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -4,39 +4,36 @@ import os workspace = os.path.dirname(os.path.realpath(__file__)) pjoin = os.path.join normpath = os.path.normpath -paddle_build_path = normpath(pjoin(workspace, '../../build')) +paddle_build_path = normpath(pjoin(workspace, '../../../build')) paddle_docker_hub_tag = "paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag = "paddlepaddlece/vgg16_dist:latest" +# clean up docker +docker system prune -f + # loginto docker hub docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD -# for aws client -AWS_SHARED_CREDENTIALS_FILE = "/workspace/dist_files/.aws" - # create paddle docker image -docker build -t @paddle_docker_hub_tag @(paddle_build_path) -docker push @paddle_docker_hub_tag +docker build -t @(paddle_docker_hub_tag) @(paddle_build_path) +docker push @(paddle_docker_hub_tag) # build test docker image +rm -rf vgg16_dist_test git clone https://github.com/putcn/vgg16_dist_test.git docker build -t @(vgg16_test_dockerhub_tag) ./vgg16_dist_test docker push @(vgg16_test_dockerhub_tag) docker logout # fetch runner and install dependencies +rm -rf aws_runner git clone https://github.com/putcn/aws_runner.git pip install -r aws_runner/client/requirements.txt -# start aws testing -python aws_runner/client/ce_runner.py \ ---key_name aws_benchmark_us_east \ ---pem_path /workspace/dist_files/aws_benchmark_us_east.pem \ ---security_group_id sg-cecaeba5 \ ---online_mode yes \ ---trainer_count 2 \ ---pserver_count 2 \ ---pserver_command local:no,batch_size:128,num_passes:1 \ ---trainer_command local:no,batch_size:128,num_passes:1 \ ---docker_image @(vgg16_test_dockerhub_tag) +# start aws testingr +python aws_runner/client/ce_runner.py --key_name aws_benchmark_us_east --pem_patt +h /workspace/dist_files/aws_benchmark_us_east.pem --security_group_id sg-cecaebaa +5 --online_mode yes --trainer_count 2 --pserver_count 2 --pserver_command local:: +no,batch_size:128,num_passes:1 --trainer_command local:no,batch_size:128,num_pass +ses:1 --docker_image @(vgg16_test_dockerhub_tag) From 6803d391ee9880dbc2722e59f29f4104e4079a94 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Mon, 21 May 2018 16:06:51 -0700 Subject: [PATCH 39/67] update format and ag --- vgg16_aws_dist/run.xsh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index e7781ee2..5b012e2d 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -31,9 +31,13 @@ git clone https://github.com/putcn/aws_runner.git pip install -r aws_runner/client/requirements.txt # start aws testingr -python aws_runner/client/ce_runner.py --key_name aws_benchmark_us_east --pem_patt -h /workspace/dist_files/aws_benchmark_us_east.pem --security_group_id sg-cecaebaa -5 --online_mode yes --trainer_count 2 --pserver_count 2 --pserver_command local:: -no,batch_size:128,num_passes:1 --trainer_command local:no,batch_size:128,num_pass -ses:1 --docker_image @(vgg16_test_dockerhub_tag) - +python aws_runner/client/ce_runner.py + --key_name aws_benchmark_us_east \ + --pem_path /workspace/dist_files/aws_benchmark_us_east.pem \ + --security_group_id sg-95539dff \ + --online_mode yes \ + --trainer_count 2 \ + --pserver_count 2 \ + --pserver_command local:no,batch_size:128,num_passes:1 \ + --trainer_command local:no,batch_size:128,num_passes:1 \ + --docker_image @(vgg16_test_dockerhub_tag) \ No newline at end of file From 772013ce8b18aed82d36394605c779057ca41495 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Mon, 21 May 2018 16:08:03 -0700 Subject: [PATCH 40/67] format update --- vgg16_aws_dist/run.xsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 5b012e2d..0299af52 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -31,7 +31,7 @@ git clone https://github.com/putcn/aws_runner.git pip install -r aws_runner/client/requirements.txt # start aws testingr -python aws_runner/client/ce_runner.py +python aws_runner/client/ce_runner.py \ --key_name aws_benchmark_us_east \ --pem_path /workspace/dist_files/aws_benchmark_us_east.pem \ --security_group_id sg-95539dff \ From 657b1f5bc0a2eb21533d055ad73d2de3c8b607c9 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Mon, 21 May 2018 17:08:09 -0700 Subject: [PATCH 41/67] format update --- vgg16_aws_dist/run.xsh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 0299af52..a8e2e312 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -7,6 +7,7 @@ normpath = os.path.normpath paddle_build_path = normpath(pjoin(workspace, '../../../build')) paddle_docker_hub_tag = "paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag = "paddlepaddlece/vgg16_dist:latest" +training_command = "local:no,batch_size:128,num_passes:1" # clean up docker docker system prune -f @@ -33,11 +34,10 @@ pip install -r aws_runner/client/requirements.txt # start aws testingr python aws_runner/client/ce_runner.py \ --key_name aws_benchmark_us_east \ - --pem_path /workspace/dist_files/aws_benchmark_us_east.pem \ - --security_group_id sg-95539dff \ + --security_group_id sg-95539dff \ --online_mode yes \ --trainer_count 2 \ --pserver_count 2 \ - --pserver_command local:no,batch_size:128,num_passes:1 \ - --trainer_command local:no,batch_size:128,num_passes:1 \ + --pserver_command @(training_command) \ + --trainer_command @(training_command) \ --docker_image @(vgg16_test_dockerhub_tag) \ No newline at end of file From 35277e19827f330b1887489a4c876a405edad5a0 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 May 2018 11:53:39 -0700 Subject: [PATCH 42/67] add source dir existence check and more log --- vgg16_aws_dist/run.xsh | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index a8e2e312..3fabe877 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -16,21 +16,42 @@ docker system prune -f docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD # create paddle docker image +echo "going to build and push paddle production image" docker build -t @(paddle_docker_hub_tag) @(paddle_build_path) docker push @(paddle_docker_hub_tag) # build test docker image -rm -rf vgg16_dist_test -git clone https://github.com/putcn/vgg16_dist_test.git +echo "going to prepare and build vgg16_dist_test" +if [ ! -d "vgg16_dist_test" ]; then + echo "No vgg16_dist_test repo found, going to clone one" + git clone https://github.com/putcn/vgg16_dist_test.git +fi +cd vgg16_dist_test +if [ -d "~/.cache/paddle/dataset/cifar" ]; then + echo "host cifar cache found, copying it to docker root" + mkdir -p .cache/paddle/dataset/ + cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ +fi +git pull +cd .. +echo "going to build vgg16_dist_test docker image and push it" docker build -t @(vgg16_test_dockerhub_tag) ./vgg16_dist_test docker push @(vgg16_test_dockerhub_tag) docker logout # fetch runner and install dependencies -rm -rf aws_runner -git clone https://github.com/putcn/aws_runner.git +echo "going to work with aws_runner" +if [ ! -d "aws_runner" ]; then + echo "no aws_runner found, cloning one" + git clone https://github.com/putcn/aws_runner.git +fi +cd aws_runner +git pull +cd .. +echo "going to install aws_runner dependencies" pip install -r aws_runner/client/requirements.txt +echo "going to start testing" # start aws testingr python aws_runner/client/ce_runner.py \ --key_name aws_benchmark_us_east \ From 16466703263f69ef2a8a9dc4e3cdcba51d032455 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 May 2018 15:33:10 -0700 Subject: [PATCH 43/67] switch to regualar bash script --- vgg16_aws_dist/run.xsh | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 3fabe877..b8a03f0e 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -1,13 +1,9 @@ -#!/usr/bin/env xonsh -import os +#!/bin/bash -workspace = os.path.dirname(os.path.realpath(__file__)) -pjoin = os.path.join -normpath = os.path.normpath -paddle_build_path = normpath(pjoin(workspace, '../../../build')) -paddle_docker_hub_tag = "paddlepaddlece/paddle:latest" -vgg16_test_dockerhub_tag = "paddlepaddlece/vgg16_dist:latest" -training_command = "local:no,batch_size:128,num_passes:1" +paddle_build_path=$PADDLE_PATH/build +paddle_docker_hub_tag="paddlepaddlece/paddle:latest" +vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" +training_command="local:no,batch_size:128,num_passes:1" # clean up docker docker system prune -f @@ -17,17 +13,17 @@ docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD # create paddle docker image echo "going to build and push paddle production image" -docker build -t @(paddle_docker_hub_tag) @(paddle_build_path) -docker push @(paddle_docker_hub_tag) +docker build -t $paddle_docker_hub_tag $paddle_build_path +docker push $paddle_docker_hub_tag # build test docker image echo "going to prepare and build vgg16_dist_test" -if [ ! -d "vgg16_dist_test" ]; then +if [ ! -d vgg16_dist_test ]; then echo "No vgg16_dist_test repo found, going to clone one" git clone https://github.com/putcn/vgg16_dist_test.git fi cd vgg16_dist_test -if [ -d "~/.cache/paddle/dataset/cifar" ]; then +if [ -d ~/.cache/paddle/dataset/cifar ]; then echo "host cifar cache found, copying it to docker root" mkdir -p .cache/paddle/dataset/ cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ @@ -35,13 +31,13 @@ fi git pull cd .. echo "going to build vgg16_dist_test docker image and push it" -docker build -t @(vgg16_test_dockerhub_tag) ./vgg16_dist_test -docker push @(vgg16_test_dockerhub_tag) +docker build -t $vgg16_test_dockerhub_tag ./vgg16_dist_test +docker push $vgg16_test_dockerhub_tag docker logout # fetch runner and install dependencies echo "going to work with aws_runner" -if [ ! -d "aws_runner" ]; then +if [ ! -d aws_runner ]; then echo "no aws_runner found, cloning one" git clone https://github.com/putcn/aws_runner.git fi @@ -59,6 +55,6 @@ python aws_runner/client/ce_runner.py \ --online_mode yes \ --trainer_count 2 \ --pserver_count 2 \ - --pserver_command @(training_command) \ - --trainer_command @(training_command) \ - --docker_image @(vgg16_test_dockerhub_tag) \ No newline at end of file + --pserver_command $training_command \ + --trainer_command $training_command \ + --docker_image $vgg16_test_dockerhub_tag \ No newline at end of file From 78c58a5932d9b209d9967711ea6192fbf90f6df0 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 May 2018 16:51:57 -0700 Subject: [PATCH 44/67] moving ce_runner to here --- vgg16_aws_dist/ce_runner.py | 221 ++++++++++++++++++++++++ vgg16_aws_dist/continuous_evaluation.py | 10 ++ vgg16_aws_dist/run.xsh | 1 + 3 files changed, 232 insertions(+) create mode 100644 vgg16_aws_dist/ce_runner.py create mode 100644 vgg16_aws_dist/continuous_evaluation.py diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py new file mode 100644 index 00000000..0d6f8332 --- /dev/null +++ b/vgg16_aws_dist/ce_runner.py @@ -0,0 +1,221 @@ +import argparse +import logging +import sys, os +import numpy as np +import threading +import copy +from aws_runner.client.train_command import TrainCommand + +# for ce env ONLY + +sys.path.append(os.environ['ceroot']) +from kpi import LessWorseKpi + +from aws_runner.client.abclient import Abclient + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def print_arguments(): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument( + '--key_name', type=str, default="", help="required, key pair name") +parser.add_argument( + '--security_group_id', + type=str, + default="", + help="required, the security group id associated with your VPC") + +parser.add_argument( + '--vpc_id', + type=str, + default="", + help="The VPC in which you wish to run test") +parser.add_argument( + '--subnet_id', + type=str, + default="", + help="The Subnet_id in which you wish to run test") + +parser.add_argument( + '--pserver_instance_type', + type=str, + default="c5.2xlarge", + help="your pserver instance type, c5.2xlarge by default") +parser.add_argument( + '--trainer_instance_type', + type=str, + default="p2.8xlarge", + help="your trainer instance type, p2.8xlarge by default") + +parser.add_argument( + '--task_name', + type=str, + default="", + help="the name you want to identify your job") +parser.add_argument( + '--pserver_image_id', + type=str, + default="ami-da2c1cbf", + help="ami id for system image, default one has nvidia-docker ready, \ + use ami-1ae93962 for us-east-2") + +parser.add_argument( + '--pserver_command', + type=str, + default="", + help="pserver start command, format example: python,vgg.py,batch_size:128,is_local:yes" +) + +parser.add_argument( + '--trainer_image_id', + type=str, + default="ami-da2c1cbf", + help="ami id for system image, default one has nvidia-docker ready, \ + use ami-1ae93962 for us-west-2") + +parser.add_argument( + '--trainer_command', + type=str, + default="", + help="trainer start command, format example: python,vgg.py,batch_size:128,is_local:yes" +) + +parser.add_argument( + '--availability_zone', + type=str, + default="us-east-2a", + help="aws zone id to place ec2 instances") + +parser.add_argument( + '--trainer_count', type=int, default=1, help="Trainer count") + +parser.add_argument( + '--pserver_count', type=int, default=1, help="Pserver count") + +parser.add_argument( + '--action', type=str, default="create", help="create|cleanup|status") + +parser.add_argument('--pem_path', type=str, help="private key file") + +parser.add_argument( + '--pserver_port', type=str, default="5436", help="pserver port") + +parser.add_argument( + '--docker_image', type=str, default="busybox", help="training docker image") + +parser.add_argument( + '--master_server_port', type=int, default=5436, help="master server port") + +parser.add_argument( + '--master_server_public_ip', type=str, help="master server public ip") + +parser.add_argument( + '--master_docker_image', + type=str, + default="putcn/paddle_aws_master:latest", + help="master docker image id") + +parser.add_argument( + '--no_clean_up', + type=str2bool, + default=False, + help="whether to clean up after training") + +parser.add_argument( + '--online_mode', + type=str2bool, + default=False, + help="is client activly stays online") + +args = parser.parse_args() +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + +train_speed_kpi = LessWorseKpi('train_speed', 0.01) +kpis_to_track = {} + +def save_to_kpi(name, val): + val = float(val) + if name in kpis_to_track: + kpi_to_track = kpis_to_track[name] + else: + kpi_to_track = LessWorseKpi(name, 0.01) + kpi_to_track.add_record(np.array(val, dtype='float32')) + +class DataCollector(object): + def __init__(self): + self.store = [] + self.metric_data_identifier = "**metrics_data: " + def log_processor(self, msg): + if (msg.startswith(self.metric_data_identifier)): + str_msg = msg.replace(self.metric_data_identifier, "") + metrics_raw = str_msg.split(",") + for metric in metrics_raw: + metric_data = metric.split("=") + if metric_data[0].strip() == "train_speed": + self.save(metric_data[1]) + def save(self, val): + self.store.append(float(val)) + def avg(self): + return np.average(self.store) + +solo_data_collector = DataCollector() +def train_without_pserver(args, lock): + def log_handler(source, id): + for line in iter(source.readline, ""): + logging.info("without pserver:") + logging.info(line) + solo_data_collector.log_processor(line) + + args.pserver_count = 0 + args.trainer_count = 1 + trainer_command = TrainCommand(args.trainer_command) + trainer_command.update({"local":"yes"}) + args.trainer_command = trainer_command.unparse() + logging.info(args) + abclient = Abclient(args, log_handler, lock) + abclient.create() + +cluster_data_collector = DataCollector() +def train_with_pserver(args, lock): + def log_handler(source, id): + for line in iter(source.readline, ""): + logging.info("with pserver:") + logging.info(line) + cluster_data_collector.log_processor(line) + + logging.info(args) + abclient = Abclient(args, log_handler, lock) + abclient.create() + +if __name__ == "__main__": + print_arguments() + if args.action == "create": + lock = threading.Lock() + thread_no_pserver = threading.Thread( + target=train_without_pserver, + args=(copy.copy(args), lock,) + ) + thread_with_pserver = threading.Thread( + target=train_with_pserver, + args=(copy.copy(args), lock, ) + ) + thread_no_pserver.start() + thread_with_pserver.start() + thread_no_pserver.join() + thread_with_pserver.join() + + speedup_rate = cluster_data_collector.avg()/solo_data_collector.avg() + logging.info("speed up rate is "+ str(speedup_rate)) + + save_to_kpi("speedup_rate", speedup_rate.item()) \ No newline at end of file diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py new file mode 100644 index 00000000..87b88c48 --- /dev/null +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -0,0 +1,10 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import LessWorseKpi + +speedup_rate_kpi = LessWorseKpi('speedup_rate', 0.01) + +tracking_kpis = [ + speedup_rate_kpi, +] diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index b8a03f0e..2c79a823 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -1,5 +1,6 @@ #!/bin/bash +PADDLE_PATH=../../../ paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" From f60156781b895103ce006a6018d37af3fb9dfb75 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 May 2018 16:53:13 -0700 Subject: [PATCH 45/67] adding base kpi --- vgg16_aws_dist/latest_kpis/speedup_rate_factor.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 vgg16_aws_dist/latest_kpis/speedup_rate_factor.txt diff --git a/vgg16_aws_dist/latest_kpis/speedup_rate_factor.txt b/vgg16_aws_dist/latest_kpis/speedup_rate_factor.txt new file mode 100644 index 00000000..edf5775a --- /dev/null +++ b/vgg16_aws_dist/latest_kpis/speedup_rate_factor.txt @@ -0,0 +1 @@ +[0.5] \ No newline at end of file From fc313eef14a49b7fd0f9c82bdd823a3895f4d751 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 24 May 2018 16:54:45 -0700 Subject: [PATCH 46/67] update runner path --- vgg16_aws_dist/run.xsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 2c79a823..af6e0bce 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -50,7 +50,7 @@ pip install -r aws_runner/client/requirements.txt echo "going to start testing" # start aws testingr -python aws_runner/client/ce_runner.py \ +python ce_runner.py \ --key_name aws_benchmark_us_east \ --security_group_id sg-95539dff \ --online_mode yes \ From 4e700ae24e4b3ca858b63027a58b1d71e9e5e71b Mon Sep 17 00:00:00 2001 From: guochaorong <32069604+guochaorong@users.noreply.github.com> Date: Fri, 25 May 2018 15:09:11 +0800 Subject: [PATCH 47/67] Update ce_runner.py --- vgg16_aws_dist/ce_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index 0d6f8332..7091393d 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -63,6 +63,7 @@ def print_arguments(): type=str, default="", help="the name you want to identify your job") + parser.add_argument( '--pserver_image_id', type=str, @@ -218,4 +219,4 @@ def log_handler(source, id): speedup_rate = cluster_data_collector.avg()/solo_data_collector.avg() logging.info("speed up rate is "+ str(speedup_rate)) - save_to_kpi("speedup_rate", speedup_rate.item()) \ No newline at end of file + save_to_kpi("speedup_rate", speedup_rate.item()) From a1acf8af5290d5b94d16fb033f9562ab0c9e970b Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 25 May 2018 14:00:21 -0700 Subject: [PATCH 48/67] find paddle path by current bash file path --- vgg16_aws_dist/run.xsh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index af6e0bce..cc0e0a0d 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -1,6 +1,8 @@ #!/bin/bash -PADDLE_PATH=../../../ + +CURRENT_FILE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PADDLE_PATH=$CURRENT_FILE_DIR../../../ paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" From 4d0db6c4a8427b01464114474e85b90c122beb0d Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Sat, 26 May 2018 13:14:18 -0700 Subject: [PATCH 49/67] update paddle path --- vgg16_aws_dist/run.xsh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index cc0e0a0d..2f9e7b5e 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -2,7 +2,7 @@ CURRENT_FILE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -PADDLE_PATH=$CURRENT_FILE_DIR../../../ +PADDLE_PATH=$CURRENT_FILE_DIR/../../.. paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" From c31d604a53053412e6eae4fbcb7f3900aa96f73c Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Sat, 26 May 2018 15:11:05 -0700 Subject: [PATCH 50/67] force start from current folder --- vgg16_aws_dist/run.xsh | 1 + 1 file changed, 1 insertion(+) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 2f9e7b5e..87c77cbf 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -21,6 +21,7 @@ docker push $paddle_docker_hub_tag # build test docker image echo "going to prepare and build vgg16_dist_test" +cd $CURRENT_FILE_DIR if [ ! -d vgg16_dist_test ]; then echo "No vgg16_dist_test repo found, going to clone one" git clone https://github.com/putcn/vgg16_dist_test.git From 6b8c1222af79879d10a37a0bf22d001a77185410 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Mon, 28 May 2018 10:05:14 +0800 Subject: [PATCH 51/67] update all to paddle master (#28) * code formated * update transformer model train * edit text_classification/README.md * update resnet50/vgg16 baseline * active model lstm * active model language model * active model text_classification * active sequence_tagging_for_ner * active model object_detection * add model object_detection four gpu card kpi * add CPU model * Update README.md * add last kpi record for CPU model * add multi card for 4 models * fix nan digital for CPU * Revert "fix nan digital for CPU" * fix nan digital for CPU * change transform change api * remove resnet30 for CPU, reuse it's function in resnet50 * image_classification add four card kpi * evalute [d0a62bfcf2067f9eab487b4662ebc3b3a0fc07c8] * fix some code style error * fix speed bug * evalute [8e3e65ff93718efbe3fa7f01dc52132f560e8bfc] * Update run.xsh * add mul card for sequence_tagging_for_ner to test pr * not save models * update run.xsh --- .pre-commit-config.yaml | 13 ++ README.md | 3 + __ocr_recognition/continuous_evaluation.py | 1 - __ocr_recognition/ctc_reader.py | 3 +- __ocr_recognition/ctc_train.py | 10 +- __ocr_recognition/eval.py | 3 +- {resnet30 => __resnet30}/__init__.py | 0 __resnet30/continuous_evaluation.py | 19 ++ .../history/train_cost_factor.txt | 2 +- .../history/train_duration_factor.txt | 2 +- __resnet30/latest_kpis/test_acc_factor.txt | 1 + __resnet30/latest_kpis/train_acc_factor.txt | 1 + .../latest_kpis/train_cost_factor.txt | 2 +- .../latest_kpis/train_duration_factor.txt | 2 +- __resnet30/latest_kpis/train_speed_factor.txt | 1 + {resnet30 => __resnet30}/model.py | 133 +++++++++----- {resnet30 => __resnet30}/run.xsh | 2 +- image_classification/continuous_evaluation.py | 31 +++- .../four_card_train_speed_kpi_factor.txt | 1 + .../latest_kpis/train_acc_top1_kpi_factor.txt | 2 +- .../latest_kpis/train_acc_top5_kpi_factor.txt | 2 +- .../latest_kpis/train_cost_kpi_factor.txt | 2 +- .../latest_kpis/train_speed_kpi_factor.txt | 1 + image_classification/mobilenet.py | 153 ++++++++++++++++ image_classification/reader.py | 12 +- image_classification/run.xsh | 9 +- image_classification/se_resnext.py | 51 ++++-- image_classification/train.py | 171 ++++++++++++++---- language_model/continuous_evaluation.py | 4 +- .../imikolov_20_avg_ppl_factor.txt | 2 +- .../imikolov_20_pass_duration_factor.txt | 2 +- language_model/train.py | 12 +- language_model/utils.py | 3 +- lstm/continuous_evaluation.py | 5 +- lstm/get_gpu_data.py | 9 +- .../latest_kpis/imdb_32_gpu_memory_factor.txt | 2 +- .../imdb_32_train_speed_factor.txt | 2 +- lstm/model.py | 18 +- mnist/continuous_evaluation.py | 8 +- mnist/latest_kpis/test_acc_factor.txt | 2 +- mnist/latest_kpis/train_acc_factor.txt | 2 +- mnist/latest_kpis/train_cost_factor.txt | 5 + mnist/latest_kpis/train_duration_factor.txt | 2 +- mnist/model.py | 39 ++-- mnist/run.xsh | 1 - object_detection/continuous_evaluation.py | 11 +- object_detection/labels/create_list.py | 6 +- .../four_card_train_speed_factor.txt | 1 + .../latest_kpis/train_cost_factor.txt | 2 +- .../latest_kpis/train_speed_factor.txt | 2 +- object_detection/reader.py | 19 +- object_detection/run.xsh | 5 +- object_detection/train.py | 37 ++-- resnet30/continuous_evaluation.py | 12 -- resnet50/continuous_evaluation.py | 10 +- resnet50/get_gpu_data.py | 9 +- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_acc_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_64_gpu_memory_factor.txt | 2 +- .../flowers_64_train_speed_factor.txt | 2 +- resnet50/model.py | 33 ++-- seq2seq/continuous_evaluation.py | 1 - seq2seq/get_gpu_data.py | 9 +- .../wmb_128_train_speed_factor.txt | 2 +- seq2seq/model.py | 19 +- .../continuous_evaluation.py | 7 +- sequence_tagging_for_ner/download.sh | 1 - .../pass_duration_card4_factor.txt | 1 + .../latest_kpis/pass_duration_factor.txt | 2 +- .../latest_kpis/train_acc_card4_factor.txt | 1 + .../latest_kpis/train_acc_factor.txt | 2 +- sequence_tagging_for_ner/network_conf.py | 11 +- sequence_tagging_for_ner/run.xsh | 8 +- sequence_tagging_for_ner/train.py | 56 ++++-- text_classification/README.md | 1 + text_classification/continuous_evaluation.py | 8 +- .../latest_kpis/lstm_pass_duration_factor.txt | 2 +- .../latest_kpis/lstm_train_cost_factor.txt | 2 +- text_classification/train.py | 6 +- transformer/continuous_evaluation.py | 1 - transformer/infer.py | 4 +- .../latest_kpis/train_avg_ppl_kpi_factor.txt | 2 +- .../train_pass_duration_kpi_factor.txt | 2 +- transformer/train.py | 21 ++- transformer/transformer_config.py | 2 - vgg16/continuous_evaluation.py | 8 +- vgg16/get_gpu_data.py | 9 +- .../cifar10_128_gpu_memory_factor.txt | 2 +- .../cifar10_128_train_speed_factor.txt | 2 +- .../flowers_32_gpu_memory_factor.txt | 2 +- .../flowers_32_train_speed_factor.txt | 2 +- vgg16/model.py | 29 +-- 93 files changed, 778 insertions(+), 355 deletions(-) create mode 100644 .pre-commit-config.yaml rename {resnet30 => __resnet30}/__init__.py (100%) create mode 100644 __resnet30/continuous_evaluation.py rename {resnet30 => __resnet30}/history/train_cost_factor.txt (79%) rename {resnet30 => __resnet30}/history/train_duration_factor.txt (80%) create mode 100644 __resnet30/latest_kpis/test_acc_factor.txt create mode 100644 __resnet30/latest_kpis/train_acc_factor.txt rename {resnet30 => __resnet30}/latest_kpis/train_cost_factor.txt (90%) rename {resnet30 => __resnet30}/latest_kpis/train_duration_factor.txt (89%) create mode 100644 __resnet30/latest_kpis/train_speed_factor.txt rename {resnet30 => __resnet30}/model.py (57%) rename {resnet30 => __resnet30}/run.xsh (81%) create mode 100644 image_classification/latest_kpis/four_card_train_speed_kpi_factor.txt create mode 100644 image_classification/latest_kpis/train_speed_kpi_factor.txt create mode 100644 image_classification/mobilenet.py create mode 100644 mnist/latest_kpis/train_cost_factor.txt create mode 100644 object_detection/latest_kpis/four_card_train_speed_factor.txt delete mode 100644 resnet30/continuous_evaluation.py create mode 100644 sequence_tagging_for_ner/latest_kpis/pass_duration_card4_factor.txt create mode 100644 sequence_tagging_for_ner/latest_kpis/train_acc_card4_factor.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..7684eed1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 + hooks: + - id: yapf + files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0 + hooks: + - id: check-added-large-files + - id: check-merge-conflict + - id: check-symlinks + - id: end-of-file-fixer diff --git a/README.md b/README.md index 63388821..7fb29933 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ ## Howtos +### Contribute +- Run `pre-commit run -a` before your PR, this will help to format code automatically + ### Add New Evaluation Task Reference [mnist task](https://github.com/Superjomn/paddle-ce-latest-kpis/tree/master/mnist), diff --git a/__ocr_recognition/continuous_evaluation.py b/__ocr_recognition/continuous_evaluation.py index 8a75c496..a4da1f67 100644 --- a/__ocr_recognition/continuous_evaluation.py +++ b/__ocr_recognition/continuous_evaluation.py @@ -6,7 +6,6 @@ train_avg_loss_kpi = CostKpi('train_avg_loss', 0.2, 0) train_seq_err_kpi = CostKpi('train_seq_err', 0.2, 0) - tracking_kpis = [ train_avg_loss_kpi, train_seq_err_kpi, diff --git a/__ocr_recognition/ctc_reader.py b/__ocr_recognition/ctc_reader.py index 0076ca17..5e65ef42 100644 --- a/__ocr_recognition/ctc_reader.py +++ b/__ocr_recognition/ctc_reader.py @@ -167,7 +167,8 @@ def train(batch_size, train_images_dir=None, train_list_file=None): train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) if train_list_file is None: train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) - return generator.train_reader(train_images_dir, train_list_file, batch_size) + return generator.train_reader(train_images_dir, train_list_file, + batch_size) def test(batch_size=1, test_images_dir=None, test_list_file=None): diff --git a/__ocr_recognition/ctc_train.py b/__ocr_recognition/ctc_train.py index be6e02f7..43fcd13e 100644 --- a/__ocr_recognition/ctc_train.py +++ b/__ocr_recognition/ctc_train.py @@ -104,8 +104,12 @@ def train(args, data_reader=ctc_reader): if batch_id == args.iterations - 1: avg_seq_err = batch_seq_error[0] / args.batch_size avg_loss = batch_loss[0] / args.batch_size - train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32')) - train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32')) + train_avg_loss_kpi.add_record( + np.array( + avg_loss, dtype='float32')) + train_seq_err_kpi.add_record( + np.array( + avg_seq_err, dtype='float32')) break # evaluate if batch_id % args.eval_period == 0: @@ -121,7 +125,7 @@ def train(args, data_reader=ctc_reader): batch_id += 1 train_avg_loss_kpi.persist() - train_seq_err_kpi.persist() + train_seq_err_kpi.persist() def main(): diff --git a/__ocr_recognition/eval.py b/__ocr_recognition/eval.py index be0a0438..1c33ff36 100644 --- a/__ocr_recognition/eval.py +++ b/__ocr_recognition/eval.py @@ -54,7 +54,8 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): count = 0 for data in test_reader(): count += 1 - exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place)) + exe.run(fluid.default_main_program(), + feed=get_feeder_data(data, place)) avg_distance, avg_seq_error = evaluator.eval(exe) print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( count, avg_distance, avg_seq_error) diff --git a/resnet30/__init__.py b/__resnet30/__init__.py similarity index 100% rename from resnet30/__init__.py rename to __resnet30/__init__.py diff --git a/__resnet30/continuous_evaluation.py b/__resnet30/continuous_evaluation.py new file mode 100644 index 00000000..0ac5f0b4 --- /dev/null +++ b/__resnet30/continuous_evaluation.py @@ -0,0 +1,19 @@ +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, AccKpi, DurationKpi + +train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True) +train_acc_kpi = AccKpi('train_acc', 0.02, 0, actived=True) +test_acc_kpi = AccKpi('test_acc', 0.05, 0, actived=True) +train_speed_kpi = AccKpi('train_speed', 0.01, 0, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.02, 0, actived=True) + + +tracking_kpis = [ + train_cost_kpi, + train_acc_kpi, + test_acc_kpi, + train_speed_kpi, + train_duration_kpi, +] diff --git a/resnet30/history/train_cost_factor.txt b/__resnet30/history/train_cost_factor.txt similarity index 79% rename from resnet30/history/train_cost_factor.txt rename to __resnet30/history/train_cost_factor.txt index cf262792..c46c47b0 100644 --- a/resnet30/history/train_cost_factor.txt +++ b/__resnet30/history/train_cost_factor.txt @@ -2,4 +2,4 @@ [[2.744691848754883]] [[2.5916006565093994]] [[2.459857225418091]] -[[2.3514037132263184]] \ No newline at end of file +[[2.3514037132263184]] diff --git a/resnet30/history/train_duration_factor.txt b/__resnet30/history/train_duration_factor.txt similarity index 80% rename from resnet30/history/train_duration_factor.txt rename to __resnet30/history/train_duration_factor.txt index f5c4f294..ebc0b54b 100644 --- a/resnet30/history/train_duration_factor.txt +++ b/__resnet30/history/train_duration_factor.txt @@ -2,4 +2,4 @@ [10.211545944213867] [10.223276853561401] [10.213245153427124] -[10.241420984268188] \ No newline at end of file +[10.241420984268188] diff --git a/__resnet30/latest_kpis/test_acc_factor.txt b/__resnet30/latest_kpis/test_acc_factor.txt new file mode 100644 index 00000000..d7046f5b --- /dev/null +++ b/__resnet30/latest_kpis/test_acc_factor.txt @@ -0,0 +1 @@ +[0.459300000667572] diff --git a/__resnet30/latest_kpis/train_acc_factor.txt b/__resnet30/latest_kpis/train_acc_factor.txt new file mode 100644 index 00000000..bab1e137 --- /dev/null +++ b/__resnet30/latest_kpis/train_acc_factor.txt @@ -0,0 +1 @@ +[0.56150390625] diff --git a/resnet30/latest_kpis/train_cost_factor.txt b/__resnet30/latest_kpis/train_cost_factor.txt similarity index 90% rename from resnet30/latest_kpis/train_cost_factor.txt rename to __resnet30/latest_kpis/train_cost_factor.txt index 29d8c699..c1286410 100644 --- a/resnet30/latest_kpis/train_cost_factor.txt +++ b/__resnet30/latest_kpis/train_cost_factor.txt @@ -7,4 +7,4 @@ [[2.416034460067749]] [[2.4315545558929443]] [[2.4579968452453613]] -[[2.449829578399658]] \ No newline at end of file +[[2.449829578399658]] diff --git a/resnet30/latest_kpis/train_duration_factor.txt b/__resnet30/latest_kpis/train_duration_factor.txt similarity index 89% rename from resnet30/latest_kpis/train_duration_factor.txt rename to __resnet30/latest_kpis/train_duration_factor.txt index 4fdf5fe6..6007f902 100644 --- a/resnet30/latest_kpis/train_duration_factor.txt +++ b/__resnet30/latest_kpis/train_duration_factor.txt @@ -7,4 +7,4 @@ [21.280965089797974] [21.29200315475464] [21.28358292579651] -[21.292808055877686] \ No newline at end of file +[21.292808055877686] diff --git a/__resnet30/latest_kpis/train_speed_factor.txt b/__resnet30/latest_kpis/train_speed_factor.txt new file mode 100644 index 00000000..6c50ba81 --- /dev/null +++ b/__resnet30/latest_kpis/train_speed_factor.txt @@ -0,0 +1 @@ +[55.21354293823242] diff --git a/resnet30/model.py b/__resnet30/model.py similarity index 57% rename from resnet30/model.py rename to __resnet30/model.py index 0140ab72..710d70cb 100644 --- a/resnet30/model.py +++ b/__resnet30/model.py @@ -11,9 +11,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core - -from continuous_evaluation import (train_cost_kpi, train_duration_kpi, - tracking_kpis) +from continuous_evaluation import * logger = logging.getLogger(__name__) @@ -85,86 +83,123 @@ def train(batch_size, device, pass_num, iterations): input = fluid.layers.data(name='data', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') + # Train program predict = resnet_cifar10(input, class_dim) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) - optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) - opts = optimizer.minimize(avg_cost) - # accuracy = fluid.evaluator.Evaluator(input=predict, label=label) + + # Evaluator + #accuracy = fluid.evaluator.Evaluator(input=predict, label=label) + + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + accuracy = fluid.average.WeightedAverage() # inference program inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): # test_target = accuracy.metrics + accuracy.states - test_target = [predict, avg_cost] - inference_program = fluid.io.get_inference_program(test_target) + target_vars=[batch_acc, batch_size_tensor] + inference_program = fluid.io.get_inference_program(target_vars) + # Optimization + optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + opts = optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) train_reader = paddle.batch( - paddle.dataset.cifar.train10(), - batch_size=batch_size) + paddle.dataset.cifar.train10(), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=batch_size) + # Initialize executor + place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + # Parameter initialization + exe.run(fluid.default_startup_program()) + def test(exe): - # accuracy.reset(exe) + test_accuracy = fluid.average.WeightedAverage() for batch_id, data in enumerate(test_reader()): img_data = np.array(map(lambda x: x[0].reshape(dshape), data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) - # print('image_data', img_data) - # print('y_data', y_data) - - predict_, avg_cost_ = exe.run( - inference_program, - feed={ - "data": img_data, - "label": y_data - }, - fetch_list=[predict, avg_cost]) - return avg_cost + acc, weight = exe.run(inference_program, + feed={"data": img_data, + "label": y_data}, + fetch_list=[batch_acc, batch_size_tensor]) + test_accuracy.add(value=acc, weight=weight) - # return accuracy.eval(exe) - - place = core.CPUPlace() if device == 'CPU' else core.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) + return test_accuracy.eval() - for pass_id in range(1): - logger.warning('Pass {}'.format(pass_id)) - # accuracy.reset(exe) + im_num = 0 + total_train_time = 0.0 + for pass_id in range(args.pass_num): iter = 0 + every_pass_loss = [] + accuracy.reset() + pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): logger.warning('Batch {}'.format(batch_id)) batch_start = time.time() if iter == iterations: break - image = np.array(map(lambda x: x[0].reshape(dshape), - data)).astype('float32') + image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( + 'float32') label = np.array(map(lambda x: x[1], data)).astype('int64') label = label.reshape([-1, 1]) - avg_cost_ = exe.run( + + loss, acc, weight = exe.run( fluid.default_main_program(), - feed={ - 'data': image, - 'label': label - }, - fetch_list=[avg_cost]) + feed={'data': image, + 'label': label}, + fetch_list=[avg_cost, batch_acc, batch_size_tensor]) + batch_end = time.time() - print('avg_cost', np.array(avg_cost_, dtype='float32')) - train_cost_kpi.add_record(np.array(avg_cost_, dtype='float32')) - train_duration_kpi.add_record(batch_end - batch_start) + every_pass_loss.append(loss) + accuracy.add(value=acc, weight=weight) + + + if iter >= args.skip_batch_num or pass_id != 0: + batch_duration = time.time() - batch_start + pass_duration += batch_duration + im_num += label.shape[0] iter += 1 - # test_start = time.time() - # test(exe) - # test_end = time.time() - # valid_tracker.add(test_end - test_start, pass_test_acc) + print( + "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % + (pass_id, iter, loss, acc)) + pass_train_acc = accuracy.eval() + pass_test_acc = test(exe) + + total_train_time += pass_duration + pass_train_loss = np.mean(every_pass_loss) + print( + "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n" + % (pass_id, pass_train_loss, pass_train_acc, + pass_test_acc, pass_duration)) + if pass_id == args.pass_num - 1: + train_cost_kpi.add_record(np.array(pass_train_loss, dtype='float32')) + train_cost_kpi.persist() + train_acc_kpi.add_record(np.array(pass_train_acc, dtype='float32')) + train_acc_kpi.persist() + test_acc_kpi.add_record(np.array(pass_test_acc, dtype='float32')) + test_acc_kpi.persist() + train_duration_kpi.add_record(batch_end - batch_start) + train_duration_kpi.persist() + + if total_train_time > 0.0: + examples_per_sec = im_num / total_train_time + sec_per_batch = total_train_time / \ + (iter * args.pass_num - args.skip_batch_num) + train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) + train_speed_kpi.persist() def parse_args(): @@ -172,6 +207,14 @@ def parse_args(): parser.add_argument('--batch_size', type=int) parser.add_argument('--device', type=str, choices=('CPU', 'GPU')) parser.add_argument('--iters', type=int) + parser.add_argument( + '--pass_num', type=int, default=3, help='The number of passes.') + parser.add_argument( + '--skip_batch_num', + type=int, + default=5, + help='The first num of minibatch num to skip, for better performance test' + ) args = parser.parse_args() return args diff --git a/resnet30/run.xsh b/__resnet30/run.xsh similarity index 81% rename from resnet30/run.xsh rename to __resnet30/run.xsh index 11393550..33644395 100755 --- a/resnet30/run.xsh +++ b/__resnet30/run.xsh @@ -9,4 +9,4 @@ import sys model_file = 'model.py' -python @(model_file) --batch_size 1000 --iters 10 --device CPU +python @(model_file) --batch_size 128 --pass_num 5 --iters 80 --device CPU diff --git a/image_classification/continuous_evaluation.py b/image_classification/continuous_evaluation.py index 4033e7c2..163251bf 100644 --- a/image_classification/continuous_evaluation.py +++ b/image_classification/continuous_evaluation.py @@ -3,13 +3,26 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.2, 0) -train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.2, 0) -train_cost_kpi = CostKpi('train_cost_kpi', 0.2, 0) +train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.05, 0, + actived=True, + desc='TOP1 ACC') +train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.05, 0, + actived=True, + desc='TOP5 ACC') +train_cost_kpi = CostKpi('train_cost_kpi', 0.05, 0, + actived=True, + desc='train cost') +train_speed_kpi = AccKpi('train_speed_kpi', 0.05, 0, + actived=True, + unit_repr='images/s', + desc='train speed in one GPU card') +four_card_train_speed_kpi = AccKpi('four_card_train_speed_kpi', 0.05, 0, + actived=True, + unit_repr='images/s', + desc='train speed in four GPU card') - -tracking_kpis = [ - train_acc_top1_kpi, - train_acc_top5_kpi, - train_cost_kpi -] +tracking_kpis = [train_acc_top1_kpi, + train_acc_top5_kpi, + train_cost_kpi, + train_speed_kpi, + four_card_train_speed_kpi] diff --git a/image_classification/latest_kpis/four_card_train_speed_kpi_factor.txt b/image_classification/latest_kpis/four_card_train_speed_kpi_factor.txt new file mode 100644 index 00000000..f2ce4eee --- /dev/null +++ b/image_classification/latest_kpis/four_card_train_speed_kpi_factor.txt @@ -0,0 +1 @@ +[174.80782203734947] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt index bbdc30d0..351fea16 100644 --- a/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top1_kpi_factor.txt @@ -1 +1 @@ -[0.33659327030181885] \ No newline at end of file +[0.3767074942588806] \ No newline at end of file diff --git a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt index e7112b7b..9e1b075c 100644 --- a/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt +++ b/image_classification/latest_kpis/train_acc_top5_kpi_factor.txt @@ -1 +1 @@ -[0.5419688820838928] \ No newline at end of file +[0.5719688820838928] diff --git a/image_classification/latest_kpis/train_cost_kpi_factor.txt b/image_classification/latest_kpis/train_cost_kpi_factor.txt index 384913f6..c896d6ff 100644 --- a/image_classification/latest_kpis/train_cost_kpi_factor.txt +++ b/image_classification/latest_kpis/train_cost_kpi_factor.txt @@ -1 +1 @@ -[2.975904941558838] \ No newline at end of file +[2.875904941558838] diff --git a/image_classification/latest_kpis/train_speed_kpi_factor.txt b/image_classification/latest_kpis/train_speed_kpi_factor.txt new file mode 100644 index 00000000..6a664608 --- /dev/null +++ b/image_classification/latest_kpis/train_speed_kpi_factor.txt @@ -0,0 +1 @@ +[101.29667191639184] \ No newline at end of file diff --git a/image_classification/mobilenet.py b/image_classification/mobilenet.py new file mode 100644 index 00000000..0a8197f1 --- /dev/null +++ b/image_classification/mobilenet.py @@ -0,0 +1,153 @@ +import os + +import paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +parameter_attr = ParamAttr(initializer=MSRA()) + + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=parameter_attr, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act) + + +def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, + scale): + depthwise_conv = conv_bn_layer( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False) + + pointwise_conv = conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + return pointwise_conv + + +def mobile_net(img, class_dim, scale=1.0): + + # conv1: 112x112 + tmp = conv_bn_layer( + img, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + # 56x56 + tmp = depthwise_separable( + tmp, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + + tmp = depthwise_separable( + tmp, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale) + + # 28x28 + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + + tmp = depthwise_separable( + tmp, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale) + + # 14x14 + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + + tmp = depthwise_separable( + tmp, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale) + + # 14x14 + for i in range(5): + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale) + # 7x7 + tmp = depthwise_separable( + tmp, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale) + + tmp = depthwise_separable( + tmp, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale) + + tmp = fluid.layers.pool2d( + input=tmp, + pool_size=0, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + tmp = fluid.layers.fc(input=tmp, + size=class_dim, + act='softmax', + param_attr=parameter_attr) + return tmp diff --git a/image_classification/reader.py b/image_classification/reader.py index 50bea712..4061d1d3 100644 --- a/image_classification/reader.py +++ b/image_classification/reader.py @@ -3,7 +3,7 @@ import random import functools import numpy as np -import paddle as paddle +import paddle from PIL import Image, ImageEnhance random.seed(0) @@ -123,7 +123,7 @@ def process_image(sample, mode, color_jitter, rotate): if mode == 'train' or mode == 'test': return img, sample[1] elif mode == 'infer': - return img + return [img] def _reader_creator(file_list, @@ -151,13 +151,13 @@ def reader(): return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) -def train(): +def train(file_list=TRAIN_LIST): return _reader_creator( - TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True) + file_list, 'train', shuffle=True, color_jitter=False, rotate=False) -def test(): - return _reader_creator(TEST_LIST, 'test', shuffle=False) +def test(file_list=TEST_LIST): + return _reader_creator(file_list, 'test', shuffle=False) def infer(file_list): diff --git a/image_classification/run.xsh b/image_classification/run.xsh index d6c0aace..f003feb2 100755 --- a/image_classification/run.xsh +++ b/image_classification/run.xsh @@ -2,8 +2,13 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 -cudaid=${se_resnext_cudaid:=0} # use 0-th card as default +cudaid=${multi_se_resnext_cudaid:=0,1,2,3} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=32 +FLAGS_benchmark=true python train.py --batch_size=64 +mv train_speed_kpi_factor.txt four_card_train_speed_kpi_factor.txt + +cudaid=${se_resnext_cudaid:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid +FLAGS_benchmark=true python train.py --batch_size=64 diff --git a/image_classification/se_resnext.py b/image_classification/se_resnext.py index 6b448d40..ad533c75 100644 --- a/image_classification/se_resnext.py +++ b/image_classification/se_resnext.py @@ -1,5 +1,14 @@ -import paddle as paddle +import os +import numpy as np +import time +import sys +import paddle import paddle.fluid as fluid +import reader +import paddle.fluid.layers.control_flow as control_flow +import paddle.fluid.layers.nn as nn +import paddle.fluid.layers.tensor as tensor +import math def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, @@ -19,23 +28,28 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, def squeeze_excitation(input, num_channels, reduction_ratio): pool = fluid.layers.pool2d( input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) squeeze = fluid.layers.fc(input=pool, size=num_channels / reduction_ratio, - act='relu') + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, + stdv))) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) excitation = fluid.layers.fc(input=squeeze, size=num_channels, - act='sigmoid') + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv))) scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) return scale def shortcut(input, ch_out, stride): ch_in = input.shape[1] - if ch_in != ch_out: - if stride == 1: - filter_size = 1 - else: - filter_size = 3 + if ch_in != ch_out or stride != 1: + filter_size = 1 return conv_bn_layer(input, ch_out, filter_size, stride) else: return input @@ -66,8 +80,8 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): def SE_ResNeXt(input, class_dim, infer=False, layers=50): supported_layers = [50, 152] if layers not in supported_layers: - print("supported layers are", supported_layers, "but input layer is", - layers) + print("supported layers are", supported_layers, \ + "but input layer is ", layers) exit() if layers == 50: cardinality = 32 @@ -96,10 +110,7 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50): conv = conv_bn_layer( input=conv, num_filters=128, filter_size=3, stride=1, act='relu') conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, + input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ pool_type='max') for block in range(len(depth)): @@ -112,10 +123,16 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50): reduction_ratio=reduction_ratio) pool = fluid.layers.pool2d( - input=conv, pool_size=0, pool_type='avg', global_pooling=True) + input=conv, pool_size=7, pool_type='avg', global_pooling=True) if not infer: - drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) + drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) else: drop = pool - out = fluid.layers.fc(input=drop, size=class_dim, act='softmax') + stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) + out = fluid.layers.fc(input=drop, + size=class_dim, + act='softmax', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, + stdv))) return out diff --git a/image_classification/train.py b/image_classification/train.py index ef5b53d4..c7a4fb43 100644 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -2,25 +2,47 @@ import numpy as np import time import sys -import paddle as paddle +import paddle import paddle.fluid as fluid from se_resnext import SE_ResNeXt +from mobilenet import mobile_net import paddle.dataset.flowers as flowers import reader import argparse import functools +import paddle.fluid.layers.ops as ops from utility import add_arguments, print_arguments +from paddle.fluid.initializer import init_on_cpu +from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter +import math -from continuous_evaluation import (train_acc_top1_kpi, train_acc_top5_kpi, train_cost_kpi) +from continuous_evaluation import (train_acc_top1_kpi, train_acc_top5_kpi, + train_cost_kpi, train_speed_kpi) parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable add_arg('batch_size', int, 256, "Minibatch size.") add_arg('num_layers', int, 50, "How many layers for SE-ResNeXt model.") add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.") add_arg('parallel_exe', bool, True, "Whether to use ParallelExecutor to train or not.") -# yapf: enable +add_arg('init_model', str, None, "Whether to use initialized model.") +add_arg('pretrained_model', str, None, "Whether to use pretrained model.") +add_arg('lr_strategy', str, "cosine_decay", + "Set the learning rate decay strategy.") +add_arg('model', str, "se_resnext", "Set the network to use.") + + +def cosine_decay(learning_rate, step_each_epoch, epochs=120): + """Applies cosine decay to the learning rate. + lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) + """ + global_step = _decay_step_counter() + + with init_on_cpu(): + epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * \ + (ops.cos(epoch * (math.pi / epochs)) + 1)/2 + return decayed_lr def train_parallel_do(args, @@ -28,6 +50,7 @@ def train_parallel_do(args, batch_size, num_passes, init_model=None, + pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, @@ -46,7 +69,12 @@ def train_parallel_do(args, with pd.do(): image_ = pd.read_input(image) label_ = pd.read_input(label) - out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers) + if args.model is 'se_resnext': + out = SE_ResNeXt( + input=image_, class_dim=class_dim, layers=layers) + else: + out = mobile_net(img=image_, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label_) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1) @@ -60,7 +88,11 @@ def train_parallel_do(args, acc_top1 = fluid.layers.mean(x=acc_top1) acc_top5 = fluid.layers.mean(x=acc_top5) else: - out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + if args.model is 'se_resnext': + out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + else: + out = mobile_net(img=image, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) @@ -68,17 +100,27 @@ def train_parallel_do(args, inference_program = fluid.default_main_program().clone(for_test=True) - if lr_strategy is None: + if "piecewise_decay" in lr_strategy: + bd = lr_strategy["piecewise_decay"]["bd"] + lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + elif "cosine_decay" in lr_strategy: + step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] + epochs = lr_strategy["cosine_decay"]["epochs"] + optimizer = fluid.optimizer.Momentum( + learning_rate=cosine_decay( + learning_rate=learning_rate, + step_each_epoch=step_each_epoch, + epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: - bd = lr_strategy["bd"] - lr = lr_strategy["lr"] optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) @@ -93,8 +135,15 @@ def train_parallel_do(args, if init_model is not None: fluid.io.load_persistables(exe, init_model) - train_reader = paddle.batch(flowers.train(), batch_size=batch_size) - test_reader = paddle.batch(flowers.test(), batch_size=batch_size) + if pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + + train_reader = paddle.batch(reader.train(), batch_size=batch_size) + test_reader = paddle.batch(reader.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(num_passes): @@ -152,7 +201,8 @@ def train_parallel_do(args, test_acc5)) sys.stdout.flush() - model_path = os.path.join(model_save_dir, str(pass_id)) + model_path = os.path.join(model_save_dir + '/' + args.model, + str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path) @@ -163,6 +213,7 @@ def train_parallel_exe(args, batch_size, num_passes, init_model=None, + pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, @@ -173,7 +224,11 @@ def train_parallel_exe(args, image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + if args.model is 'se_resnext': + out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) + else: + out = mobile_net(img=image, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) @@ -181,17 +236,27 @@ def train_parallel_exe(args, test_program = fluid.default_main_program().clone(for_test=True) - if lr_strategy is None: + if "piecewise_decay" in lr_strategy: + bd = lr_strategy["piecewise_decay"]["bd"] + lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + elif "cosine_decay" in lr_strategy: + step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] + epochs = lr_strategy["cosine_decay"]["epochs"] + optimizer = fluid.optimizer.Momentum( + learning_rate=cosine_decay( + learning_rate=learning_rate, + step_each_epoch=step_each_epoch, + epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: - bd = lr_strategy["bd"] - lr = lr_strategy["lr"] optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) @@ -208,6 +273,13 @@ def train_parallel_exe(args, if init_model is not None: fluid.io.load_persistables(exe, init_model) + if pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + train_reader = paddle.batch(flowers.train(), batch_size=batch_size) test_reader = paddle.batch(flowers.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) @@ -217,16 +289,21 @@ def train_parallel_exe(args, use_cuda=True, main_program=test_program, share_vars_from=train_exe) fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] - + train_speed = [] for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] + pass_time = 0 + pass_num = 0 + pass_speed = 0.0 for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 + pass_time += period + pass_num += len(data) loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) @@ -244,14 +321,17 @@ def train_parallel_exe(args, train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() - if pass_id == num_passes - 1: - train_acc_top1_kpi.add_record(train_acc1) - train_acc_top5_kpi.add_record(train_acc5) - train_cost_kpi.add_record(train_loss) + pass_speed = pass_num / pass_time + train_speed.append(pass_speed) + if pass_id == num_passes - 1: + train_acc_top1_kpi.add_record(train_acc1) + train_acc_top5_kpi.add_record(train_acc5) + train_cost_kpi.add_record(train_loss) + mean_pass_speed = np.array(pass_speed).mean() + train_speed_kpi.add_record(mean_pass_speed) for data in test_reader(): t1 = time.time() - loss, acc1, acc5 = test_exe.run(fetch_list, - feed=feeder.feed(data)) + loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) @@ -273,39 +353,54 @@ def train_parallel_exe(args, test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ - test_loss {4}, test_acc1 {5}, test_acc5 {6}" + test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ - test_acc5)) + test_acc5, pass_time, pass_num / pass_time)) sys.stdout.flush() train_acc_top1_kpi.persist() train_acc_top5_kpi.persist() train_cost_kpi.persist() + train_speed_kpi.persist() if __name__ == '__main__': args = parser.parse_args() print_arguments(args) - epoch_points = [30, 60, 90] total_images = 1281167 batch_size = args.batch_size step = int(total_images / batch_size + 1) - bd = [e * step for e in epoch_points] - lr = [0.1, 0.01, 0.001, 0.0001] - - lr_strategy = {"bd": bd, "lr": lr} + num_epochs = 5 + + learning_rate_mode = args.lr_strategy + lr_strategy = {} + if learning_rate_mode == "piecewise_decay": + epoch_points = [30, 60, 90] + bd = [e * step for e in epoch_points] + lr = [0.1, 0.01, 0.001, 0.0001] + lr_strategy[learning_rate_mode] = {"bd": bd, "lr": lr} + elif learning_rate_mode == "cosine_decay": + lr_strategy[learning_rate_mode] = { + "step_each_epoch": step, + "epochs": num_epochs + } + else: + lr_strategy = None use_nccl = True # layers: 50, 152 layers = args.num_layers method = train_parallel_exe if args.parallel_exe else train_parallel_do + init_model = args.init_model if args.init_model else None + pretrained_model = args.pretrained_model if args.pretrained_model else None method( args, learning_rate=0.1, batch_size=batch_size, - num_passes=5, - init_model=None, + num_passes=num_epochs, + init_model=init_model, + pretrained_model=pretrained_model, parallel=True, use_nccl=True, lr_strategy=lr_strategy, diff --git a/language_model/continuous_evaluation.py b/language_model/continuous_evaluation.py index e12f2b28..a7bc53bd 100644 --- a/language_model/continuous_evaluation.py +++ b/language_model/continuous_evaluation.py @@ -8,8 +8,8 @@ from kpi import DurationKpi imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0) -imikolov_20_pass_duration_kpi = DurationKpi('imikolov_20_pass_duration', 0.2, 0) - +imikolov_20_pass_duration_kpi = DurationKpi('imikolov_20_pass_duration', 0.02, + 0, actived=True) tracking_kpis = [ imikolov_20_avg_ppl_kpi, diff --git a/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt index 47024cc1..b570c294 100644 --- a/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt +++ b/language_model/latest_kpis/imikolov_20_avg_ppl_factor.txt @@ -1 +1 @@ -[32.465272032979705] \ No newline at end of file +[32.465272032979705] diff --git a/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt index 40b5cde3..c9ab10b4 100644 --- a/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt +++ b/language_model/latest_kpis/imikolov_20_pass_duration_factor.txt @@ -1 +1 @@ -[17.121339857578278] \ No newline at end of file +[29.741339857578278] diff --git a/language_model/train.py b/language_model/train.py index 2430af72..773c7431 100644 --- a/language_model/train.py +++ b/language_model/train.py @@ -5,12 +5,13 @@ import math import paddle.fluid as fluid -import paddle +import paddle import utils from continuous_evaluation import imikolov_20_avg_ppl_kpi, imikolov_20_pass_duration_kpi + def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound): """ network definition """ emb_lr_x = 10.0 @@ -129,16 +130,17 @@ def train(train_reader, t1 = time.time() total_time += t1 - t0 - print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, - total_time / epoch_idx) + print "epoch:%d num_steps:%d time_cost(s):%f" % ( + epoch_idx, i, total_time / epoch_idx) if pass_idx == pass_num - 1: imikolov_20_pass_duration_kpi.add_record(total_time / epoch_idx) - imikolov_20_avg_ppl_kpi.add_record(newest_ppl) + imikolov_20_avg_ppl_kpi.add_record(newest_ppl) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] fetch_vars = [avg_cost] - fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) + fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, + exe) print("model saved in %s" % save_dir) imikolov_20_pass_duration_kpi.persist() imikolov_20_avg_ppl_kpi.persist() diff --git a/language_model/utils.py b/language_model/utils.py index 68ab1f5e..9ca0ef4d 100644 --- a/language_model/utils.py +++ b/language_model/utils.py @@ -35,6 +35,7 @@ def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0): batch_size) test_reader = paddle.batch( paddle.dataset.imikolov.test( - vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ), + vocab, buffer_size, + data_type=paddle.dataset.imikolov.DataType.SEQ), batch_size) return vocab, train_reader, test_reader diff --git a/lstm/continuous_evaluation.py b/lstm/continuous_evaluation.py index 1a6a119a..e7efb66a 100644 --- a/lstm/continuous_evaluation.py +++ b/lstm/continuous_evaluation.py @@ -8,9 +8,8 @@ from kpi import CostKpi from kpi import DurationKpi -imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.2, 0) -imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.2, 0) - +imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.03, 0, actived=True) +imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.05, 0, actived=True) tracking_kpis = [ imdb_32_train_speed_kpi, diff --git a/lstm/get_gpu_data.py b/lstm/get_gpu_data.py index 7f06dd9e..7afb3fdb 100644 --- a/lstm/get_gpu_data.py +++ b/lstm/get_gpu_data.py @@ -5,7 +5,6 @@ # Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved # ######################################################################## - """ File: get_gpu_data.py Author: paddle(paddle@baidu.com) @@ -14,7 +13,6 @@ import argparse from continuous_evaluation import tracking_kpis - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--batch_size', type=int, default=128, help="Batch size for training.") @@ -25,20 +23,21 @@ help='Optional dataset for benchmark.') args = parser.parse_args() + def save_gpu_data(): mem_list = [] with open('memory.txt', 'r') as f: - for i , data in enumerate(f.readlines()): + for i, data in enumerate(f.readlines()): if i == 0: continue mem_list.append(int(data.split("\n")[0].split(" ")[0])) gpu_memory_factor = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) gpu_memory_kpi.persist() + if __name__ == "__main__": save_gpu_data() - diff --git a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt index 4fb6348d..7a9fb042 100644 --- a/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt +++ b/lstm/latest_kpis/imdb_32_gpu_memory_factor.txt @@ -1 +1 @@ -[1560] \ No newline at end of file +[1560] diff --git a/lstm/latest_kpis/imdb_32_train_speed_factor.txt b/lstm/latest_kpis/imdb_32_train_speed_factor.txt index 7ab232fd..1f3013cb 100644 --- a/lstm/latest_kpis/imdb_32_train_speed_factor.txt +++ b/lstm/latest_kpis/imdb_32_train_speed_factor.txt @@ -1 +1 @@ -[883.2451171875] \ No newline at end of file +[779.2451171875] diff --git a/lstm/model.py b/lstm/model.py index 0f442ebf..418a79b6 100644 --- a/lstm/model.py +++ b/lstm/model.py @@ -24,6 +24,7 @@ from continuous_evaluation import tracking_kpis + def parse_args(): parser = argparse.ArgumentParser("Understand Sentiment by Dynamic RNN.") parser.add_argument( @@ -38,7 +39,10 @@ def parse_args(): help='The first num of minibatch num to skip, for better performance test' ) parser.add_argument( - '--iterations', type=int, default=80, help='The number of minibatches.') + '--iterations', + type=int, + default=80, + help='The number of minibatches.') parser.add_argument( '--emb_dim', type=int, @@ -173,7 +177,7 @@ def gate_common( crop_sentence(imdb.train(word_dict), args.crop_size), buf_size=25000), batch_size=args.batch_size) - + train_acc_kpi = None for kpi in tracking_kpis: if kpi.name == 'imdb_%s_train_acc' % (args.batch_size): @@ -247,19 +251,19 @@ def collect_gpu_memory_data(alive): global is_alive status, output = commands.getstatusoutput('rm -rf memory.txt') if status == 0: - print('del memory.txt') + print('del memory.txt') command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id p = subprocess.Popen(command, shell=True) if p.pid < 0: - print('Get GPU memory data error') - while(is_alive): + print('Get GPU memory data error') + while (is_alive): time.sleep(1) p.kill() def save_gpu_data(mem_list): gpu_memory_kpi = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == 'imdb_%s_gpu_memory' % (args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) @@ -272,7 +276,7 @@ def save_gpu_data(mem_list): global is_alive is_alive = True collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(is_alive,)) + target=collect_gpu_memory_data, args=(is_alive, )) collect_memory_thread.setDaemon(True) collect_memory_thread.start() main() diff --git a/mnist/continuous_evaluation.py b/mnist/continuous_evaluation.py index 0698887b..25047026 100644 --- a/mnist/continuous_evaluation.py +++ b/mnist/continuous_evaluation.py @@ -3,12 +3,14 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_acc_kpi = AccKpi('train_acc', 0.05) -test_acc_kpi = AccKpi('test_acc', 0.05) -train_duration_kpi = DurationKpi('train_duration', 0.1) +train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) +test_acc_kpi = AccKpi('test_acc', 0.005, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.02, actived=True) +train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) tracking_kpis = [ train_acc_kpi, + train_cost_kpi, test_acc_kpi, train_duration_kpi, ] diff --git a/mnist/latest_kpis/test_acc_factor.txt b/mnist/latest_kpis/test_acc_factor.txt index 62e7af85..bdfcae70 100644 --- a/mnist/latest_kpis/test_acc_factor.txt +++ b/mnist/latest_kpis/test_acc_factor.txt @@ -2,4 +2,4 @@ [0.9839000105857849] [0.9868000149726868] [0.9866999983787537] -[0.9879000186920166] \ No newline at end of file +[0.9879000186920166] diff --git a/mnist/latest_kpis/train_acc_factor.txt b/mnist/latest_kpis/train_acc_factor.txt index a51eb966..856ba0ff 100644 --- a/mnist/latest_kpis/train_acc_factor.txt +++ b/mnist/latest_kpis/train_acc_factor.txt @@ -2,4 +2,4 @@ [0.9831333160400391] [0.9886166453361511] [0.9915000200271606] -[0.9929666519165039] \ No newline at end of file +[0.9929666519165039] diff --git a/mnist/latest_kpis/train_cost_factor.txt b/mnist/latest_kpis/train_cost_factor.txt new file mode 100644 index 00000000..7d7bd861 --- /dev/null +++ b/mnist/latest_kpis/train_cost_factor.txt @@ -0,0 +1,5 @@ +[0.05625442788004875] +[0.0373283299320031] +[0.0393865630030632] +[0.029800457879900932] +[0.02382788062095642] diff --git a/mnist/latest_kpis/train_duration_factor.txt b/mnist/latest_kpis/train_duration_factor.txt index ce48626b..ee89488a 100644 --- a/mnist/latest_kpis/train_duration_factor.txt +++ b/mnist/latest_kpis/train_duration_factor.txt @@ -2,4 +2,4 @@ [36.04332995414734] [36.20732808113098] [36.188393115997314] -[35.95417380332947] \ No newline at end of file +[35.95417380332947] diff --git a/mnist/model.py b/mnist/model.py index f180f3b5..6e167345 100644 --- a/mnist/model.py +++ b/mnist/model.py @@ -10,7 +10,7 @@ import paddle.fluid as fluid import paddle.fluid.profiler as profiler -from continuous_evaluation import (train_acc_kpi, test_acc_kpi, +from continuous_evaluation import (train_acc_kpi, train_cost_kpi, test_acc_kpi, train_duration_kpi, tracking_kpis) SEED = 1 DTYPE = "float32" @@ -49,8 +49,8 @@ def parse_args(): def print_arguments(args): - vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] - and vars(args)['device'] == 'GPU') + vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and + vars(args)['device'] == 'GPU') print('----------- Configuration Arguments -----------') for arg, value in sorted(vars(args).iteritems()): print('%s: %s' % (arg, value)) @@ -99,13 +99,10 @@ def eval_test(exe, batch_acc, batch_size_tensor, inference_program): y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([len(y_data), 1]) - acc, weight = exe.run( - inference_program, - feed={ - "pixel": img_data, - "label": y_data - }, - fetch_list=[batch_acc, batch_size_tensor]) + acc, weight = exe.run(inference_program, + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[batch_acc, batch_size_tensor]) test_pass_acc.add(value=acc, weight=weight) pass_acc = test_pass_acc.eval() return pass_acc @@ -158,6 +155,7 @@ def run_benchmark(model, args): for pass_id in range(args.pass_num): accuracy.reset() pass_start = time.time() + every_pass_loss = [] for batch_id, data in enumerate(train_reader()): img_data = np.array( map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE) @@ -165,29 +163,30 @@ def run_benchmark(model, args): y_data = y_data.reshape([len(y_data), 1]) start = time.time() - outs = exe.run( + loss, acc, weight = exe.run( fluid.default_main_program(), - feed={ - "pixel": img_data, - "label": y_data - }, + feed={"pixel": img_data, + "label": y_data}, fetch_list=[avg_cost, batch_acc, batch_size_tensor] ) # The accuracy is the accumulation of batches, but not the current batch. - accuracy.add(value=outs[1], weight=outs[2]) end = time.time() - loss = np.array(outs[0]) - acc = np.array(outs[1]) + accuracy.add(value=acc, weight=weight) + every_pass_loss.append(loss) + print ("Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % + (pass_id, batch_id, loss, acc)) pass_end = time.time() train_avg_acc = accuracy.eval() + train_avg_loss = np.mean(every_pass_loss) test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor, inference_program) - print("pass=%d, train_avg_acc=%f, test_avg_acc=%f, elapse=%f" % - (pass_id, train_avg_acc, test_avg_acc, (pass_end - pass_start))) + print("pass=%d, train_avg_acc=%f,train_avg_loss=%f, test_avg_acc=%f, elapse=%f" % + (pass_id, train_avg_acc, train_avg_loss, test_avg_acc, (pass_end - pass_start))) train_acc_kpi.add_record(np.array(train_avg_acc, dtype='float32')) + train_cost_kpi.add_record(np.array(train_avg_loss, dtype='float32')) test_acc_kpi.add_record(np.array(test_avg_acc, dtype='float32')) train_duration_kpi.add_record(pass_end - pass_start) diff --git a/mnist/run.xsh b/mnist/run.xsh index 65f75488..504d3c63 100755 --- a/mnist/run.xsh +++ b/mnist/run.xsh @@ -2,5 +2,4 @@ import sys model_file = 'model.py' - python @(model_file) --batch_size 128 --pass_num 5 --device CPU diff --git a/object_detection/continuous_evaluation.py b/object_detection/continuous_evaluation.py index 47e904a3..465a7985 100644 --- a/object_detection/continuous_evaluation.py +++ b/object_detection/continuous_evaluation.py @@ -3,11 +3,8 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -train_cost_kpi = CostKpi('train_cost', 0.2, 0) -train_speed_kpi = AccKpi('train_speed', 0.2, 0) +train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) +train_speed_kpi = AccKpi('train_speed', 0.02, 0, actived=True) +four_card_speed_kpi = AccKpi('four_card_train_speed', 0.02, 0, actived=True) - -tracking_kpis = [ - train_cost_kpi, - train_speed_kpi -] +tracking_kpis = [train_cost_kpi, train_speed_kpi, four_card_speed_kpi] diff --git a/object_detection/labels/create_list.py b/object_detection/labels/create_list.py index 1f53b182..8b472aac 100644 --- a/object_detection/labels/create_list.py +++ b/object_detection/labels/create_list.py @@ -36,8 +36,10 @@ def walk_dir(devkit_dir, year): added.add(name_prefix) ann_path = osp.join(annotation_dir, name_prefix + '.xml') img_path = osp.join(img_dir, name_prefix + '.jpg') - assert os.path.isfile(ann_path), 'file %s not found.' % ann_path - assert os.path.isfile(img_path), 'file %s not found.' % img_path + assert os.path.isfile( + ann_path), 'file %s not found.' % ann_path + assert os.path.isfile( + img_path), 'file %s not found.' % img_path img_ann_list.append((img_path, ann_path)) return trainval_list, test_list diff --git a/object_detection/latest_kpis/four_card_train_speed_factor.txt b/object_detection/latest_kpis/four_card_train_speed_factor.txt new file mode 100644 index 00000000..8ba14137 --- /dev/null +++ b/object_detection/latest_kpis/four_card_train_speed_factor.txt @@ -0,0 +1 @@ +[143.63855412820158] \ No newline at end of file diff --git a/object_detection/latest_kpis/train_cost_factor.txt b/object_detection/latest_kpis/train_cost_factor.txt index 9bfed99c..5b830a56 100644 --- a/object_detection/latest_kpis/train_cost_factor.txt +++ b/object_detection/latest_kpis/train_cost_factor.txt @@ -1 +1 @@ -[8.533137321472168] \ No newline at end of file +[8.613137321472168] diff --git a/object_detection/latest_kpis/train_speed_factor.txt b/object_detection/latest_kpis/train_speed_factor.txt index 000d4641..11510145 100644 --- a/object_detection/latest_kpis/train_speed_factor.txt +++ b/object_detection/latest_kpis/train_speed_factor.txt @@ -1 +1 @@ -[73.42477785941152] \ No newline at end of file +[71.42477785941152] diff --git a/object_detection/reader.py b/object_detection/reader.py index 78efcc4a..4ccf3326 100644 --- a/object_detection/reader.py +++ b/object_detection/reader.py @@ -48,8 +48,8 @@ def __init__(self, self._apply_expand = apply_expand self._resize_height = resize_h self._resize_width = resize_w - self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype( - 'float32') + self._img_mean = np.array(mean_value)[:, np.newaxis, + np.newaxis].astype('float32') self._expand_prob = 0.5 self._expand_max_ratio = 4 self._hue_prob = 0.5 @@ -173,8 +173,10 @@ def coco(settings, file_list, mode, shuffle): category_names = [item['name'] for item in coco.loadCats(category_ids)] if not settings.toy == 0: - images = images[:settings.toy] if len(images) > settings.toy else images - print("{} on {} with {} images".format(mode, settings.dataset, len(images))) + images = images[:settings.toy] if len( + images) > settings.toy else images + print("{} on {} with {} images".format(mode, settings.dataset, len( + images))) def reader(): if mode == 'train' and shuffle: @@ -224,8 +226,10 @@ def pascalvoc(settings, file_list, mode, shuffle): flist = open(file_list) images = [line.strip() for line in flist] if not settings.toy == 0: - images = images[:settings.toy] if len(images) > settings.toy else images - print("{} on {} with {} images".format(mode, settings.dataset, len(images))) + images = images[:settings.toy] if len( + images) > settings.toy else images + print("{} on {} with {} images".format(mode, settings.dataset, len( + images))) def reader(): if mode == 'train' and shuffle: @@ -247,7 +251,8 @@ def reader(): bbox_sample = [] # start from 1 bbox_sample.append( - float(settings.label_list.index(object.find('name').text))) + float( + settings.label_list.index(object.find('name').text))) bbox = object.find('bndbox') difficult = float(object.find('difficult').text) bbox_sample.append(float(bbox.find('xmin').text) / im_width) diff --git a/object_detection/run.xsh b/object_detection/run.xsh index 0e864a15..8783d96f 100755 --- a/object_detection/run.xsh +++ b/object_detection/run.xsh @@ -9,5 +9,8 @@ export CUDA_VISIBLE_DEVICES=$cudaid # mkdir -p data/pascalvoc # ./download.sh #fi -FLAGS_benchmark=true FLAGS_fraction_of_gpu_memory_to_use=0.9 python train.py --batch_size=64 --num_passes=2 +FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 +cudaid=${object_detection_multi_cudaid:=0,1,2,3} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid +FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 --gpu_card_num=4 diff --git a/object_detection/train.py b/object_detection/train.py index d5ebfa30..7846cbe8 100644 --- a/object_detection/train.py +++ b/object_detection/train.py @@ -11,7 +11,7 @@ from mobilenet_ssd import mobile_net from utility import add_arguments, print_arguments -from continuous_evaluation import train_cost_kpi, train_speed_kpi +from continuous_evaluation import train_cost_kpi, train_speed_kpi, four_card_speed_kpi parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -21,6 +21,7 @@ add_arg('num_passes', int, 120, "Epoch number.") add_arg('iterations', int, 120, "mini batchs.") add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.") +add_arg('gpu_card_num', int, 1, "the num of gpu card.") add_arg('parallel', bool, True, "Whether use parallel training.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('use_nccl', bool, True, "Whether to use NCCL or not.") @@ -158,7 +159,8 @@ def test(pass_id): end_time = time.time() if batch_id % 20 == 0: print("Pass {0}, batch {1}, loss {2}, time {3}".format( - pass_id, batch_id, loss_v[0], start_time - prev_start_time)) + pass_id, batch_id, loss_v[0], + start_time - prev_start_time)) test(pass_id) if pass_id % 10 == 0 or pass_id == num_passes - 1: @@ -281,13 +283,13 @@ def test(pass_id, best_map): total_train_time = 0.0 total_iters = 0 for pass_id in range(num_passes): - every_pass_loss = [] - iter = 0 + every_pass_loss = [] + iter = 0 pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): batch_start = time.time() if iter == args.iterations: - break + break if len(data) < devices_num: continue if args.parallel: loss_v, = train_exe.run(fetch_list=[loss.name], @@ -307,15 +309,24 @@ def test(pass_id, best_map): every_pass_loss.append(loss_v) iter += 1 total_iters += 1 - #test(pass_id, best_map) - total_train_time += pass_duration - print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" % (pass_id, np.mean(every_pass_loss), pass_duration)) + #test(pass_id, best_map) + total_train_time += pass_duration + print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" % + (pass_id, np.mean(every_pass_loss), pass_duration)) if pass_id == num_passes - 1: - examples_per_sec = train_num / total_train_time - train_cost_kpi.add_record(np.mean(every_pass_loss)) - train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float')) - train_cost_kpi.persist() - train_speed_kpi.persist() + examples_per_sec = train_num / total_train_time + train_cost_kpi.add_record(np.mean(every_pass_loss)) + train_speed_kpi.add_record( + np.array( + examples_per_sec, dtype='float')) + four_card_speed_kpi.add_record( + np.array( + examples_per_sec, dtype='float')) + if args.gpu_card_num == 1: + train_cost_kpi.persist() + train_speed_kpi.persist() + else: + four_card_speed_kpi.persist() print("Best test map {0}".format(best_map)) diff --git a/resnet30/continuous_evaluation.py b/resnet30/continuous_evaluation.py deleted file mode 100644 index 800645c2..00000000 --- a/resnet30/continuous_evaluation.py +++ /dev/null @@ -1,12 +0,0 @@ -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi, DurationKpi - -train_cost_kpi = CostKpi('train_cost', 0.5) -train_duration_kpi = DurationKpi('train_duration', 0.2) - -tracking_kpis = [ - train_cost_kpi, - train_duration_kpi, -] diff --git a/resnet50/continuous_evaluation.py b/resnet50/continuous_evaluation.py index d88bdda8..b4eed6d2 100644 --- a/resnet50/continuous_evaluation.py +++ b/resnet50/continuous_evaluation.py @@ -3,12 +3,12 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi -cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.2, 0) -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.2, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.2, 0) +cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.03, 0, actived=True) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.06, 0, actived=True) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0, actived=True) -flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.2, 0) -flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.2, 0) +flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.05, 0, actived=True) +flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.1, 0, actived=True) tracking_kpis = [ cifar10_128_train_acc_kpi, diff --git a/resnet50/get_gpu_data.py b/resnet50/get_gpu_data.py index 831c9eed..1e391253 100644 --- a/resnet50/get_gpu_data.py +++ b/resnet50/get_gpu_data.py @@ -5,7 +5,6 @@ # Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved # ######################################################################## - """ File: get_gpu_data.py Author: paddle(paddle@baidu.com) @@ -14,7 +13,6 @@ import argparse from continuous_evaluation import tracking_kpis - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--batch_size', type=int, default=128, help="Batch size for training.") @@ -26,20 +24,21 @@ help='Optional dataset for benchmark.') args = parser.parse_args() + def save_gpu_data(): mem_list = [] with open('memory.txt', 'r') as f: - for i , data in enumerate(f.readlines()): + for i, data in enumerate(f.readlines()): if i == 0: continue mem_list.append(int(data.split("\n")[0].split(" ")[0])) gpu_memory_factor = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) gpu_memory_kpi.persist() + if __name__ == "__main__": save_gpu_data() - diff --git a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt index ffe22e2d..5ebe01c2 100644 --- a/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[1394] \ No newline at end of file +[1394] diff --git a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt index 736ea74e..83208824 100644 --- a/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_acc_factor.txt @@ -1 +1 @@ -[0.91755859375] +[0.93755859375] diff --git a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt index dc2cb5e3..f37998fd 100644 --- a/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/resnet50/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[871.095703125] \ No newline at end of file +[738.095703125] diff --git a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt index 7c29b91f..2799deaf 100644 --- a/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt +++ b/resnet50/latest_kpis/flowers_64_gpu_memory_factor.txt @@ -1 +1 @@ -[10352] \ No newline at end of file +[10352] diff --git a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt index 4e3deed0..4938eeb8 100644 --- a/resnet50/latest_kpis/flowers_64_train_speed_factor.txt +++ b/resnet50/latest_kpis/flowers_64_train_speed_factor.txt @@ -1 +1 @@ -[122.87747192382812] \ No newline at end of file +[106.87747192382812] diff --git a/resnet50/model.py b/resnet50/model.py index 4c9088fb..f4e7beea 100644 --- a/resnet50/model.py +++ b/resnet50/model.py @@ -91,8 +91,8 @@ def parse_args(): def print_arguments(args): - vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] - and vars(args)['device'] == 'GPU') + vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and + vars(args)['device'] == 'GPU') print('----------- Configuration Arguments -----------') for arg, value in sorted(vars(args).iteritems()): print('%s: %s' % (arg, value)) @@ -283,14 +283,15 @@ def test(exe): if iter == args.iterations: break if not args.use_fake_data: - image = np.array( - map(lambda x: x[0].reshape(dshape), data)).astype('float32') + image = np.array(map(lambda x: x[0].reshape(dshape), + data)).astype('float32') label = np.array(map(lambda x: x[1], data)).astype('int64') label = label.reshape([-1, 1]) loss, acc, weight = exe.run( - fluid.default_main_program(), feed={ - 'data': image, 'label': label}, fetch_list=[ - avg_cost, batch_acc, batch_size_tensor]) + fluid.default_main_program(), + feed={'data': image, + 'label': label}, + fetch_list=[avg_cost, batch_acc, batch_size_tensor]) accuracy.add(value=acc, weight=weight) if iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - batch_start @@ -306,8 +307,9 @@ def test(exe): pass_train_acc = accuracy.eval() pass_test_acc = test(exe) print( - "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n" % - (pass_id, np.mean(every_pass_loss), pass_train_acc, pass_test_acc, pass_duration)) + "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n" + % (pass_id, np.mean(every_pass_loss), pass_train_acc, + pass_test_acc, pass_duration)) if pass_id == args.pass_num - 1 and args.data_set == 'cifar10': train_acc_kpi.add_record(np.array(pass_train_acc, dtype='float32')) train_acc_kpi.persist() @@ -318,9 +320,8 @@ def test(exe): train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) train_speed_kpi.persist() - print( - '\nTotal examples: %d, total time: %.5f' % - (im_num, total_train_time)) + print('\nTotal examples: %d, total time: %.5f' % + (im_num, total_train_time)) print('%.5f examples/sec, %.5f sec/batch \n' % (examples_per_sec, sec_per_batch)) @@ -340,19 +341,19 @@ def collect_gpu_memory_data(alive): global is_alive status, output = commands.getstatusoutput('rm -rf memory.txt') if status == 0: - print('del memory.txt') + print('del memory.txt') command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id p = subprocess.Popen(command, shell=True) if p.pid < 0: - print('Get GPU memory data error') - while(is_alive): + print('Get GPU memory data error') + while (is_alive): time.sleep(1) p.kill() def save_gpu_data(mem_list): gpu_memory_kpi = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) diff --git a/seq2seq/continuous_evaluation.py b/seq2seq/continuous_evaluation.py index 1bf21055..191f2c63 100644 --- a/seq2seq/continuous_evaluation.py +++ b/seq2seq/continuous_evaluation.py @@ -11,7 +11,6 @@ wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.2, 0) wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.2, 0) - tracking_kpis = [ wmb_128_train_speed_kpi, wmb_128_gpu_memory_kpi, diff --git a/seq2seq/get_gpu_data.py b/seq2seq/get_gpu_data.py index f5c43404..c852351d 100644 --- a/seq2seq/get_gpu_data.py +++ b/seq2seq/get_gpu_data.py @@ -5,7 +5,6 @@ # Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved # ######################################################################## - """ File: get_gpu_data.py Author: paddle(paddle@baidu.com) @@ -14,7 +13,6 @@ import argparse from continuous_evaluation import tracking_kpis - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--batch_size', type=int, default=128, help="Batch size for training.") @@ -25,20 +23,21 @@ help='Optional dataset for benchmark.') args = parser.parse_args() + def save_gpu_data(): mem_list = [] with open('memory.txt', 'r') as f: - for i , data in enumerate(f.readlines()): + for i, data in enumerate(f.readlines()): if i == 0: continue mem_list.append(int(data.split("\n")[0].split(" ")[0])) gpu_memory_factor = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) gpu_memory_kpi.persist() + if __name__ == "__main__": save_gpu_data() - diff --git a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt index 8301f111..f845312b 100644 --- a/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt +++ b/seq2seq/latest_kpis/wmb_128_train_speed_factor.txt @@ -1 +1 @@ -[4430.63330078125] \ No newline at end of file +[4430.63330078125] diff --git a/seq2seq/model.py b/seq2seq/model.py index c8534e5f..7c0db4e0 100644 --- a/seq2seq/model.py +++ b/seq2seq/model.py @@ -165,8 +165,8 @@ def bi_lstm_encoder(input_seq, gate_size): bias_attr=False, act='tanh') - def lstm_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, - decoder_boot, decoder_size): + def lstm_decoder_with_attention(target_embedding, encoder_vec, + encoder_proj, decoder_boot, decoder_size): def simple_attention(encoder_vec, encoder_proj, decoder_state): decoder_state_proj = fluid.layers.fc(input=decoder_state, size=decoder_size, @@ -206,7 +206,8 @@ def simple_attention(encoder_vec, encoder_proj, decoder_state): context = simple_attention(encoder_vec, encoder_proj, hidden_mem) decoder_inputs = fluid.layers.concat( input=[context, current_word], axis=1) - h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size) + h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, + decoder_size) rnn.update_memory(hidden_mem, h) rnn.update_memory(cell_mem, c) out = fluid.layers.fc(input=h, @@ -315,7 +316,7 @@ def do_validation(): count += 1 return total_loss / count - + train_acc_kpi = None for kpi in tracking_kpis: if kpi.name == 'wmb_%s_train_acc' % (args.batch_size): @@ -385,19 +386,19 @@ def collect_gpu_memory_data(alive): global is_alive status, output = commands.getstatusoutput('rm -rf memory.txt') if status == 0: - print('del memory.txt') + print('del memory.txt') command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id p = subprocess.Popen(command, shell=True) if p.pid < 0: - print('Get GPU memory data error') - while(is_alive): + print('Get GPU memory data error') + while (is_alive): time.sleep(1) p.kill() def save_gpu_data(mem_list): gpu_memory_kpi = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == 'wmb_%s_gpu_memory' % (args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) @@ -410,7 +411,7 @@ def save_gpu_data(mem_list): global is_alive is_alive = True collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(is_alive,)) + target=collect_gpu_memory_data, args=(is_alive, )) collect_memory_thread.setDaemon(True) collect_memory_thread.start() if args.infer_only: diff --git a/sequence_tagging_for_ner/continuous_evaluation.py b/sequence_tagging_for_ner/continuous_evaluation.py index 426610d7..e8e4ccd0 100644 --- a/sequence_tagging_for_ner/continuous_evaluation.py +++ b/sequence_tagging_for_ner/continuous_evaluation.py @@ -8,10 +8,13 @@ from kpi import DurationKpi train_acc_kpi = AccKpi('train_acc', 0.2, 0) -pass_duration_kpi = DurationKpi('pass_duration', 0.2, 0) - +pass_duration_kpi = DurationKpi('pass_duration', 0.02, 0, actived=True) +train_acc_kpi_card4 = AccKpi('train_acc_card4', 0.2, 0) +pass_duration_kpi_card4 = DurationKpi('pass_duration_card4', 0.02, 0, actived=True) tracking_kpis = [ train_acc_kpi, pass_duration_kpi, + train_acc_kpi_card4, + pass_duration_kpi_card4, ] diff --git a/sequence_tagging_for_ner/download.sh b/sequence_tagging_for_ner/download.sh index 99d81c1e..861f943e 100644 --- a/sequence_tagging_for_ner/download.sh +++ b/sequence_tagging_for_ner/download.sh @@ -13,4 +13,3 @@ else echo "download data error!" >> /dev/stderr exit 1 fi - diff --git a/sequence_tagging_for_ner/latest_kpis/pass_duration_card4_factor.txt b/sequence_tagging_for_ner/latest_kpis/pass_duration_card4_factor.txt new file mode 100644 index 00000000..bbcc1bf4 --- /dev/null +++ b/sequence_tagging_for_ner/latest_kpis/pass_duration_card4_factor.txt @@ -0,0 +1 @@ +[0.04497942033021347] \ No newline at end of file diff --git a/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt index fc6ca0bd..683e1d69 100644 --- a/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt +++ b/sequence_tagging_for_ner/latest_kpis/pass_duration_factor.txt @@ -1 +1 @@ -[0.020249680643496307] \ No newline at end of file +[0.021749680643496307] diff --git a/sequence_tagging_for_ner/latest_kpis/train_acc_card4_factor.txt b/sequence_tagging_for_ner/latest_kpis/train_acc_card4_factor.txt new file mode 100644 index 00000000..e7a19a6e --- /dev/null +++ b/sequence_tagging_for_ner/latest_kpis/train_acc_card4_factor.txt @@ -0,0 +1 @@ +[1.0] \ No newline at end of file diff --git a/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt index e7a19a6e..0ea64a68 100644 --- a/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt +++ b/sequence_tagging_for_ner/latest_kpis/train_acc_factor.txt @@ -1 +1 @@ -[1.0] \ No newline at end of file +[1.0] diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py index 5eaa704f..3611d7b7 100644 --- a/sequence_tagging_for_ner/network_conf.py +++ b/sequence_tagging_for_ner/network_conf.py @@ -59,8 +59,9 @@ def _net_conf(word, mark, target): name="__hidden%02d_%s__" % (i, direction), size=hidden_dim, act="stanh", - bias_attr=fluid.ParamAttr(initializer=NormalInitializer( - loc=0.0, scale=1.0)), + bias_attr=fluid.ParamAttr( + initializer=NormalInitializer( + loc=0.0, scale=1.0)), input=[hidden, rnn[0], rnn[1]], param_attr=[ hidden_para_attr, rnn_para_attr, rnn_para_attr @@ -103,8 +104,10 @@ def _net_conf(word, mark, target): avg_cost = fluid.layers.mean(x=crf_cost) return avg_cost, emission - word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) - mark = fluid.layers.data(name='mark', shape=[1], dtype='int64', lod_level=1) + word = fluid.layers.data( + name='word', shape=[1], dtype='int64', lod_level=1) + mark = fluid.layers.data( + name='mark', shape=[1], dtype='int64', lod_level=1) target = fluid.layers.data( name="target", shape=[1], dtype='int64', lod_level=1) diff --git a/sequence_tagging_for_ner/run.xsh b/sequence_tagging_for_ner/run.xsh index fdebda79..9fda2d21 100755 --- a/sequence_tagging_for_ner/run.xsh +++ b/sequence_tagging_for_ner/run.xsh @@ -2,9 +2,15 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 + cudaid=${sequence_tagging:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid - #pass_num 2200 sh download.sh FLAGS_benchmark=true python train.py + +cudaid=${sequence_tagging_m:=0,1,2,3} # use multi card as default +export CUDA_VISIBLE_DEVICES=$cudaid +#pass_num 2200 +sh download.sh +FLAGS_benchmark=true python train.py --gpu_card_num 4 diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py index e4df9c35..a80e75b1 100644 --- a/sequence_tagging_for_ner/train.py +++ b/sequence_tagging_for_ner/train.py @@ -5,12 +5,20 @@ import paddle import paddle.fluid as fluid - +import argparse import reader from network_conf import ner_net from utils import logger, load_dict from utils_extend import to_lodtensor, get_embedding -from continuous_evaluation import train_acc_kpi, pass_duration_kpi +from continuous_evaluation import * + +def parse_args(): + parser = argparse.ArgumentParser("sequence_tagging_for_ner model benchmark.") + parser.add_argument( + '--gpu_card_num', type=int, default=1, help='gpu card num used.') + + args = parser.parse_args() + return args def test(exe, chunk_evaluator, inference_program, test_data, place): chunk_evaluator.reset(exe) @@ -27,6 +35,8 @@ def test(exe, chunk_evaluator, inference_program, test_data, place): def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, model_save_dir, num_passes, use_gpu, parallel): + + args = parse_args() if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) @@ -77,7 +87,8 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, exe.run(fluid.default_startup_program()) embedding_name = 'emb' - embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() + embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor( + ) embedding_param.set(word_vector_values, place) batch_id = 0 @@ -95,24 +106,35 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, total_time += t1 - start_time pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe) if pass_id == num_passes - 1: - train_acc_kpi.add_record(pass_precision) - pass_duration_kpi.add_record(total_time / num_passes) + if args.gpu_card_num == 1: + train_acc_kpi.add_record(pass_precision) + pass_duration_kpi.add_record(total_time / num_passes) + else: + train_acc_kpi_card4.add_record(pass_precision) + pass_duration_kpi_card4.add_record(total_time / num_passes) + if pass_id % 100 == 0: - print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str( - pass_precision) + " pass_recall:" + str(pass_recall) + - " pass_f1_score:" + str(pass_f1_score)) + print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + + str(pass_precision) + " pass_recall:" + str( + pass_recall) + " pass_f1_score:" + str(pass_f1_score)) pass_precision, pass_recall, pass_f1_score = test( exe, chunk_evaluator, inference_program, test_reader, place) if pass_id % 100 == 0: - print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str( - pass_precision) + " pass_recall:" + str(pass_recall) + - " pass_f1_score:" + str(pass_f1_score)) - - save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) - fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'], - [crf_decode], exe) - train_acc_kpi.persist() - pass_duration_kpi.persist() + print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + + str(pass_precision) + " pass_recall:" + str( + pass_recall) + " pass_f1_score:" + str(pass_f1_score)) + + #save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) + #fluid.io.save_inference_model( + # save_dirname, ['word', 'mark', 'target'], [crf_decode], exe) + + if args.gpu_card_num == 1: + train_acc_kpi.persist() + pass_duration_kpi.persist() + else: + train_acc_kpi_card4.persist() + pass_duration_kpi_card4.persist() + if __name__ == "__main__": main( diff --git a/text_classification/README.md b/text_classification/README.md index 43c15934..7855f6c2 100644 --- a/text_classification/README.md +++ b/text_classification/README.md @@ -109,4 +109,5 @@ model_path: bow_model/epoch28, avg_acc: 0.844680 model_path: bow_model/epoch29, avg_acc: 0.844960 ``` + 注:过拟合导致acc持续下降,请忽略 diff --git a/text_classification/continuous_evaluation.py b/text_classification/continuous_evaluation.py index 8c1fa2b4..133a0d35 100644 --- a/text_classification/continuous_evaluation.py +++ b/text_classification/continuous_evaluation.py @@ -8,10 +8,6 @@ from kpi import DurationKpi lstm_train_cost_kpi = CostKpi('lstm_train_cost', 5, 0) -lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.2, 0) +lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.02, 0, actived=True) - -tracking_kpis = [ - lstm_train_cost_kpi, - lstm_pass_duration_kpi -] +tracking_kpis = [lstm_train_cost_kpi, lstm_pass_duration_kpi] diff --git a/text_classification/latest_kpis/lstm_pass_duration_factor.txt b/text_classification/latest_kpis/lstm_pass_duration_factor.txt index 9adbc5ec..60ab6882 100644 --- a/text_classification/latest_kpis/lstm_pass_duration_factor.txt +++ b/text_classification/latest_kpis/lstm_pass_duration_factor.txt @@ -1 +1 @@ -[14.64635027249654] \ No newline at end of file +[15.24635027249654] diff --git a/text_classification/latest_kpis/lstm_train_cost_factor.txt b/text_classification/latest_kpis/lstm_train_cost_factor.txt index 6ee69322..1224335d 100644 --- a/text_classification/latest_kpis/lstm_train_cost_factor.txt +++ b/text_classification/latest_kpis/lstm_train_cost_factor.txt @@ -1 +1 @@ -[0.000792166159953922] \ No newline at end of file +[0.000792166159953922] diff --git a/text_classification/train.py b/text_classification/train.py index fced1c09..b22001ea 100644 --- a/text_classification/train.py +++ b/text_classification/train.py @@ -13,6 +13,7 @@ from nets import gru_net from continuous_evaluation import lstm_train_cost_kpi, lstm_pass_duration_kpi + def train(train_reader, word_dict, network, @@ -70,7 +71,7 @@ def train(train_reader, avg_cost = total_cost / data_count newest_avg_cost = avg_cost t1 = time.time() - total_time += t1 - start_time + total_time += t1 - start_time avg_acc = total_acc / data_count print("pass_id: %d, avg_acc: %f, avg_cost: %f" % (pass_id, avg_acc, avg_cost)) @@ -78,7 +79,8 @@ def train(train_reader, lstm_train_cost_kpi.add_record(newest_avg_cost) lstm_pass_duration_kpi.add_record(total_time / pass_num) epoch_model = save_dirname + "/" + "epoch" + str(pass_id) - fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) + fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, + exe) lstm_train_cost_kpi.persist() lstm_pass_duration_kpi.persist() diff --git a/transformer/continuous_evaluation.py b/transformer/continuous_evaluation.py index ff5423aa..7a39755e 100644 --- a/transformer/continuous_evaluation.py +++ b/transformer/continuous_evaluation.py @@ -6,7 +6,6 @@ train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.2, 0) train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.2, 0) - tracking_kpis = [ train_avg_ppl_kpi, train_pass_duration_kpi, diff --git a/transformer/infer.py b/transformer/infer.py index 432183cd..7d0c9776 100644 --- a/transformer/infer.py +++ b/transformer/infer.py @@ -291,7 +291,6 @@ def main(): decoder_program = fluid.io.get_inference_program( target_vars=[predict], main_program=decoder_program) - test_data = paddle.batch( paddle.dataset.wmt16.test(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), @@ -328,7 +327,8 @@ def post_process_seq(seq, encoder_data_input_fields + encoder_util_input_fields, [enc_output.name], decoder_program, - decoder_data_input_fields[:-1] + decoder_util_input_fields + (decoder_data_input_fields[-1],), + decoder_data_input_fields[:-1] + decoder_util_input_fields + + (decoder_data_input_fields[-1], ), [predict.name], InferTaskConfig.beam_size, InferTaskConfig.max_length, diff --git a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt index d7e63ce9..4075807a 100644 --- a/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt +++ b/transformer/latest_kpis/train_avg_ppl_kpi_factor.txt @@ -1 +1 @@ -[19.267375946044922] \ No newline at end of file +[19.267375946044922] diff --git a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt index b9b85cf9..3cf0a471 100644 --- a/transformer/latest_kpis/train_pass_duration_kpi_factor.txt +++ b/transformer/latest_kpis/train_pass_duration_kpi_factor.txt @@ -1 +1 @@ -[56.41797208786011] \ No newline at end of file +[56.41797208786011] diff --git a/transformer/train.py b/transformer/train.py index 5d10fbf3..f1b3bfe1 100644 --- a/transformer/train.py +++ b/transformer/train.py @@ -185,16 +185,15 @@ def test(exe): for batch_id, data in enumerate(test_data()): feed_list = [] for place_id, data_buffer in enumerate(data): - data_input_dict, util_input_dict,_ = prepare_batch_input( + data_input_dict, util_input_dict, _ = prepare_batch_input( data_buffer, data_input_names, util_input_names, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, ModelHyperParams.n_head, ModelHyperParams.d_model) feed_list.append( dict(data_input_dict.items() + util_input_dict.items())) - outs = exe.run( - feed=feed_list, - fetch_list=[sum_cost.name, token_num.name]) + outs = exe.run(feed=feed_list, + fetch_list=[sum_cost.name, token_num.name]) sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[1]) test_total_cost += sum_cost_val.sum() test_total_token += token_num_val.sum() @@ -214,11 +213,13 @@ def test(exe): util_input_names = encoder_util_input_fields + decoder_util_input_fields train_exe = fluid.ParallelExecutor( - use_cuda=TrainTaskConfig.use_gpu, loss_name=sum_cost.name, customize_loss_grad=True) + use_cuda=TrainTaskConfig.use_gpu, + loss_name=sum_cost.name) test_exe = fluid.ParallelExecutor( - use_cuda=TrainTaskConfig.use_gpu, main_program=test_program, share_vars_from=train_exe) - + use_cuda=TrainTaskConfig.use_gpu, + main_program=test_program, + share_vars_from=train_exe) init = False train_data = read_multiple(reader=train_data, count=dev_count) @@ -267,9 +268,9 @@ def test(exe): time_consumed = pass_end_time - pass_start_time print("pass_id = " + str(pass_id) + " time_consumed = " + str( time_consumed)) - if pass_id == TrainTaskConfig.pass_num - 1: - train_avg_ppl_kpi.add_record(np.array(val_ppl, dtype='float32')) - train_pass_duration_kpi.add_record(time_consumed) + if pass_id == TrainTaskConfig.pass_num - 1: + train_avg_ppl_kpi.add_record(np.array(val_ppl, dtype='float32')) + train_pass_duration_kpi.add_record(time_consumed) train_avg_ppl_kpi.persist() train_pass_duration_kpi.persist() diff --git a/transformer/transformer_config.py b/transformer/transformer_config.py index 4cd16924..d37636d9 100644 --- a/transformer/transformer_config.py +++ b/transformer/transformer_config.py @@ -154,7 +154,6 @@ class ModelHyperParams(object): "lbl_weight": [(1 * (ModelHyperParams.max_length + 1), 1L), "float32"], } - # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", @@ -183,4 +182,3 @@ class ModelHyperParams(object): label_data_input_fields = ( "lbl_word", "lbl_weight", ) - diff --git a/vgg16/continuous_evaluation.py b/vgg16/continuous_evaluation.py index d2b44d6f..24b09ddd 100644 --- a/vgg16/continuous_evaluation.py +++ b/vgg16/continuous_evaluation.py @@ -8,11 +8,11 @@ from kpi import CostKpi from kpi import DurationKpi -cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.2, 0) -cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.2, 0) +cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.02, 0, actived=True) +cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0, actived=True) -flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.2, 0) -flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.2, 0) +flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.02, 0, actived=True) +flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.1, 0, actived=True) tracking_kpis = [ cifar10_128_train_speed_kpi, diff --git a/vgg16/get_gpu_data.py b/vgg16/get_gpu_data.py index 831c9eed..1e391253 100644 --- a/vgg16/get_gpu_data.py +++ b/vgg16/get_gpu_data.py @@ -5,7 +5,6 @@ # Copyright (c) 2018 Baidu.com, Inc. All Rights Reserved # ######################################################################## - """ File: get_gpu_data.py Author: paddle(paddle@baidu.com) @@ -14,7 +13,6 @@ import argparse from continuous_evaluation import tracking_kpis - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--batch_size', type=int, default=128, help="Batch size for training.") @@ -26,20 +24,21 @@ help='Optional dataset for benchmark.') args = parser.parse_args() + def save_gpu_data(): mem_list = [] with open('memory.txt', 'r') as f: - for i , data in enumerate(f.readlines()): + for i, data in enumerate(f.readlines()): if i == 0: continue mem_list.append(int(data.split("\n")[0].split(" ")[0])) gpu_memory_factor = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) gpu_memory_kpi.persist() + if __name__ == "__main__": save_gpu_data() - diff --git a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt index aad9b5c0..e6827254 100644 --- a/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_gpu_memory_factor.txt @@ -1 +1 @@ -[2198] \ No newline at end of file +[2198] diff --git a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt index 042002c0..49da8670 100644 --- a/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt +++ b/vgg16/latest_kpis/cifar10_128_train_speed_factor.txt @@ -1 +1 @@ -[736.5991821289062] \ No newline at end of file +[735.5991821289062] diff --git a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt index 49d5609f..590bed9c 100644 --- a/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt +++ b/vgg16/latest_kpis/flowers_32_gpu_memory_factor.txt @@ -1 +1 @@ -[8938] \ No newline at end of file +[8938] diff --git a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt index 4a131d3d..b767a1cd 100644 --- a/vgg16/latest_kpis/flowers_32_train_speed_factor.txt +++ b/vgg16/latest_kpis/flowers_32_train_speed_factor.txt @@ -1 +1 @@ -[51.50917434692383] \ No newline at end of file +[51.00917434692383] diff --git a/vgg16/model.py b/vgg16/model.py index 25b5719b..fa5c25d5 100644 --- a/vgg16/model.py +++ b/vgg16/model.py @@ -67,6 +67,7 @@ def vgg16_bn_drop(input): """ vgg16_bn_drop """ + def conv_block(input, num_filter, groups, dropouts): """ conv_block @@ -166,8 +167,9 @@ def test(exe): """ test_accuracy = fluid.average.WeightedAverage() for batch_id, data in enumerate(test_reader()): - img_data = np.array(map(lambda x: x[0].reshape(data_shape), - data)).astype("float32") + img_data = np.array( + map(lambda x: x[0].reshape(data_shape), data)).astype( + "float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) @@ -186,7 +188,7 @@ def test(exe): for kpi in tracking_kpis: if kpi.name == '%s_%s_train_speed' % (args.data_set, args.batch_size): train_speed_kpi = kpi - + iters, num_samples, start_time = 0, 0, time.time() accuracy = fluid.average.WeightedAverage() for pass_id in range(args.pass_num): @@ -199,8 +201,9 @@ def test(exe): num_samples = 0 if iters == args.iterations: break - img_data = np.array(map(lambda x: x[0].reshape(data_shape), - data)).astype("float32") + img_data = np.array( + map(lambda x: x[0].reshape(data_shape), data)).astype( + "float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) @@ -233,7 +236,8 @@ def test(exe): if args.with_test: pass_test_acc = test(exe) break - #train_acc_kpi.persist() + +#train_acc_kpi.persist() train_speed_kpi.persist() @@ -254,30 +258,31 @@ def collect_gpu_memory_data(alive): global is_alive status, output = commands.getstatusoutput('rm -rf memory.txt') if status == 0: - print('del memory.txt') + print('del memory.txt') command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id p = subprocess.Popen(command, shell=True) if p.pid < 0: - print('Get GPU memory data error') - while(is_alive): + print('Get GPU memory data error') + while (is_alive): time.sleep(1) p.kill() def save_gpu_data(mem_list): gpu_memory_kpi = None - for kpi in tracking_kpis: + for kpi in tracking_kpis: if kpi.name == '%s_%s_gpu_memory' % (args.data_set, args.batch_size): gpu_memory_kpi = kpi gpu_memory_kpi.add_record(max(mem_list)) gpu_memory_kpi.persist() - + if __name__ == "__main__": print_arguments() global is_alive is_alive = True - collect_memory_thread = threading.Thread(target=collect_gpu_memory_data, args=(is_alive,)) + collect_memory_thread = threading.Thread( + target=collect_gpu_memory_data, args=(is_alive, )) collect_memory_thread.setDaemon(True) collect_memory_thread.start() main() From 0e2ba0638eb590096b9ac98320b70b2056f29d0e Mon Sep 17 00:00:00 2001 From: Paddle CI Date: Tue, 29 May 2018 13:03:53 +0800 Subject: [PATCH 52/67] add multi card for text_classification --- text_classification/continuous_evaluation.py | 8 +++- .../lstm_pass_duration_card4_factor.txt | 1 + .../lstm_train_cost_card4_factor.txt | 1 + text_classification/run.xsh | 9 +++- text_classification/train.py | 46 +++++++++++++------ text_classification/utils.py | 18 +++----- 6 files changed, 55 insertions(+), 28 deletions(-) create mode 100644 text_classification/latest_kpis/lstm_pass_duration_card4_factor.txt create mode 100644 text_classification/latest_kpis/lstm_train_cost_card4_factor.txt diff --git a/text_classification/continuous_evaluation.py b/text_classification/continuous_evaluation.py index 133a0d35..9d9c9240 100644 --- a/text_classification/continuous_evaluation.py +++ b/text_classification/continuous_evaluation.py @@ -10,4 +10,10 @@ lstm_train_cost_kpi = CostKpi('lstm_train_cost', 5, 0) lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.02, 0, actived=True) -tracking_kpis = [lstm_train_cost_kpi, lstm_pass_duration_kpi] +lstm_train_cost_kpi_card4 = CostKpi('lstm_train_cost_card4', 0.2, 0) +lstm_pass_duration_kpi_card4 = DurationKpi('lstm_pass_duration_card4', 0.02, 0, actived=True) + +tracking_kpis = [ + lstm_train_cost_kpi, lstm_pass_duration_kpi, + lstm_train_cost_kpi_card4, lstm_pass_duration_kpi_card4, + ] diff --git a/text_classification/latest_kpis/lstm_pass_duration_card4_factor.txt b/text_classification/latest_kpis/lstm_pass_duration_card4_factor.txt new file mode 100644 index 00000000..bfd66206 --- /dev/null +++ b/text_classification/latest_kpis/lstm_pass_duration_card4_factor.txt @@ -0,0 +1 @@ +[17.750867716471355] \ No newline at end of file diff --git a/text_classification/latest_kpis/lstm_train_cost_card4_factor.txt b/text_classification/latest_kpis/lstm_train_cost_card4_factor.txt new file mode 100644 index 00000000..f8d4e66e --- /dev/null +++ b/text_classification/latest_kpis/lstm_train_cost_card4_factor.txt @@ -0,0 +1 @@ +[0.0030332264248281717] diff --git a/text_classification/run.xsh b/text_classification/run.xsh index 9f93ed3d..29c8faab 100755 --- a/text_classification/run.xsh +++ b/text_classification/run.xsh @@ -2,8 +2,13 @@ export MKL_NUM_THREADS=1 export OMP_NUM_THREADS=1 -cudaid=${text_classification:=0} # use 0-th card as default + +cudaid=${text_classification:=0} +export CUDA_VISIBLE_DEVICES=$cudaid +FLAGS_benchmark=true python train.py --model lstm + +cudaid=${text_classification_m:=0,1,2,3} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid #LSTM pass_num 15 -FLAGS_benchmark=true python train.py lstm +FLAGS_benchmark=true python train.py --model lstm --gpu_card_num 4 diff --git a/text_classification/train.py b/text_classification/train.py index b22001ea..dfb3f877 100644 --- a/text_classification/train.py +++ b/text_classification/train.py @@ -5,14 +5,24 @@ import paddle.fluid as fluid import paddle - +import argparse import utils from nets import bow_net from nets import cnn_net from nets import lstm_net from nets import gru_net -from continuous_evaluation import lstm_train_cost_kpi, lstm_pass_duration_kpi +from continuous_evaluation import * +fluid.default_startup_program().random_seed = 99 + +def parse_args(): + parser = argparse.ArgumentParser("text_classification model benchmark.") + parser.add_argument( + '--model', type=str, default="lstm", help='model to run.') + parser.add_argument( + '--gpu_card_num', type=int, default=1, help='gpu card num used.') + args = parser.parse_args() + return args def train(train_reader, word_dict, @@ -26,6 +36,7 @@ def train(train_reader, """ train network """ + args = parse_args() data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) @@ -34,7 +45,7 @@ def train(train_reader, if not parallel: cost, acc, prediction = network(data, label, len(word_dict)) else: - places = fluid.layers.get_places(device_count=2) + places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): cost, acc, prediction = network( @@ -76,20 +87,29 @@ def train(train_reader, print("pass_id: %d, avg_acc: %f, avg_cost: %f" % (pass_id, avg_acc, avg_cost)) if pass_id == pass_num - 1: - lstm_train_cost_kpi.add_record(newest_avg_cost) - lstm_pass_duration_kpi.add_record(total_time / pass_num) + if args.gpu_card_num == 1: + lstm_train_cost_kpi.add_record(newest_avg_cost) + lstm_pass_duration_kpi.add_record(total_time / pass_num) + else: + lstm_train_cost_kpi_card4.add_record(newest_avg_cost) + lstm_pass_duration_kpi_card4.add_record(total_time / pass_num) + epoch_model = save_dirname + "/" + "epoch" + str(pass_id) fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) - lstm_train_cost_kpi.persist() - lstm_pass_duration_kpi.persist() - + if args.gpu_card_num == 1: + lstm_train_cost_kpi.persist() + lstm_pass_duration_kpi.persist() + else: + lstm_train_cost_kpi_card4.persist() + lstm_pass_duration_kpi_card4.persist() def train_net(): + args = parse_args() word_dict, train_reader, test_reader = utils.prepare_data( "imdb", self_dict=False, batch_size=128, buf_size=50000) - if sys.argv[1] == "bow": + if args.model == "bow": train( train_reader, word_dict, @@ -100,7 +120,7 @@ def train_net(): lr=0.002, pass_num=30, batch_size=128) - elif sys.argv[1] == "cnn": + elif args.model == "cnn": train( train_reader, word_dict, @@ -111,18 +131,18 @@ def train_net(): lr=0.01, pass_num=30, batch_size=4) - elif sys.argv[1] == "lstm": + elif args.model == "lstm": train( train_reader, word_dict, lstm_net, use_cuda=True, - parallel=False, + parallel=True, save_dirname="lstm_model", lr=0.05, pass_num=15, batch_size=4) - elif sys.argv[1] == "gru": + elif args.model == "gru": train( train_reader, word_dict, diff --git a/text_classification/utils.py b/text_classification/utils.py index bda95656..bff77d11 100644 --- a/text_classification/utils.py +++ b/text_classification/utils.py @@ -69,35 +69,29 @@ def prepare_data(data_type="imdb", if data_type == "imdb": train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=buf_size), + paddle.dataset.imdb.train(word_dict), batch_size=batch_size) test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.test(word_dict), buf_size=buf_size), + paddle.dataset.imdb.test(word_dict), batch_size=batch_size) elif data_type == "light_imdb": train_reader = paddle.batch( - paddle.reader.shuffle( - light_imdb.train(word_dict), buf_size=buf_size), + light_imdb.train(word_dict), batch_size=batch_size) test_reader = paddle.batch( - paddle.reader.shuffle( - light_imdb.test(word_dict), buf_size=buf_size), + light_imdb.test(word_dict), batch_size=batch_size) elif data_type == "tiny_imdb": train_reader = paddle.batch( - paddle.reader.shuffle( - tiny_imdb.train(word_dict), buf_size=buf_size), + tiny_imdb.train(word_dict), batch_size=batch_size) test_reader = paddle.batch( - paddle.reader.shuffle( - tiny_imdb.test(word_dict), buf_size=buf_size), + tiny_imdb.test(word_dict), batch_size=batch_size) else: raise RuntimeError("no such dataset") From 2d97d55951a65db0412f725d0e3eeedb027b6617 Mon Sep 17 00:00:00 2001 From: guochaorong <32069604+guochaorong@users.noreply.github.com> Date: Tue, 29 May 2018 14:44:18 +0800 Subject: [PATCH 53/67] Update continuous_evaluation.py --- image_classification/continuous_evaluation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/image_classification/continuous_evaluation.py b/image_classification/continuous_evaluation.py index 163251bf..21f3ea06 100644 --- a/image_classification/continuous_evaluation.py +++ b/image_classification/continuous_evaluation.py @@ -4,7 +4,6 @@ from kpi import CostKpi, DurationKpi, AccKpi train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.05, 0, - actived=True, desc='TOP1 ACC') train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.05, 0, actived=True, From d94b7c191dbe8c04142fe38f7428b797eca4fe74 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 29 May 2018 18:25:16 -0700 Subject: [PATCH 54/67] add cluster spec support --- vgg16_aws_dist/ce_runner.py | 169 ++++++++++++++---------- vgg16_aws_dist/continuous_evaluation.py | 32 ++++- vgg16_aws_dist/run.xsh | 4 +- 3 files changed, 130 insertions(+), 75 deletions(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index 7091393d..d45d7da9 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -4,12 +4,13 @@ import numpy as np import threading import copy +import csv from aws_runner.client.train_command import TrainCommand # for ce env ONLY sys.path.append(os.environ['ceroot']) -from kpi import LessWorseKpi +from continuous_evaluation import cluster_specs, kpis_map, generate_kpi_id, generate_cluster_id from aws_runner.client.abclient import Abclient @@ -98,12 +99,6 @@ def print_arguments(): default="us-east-2a", help="aws zone id to place ec2 instances") -parser.add_argument( - '--trainer_count', type=int, default=1, help="Trainer count") - -parser.add_argument( - '--pserver_count', type=int, default=1, help="Pserver count") - parser.add_argument( '--action', type=str, default="create", help="create|cleanup|status") @@ -142,81 +137,119 @@ def print_arguments(): args = parser.parse_args() logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') -train_speed_kpi = LessWorseKpi('train_speed', 0.01) -kpis_to_track = {} - -def save_to_kpi(name, val): - val = float(val) - if name in kpis_to_track: - kpi_to_track = kpis_to_track[name] - else: - kpi_to_track = LessWorseKpi(name, 0.01) - kpi_to_track.add_record(np.array(val, dtype='float32')) - class DataCollector(object): - def __init__(self): - self.store = [] + _instance_store = {} + @classmethod + def get_instance_by_spec(cls, cluster_spec): + cluster_id = generate_cluster_id(cluster_spec) + if cluster_id not in cls._instance_store: + cls._instance_store[cluster_id] = cls(cluster_spec) + return cls._instance_store[cluster_id] + @classmethod + def persist_all(cls): + for _, collector in cls._instance_store.iteritems(): + collector.persist() + @classmethod + def generate_csv(cls): + with open("report.csv", "w") as csvfile: + fieldnames = [] + rows = [] + for cluster_id, collector in cls._instance_store.iteritems(): + row = { + "cluster_spec": cluster_id + } + for metric_name, _ in collector.store.iteritems(): + if metric_name not in fieldnames: + fieldnames.append(metric_name) + row[metric_name] = collector.avg(metric_name) + rows.append(row) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + def __init__(self, cluster_spec): + self.store = {} self.metric_data_identifier = "**metrics_data: " + self.cluster_spec = cluster_spec + self.cluster_id = generate_cluster_id(cluster_spec) def log_processor(self, msg): if (msg.startswith(self.metric_data_identifier)): str_msg = msg.replace(self.metric_data_identifier, "") metrics_raw = str_msg.split(",") for metric in metrics_raw: metric_data = metric.split("=") - if metric_data[0].strip() == "train_speed": - self.save(metric_data[1]) - def save(self, val): - self.store.append(float(val)) - def avg(self): - return np.average(self.store) - -solo_data_collector = DataCollector() -def train_without_pserver(args, lock): - def log_handler(source, id): - for line in iter(source.readline, ""): - logging.info("without pserver:") - logging.info(line) - solo_data_collector.log_processor(line) - - args.pserver_count = 0 - args.trainer_count = 1 + self.save(metric_data[0], metric_data[1]) + def save(self, key, val): + if (key not in self.store): + self.store[key] = [] + logging.info("going to save " + key + "=" + str(val) + "from " + self.cluster_id) + self.store[key].append(float(val)) + def get(self, key): + if (key in self.store): + return self.store[key] + return None + def avg(self, key): + vals = self.store[key] + return sum(vals)/float(len(vals)) + def persist(self): + for metric_name in self.store.iteritems(): + kpi_id = generate_kpi_id(metric_name, self.cluster_spec) + logging.info("going to persist kpi " + kpi_id) + if kpi_id in kpis_map: + kpi_instance = kpis_map[kpi_id] + kpi_instance.add_record(np.array(self.avg(metric_name), dtype='float32')) + kpi_instance.persist() + logging.info("done persisting kpi " + kpi_id) + else: + logging.info("no such kpi id found in map!!!") + logging.info(kpi_id) + +def train_with_spec(spec, args, lock): + batch_size = spec[0] + args.trainer_count = spec[1] + # gpus_per_trainer_count = spec[2] + args.pserver_count = spec[3] trainer_command = TrainCommand(args.trainer_command) - trainer_command.update({"local":"yes"}) + if args.pserver_count == 0: + trainer_command.update({"local":"yes"}) + trainer_command.update({"batch_size":str(batch_size)}) args.trainer_command = trainer_command.unparse() - logging.info(args) - abclient = Abclient(args, log_handler, lock) - abclient.create() + args.pserver_command = args.trainer_command -cluster_data_collector = DataCollector() -def train_with_pserver(args, lock): - def log_handler(source, id): - for line in iter(source.readline, ""): - logging.info("with pserver:") - logging.info(line) - cluster_data_collector.log_processor(line) - - logging.info(args) - abclient = Abclient(args, log_handler, lock) + data_collector = DataCollector.get_instance_by_spec(spec) + + abclient = Abclient(args, data_collector.log_processor, lock) abclient.create() if __name__ == "__main__": print_arguments() if args.action == "create": lock = threading.Lock() - thread_no_pserver = threading.Thread( - target=train_without_pserver, - args=(copy.copy(args), lock,) - ) - thread_with_pserver = threading.Thread( - target=train_with_pserver, - args=(copy.copy(args), lock, ) - ) - thread_no_pserver.start() - thread_with_pserver.start() - thread_no_pserver.join() - thread_with_pserver.join() - - speedup_rate = cluster_data_collector.avg()/solo_data_collector.avg() - logging.info("speed up rate is "+ str(speedup_rate)) - - save_to_kpi("speedup_rate", speedup_rate.item()) + testing_threads = [] + for cluster_spec in cluster_specs: + thread = threading.Thread( + target=train_with_spec, + args=(cluster_spec, copy.copy(args), lock,) + ) + testing_threads.append(thread) + + for testing_thread in testing_threads: + testing_thread.start() + + for testing_thread in testing_threads: + testing_thread.join() + + # generate speedup rate + # 0 spec is the baseline + def get_speed_and_collector_by_spec(spec): + data_collector = DataCollector.get_instance_by_spec(spec) + return data_collector.avg("train_speed"), data_collector + + base_speed, _ = get_speed_and_collector_by_spec(cluster_specs[0]) + for cluster_spec in cluster_specs[1:]: + speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) + data_collector.save("speedup", base_speed/speed) + + DataCollector.persist_all() + # DataCollector.generate_csv() + diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 87b88c48..ac896a57 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -1,10 +1,34 @@ import os import sys sys.path.append(os.environ['ceroot']) -from kpi import LessWorseKpi +from kpi import LessWorseKpi, GreaterWorseKpi -speedup_rate_kpi = LessWorseKpi('speedup_rate', 0.01) +kpis_specs = { + "speedup": [LessWorseKpi, 0.01], + "train_speed":[LessWorseKpi, 0.01], + "converge_speed":[GreaterWorseKpi, 0.01], + "gpu_memory":[GreaterWorseKpi, 0.01], + "acc_4passes":[GreaterWorseKpi, 0.01], +} -tracking_kpis = [ - speedup_rate_kpi, +# each row represets a cluster setting with the following columns +# batch_size, trainer_count, gpus_per_trainer_count, pserver_count +cluster_specs = [ + [64, 1, 1, 0], + [64, 8, 1, 8], + [64, 16, 1, 8], + [64, 32, 1, 8], ] + +kpis_map = {} + +def generate_cluster_id(cluster_spec): + return "_".join(map(str, cluster_spec)) +def generate_kpi_id(kpi_name, cluster_spec): + return kpi_name + "_" + generate_cluster_id(cluster_spec) + +for kpi_type_name, (Kpi_class, diff_thre) in kpis_specs.iteritems(): + for cluster_spec in cluster_specs: + kpi_id = generate_kpi_id(kpi_type_name, cluster_spec) + the_kpi = Kpi_class(kpi_id, diff_thre) + kpis_map[kpi_id] = the_kpi \ No newline at end of file diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 87c77cbf..7910c581 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -6,7 +6,7 @@ PADDLE_PATH=$CURRENT_FILE_DIR/../../.. paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" -training_command="local:no,batch_size:128,num_passes:1" +training_command="local:no,batch_size:128,num_passes:10,acc_target:0.6" # clean up docker docker system prune -f @@ -57,8 +57,6 @@ python ce_runner.py \ --key_name aws_benchmark_us_east \ --security_group_id sg-95539dff \ --online_mode yes \ - --trainer_count 2 \ - --pserver_count 2 \ --pserver_command $training_command \ --trainer_command $training_command \ --docker_image $vgg16_test_dockerhub_tag \ No newline at end of file From b2a7afe6cbe190d1c3cb7ee5108c78e2c85f51cc Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Wed, 30 May 2018 00:29:52 -0700 Subject: [PATCH 55/67] fixed log_processer; more logs; removed docker login --- vgg16_aws_dist/ce_runner.py | 26 +++++++++++++++++-------- vgg16_aws_dist/continuous_evaluation.py | 12 +++++++++--- vgg16_aws_dist/run.xsh | 5 +++-- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index d45d7da9..a1da18b1 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -172,13 +172,17 @@ def __init__(self, cluster_spec): self.metric_data_identifier = "**metrics_data: " self.cluster_spec = cluster_spec self.cluster_id = generate_cluster_id(cluster_spec) - def log_processor(self, msg): - if (msg.startswith(self.metric_data_identifier)): - str_msg = msg.replace(self.metric_data_identifier, "") - metrics_raw = str_msg.split(",") - for metric in metrics_raw: - metric_data = metric.split("=") - self.save(metric_data[0], metric_data[1]) + def log_processor(self, source, log_type): + for msg in iter(source.readline, ""): + logging.info(self.cluster_id) + logging.info(msg) + if (msg.startswith(self.metric_data_identifier)): + logging.info("metric data found, parse and save it") + str_msg = msg.replace(self.metric_data_identifier, "") + metrics_raw = str_msg.split(",") + for metric in metrics_raw: + metric_data = metric.split("=") + self.save(metric_data[0], metric_data[1]) def save(self, key, val): if (key not in self.store): self.store[key] = [] @@ -205,6 +209,7 @@ def persist(self): logging.info(kpi_id) def train_with_spec(spec, args, lock): + logging.info("updating cluster config and starting client") batch_size = spec[0] args.trainer_count = spec[1] # gpus_per_trainer_count = spec[2] @@ -217,7 +222,8 @@ def train_with_spec(spec, args, lock): args.pserver_command = args.trainer_command data_collector = DataCollector.get_instance_by_spec(spec) - + + logging.info(args) abclient = Abclient(args, data_collector.log_processor, lock) abclient.create() @@ -227,6 +233,8 @@ def train_with_spec(spec, args, lock): lock = threading.Lock() testing_threads = [] for cluster_spec in cluster_specs: + logging.info("creating cluster thread with spec") + logging.info(cluster_spec) thread = threading.Thread( target=train_with_spec, args=(cluster_spec, copy.copy(args), lock,) @@ -239,6 +247,8 @@ def train_with_spec(spec, args, lock): for testing_thread in testing_threads: testing_thread.join() + logging.info("all thread joined") + # generate speedup rate # 0 spec is the baseline def get_speed_and_collector_by_spec(spec): diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index ac896a57..1dc7cb9b 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -13,11 +13,17 @@ # each row represets a cluster setting with the following columns # batch_size, trainer_count, gpus_per_trainer_count, pserver_count +# disable production cluster config for now +# cluster_specs = [ +# [64, 1, 1, 0], +# [64, 8, 1, 8], +# [64, 16, 1, 8], +# [64, 32, 1, 8], +# ] + cluster_specs = [ [64, 1, 1, 0], - [64, 8, 1, 8], - [64, 16, 1, 8], - [64, 32, 1, 8], + [64, 4, 1, 4], ] kpis_map = {} diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 7910c581..22cd0344 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -1,5 +1,6 @@ #!/bin/bash +set -xe CURRENT_FILE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" PADDLE_PATH=$CURRENT_FILE_DIR/../../.. @@ -12,7 +13,8 @@ training_command="local:no,batch_size:128,num_passes:10,acc_target:0.6" docker system prune -f # loginto docker hub -docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD +# login is now performed in teamcity +# docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD # create paddle docker image echo "going to build and push paddle production image" @@ -37,7 +39,6 @@ cd .. echo "going to build vgg16_dist_test docker image and push it" docker build -t $vgg16_test_dockerhub_tag ./vgg16_dist_test docker push $vgg16_test_dockerhub_tag -docker logout # fetch runner and install dependencies echo "going to work with aws_runner" From ada36a87af96de7e0ad9ab85d059568b965cf3df Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Wed, 30 May 2018 16:32:36 -0700 Subject: [PATCH 56/67] move testing py to this repo; added chunk exec; --- vgg16_aws_dist/ce_runner.py | 111 ++++-- vgg16_aws_dist/run.xsh | 6 +- vgg16_aws_dist/vgg16_dist_test/Dockerfile | 7 + vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py | 329 ++++++++++++++++++ 4 files changed, 426 insertions(+), 27 deletions(-) create mode 100644 vgg16_aws_dist/vgg16_dist_test/Dockerfile create mode 100644 vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index a1da18b1..c46fc483 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -184,6 +184,9 @@ def log_processor(self, source, log_type): metric_data = metric.split("=") self.save(metric_data[0], metric_data[1]) def save(self, key, val): + key = key.strip() + if isinstance(val, str): + val = val.strip() if (key not in self.store): self.store[key] = [] logging.info("going to save " + key + "=" + str(val) + "from " + self.cluster_id) @@ -193,10 +196,12 @@ def get(self, key): return self.store[key] return None def avg(self, key): - vals = self.store[key] + vals = self.get(key) + if vals is None: + return None return sum(vals)/float(len(vals)) def persist(self): - for metric_name in self.store.iteritems(): + for metric_name, _ in self.store.iteritems(): kpi_id = generate_kpi_id(metric_name, self.cluster_spec) logging.info("going to persist kpi " + kpi_id) if kpi_id in kpis_map: @@ -227,27 +232,84 @@ def train_with_spec(spec, args, lock): abclient = Abclient(args, data_collector.log_processor, lock) abclient.create() +''' +ClusterIterator relies on spec structure as follows + batch_size, trainer_count, gpus_per_trainer_count, pserver_count + cluster_specs = [ + [64, 1, 1, 0], + [64, 8, 1, 8], + [64, 16, 1, 8], + [64, 32, 1, 8], + ] + it will sequentially distribute specs into chunks and make sure each chunk + does not exceeds trainer and pserver count limit + above specs will be distributed into 2 chunks +[[64, 1, 1, 0], [64, 8, 1, 8]] +and +[[64, 16, 1, 8]] + +[64, 32, 1, 8] itself does not fit in a single chunk, thus gets discard + +''' +class ClusterIterator: + def __init__(self, specs, trainer_count_threshold = 20, pserver_count_threshold = 10): + self.specs = specs + self.trainer_count_threshold = trainer_count_threshold + self.pserver_count_threshold = pserver_count_threshold + self.bad_specs = [] + def __iter__(self): + return self + def spec_can_not_fit(self, trainer_count, pserver_count): + return (trainer_count > self.trainer_count_threshold or pserver_count > self.pserver_count_threshold) + def next(self): + specs_to_ret = [] + trainer_count = 0 + pserver_count = 0 + if len(self.specs) == 0: + raise StopIteration() + else: + while len(self.specs) != 0: + next_spec = self.specs[0] + # when single spec can't even fit, move it to bad spec list + if self.spec_can_not_fit(next_spec[1], next_spec[3]): + self.bad_specs.append(self.specs.pop(0)) + continue + trainer_count += next_spec[1] + pserver_count += next_spec[3] + if self.spec_can_not_fit(trainer_count, pserver_count): + break + specs_to_ret.append(self.specs.pop(0)) + if len(specs_to_ret) == 0: + if len(self.bad_specs) != 0: + logging.info("%d specs not be able to fit in any test chunk" % len(self.bad_specs)) + raise StopIteration() + return specs_to_ret + if __name__ == "__main__": print_arguments() if args.action == "create": lock = threading.Lock() - testing_threads = [] - for cluster_spec in cluster_specs: - logging.info("creating cluster thread with spec") - logging.info(cluster_spec) - thread = threading.Thread( - target=train_with_spec, - args=(cluster_spec, copy.copy(args), lock,) - ) - testing_threads.append(thread) - - for testing_thread in testing_threads: - testing_thread.start() - - for testing_thread in testing_threads: - testing_thread.join() + cluster_specs_origin = copy.copy(cluster_specs) + for specs in ClusterIterator(cluster_specs): + logging.info("starting a new chunk of test") + testing_threads = [] + for cluster_spec in specs: + logging.info("creating cluster thread with spec") + logging.info(cluster_spec) + thread = threading.Thread( + target=train_with_spec, + args=(cluster_spec, copy.copy(args), lock,) + ) + testing_threads.append(thread) + + for testing_thread in testing_threads: + testing_thread.start() + + for testing_thread in testing_threads: + testing_thread.join() + logging.info("testing chunk ended") - logging.info("all thread joined") + logging.info("all testing ended") # generate speedup rate # 0 spec is the baseline @@ -255,10 +317,15 @@ def get_speed_and_collector_by_spec(spec): data_collector = DataCollector.get_instance_by_spec(spec) return data_collector.avg("train_speed"), data_collector - base_speed, _ = get_speed_and_collector_by_spec(cluster_specs[0]) - for cluster_spec in cluster_specs[1:]: - speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) - data_collector.save("speedup", base_speed/speed) + logging.info("generating speedup") + + base_speed, _ = get_speed_and_collector_by_spec(cluster_specs_origin[0]) + logging.info("base speed is %f" % base_speed) + if base_speed is not None: + for cluster_spec in cluster_specs_origin: + speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) + if speed is not None: + data_collector.save("speedup", speed*cluster_spec[1]*cluster_spec[2]/base_speed) DataCollector.persist_all() # DataCollector.generate_csv() diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 22cd0344..e63fd887 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -24,17 +24,13 @@ docker push $paddle_docker_hub_tag # build test docker image echo "going to prepare and build vgg16_dist_test" cd $CURRENT_FILE_DIR -if [ ! -d vgg16_dist_test ]; then - echo "No vgg16_dist_test repo found, going to clone one" - git clone https://github.com/putcn/vgg16_dist_test.git -fi + cd vgg16_dist_test if [ -d ~/.cache/paddle/dataset/cifar ]; then echo "host cifar cache found, copying it to docker root" mkdir -p .cache/paddle/dataset/ cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ fi -git pull cd .. echo "going to build vgg16_dist_test docker image and push it" docker build -t $vgg16_test_dockerhub_tag ./vgg16_dist_test diff --git a/vgg16_aws_dist/vgg16_dist_test/Dockerfile b/vgg16_aws_dist/vgg16_dist_test/Dockerfile new file mode 100644 index 00000000..467fdbaa --- /dev/null +++ b/vgg16_aws_dist/vgg16_dist_test/Dockerfile @@ -0,0 +1,7 @@ +FROM paddlepaddlece/paddle:latest + +ENV HOME /root +COPY ./ /root/ +WORKDIR /root +RUN apt install -y python-opencv +ENTRYPOINT ["python", "vgg16_fluid.py"] diff --git a/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py b/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py new file mode 100644 index 00000000..3fd28575 --- /dev/null +++ b/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py @@ -0,0 +1,329 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""VGG16 benchmark in Fluid""" +from __future__ import print_function + +import sys +import time +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.profiler as profiler +import argparse +import functools +import os +from paddle.fluid import debuger + + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + '--batch_size', type=int, default=128, help="Batch size for training.") +parser.add_argument( + '--learning_rate', + type=float, + default=1e-3, + help="Learning rate for training.") +parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") +parser.add_argument( + '--device', + type=str, + default='CPU', + choices=['CPU', 'GPU'], + help="The device type.") +parser.add_argument('--device_id', type=int, default=0, help="The device id.") +parser.add_argument( + '--data_format', + type=str, + default='NCHW', + choices=['NCHW', 'NHWC'], + help='The data order, now only support NCHW.') +parser.add_argument( + '--data_set', + type=str, + default='cifar10', + choices=['cifar10', 'flowers'], + help='Optional dataset for benchmark.') +parser.add_argument( + '--local', + type=str2bool, + default=True, + help='Whether to run as local mode.') + +parser.add_argument( + "--ps_hosts", + type=str, + default=os.getenv("PSERVER_HOSTS"), + help="Comma-separated list of hostname:port pairs") +parser.add_argument( + "--trainer_hosts", + type=str, + default="", + help="Comma-separated list of hostname:port pairs") +parser.add_argument( + "--profile", action='store_true', help="If set, profile a few steps.") + +parser.add_argument( + "--acc_target", default=None, help="trianing will be terminated when acc_target reaches") + +# Flags for defining the tf.train.Server +parser.add_argument( + "--task_index", type=int, default=os.getenv("TRAINER_INDEX"), help="Index of task within the job") +args = parser.parse_args() + + +def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): + return fluid.nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') + + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) + fc1 = fluid.layers.fc(input=drop, size=4096, act=None) + bn = fluid.layers.batch_norm(input=fc1, act='relu') + drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) + fc2 = fluid.layers.fc(input=drop2, size=4096, act=None) + return fc2 + + +def main(): + if args.data_set == "cifar10": + classdim = 10 + if args.data_format == 'NCHW': + data_shape = [3, 32, 32] + else: + data_shape = [32, 32, 3] + else: + classdim = 102 + if args.data_format == 'NCHW': + data_shape = [3, 224, 224] + else: + data_shape = [224, 224, 3] + + # Input data + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + net = vgg16_bn_drop(images) + predict = fluid.layers.fc(input=net, size=classdim, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size) + + # inference program + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program(batch_acc) + + # Optimization + optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + optimize_ops, params_grads = optimizer.minimize(avg_cost) + + # Initialize executor + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace( + args.device_id) + exe = fluid.Executor(place) + + # test + def test(exe): + test_pass_acc = fluid.average.WeightedAverage() + for batch_id, data in enumerate(test_reader()): + img_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape([-1, 1]) + + outs = exe.run(inference_program, + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[batch_acc, batch_size]) + test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1])) + + return test_pass_acc.eval() + + def train_loop(exe, trainer_prog): + iters = 0 + ts = time.time() + train_pass_acc = fluid.average.WeightedAverage() + acc_4passes = None + converge_speed = None + for pass_id in range(args.num_passes): + # train + start_time = time.time() + num_samples = 0 + train_pass_acc.reset() + + def run_step(batch_id, data): + img_data = np.array( + map(lambda x: x[0].reshape(data_shape), data)).astype( + "float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape([-1, 1]) + + loss, acc, b_size = exe.run( + trainer_prog, + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[avg_cost, batch_acc, batch_size]) + return loss, acc, b_size + + if args.profile and args.task_index == 0: + # warmup. + for batch_id, data in enumerate(train_reader()): + if batch_id > 5: break + run_step(batch_id, data) + with profiler.profiler('All', 'total', '/tmp/profile_vgg'): + for batch_id, data in enumerate(train_reader()): + if batch_id > 5: break + run_step(batch_id, data) + + for batch_id, data in enumerate(train_reader()): + ts = time.time() + loss, acc, b_size = run_step(batch_id, data) + iters += 1 + num_samples += len(data) + train_pass_acc.add(value=acc, weight=b_size) + print( + "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, " + "Speed = %.2f img/s" % (pass_id, iters, loss, acc, + len(data) / (time.time() - ts)) + ) # The accuracy is the accumulation of batches, but not the current batch. + # terminate training when acc_target reaches + if args.acc_target and acc >= args.acc_target: + converge_speed = time.time() - start_time + print("converge_speed set with %d" % converge_speed) + + pass_elapsed = time.time() - start_time + pass_train_acc = train_pass_acc.eval() + pass_test_acc = test(exe) + + if pass_id == 4: + print("acc_4passes set with %f" % pass_train_acc) + acc_4passes = float(pass_train_acc) + + msgs = [] + msgs.append("pass = %d" % pass_id) + msgs.append("train_speed = %f" % float(num_samples / pass_elapsed)) + msgs.append("train_accuracy = %f" % pass_train_acc) + msgs.append("test_accuracy = %f" % pass_test_acc) + + if isinstance(acc_4passes, float): + msgs.append("acc_4passes = %f" % acc_4passes) + if isinstance(converge_speed, int): + msgs.append("converge_speed = %d" % converge_speed) + + print("**metrics_data: " + ", ".join(msgs)) + + if args.local: + # Parameter initialization + exe.run(fluid.default_startup_program()) + + # data reader + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10() if args.data_set == 'cifar10' + else paddle.dataset.flowers.train(), + buf_size=5120), + batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.cifar.test10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), + batch_size=args.batch_size) + train_loop(exe, fluid.default_main_program()) + else: + trainers = int(os.getenv("TRAINERS")) # total trainer count + print("trainers total: ", trainers) + + training_role = os.getenv( + "TRAINING_ROLE", + "TRAINER") # get the training role: trainer/pserver + + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=args.task_index, + pservers=args.ps_hosts, + trainers=trainers) + + if training_role == "PSERVER": + current_endpoint = os.getenv("SERVER_ENDPOINT") + if not current_endpoint: + print("need env SERVER_ENDPOINT") + exit(1) + pserver_prog = t.get_pserver_program(current_endpoint) + pserver_startup = t.get_startup_program(current_endpoint, + pserver_prog) + exe.run(pserver_startup) + exe.run(pserver_prog) + elif training_role == "TRAINER": + # Parameter initialization + exe.run(fluid.default_startup_program()) + + # data reader + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10() if args.data_set == 'cifar10' + else paddle.dataset.flowers.train(), + buf_size=5120), + batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else + paddle.dataset.flowers.test(), + batch_size=args.batch_size) + + trainer_prog = t.get_trainer_program() + feeder = fluid.DataFeeder(feed_list=[images, label], place=place) + # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver + exe.run(fluid.default_startup_program()) + train_loop(exe, trainer_prog) + else: + print("environment var TRAINER_ROLE should be TRAINER os PSERVER") + + +def print_arguments(): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +if __name__ == "__main__": + print_arguments() + main() From b93e4c8c767250fe76afc99d399f13a74b13a6cf Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Wed, 30 May 2018 17:57:53 -0700 Subject: [PATCH 57/67] update cluster spec due to aws limit --- vgg16_aws_dist/ce_runner.py | 2 +- vgg16_aws_dist/continuous_evaluation.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index c46fc483..a4a78c22 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -252,7 +252,7 @@ def train_with_spec(spec, args, lock): ''' class ClusterIterator: - def __init__(self, specs, trainer_count_threshold = 20, pserver_count_threshold = 10): + def __init__(self, specs, trainer_count_threshold = 7, pserver_count_threshold = 10): self.specs = specs self.trainer_count_threshold = trainer_count_threshold self.pserver_count_threshold = pserver_count_threshold diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 1dc7cb9b..4822788e 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -24,6 +24,7 @@ cluster_specs = [ [64, 1, 1, 0], [64, 4, 1, 4], + [64, 7, 1, 7], ] kpis_map = {} From 34ca85e0b85ecb3da9d2bd0ac292cf3cb3697ee6 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Thu, 31 May 2018 10:56:38 -0700 Subject: [PATCH 58/67] add __init__ and tracking_kpis for CE --- vgg16_aws_dist/__init__.py | 0 vgg16_aws_dist/continuous_evaluation.py | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 vgg16_aws_dist/__init__.py diff --git a/vgg16_aws_dist/__init__.py b/vgg16_aws_dist/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 4822788e..77a31911 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -29,6 +29,8 @@ kpis_map = {} +tracking_kpis = [] + def generate_cluster_id(cluster_spec): return "_".join(map(str, cluster_spec)) def generate_kpi_id(kpi_name, cluster_spec): @@ -38,4 +40,5 @@ def generate_kpi_id(kpi_name, cluster_spec): for cluster_spec in cluster_specs: kpi_id = generate_kpi_id(kpi_type_name, cluster_spec) the_kpi = Kpi_class(kpi_id, diff_thre) + tracking_kpis.append(the_kpi) kpis_map[kpi_id] = the_kpi \ No newline at end of file From 689538488c3783e90810948ba69e692e0d1d6cd9 Mon Sep 17 00:00:00 2001 From: guochaorong <32069604+guochaorong@users.noreply.github.com> Date: Fri, 1 Jun 2018 22:59:22 +0800 Subject: [PATCH 59/67] Update model.py --- mnist/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mnist/model.py b/mnist/model.py index 6e167345..1ae83922 100644 --- a/mnist/model.py +++ b/mnist/model.py @@ -12,11 +12,11 @@ from continuous_evaluation import (train_acc_kpi, train_cost_kpi, test_acc_kpi, train_duration_kpi, tracking_kpis) -SEED = 1 +SEED = 90 DTYPE = "float32" # random seed must set before configuring the network. -# fluid.default_startup_program().random_seed = SEED +fluid.default_startup_program().random_seed = SEED def parse_args(): From b7923391c331b60b9dfc6f3c1ef189c9a58c5f3b Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 13:30:17 -0700 Subject: [PATCH 60/67] switch to fluid_benchmark; add multi gpu support --- vgg16_aws_dist/ce_runner.py | 35 +- vgg16_aws_dist/continuous_evaluation.py | 16 +- .../fluid_benchmark_for_aws/Dockerfile | 7 + .../fluid_benchmark_for_aws/README.md | 73 +++ .../fluid_benchmark.py | 461 ++++++++++++++++++ .../fluid_benchmark_for_aws/kube_gen_job.py | 191 ++++++++ .../kube_templates/__init__.py | 66 +++ .../kube_templates/pserver.py | 58 +++ .../kube_templates/trainer.py | 70 +++ .../models/__init__.py | 17 + .../models/machine_translation.py | 232 +++++++++ .../fluid_benchmark_for_aws/models/mnist.py | 94 ++++ .../fluid_benchmark_for_aws/models/resnet.py | 161 ++++++ .../models/stacked_dynamic_lstm.py | 139 ++++++ .../fluid_benchmark_for_aws/models/vgg.py | 104 ++++ vgg16_aws_dist/fluid_benchmark_for_aws/run.sh | 105 ++++ vgg16_aws_dist/run.xsh | 20 +- 17 files changed, 1816 insertions(+), 33 deletions(-) create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/Dockerfile create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/README.md create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/kube_gen_job.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/__init__.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/pserver.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/trainer.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/__init__.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/machine_translation.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/mnist.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/resnet.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/stacked_dynamic_lstm.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/models/vgg.py create mode 100644 vgg16_aws_dist/fluid_benchmark_for_aws/run.sh diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index a4a78c22..ace00080 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -215,14 +215,27 @@ def persist(self): def train_with_spec(spec, args, lock): logging.info("updating cluster config and starting client") - batch_size = spec[0] - args.trainer_count = spec[1] - # gpus_per_trainer_count = spec[2] - args.pserver_count = spec[3] + test_name = spec[0] + batch_size = spec[1] + args.trainer_count = spec[2] + gpus_per_trainer_count = spec[3] + args.pserver_count = spec[4] trainer_command = TrainCommand(args.trainer_command) - if args.pserver_count == 0: - trainer_command.update({"local":"yes"}) - trainer_command.update({"batch_size":str(batch_size)}) + + command_to_update = { + "model": test_name, + "batch_size": str(batch_size), + "gpus": str(gpus_per_trainer_count), + } + + if args.pserver_count == 0 and args.trainer_count == 1: + command_to_update["update_method"] = "local" + ''' not yet supported because aws runner can't provide PADDLE_TRAINER_IPS + if args.pserver_count == 0 and args.trainer_count > 1: + command_to_update["update_method"] = "nccl2" + ''' + + trainer_command.update(command_to_update) args.trainer_command = trainer_command.unparse() args.pserver_command = args.trainer_command @@ -271,11 +284,11 @@ def next(self): while len(self.specs) != 0: next_spec = self.specs[0] # when single spec can't even fit, move it to bad spec list - if self.spec_can_not_fit(next_spec[1], next_spec[3]): + if self.spec_can_not_fit(next_spec[2], next_spec[4]): self.bad_specs.append(self.specs.pop(0)) continue - trainer_count += next_spec[1] - pserver_count += next_spec[3] + trainer_count += next_spec[2] + pserver_count += next_spec[4] if self.spec_can_not_fit(trainer_count, pserver_count): break specs_to_ret.append(self.specs.pop(0)) @@ -325,7 +338,7 @@ def get_speed_and_collector_by_spec(spec): for cluster_spec in cluster_specs_origin: speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) if speed is not None: - data_collector.save("speedup", speed*cluster_spec[1]*cluster_spec[2]/base_speed) + data_collector.save("speedup", speed*cluster_spec[2]*cluster_spec[3]/base_speed) DataCollector.persist_all() # DataCollector.generate_csv() diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 77a31911..a0676ef1 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -12,19 +12,19 @@ } # each row represets a cluster setting with the following columns -# batch_size, trainer_count, gpus_per_trainer_count, pserver_count +# test_name, batch_size, trainer_count, gpus_per_trainer_count, pserver_count # disable production cluster config for now # cluster_specs = [ -# [64, 1, 1, 0], -# [64, 8, 1, 8], -# [64, 16, 1, 8], -# [64, 32, 1, 8], +# ["mnist", 64, 1, 1, 0], +# ["mnist", 64, 8, 1, 8], +# ["mnist", 64, 16, 1, 8], +# ["mnist", 64, 32, 1, 8], # ] cluster_specs = [ - [64, 1, 1, 0], - [64, 4, 1, 4], - [64, 7, 1, 7], + ["mnist", 64, 1, 1, 0], + ["mnist", 64, 4, 4, 4], + ["mnist", 64, 7, 8, 7], ] kpis_map = {} diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/Dockerfile b/vgg16_aws_dist/fluid_benchmark_for_aws/Dockerfile new file mode 100644 index 00000000..bef80bb6 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/Dockerfile @@ -0,0 +1,7 @@ +FROM paddlepaddlece/paddle:latest + +ENV HOME /root +COPY ./ /root/ +WORKDIR /root +RUN apt install -y python-opencv +ENTRYPOINT ["python", "fluid_benchmark.py"] \ No newline at end of file diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/README.md b/vgg16_aws_dist/fluid_benchmark_for_aws/README.md new file mode 100644 index 00000000..357ce932 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/README.md @@ -0,0 +1,73 @@ +# Fluid Benchmark + +Originally from https://github.com/PaddlePaddle/Paddle/tree/develop/benchmark/fluid + +This directory contains several models configurations and tools that used to run +Fluid benchmarks for local and distributed training. + + +## Run the Benchmark + +To start, run the following command to get the full help message: + +```bash +python fluid_benchmark.py --help +``` + +Currently supported `--model` argument include: + +* mnist +* resnet + * you can chose to use different dataset using `--data_set cifar10` or + `--data_set flowers`. +* vgg +* stacked_dynamic_lstm +* machine_translation + +* Run the following command to start a benchmark job locally: + ```bash + python fluid_benchmark.py --model mnist --device GPU + ``` + You can choose to use GPU/CPU training. With GPU training, you can specify + `--gpus ` to run multi GPU training. +* Run distributed training with parameter servers: + * start parameter servers: + ```bash + PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver + ``` + * start trainers: + ```bash + PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver + ``` +* Run distributed training using NCCL2 + ```bash + PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2 + ``` + +## Run Distributed Benchmark on Kubernetes Cluster + +We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit +distributed benchmark jobs to your cluster. To generate a job yaml, just run: + +```bash +python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver " --disttype pserver +``` + +Then the yaml files are generated under directory `myjob`, you can run: + +```bash +kubectl create -f myjob/ +``` + +The job shall start. + + +## Notes for Run Fluid Distributed with NCCL2 and RDMA + +Before running NCCL2 distributed jobs, please check that whether your node has multiple network +interfaces, try to add the environment variable `export NCCL_SOCKET_IFNAME=eth0` to use your actual +network device. + +To run high-performance distributed training, you must prepare your hardware environment to be +able to run RDMA enabled network communication, please check out [this](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/cluster/nccl2_rdma_training.md) +note for details. diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py b/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py new file mode 100644 index 00000000..005c62e0 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py @@ -0,0 +1,461 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import cProfile +import time +import os + +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.profiler as profiler +import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler + +BENCHMARK_MODELS = [ + "machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm" +] + + +def parse_args(): + parser = argparse.ArgumentParser('Fluid model benchmarks.') + parser.add_argument( + '--model', + type=str, + choices=BENCHMARK_MODELS, + default='resnet', + help='The model to run benchmark with.') + parser.add_argument( + '--batch_size', type=int, default=32, help='The minibatch size.') + parser.add_argument( + '--learning_rate', + type=float, + default=0.001, + help='The minibatch size.') + # TODO(wuyi): add "--use_fake_data" option back. + parser.add_argument( + '--skip_batch_num', + type=int, + default=5, + help='The first num of minibatch num to skip, for better performance test' + ) + parser.add_argument( + '--iterations', type=int, default=80, help='The number of minibatches.') + parser.add_argument( + '--pass_num', type=int, default=100, help='The number of passes.') + parser.add_argument( + '--data_format', + type=str, + default='NCHW', + choices=['NCHW', 'NHWC'], + help='The data data_format, now only support NCHW.') + parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help='The device type.') + parser.add_argument( + '--gpus', + type=int, + default=1, + help='If gpus > 1, will use ParallelExecutor to run, else use Executor.') + parser.add_argument( + '--data_set', + type=str, + default='flowers', + choices=['cifar10', 'flowers'], + help='Optional dataset for benchmark.') + parser.add_argument( + '--infer_only', action='store_true', help='If set, run forward only.') + parser.add_argument( + '--use_cprof', action='store_true', help='If set, use cProfile.') + parser.add_argument( + '--use_nvprof', + action='store_true', + help='If set, use nvprof for CUDA.') + parser.add_argument( + '--no_test', + action='store_false', + help='If set, test the testset during training.') + parser.add_argument( + '--memory_optimize', + action='store_true', + help='If set, optimize runtime memory before start.') + parser.add_argument( + '--use_fake_data', + action='store_true', + help='If set ommit the actual read data operators.') + parser.add_argument( + '--profile', action='store_true', help='If set, profile a few steps.') + parser.add_argument( + '--update_method', + type=str, + default='local', + choices=['local', 'pserver', 'nccl2'], + help='Choose parameter update method, can be local, pserver, nccl2.') + + parser.add_argument( + "--acc_target", default=0.6, type=float, help="trianing will be terminated when acc_target reaches") + + args = parser.parse_args() + return args + + +def append_nccl2_prepare(trainer_id): + if trainer_id >= 0: + # append gen_nccl_id at the end of startup program + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + port = os.getenv("PADDLE_PSERVER_PORT") + worker_ips = os.getenv("PADDLE_TRAINER_IPS") + worker_endpoints = [] + for ip in worker_ips.split(","): + worker_endpoints.append(':'.join([ip, port])) + num_trainers = len(worker_endpoints) + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port + worker_endpoints.remove(current_endpoint) + + nccl_id_var = fluid.default_startup_program().global_block().create_var( + name="NCCLID", + persistable=True, + type=fluid.core.VarDesc.VarType.RAW) + fluid.default_startup_program().global_block().append_op( + type="gen_nccl_id", + inputs={}, + outputs={"NCCLID": nccl_id_var}, + attrs={ + "endpoint": current_endpoint, + "endpoint_list": worker_endpoints, + "trainer_id": trainer_id + }) + return nccl_id_var, num_trainers, trainer_id + else: + raise Exception("must set positive PADDLE_TRAINER_ID env variables for " + "nccl-based dist train.") + + +def dist_transpile(trainer_id): + if trainer_id < 0: + return None, None + + # the port of all pservers, needed by both trainer and pserver + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + # comma separated ips of all pservers, needed by trainer and + # pserver + pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") + eplist = [] + for ip in pserver_ips.split(","): + eplist.append(':'.join([ip, port])) + pserver_endpoints = ",".join(eplist) + # total number of workers/trainers in the job, needed by + # trainer and pserver + trainers = int(os.getenv("PADDLE_TRAINERS")) + # the IP of the local machine, needed by pserver only + current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port + # the role, should be either PSERVER or TRAINER + training_role = os.getenv("PADDLE_TRAINING_ROLE") + + t = distribute_transpiler.DistributeTranspiler() + t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) + if training_role == "PSERVER": + pserver_program = t.get_pserver_program(current_endpoint) + pserver_startup_program = t.get_startup_program(current_endpoint, + pserver_program) + return pserver_program, pserver_startup_program + elif training_role == "TRAINER": + train_program = t.get_trainer_program() + return train_program, fluid.default_startup_program() + else: + raise ValueError( + 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + ) + + +def test(exe, inference_program, test_reader, feeder, batch_acc): + accuracy_evaluator = fluid.metrics.Accuracy() + for batch_id, data in enumerate(test_reader()): + acc = exe.run(inference_program, + feed=feeder.feed(data), + fetch_list=[batch_acc]) + accuracy_evaluator.update(value=np.array(acc), weight=len(data)) + + return accuracy_evaluator.eval() + + +# TODO(wuyi): replace train, train_parallel, test functions with new trainer +# API once it is ready. +def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, batch_size_tensor, + args, train_prog, startup_prog): + if os.getenv("PADDLE_TRAINING_ROLE") == "PSERVER": + place = core.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + exe.run(train_prog) + return + + if args.use_fake_data: + raise Exception( + "fake data is not supported in single GPU test for now.") + + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup_prog) + feed_var_list = [ + var for var in train_prog.global_block().vars.itervalues() + if var.is_data + ] + feeder = fluid.DataFeeder(feed_var_list, place) + + acc_4passes = None + converge_speed = None + train_pass_acc = fluid.average.WeightedAverage() + fetch_list = [avg_loss] + if batch_acc is not None: + fetch_list.append(batch_acc) + + iters, num_samples, start_time = 0, 0, time.time() + for pass_id in range(args.pass_num): + train_losses = [] + train_pass_acc.reset() + for batch_id, data in enumerate(train_reader()): + if iters == args.skip_batch_num: + start_time = time.time() + num_samples = 0 + if iters == args.iterations: + break + outs = exe.run(train_prog, + feed=feeder.feed(data), + fetch_list=fetch_list) + iters += 1 + num_samples += len(data) + loss = outs[0] + if batch_acc is not None: + acc = outs[1] + train_pass_acc.add(value=acc, weight=len(data)) + else: + acc = None + train_losses.append(loss) + print("Pass: %d, Iter: %d, Loss: %f, acc %s\n" % + (pass_id, iters, np.mean(train_losses), str(acc))) + if converge_speed is None and args.acc_target and acc >= args.acc_target: + converge_speed = time.time() - start_time + print("converge_speed set with %f" % converge_speed) + train_elapsed = time.time() - start_time + examples_per_sec = num_samples / train_elapsed + if batch_acc is not None: + pass_train_acc = train_pass_acc.eval() + else: + pass_train_acc = None + + if pass_id == 4 and batch_acc is not None: + print("acc_4passes set with %f" % pass_train_acc) + acc_4passes = float(pass_train_acc) + + output_metric_data(pass_id, examples_per_sec, pass_train_acc, acc_4passes, converge_speed) + + # evaluation + if not args.no_test and batch_acc != None: + pass_test_acc = test(exe, infer_prog, test_reader, feeder, + batch_acc) + print(", Test Accuracy: %f" % pass_test_acc) + print("\n") + # TODO(wuyi): add warmup passes to get better perf data. + exit(0) + + +# TODO(wuyi): replace train, train_parallel, test functions with new trainer +# API once it is ready. +def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, + batch_acc, batch_size_tensor, args, train_prog, startup_prog, nccl_id_var, + num_trainers, trainer_id): + feed_var_list = [ + var for var in train_prog.global_block().vars.itervalues() + if var.is_data + ] + # generate fake: + if args.use_fake_data: + for var in feed_var_list: + v = startup_prog.global_block().clone_variable(var) + var.persistable = True + v.persistable = True + + real_shape = list(var.shape) + real_shape[0] = args.batch_size / args.gpus + startup_prog.global_block().append_op( + outputs={"Out": v}, + type="fill_constant", + attrs={"shape": real_shape, + "value": 1.0, + "dtype": var.dtype}) + + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + if nccl_id_var and trainer_id == 0: + #FIXME(wuyi): wait other trainer to start listening + time.sleep(30) + + startup_exe = fluid.Executor(place) + startup_exe.run(startup_prog) + strategy = fluid.ExecutionStrategy() + strategy.num_threads = 1 + strategy.allow_op_delay = False + exe = fluid.ParallelExecutor( + True, + avg_loss.name, + exec_strategy=strategy, + num_trainers=num_trainers, + trainer_id=trainer_id) + + feeder = fluid.DataFeeder(feed_var_list, place) + acc_4passes = None + converge_speed = None + accuracy_evaluator = fluid.metrics.Accuracy() + fetch_list = [avg_loss.name] + if batch_acc is not None: + fetch_list.append(batch_acc.name) + start_time = time.time() + + for pass_id in range(args.pass_num): + num_samples = 0 + iters = 0 + pass_start_time = time.time() + accuracy_evaluator.reset() + for batch_id, data in enumerate(train_reader()): + if args.profile and pass_id == 0 and batch_id == 5: + profiler.start_profiler("All") + elif args.profile and pass_id == 0 and batch_id == 10: + profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id) + + if iters == args.skip_batch_num: + start_time = time.time() + num_samples = 0 + if iters == args.iterations: + break + if args.use_fake_data: + outs = exe.run(fetch_list) + else: + outs = exe.run(fetch_list, feed=feeder.feed(data)) + + if args.update_method == "pserver": + exe.bcast_params() + num_samples += len(data) + iters += 1 + + if batch_acc is not None: + acc = np.mean(outs[1]).item() + accuracy_evaluator.update(value=acc, weight=len(data)) + else: + acc = None + + if batch_id % 1 == 0: + print("Pass %d, batch %d, loss %s, acc %s" % + (pass_id, batch_id, np.mean(outs[0]), str(acc))) + if converge_speed is None and args.acc_target and acc >= args.acc_target: + converge_speed = time.time() - start_time + print("converge_speed set with %f" % converge_speed) + + pass_elapsed = time.time() - pass_start_time + examples_per_sec = num_samples / pass_elapsed + if batch_acc is not None: + pass_train_acc = accuracy_evaluator.eval() + else: + pass_train_acc = None + + if pass_id == 4 and batch_acc is not None: + print("acc_4passes set with %f" % pass_train_acc) + acc_4passes = float(pass_train_acc) + + output_metric_data(pass_id, examples_per_sec, pass_train_acc, acc_4passes, converge_speed) + + if not args.no_test and batch_acc != None: + test_acc = test(startup_exe, infer_prog, test_reader, feeder, + batch_acc) + print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc)) + exit(0) + +def output_metric_data(pass_id, examples_per_sec, pass_train_acc, acc_4passes, converge_speed): + msgs = [] + msgs.append("pass = %d" % pass_id) + msgs.append("train_speed = %f" % float(examples_per_sec)) + if isinstance(pass_train_acc, float): + msgs.append("train_accuracy = %f" % pass_train_acc) + if isinstance(acc_4passes, float): + msgs.append("acc_4passes = %f" % acc_4passes) + if isinstance(converge_speed, float): + msgs.append("converge_speed = %f" % converge_speed) + print("**metrics_data: " + ", ".join(msgs)) + +def print_arguments(args): + vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and + vars(args)['device'] == 'GPU') + print('----------- resnet Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def main(): + args = parse_args() + print_arguments(args) + + # the unique trainer id, starting from 0, needed by trainer + # only + nccl_id_var, num_trainers, trainer_id = ( + None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) + + if args.use_cprof: + pr = cProfile.Profile() + pr.enable() + model_def = __import__("models.%s" % args.model, fromlist=["models"]) + train_args = list(model_def.get_model(args)) + train_args.append(args) + # Run optimizer.minimize(avg_loss) + train_args[2].minimize(train_args[0]) + if args.memory_optimize: + fluid.memory_optimize(fluid.default_main_program()) + + if args.update_method == "pserver": + train_prog, startup_prog = dist_transpile(trainer_id) + if not train_prog: + raise Exception( + "Must configure correct environments to run dist train.") + train_args.extend([train_prog, startup_prog]) + if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER": + train_args.extend([nccl_id_var, num_trainers, trainer_id]) + train_parallel(*train_args) + train(*train_args) + exit(0) + + # for other update methods, use default programs + train_args.append(fluid.default_main_program()) + train_args.append(fluid.default_startup_program()) + + if args.update_method == "nccl2": + nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id) + if args.gpus == 1: + # NOTE: parallel executor use profiler interanlly + if args.use_nvprof and args.device == 'GPU': + with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + train(*train_args) + else: + train(*train_args) + else: + if args.device == "CPU": + raise Exception("Only support GPU perf with parallel exe") + train_args.extend([nccl_id_var, num_trainers, trainer_id]) + train_parallel(*train_args) + + +if __name__ == "__main__": + main() diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/kube_gen_job.py b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_gen_job.py new file mode 100644 index 00000000..39ba207f --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_gen_job.py @@ -0,0 +1,191 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +import copy +import argparse +import random +import os +from kube_templates import pserver, trainer, envs + + +def parse_args(): + parser = argparse.ArgumentParser(description='Generate dist job yamls.') + + parser.add_argument( + '--jobname', default="paddlejob", help='unique job name') + parser.add_argument( + '--cpu', default=1, type=int, help='CPU cores per trainer node') + parser.add_argument( + '--pscpu', default=1, type=int, help='CPU cores per pserver node') + parser.add_argument( + '--gpu', default=0, type=int, help='num of GPUs per node') + parser.add_argument( + '--image', + default="bootstrapper:5000/fluid_benchmark:gpu", + help='num of GPUs per node') + parser.add_argument( + '--pservers', default=1, type=int, help='num of pservers') + parser.add_argument( + '--trainers', default=1, type=int, help='num of trainers') + parser.add_argument('--memory', default=1, type=int, help='trainer memory') + parser.add_argument( + '--psmemory', default=1, type=int, help='pserver memory') + parser.add_argument( + '--port', default=30236, type=int, help='num of trainers') + parser.add_argument( + '--entry', default="python train.py", help='command to run') + parser.add_argument( + '--fluid', default=1, type=int, help='whether is fluid job') + parser.add_argument( + '--rdma', action='store_ture', help='whether mount rdma libs') + parser.add_argument( + '--disttype', + default="pserver", + type=str, + choices=['pserver', 'nccl2', 'local'], + help='pserver or nccl2 or local') + + args = parser.parse_args() + return args + + +def gen_job(): + ps = pserver + tn = trainer + args = parse_args() + + ps_container = ps["spec"]["template"]["spec"]["containers"][0] + tn_container = tn["spec"]["template"]["spec"]["containers"][0] + + if args.fluid == 1: + ps_container["command"] = \ + ["paddle_k8s", "start_fluid"] + tn_container["command"] = \ + ["paddle_k8s", "start_fluid"] + ps["metadata"]["name"] = args.jobname + "-pserver" + ps["spec"]["template"]["metadata"]["labels"][ + "paddle-job-pserver"] = args.jobname + tn["metadata"]["name"] = args.jobname + "-trainer" + tn["spec"]["template"]["metadata"]["labels"]["paddle-job"] = args.jobname + + ps_container["image"] = args.image + tn_container["image"] = args.image + + ps_container["resources"]["requests"]["cpu"] = str(args.pscpu) + ps_container["resources"]["requests"]["memory"] = str(args.psmemory) + "Gi" + ps_container["resources"]["limits"]["cpu"] = str(args.pscpu) + ps_container["resources"]["limits"]["memory"] = str(args.psmemory) + "Gi" + + tn_container["resources"]["requests"]["cpu"] = str(args.cpu) + tn_container["resources"]["requests"]["memory"] = str(args.memory) + "Gi" + tn_container["resources"]["limits"]["cpu"] = str(args.cpu) + tn_container["resources"]["limits"]["memory"] = str(args.memory) + "Gi" + if args.gpu > 0: + tn_container["resources"]["requests"][ + "alpha.kubernetes.io/nvidia-gpu"] = str(args.gpu) + tn_container["resources"]["limits"][ + "alpha.kubernetes.io/nvidia-gpu"] = str(args.gpu) + + ps["spec"]["replicas"] = int(args.pservers) + tn["spec"]["parallelism"] = int(args.trainers) + tn["spec"]["completions"] = int(args.trainers) + ps_container["ports"][0]["name"] = "jobport-" + str(args.port) + ps_container["ports"][0]["containerPort"] = args.port + spreadport = random.randint(40000, 60000) + tn_container["ports"][0]["name"] = "spr-" + str(spreadport) + tn_container["ports"][0]["containerPort"] = spreadport + + envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname}) + envs.append({"name": "TRAINERS", "value": str(args.trainers)}) + envs.append({"name": "PSERVERS", "value": str(args.pservers)}) + envs.append({"name": "ENTRY", "value": args.entry}) + envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) + envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) + # NOTE: these directories below are cluster specific, please modify + # this settings before you run on your own cluster. + envs.append({ + "name": "LD_LIBRARY_PATH", + "value": + "/usr/local/lib:/usr/local/nvidia/lib64:/usr/local/rdma/lib64:/usr/lib64/mlnx_ofed/valgrind" + }) + + volumes = [{ + "name": "nvidia-driver", + "hostPath": { + "path": "/usr/local/nvidia/lib64" + } + }] + volumeMounts = [{ + "mountPath": "/usr/local/nvidia/lib64", + "name": "nvidia-driver" + }] + + if args.rdma: + volumes.extend([{ + "name": "ibetc", + "hostPath": { + "path": "/etc/libibverbs.d" + } + }, { + "name": "iblibs", + "hostPath": { + "path": "/usr/local/rdma" + } + }, { + "name": "valgrind", + "hostPath": { + "path": "/usr/lib64/mlnx_ofed/valgrind" + } + }]) + volumeMounts.extend([{ + "mountPath": "/etc/libibverbs.d", + "name": "ibetc" + }, { + "mountPath": "/usr/local/rdma", + "name": "iblibs" + }, { + "mountPath": "/usr/lib64/mlnx_ofed/valgrind", + "name": "valgrind" + }]) + # append shm for NCCL2 + volumes.append({"name": "dshm", "emptyDir": {"medium": "Memory"}}) + volumeMounts.append({"mountPath": "/dev/shm", "name": "dshm"}) + + tn["spec"]["template"]["spec"]["volumes"] = volumes + tn_container["volumeMounts"] = volumeMounts + + ps_container["env"] = envs + ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"}) + tn_container["env"] = envs + if args.disttype == "pserver": + tn_container["env"].append({ + "name": "TRAINING_ROLE", + "value": "TRAINER" + }) + elif args.disttype == "nccl2" or args.disttype == "local": + # NCCL2 have no training role, set to plain WORKER + tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"}) + + os.mkdir(args.jobname) + if args.disttype == "pserver": + with open("%s/pserver.yaml" % args.jobname, "w") as fn: + yaml.dump(ps, fn) + + with open("%s/trainer.yaml" % args.jobname, "w") as fn: + yaml.dump(tn, fn) + + +if __name__ == "__main__": + gen_job() diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/__init__.py b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/__init__.py new file mode 100644 index 00000000..2d09d940 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/__init__.py @@ -0,0 +1,66 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pserver import pserver +from trainer import trainer + +__all__ = ["pserver", "trainer", "envs"] + +envs = [ + # envs that don't need to change + { + "name": "GLOG_v", + "value": "0" + }, + { + "name": "GLOG_logtostderr", + "value": "1" + }, + { + "name": "TOPOLOGY", + "value": "" + }, + { + "name": "TRAINER_PACKAGE", + "value": "/workspace" + }, + { + "name": "PADDLE_INIT_NICS", + "value": "eth2" + }, + { + "name": "NAMESPACE", + "valueFrom": { + "fieldRef": { + "fieldPath": "metadata.namespace" + } + } + }, + { + "name": "POD_IP", + "valueFrom": { + "fieldRef": { + "fieldPath": "status.podIP" + } + } + }, + { + "name": "PADDLE_CURRENT_IP", + "valueFrom": { + "fieldRef": { + "fieldPath": "status.podIP" + } + } + } +] diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/pserver.py b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/pserver.py new file mode 100644 index 00000000..b54982c8 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/pserver.py @@ -0,0 +1,58 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +pserver = { + "apiVersion": "extensions/v1beta1", + "kind": "ReplicaSet", + "metadata": { + "name": "jobname-pserver" + }, + "spec": { + "replicas": 1, + "template": { + "metadata": { + "labels": { + "paddle-job-pserver": "jobname" + } + }, + "spec": { + "hostNetwork": True, + "imagePullSecrets": [{ + "name": "job-registry-secret" + }], + "containers": [{ + "name": "pserver", + "image": "", + "imagePullPolicy": "Always", + "ports": [{ + "name": "jobport-1", + "containerPort": 1 + }], + "env": [], + "command": ["paddle_k8s", "start_pserver"], + "resources": { + "requests": { + "memory": "10Gi", + "cpu": "4" + }, + "limits": { + "memory": "10Gi", + "cpu": "4" + } + } + }] + } + } + } +} diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/trainer.py b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/trainer.py new file mode 100644 index 00000000..b915d31e --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/kube_templates/trainer.py @@ -0,0 +1,70 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +trainer = { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "name": "jobname-pserver" + }, + "spec": { + "parallelism": 4, + "completions": 4, + "template": { + "metadata": { + "labels": { + "paddle-job": "jobname" + } + }, + "spec": { + "hostNetwork": True, + "imagePullSecrets": [{ + "name": "job-registry-secret" + }], + "restartPolicy": "Never", + "containers": [{ + "name": "trainer", + "image": "", + "imagePullPolicy": "Always", + # to let container set rlimit + "securityContext": { + "privileged": True + # TODO(wuyi): use below specific cap instead of privileged, + # using privileged will cause all GPU device are visible + # in the container. + # "capabilities": { + # "add": ["SYS_RESOURCE"] + # } + }, + "ports": [{ + "name": "jobport-1", + "containerPort": 1 + }], + "env": [], + "command": ["paddle_k8s", "start_trainer", "v2"], + "resources": { + "requests": { + "memory": "10Gi", + "cpu": "4", + }, + "limits": { + "memory": "10Gi", + "cpu": "4", + } + } + }] + } + } + } +} diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/__init__.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/__init__.py new file mode 100644 index 00000000..1c3fcac8 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + "machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm" +] diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/machine_translation.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/machine_translation.py new file mode 100644 index 00000000..122a66c9 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/machine_translation.py @@ -0,0 +1,232 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""seq2seq model for fluid.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import argparse +import time +import distutils.util + +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.framework as framework +from paddle.fluid.executor import Executor + + +def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): + def linear(inputs): + return fluid.layers.fc(input=inputs, size=size, bias_attr=True) + + forget_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + input_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + output_gate = fluid.layers.sigmoid(x=linear([hidden_t_prev, x_t])) + cell_tilde = fluid.layers.tanh(x=linear([hidden_t_prev, x_t])) + + cell_t = fluid.layers.sums(input=[ + fluid.layers.elementwise_mul( + x=forget_gate, y=cell_t_prev), fluid.layers.elementwise_mul( + x=input_gate, y=cell_tilde) + ]) + + hidden_t = fluid.layers.elementwise_mul( + x=output_gate, y=fluid.layers.tanh(x=cell_t)) + + return hidden_t, cell_t + + +def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, + target_dict_dim, is_generating, beam_size, max_length): + """Construct a seq2seq network.""" + + def bi_lstm_encoder(input_seq, gate_size): + # Linear transformation part for input gate, output gate, forget gate + # and cell activation vectors need be done outside of dynamic_lstm. + # So the output size is 4 times of gate_size. + input_forward_proj = fluid.layers.fc(input=input_seq, + size=gate_size * 4, + act=None, + bias_attr=False) + forward, _ = fluid.layers.dynamic_lstm( + input=input_forward_proj, size=gate_size * 4, use_peepholes=False) + input_reversed_proj = fluid.layers.fc(input=input_seq, + size=gate_size * 4, + act=None, + bias_attr=False) + reversed, _ = fluid.layers.dynamic_lstm( + input=input_reversed_proj, + size=gate_size * 4, + is_reverse=True, + use_peepholes=False) + return forward, reversed + + src_word_idx = fluid.layers.data( + name='source_sequence', shape=[1], dtype='int64', lod_level=1) + + src_embedding = fluid.layers.embedding( + input=src_word_idx, + size=[source_dict_dim, embedding_dim], + dtype='float32') + + src_forward, src_reversed = bi_lstm_encoder( + input_seq=src_embedding, gate_size=encoder_size) + + encoded_vector = fluid.layers.concat( + input=[src_forward, src_reversed], axis=1) + + encoded_proj = fluid.layers.fc(input=encoded_vector, + size=decoder_size, + bias_attr=False) + + backward_first = fluid.layers.sequence_pool( + input=src_reversed, pool_type='first') + + decoder_boot = fluid.layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act='tanh') + + def lstm_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, + decoder_boot, decoder_size): + def simple_attention(encoder_vec, encoder_proj, decoder_state): + decoder_state_proj = fluid.layers.fc(input=decoder_state, + size=decoder_size, + bias_attr=False) + decoder_state_expand = fluid.layers.sequence_expand( + x=decoder_state_proj, y=encoder_proj) + concated = fluid.layers.concat( + input=[encoder_proj, decoder_state_expand], axis=1) + attention_weights = fluid.layers.fc(input=concated, + size=1, + act='tanh', + bias_attr=False) + attention_weights = fluid.layers.sequence_softmax( + input=attention_weights) + weigths_reshape = fluid.layers.reshape( + x=attention_weights, shape=[-1]) + scaled = fluid.layers.elementwise_mul( + x=encoder_vec, y=weigths_reshape, axis=0) + context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + return context + + rnn = fluid.layers.DynamicRNN() + + cell_init = fluid.layers.fill_constant_batch_size_like( + input=decoder_boot, + value=0.0, + shape=[-1, decoder_size], + dtype='float32') + cell_init.stop_gradient = False + + with rnn.block(): + current_word = rnn.step_input(target_embedding) + encoder_vec = rnn.static_input(encoder_vec) + encoder_proj = rnn.static_input(encoder_proj) + hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) + cell_mem = rnn.memory(init=cell_init) + context = simple_attention(encoder_vec, encoder_proj, hidden_mem) + decoder_inputs = fluid.layers.concat( + input=[context, current_word], axis=1) + h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size) + rnn.update_memory(hidden_mem, h) + rnn.update_memory(cell_mem, c) + out = fluid.layers.fc(input=h, + size=target_dict_dim, + bias_attr=True, + act='softmax') + rnn.output(out) + return rnn() + + if not is_generating: + trg_word_idx = fluid.layers.data( + name='target_sequence', shape=[1], dtype='int64', lod_level=1) + + trg_embedding = fluid.layers.embedding( + input=trg_word_idx, + size=[target_dict_dim, embedding_dim], + dtype='float32') + + prediction = lstm_decoder_with_attention(trg_embedding, encoded_vector, + encoded_proj, decoder_boot, + decoder_size) + label = fluid.layers.data( + name='label_sequence', shape=[1], dtype='int64', lod_level=1) + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + + feeding_list = ["source_sequence", "target_sequence", "label_sequence"] + + return avg_cost, feeding_list + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + lod_t = core.LoDTensor() + lod_t.set(flattened_data, place) + lod_t.set_lod([lod]) + return lod_t, lod[-1] + + +def lodtensor_to_ndarray(lod_tensor): + dims = lod_tensor.get_dims() + ndarray = np.zeros(shape=dims).astype('float32') + for i in xrange(np.product(dims)): + ndarray.ravel()[i] = lod_tensor.get_float_element(i) + return ndarray + + +def get_model(args): + embedding_dim = 512 + encoder_size = 512 + decoder_size = 512 + dict_size = 30000 + beam_size = 3 + max_length = 250 + avg_cost, feeding_list = seq_to_seq_net( + embedding_dim, + encoder_size, + decoder_size, + dict_size, + dict_size, + False, + beam_size=beam_size, + max_length=max_length) + + # clone from default main program + inference_program = fluid.default_main_program().clone() + + optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + + train_batch_generator = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=args.batch_size) + + test_batch_generator = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.test(dict_size), buf_size=1000), + batch_size=args.batch_size) + + return avg_cost, inference_program, optimizer, train_batch_generator, \ + test_batch_generator, None, None diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/mnist.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/mnist.py new file mode 100644 index 00000000..9606304b --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/mnist.py @@ -0,0 +1,94 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import argparse +import time +import cProfile + +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler + +SEED = 1 +DTYPE = "float32" + +# random seed must set before configuring the network. +# fluid.default_startup_program().random_seed = SEED + + +def cnn_model(data): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=data, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + + # TODO(dzhwinter) : refine the initializer and random seed settting + SIZE = 10 + input_shape = conv_pool_2.shape + param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] + scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 + + predict = fluid.layers.fc( + input=conv_pool_2, + size=SIZE, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale))) + return predict + + +def get_model(args): + # Input data + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + # inference program + inference_program = fluid.default_main_program().clone() + + # Optimization + opt = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, beta1=0.9, beta2=0.999) + + # Reader + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=args.batch_size) + return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc, batch_size_tensor diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/resnet.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/resnet.py new file mode 100644 index 00000000..34748e37 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/resnet.py @@ -0,0 +1,161 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import numpy as np +import time + +import cProfile, pstats, StringIO + +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.profiler as profiler + + +def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): + conv1 = fluid.layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv1, act=act) + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] # if args.data_format == 'NCHW' else input.shape[-1] + if ch_in != ch_out: + return conv_bn_layer(input, ch_out, 1, stride, 0, None) + else: + return input + + +def basicblock(input, ch_out, stride): + short = shortcut(input, ch_out, stride) + conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, act=None) + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def bottleneck(input, ch_out, stride): + short = shortcut(input, ch_out * 4, stride) + conv1 = conv_bn_layer(input, ch_out, 1, stride, 0) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1) + conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, act=None) + return fluid.layers.elementwise_add(x=short, y=conv3, act='relu') + + +def layer_warp(block_func, input, ch_out, count, stride): + res_out = block_func(input, ch_out, stride) + for i in range(1, count): + res_out = block_func(res_out, ch_out, 1) + return res_out + + +def resnet_imagenet(input, class_dim, depth=50, data_format='NCHW'): + + cfg = { + 18: ([2, 2, 2, 1], basicblock), + 34: ([3, 4, 6, 3], basicblock), + 50: ([3, 4, 6, 3], bottleneck), + 101: ([3, 4, 23, 3], bottleneck), + 152: ([3, 8, 36, 3], bottleneck) + } + stages, block_func = cfg[depth] + conv1 = conv_bn_layer(input, ch_out=64, filter_size=7, stride=2, padding=3) + pool1 = fluid.layers.pool2d( + input=conv1, pool_type='avg', pool_size=3, pool_stride=2) + res1 = layer_warp(block_func, pool1, 64, stages[0], 1) + res2 = layer_warp(block_func, res1, 128, stages[1], 2) + res3 = layer_warp(block_func, res2, 256, stages[2], 2) + res4 = layer_warp(block_func, res3, 512, stages[3], 2) + pool2 = fluid.layers.pool2d( + input=res4, + pool_size=7, + pool_type='avg', + pool_stride=1, + global_pooling=True) + out = fluid.layers.fc(input=pool2, size=class_dim, act='softmax') + return out + + +def resnet_cifar10(input, class_dim, depth=32, data_format='NCHW'): + assert (depth - 2) % 6 == 0 + + n = (depth - 2) // 6 + + conv1 = conv_bn_layer( + input=input, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = fluid.layers.pool2d( + input=res3, pool_size=8, pool_type='avg', pool_stride=1) + out = fluid.layers.fc(input=pool, size=class_dim, act='softmax') + return out + + +def get_model(args): + model = resnet_cifar10 + if args.data_set == "cifar10": + class_dim = 10 + if args.data_format == 'NCHW': + dshape = [3, 32, 32] + else: + dshape = [32, 32, 3] + model = resnet_cifar10 + else: + class_dim = 102 + if args.data_format == 'NCHW': + dshape = [3, 224, 224] + else: + dshape = [224, 224, 3] + model = resnet_imagenet + + input = fluid.layers.data(name='data', shape=dshape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + predict = model(input, class_dim) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program( + target_vars=[batch_acc, batch_size_tensor]) + + optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), + buf_size=5120), + batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.cifar.test10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), + batch_size=args.batch_size) + + return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc, batch_size_tensor diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/stacked_dynamic_lstm.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/stacked_dynamic_lstm.py new file mode 100644 index 00000000..bd44a607 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/stacked_dynamic_lstm.py @@ -0,0 +1,139 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import cPickle +import os +import random +import time + +import numpy +import paddle +import paddle.dataset.imdb as imdb +import paddle.fluid as fluid +import paddle.batch as batch +import paddle.fluid.profiler as profiler + +word_dict = imdb.word_dict() + + +def crop_sentence(reader, crop_size): + unk_value = word_dict[''] + + def __impl__(): + for item in reader(): + if len([x for x in item[0] if x != unk_value]) < crop_size: + yield item + + return __impl__ + + +def get_model(args): + lstm_size = 512 + emb_dim = 512 + crop_size = 1500 + + data = fluid.layers.data( + name="words", shape=[1], lod_level=1, dtype='int64') + sentence = fluid.layers.embedding( + input=data, size=[len(word_dict), emb_dim]) + + sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + word = rnn.step_input(sentence) + prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) + prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) + + def gate_common( + ipt, + hidden, + size, ): + gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) + gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) + gate = fluid.layers.sums(input=[gate0, gate1]) + return gate + + forget_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + input_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + output_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + cell_gate = fluid.layers.tanh( + x=gate_common(word, prev_hidden, lstm_size)) + + cell = fluid.layers.sums(input=[ + fluid.layers.elementwise_mul( + x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( + x=input_gate, y=cell_gate) + ]) + + hidden = fluid.layers.elementwise_mul( + x=output_gate, y=fluid.layers.tanh(x=cell)) + + rnn.update_memory(prev_cell, cell) + rnn.update_memory(prev_hidden, hidden) + rnn.output(hidden) + + last = fluid.layers.sequence_pool(rnn(), 'last') + logit = fluid.layers.fc(input=last, size=2, act='softmax') + loss = fluid.layers.cross_entropy( + input=logit, + label=fluid.layers.data( + name='label', shape=[1], dtype='int64')) + loss = fluid.layers.mean(x=loss) + + # add acc + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ + shape=[1], dtype='int64'), total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program( + target_vars=[batch_acc, batch_size_tensor]) + + adam = fluid.optimizer.Adam() + + train_reader = batch( + paddle.reader.shuffle( + crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), + batch_size=args.batch_size) + test_reader = batch( + paddle.reader.shuffle( + crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), + batch_size=args.batch_size) + + return loss, inference_program, adam, train_reader, test_reader, batch_acc, batch_size_tensor + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = numpy.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/models/vgg.py b/vgg16_aws_dist/fluid_benchmark_for_aws/models/vgg.py new file mode 100644 index 00000000..6571bbf6 --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/models/vgg.py @@ -0,0 +1,104 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""VGG16 benchmark in Fluid""" +from __future__ import print_function + +import sys +import time +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import argparse +import functools + + +def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): + return fluid.nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') + + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) + fc1 = fluid.layers.fc(input=drop, size=512, act=None) + bn = fluid.layers.batch_norm(input=fc1, act='relu') + drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) + fc2 = fluid.layers.fc(input=drop2, size=512, act=None) + return fc2 + + +def get_model(args): + if args.data_set == "cifar10": + classdim = 10 + if args.data_format == 'NCHW': + data_shape = [3, 32, 32] + else: + data_shape = [32, 32, 3] + else: + classdim = 102 + if args.data_format == 'NCHW': + data_shape = [3, 224, 224] + else: + data_shape = [224, 224, 3] + + # Input data + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + net = vgg16_bn_drop(images) + predict = fluid.layers.fc(input=net, size=classdim, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + # inference program + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program( + target_vars=[batch_acc, batch_size_tensor]) + + # Optimization + optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + + # data reader + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), + buf_size=5120), + batch_size=args.batch_size) + test_reader = paddle.batch( + paddle.dataset.cifar.test10() + if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), + batch_size=args.batch_size) + + return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc, batch_size_tensor diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/run.sh b/vgg16_aws_dist/fluid_benchmark_for_aws/run.sh new file mode 100644 index 00000000..f6dfd20b --- /dev/null +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/run.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# This script benchmarking the PaddlePaddle Fluid on +# single thread single GPU. + +#export FLAGS_fraction_of_gpu_memory_to_use=0.0 +export CUDNN_PATH=/paddle/cudnn_v5 + +# disable openmp and mkl parallel +#https://github.com/PaddlePaddle/Paddle/issues/7199 +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 +ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` +if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi +else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi +fi +# disable multi-gpu if have more than one +export CUDA_VISIBLE_DEVICES=0 +export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH + +# only query the gpu used +nohup stdbuf -oL nvidia-smi \ + --id=${CUDA_VISIBLE_DEVICES} \ + --query-gpu=timestamp \ + --query-compute-apps=pid,process_name,used_memory \ + --format=csv \ + --filename=mem.log \ + -l 1 & +# mnist +# mnist gpu mnist 128 +FLAGS_benchmark=true stdbuf -oL python fluid/mnist.py \ + --device=GPU \ + --batch_size=128 \ + --skip_batch_num=5 \ + --iterations=500 \ + 2>&1 | tee -a mnist_gpu_128.log + +# vgg16 +# gpu cifar10 128 +FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \ + --device=GPU \ + --batch_size=128 \ + --skip_batch_num=5 \ + --iterations=30 \ + 2>&1 | tee -a vgg16_gpu_128.log + +# flowers gpu 128 +FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \ + --device=GPU \ + --batch_size=32 \ + --data_set=flowers \ + --skip_batch_num=5 \ + --iterations=30 \ + 2>&1 | tee -a vgg16_gpu_flowers_32.log + +# resnet50 +# resnet50 gpu cifar10 128 +FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \ + --device=GPU \ + --batch_size=128 \ + --data_set=cifar10 \ + --model=resnet_cifar10 \ + --skip_batch_num=5 \ + --iterations=30 \ + 2>&1 | tee -a resnet50_gpu_128.log + +# resnet50 gpu flowers 64 +FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \ + --device=GPU \ + --batch_size=64 \ + --data_set=flowers \ + --model=resnet_imagenet \ + --skip_batch_num=5 \ + --iterations=30 \ + 2>&1 | tee -a resnet50_gpu_flowers_64.log + +# lstm +# lstm gpu imdb 32 # tensorflow only support batch=32 +FLAGS_benchmark=true stdbuf -oL python fluid/stacked_dynamic_lstm.py \ + --device=GPU \ + --batch_size=32 \ + --skip_batch_num=5 \ + --iterations=30 \ + --hidden_dim=512 \ + --emb_dim=512 \ + --crop_size=1500 \ + 2>&1 | tee -a lstm_gpu_32.log + +# seq2seq +# seq2seq gpu wmb 128 +FLAGS_benchmark=true stdbuf -oL python fluid/machine_translation.py \ + --device=GPU \ + --batch_size=128 \ + --skip_batch_num=5 \ + --iterations=30 \ + 2>&1 | tee -a lstm_gpu_128.log diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index e63fd887..51976589 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -6,8 +6,8 @@ CURRENT_FILE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" PADDLE_PATH=$CURRENT_FILE_DIR/../../.. paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" -vgg16_test_dockerhub_tag="paddlepaddlece/vgg16_dist:latest" -training_command="local:no,batch_size:128,num_passes:10,acc_target:0.6" +fluid_benchmark_dockerhub_tag="paddlepaddlece/fluid_benchmark:latest" +training_command="update_method:pserver,pass_num:10,acc_target:0.6" # clean up docker docker system prune -f @@ -22,19 +22,11 @@ docker build -t $paddle_docker_hub_tag $paddle_build_path docker push $paddle_docker_hub_tag # build test docker image -echo "going to prepare and build vgg16_dist_test" cd $CURRENT_FILE_DIR -cd vgg16_dist_test -if [ -d ~/.cache/paddle/dataset/cifar ]; then - echo "host cifar cache found, copying it to docker root" - mkdir -p .cache/paddle/dataset/ - cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ -fi -cd .. -echo "going to build vgg16_dist_test docker image and push it" -docker build -t $vgg16_test_dockerhub_tag ./vgg16_dist_test -docker push $vgg16_test_dockerhub_tag +echo "going to build fluid_benchmark_for_aws docker image and push it" +docker build -t $fluid_benchmark_dockerhub_tag ./fluid_benchmark_for_aws +docker push $fluid_benchmark_dockerhub_tag # fetch runner and install dependencies echo "going to work with aws_runner" @@ -56,4 +48,4 @@ python ce_runner.py \ --online_mode yes \ --pserver_command $training_command \ --trainer_command $training_command \ - --docker_image $vgg16_test_dockerhub_tag \ No newline at end of file + --docker_image $fluid_benchmark_dockerhub_tag \ No newline at end of file From 6c4fc0a79678f9849ce67546373761366651cb87 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 14:06:12 -0700 Subject: [PATCH 61/67] change model to resnet; update trainer count limit --- vgg16_aws_dist/ce_runner.py | 2 +- vgg16_aws_dist/continuous_evaluation.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index ace00080..d0f9fcb4 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -265,7 +265,7 @@ def train_with_spec(spec, args, lock): ''' class ClusterIterator: - def __init__(self, specs, trainer_count_threshold = 7, pserver_count_threshold = 10): + def __init__(self, specs, trainer_count_threshold = 32, pserver_count_threshold = 10): self.specs = specs self.trainer_count_threshold = trainer_count_threshold self.pserver_count_threshold = pserver_count_threshold diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index a0676ef1..d3770b34 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -22,9 +22,9 @@ # ] cluster_specs = [ - ["mnist", 64, 1, 1, 0], - ["mnist", 64, 4, 4, 4], - ["mnist", 64, 7, 8, 7], + ["resnet", 64, 1, 1, 0], + ["resnet", 64, 4, 4, 4], + ["resnet", 64, 7, 8, 7], ] kpis_map = {} From 15be6270d0741f67e264e7254916afb95035cde7 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 14:48:52 -0700 Subject: [PATCH 62/67] add base speed exception handling; switch to mnist --- vgg16_aws_dist/ce_runner.py | 15 ++++++++++----- vgg16_aws_dist/continuous_evaluation.py | 6 +++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index d0f9fcb4..c6f3d8ef 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -332,13 +332,18 @@ def get_speed_and_collector_by_spec(spec): logging.info("generating speedup") + # base_speed supposed to be one trainer, one gpu, local mode base_speed, _ = get_speed_and_collector_by_spec(cluster_specs_origin[0]) - logging.info("base speed is %f" % base_speed) if base_speed is not None: - for cluster_spec in cluster_specs_origin: - speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) - if speed is not None: - data_collector.save("speedup", speed*cluster_spec[2]*cluster_spec[3]/base_speed) + logging.info("base speed is %f" % base_speed) + if base_speed is not None: + for cluster_spec in cluster_specs_origin: + speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) + if speed is not None: + # speed * trainer_count * gpus_per_trainer_count / base_speed + data_collector.save("speedup", speed*cluster_spec[2]*cluster_spec[3]/base_speed) + else: + logging.info("base speed is not available") DataCollector.persist_all() # DataCollector.generate_csv() diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index d3770b34..a0676ef1 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -22,9 +22,9 @@ # ] cluster_specs = [ - ["resnet", 64, 1, 1, 0], - ["resnet", 64, 4, 4, 4], - ["resnet", 64, 7, 8, 7], + ["mnist", 64, 1, 1, 0], + ["mnist", 64, 4, 4, 4], + ["mnist", 64, 7, 8, 7], ] kpis_map = {} From 7dd4b14b70389a06191a694410c4a07b4de91a4a Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 17:30:34 -0700 Subject: [PATCH 63/67] change test to vgg; update acc log handling --- vgg16_aws_dist/continuous_evaluation.py | 6 +++--- vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index a0676ef1..76ca4db3 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -22,9 +22,9 @@ # ] cluster_specs = [ - ["mnist", 64, 1, 1, 0], - ["mnist", 64, 4, 4, 4], - ["mnist", 64, 7, 8, 7], + ["vgg", 16, 1, 1, 0], + ["vgg", 16, 4, 4, 4], + ["vgg", 16, 7, 8, 7], ] kpis_map = {} diff --git a/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py b/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py index 005c62e0..0f780a49 100644 --- a/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py +++ b/vgg16_aws_dist/fluid_benchmark_for_aws/fluid_benchmark.py @@ -242,7 +242,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, num_samples += len(data) loss = outs[0] if batch_acc is not None: - acc = outs[1] + acc = np.mean(outs[1]).item() train_pass_acc.add(value=acc, weight=len(data)) else: acc = None From 38b066cbd98ce68522aad052894bcfcd9aa23203 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 17:49:59 -0700 Subject: [PATCH 64/67] add cache back --- vgg16_aws_dist/run.xsh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 51976589..8d8dfce2 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -24,6 +24,15 @@ docker push $paddle_docker_hub_tag # build test docker image cd $CURRENT_FILE_DIR +cd fluid_benchmark_for_aws +if [ -d ~/.cache/paddle/dataset/cifar ]; then + echo "host cifar cache found, copying it to docker root" + mkdir -p .cache/paddle/dataset/ + cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ + cp -r -f ~/.cache/paddle/dataset/flowers .cache/paddle/dataset/ +fi +cd .. + echo "going to build fluid_benchmark_for_aws docker image and push it" docker build -t $fluid_benchmark_dockerhub_tag ./fluid_benchmark_for_aws docker push $fluid_benchmark_dockerhub_tag From a918d789bc2408b3c823757e7b9a29c1b0ef492b Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Fri, 1 Jun 2018 23:36:40 -0700 Subject: [PATCH 65/67] update speedup formula; update training config --- vgg16_aws_dist/ce_runner.py | 4 ++-- vgg16_aws_dist/run.xsh | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/vgg16_aws_dist/ce_runner.py b/vgg16_aws_dist/ce_runner.py index c6f3d8ef..bbc19351 100644 --- a/vgg16_aws_dist/ce_runner.py +++ b/vgg16_aws_dist/ce_runner.py @@ -340,8 +340,8 @@ def get_speed_and_collector_by_spec(spec): for cluster_spec in cluster_specs_origin: speed, data_collector = get_speed_and_collector_by_spec(cluster_spec) if speed is not None: - # speed * trainer_count * gpus_per_trainer_count / base_speed - data_collector.save("speedup", speed*cluster_spec[2]*cluster_spec[3]/base_speed) + # speed * trainer_count / base_speed + data_collector.save("speedup", speed*cluster_spec[2]/base_speed) else: logging.info("base speed is not available") diff --git a/vgg16_aws_dist/run.xsh b/vgg16_aws_dist/run.xsh index 8d8dfce2..be239834 100755 --- a/vgg16_aws_dist/run.xsh +++ b/vgg16_aws_dist/run.xsh @@ -7,7 +7,7 @@ PADDLE_PATH=$CURRENT_FILE_DIR/../../.. paddle_build_path=$PADDLE_PATH/build paddle_docker_hub_tag="paddlepaddlece/paddle:latest" fluid_benchmark_dockerhub_tag="paddlepaddlece/fluid_benchmark:latest" -training_command="update_method:pserver,pass_num:10,acc_target:0.6" +training_command="update_method:pserver,acc_target:0.6,iterations:100,pass_num:1" # clean up docker docker system prune -f @@ -26,11 +26,17 @@ cd $CURRENT_FILE_DIR cd fluid_benchmark_for_aws if [ -d ~/.cache/paddle/dataset/cifar ]; then - echo "host cifar cache found, copying it to docker root" + echo "host cifar dataset cache found, copying it to docker root" mkdir -p .cache/paddle/dataset/ cp -r -f ~/.cache/paddle/dataset/cifar .cache/paddle/dataset/ +fi + +if [ -d ~/.cache/paddle/dataset/flowers ]; then + echo "host flower dataset cache found, copying it to docker root" + mkdir -p .cache/paddle/dataset/ cp -r -f ~/.cache/paddle/dataset/flowers .cache/paddle/dataset/ fi + cd .. echo "going to build fluid_benchmark_for_aws docker image and push it" From 813409a01844c1bdc1869a6de64a6bc0d2c39a8d Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Sat, 2 Jun 2018 00:58:49 -0700 Subject: [PATCH 66/67] make continous_eva python 3 complied --- vgg16_aws_dist/continuous_evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 76ca4db3..584d73db 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -36,7 +36,7 @@ def generate_cluster_id(cluster_spec): def generate_kpi_id(kpi_name, cluster_spec): return kpi_name + "_" + generate_cluster_id(cluster_spec) -for kpi_type_name, (Kpi_class, diff_thre) in kpis_specs.iteritems(): +for kpi_type_name, (Kpi_class, diff_thre) in kpis_specs.items(): for cluster_spec in cluster_specs: kpi_id = generate_kpi_id(kpi_type_name, cluster_spec) the_kpi = Kpi_class(kpi_id, diff_thre) From 0b079301dd9b06ebc8323e9f2355d751c074915b Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Sat, 2 Jun 2018 14:09:40 -0700 Subject: [PATCH 67/67] remove some kpi; add history data; remove unused model; --- vgg16_aws_dist/continuous_evaluation.py | 6 +- .../speedup_vgg_16_1_1_0_factor.txt | 1 + .../speedup_vgg_16_4_4_4_factor.txt | 1 + .../speedup_vgg_16_7_8_7_factor.txt | 1 + .../train_speed_vgg_16_1_1_0_factor.txt | 1 + .../train_speed_vgg_16_4_4_4_factor.txt | 1 + .../train_speed_vgg_16_7_8_7_factor.txt | 1 + vgg16_aws_dist/vgg16_dist_test/Dockerfile | 7 - vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py | 329 ------------------ 9 files changed, 9 insertions(+), 339 deletions(-) create mode 100644 vgg16_aws_dist/speedup_vgg_16_1_1_0_factor.txt create mode 100644 vgg16_aws_dist/speedup_vgg_16_4_4_4_factor.txt create mode 100644 vgg16_aws_dist/speedup_vgg_16_7_8_7_factor.txt create mode 100644 vgg16_aws_dist/train_speed_vgg_16_1_1_0_factor.txt create mode 100644 vgg16_aws_dist/train_speed_vgg_16_4_4_4_factor.txt create mode 100644 vgg16_aws_dist/train_speed_vgg_16_7_8_7_factor.txt delete mode 100644 vgg16_aws_dist/vgg16_dist_test/Dockerfile delete mode 100644 vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py diff --git a/vgg16_aws_dist/continuous_evaluation.py b/vgg16_aws_dist/continuous_evaluation.py index 584d73db..dea1aa0e 100644 --- a/vgg16_aws_dist/continuous_evaluation.py +++ b/vgg16_aws_dist/continuous_evaluation.py @@ -6,9 +6,9 @@ kpis_specs = { "speedup": [LessWorseKpi, 0.01], "train_speed":[LessWorseKpi, 0.01], - "converge_speed":[GreaterWorseKpi, 0.01], - "gpu_memory":[GreaterWorseKpi, 0.01], - "acc_4passes":[GreaterWorseKpi, 0.01], + # "converge_speed":[GreaterWorseKpi, 0.01], + # "gpu_memory":[GreaterWorseKpi, 0.01], + # "acc_4passes":[GreaterWorseKpi, 0.01], } # each row represets a cluster setting with the following columns diff --git a/vgg16_aws_dist/speedup_vgg_16_1_1_0_factor.txt b/vgg16_aws_dist/speedup_vgg_16_1_1_0_factor.txt new file mode 100644 index 00000000..e7a19a6e --- /dev/null +++ b/vgg16_aws_dist/speedup_vgg_16_1_1_0_factor.txt @@ -0,0 +1 @@ +[1.0] \ No newline at end of file diff --git a/vgg16_aws_dist/speedup_vgg_16_4_4_4_factor.txt b/vgg16_aws_dist/speedup_vgg_16_4_4_4_factor.txt new file mode 100644 index 00000000..3ea09272 --- /dev/null +++ b/vgg16_aws_dist/speedup_vgg_16_4_4_4_factor.txt @@ -0,0 +1 @@ +[10.233551979064941] \ No newline at end of file diff --git a/vgg16_aws_dist/speedup_vgg_16_7_8_7_factor.txt b/vgg16_aws_dist/speedup_vgg_16_7_8_7_factor.txt new file mode 100644 index 00000000..c3f822e5 --- /dev/null +++ b/vgg16_aws_dist/speedup_vgg_16_7_8_7_factor.txt @@ -0,0 +1 @@ +[11.316923141479492] \ No newline at end of file diff --git a/vgg16_aws_dist/train_speed_vgg_16_1_1_0_factor.txt b/vgg16_aws_dist/train_speed_vgg_16_1_1_0_factor.txt new file mode 100644 index 00000000..55d41345 --- /dev/null +++ b/vgg16_aws_dist/train_speed_vgg_16_1_1_0_factor.txt @@ -0,0 +1 @@ +[11.437457084655762] \ No newline at end of file diff --git a/vgg16_aws_dist/train_speed_vgg_16_4_4_4_factor.txt b/vgg16_aws_dist/train_speed_vgg_16_4_4_4_factor.txt new file mode 100644 index 00000000..c133cf2f --- /dev/null +++ b/vgg16_aws_dist/train_speed_vgg_16_4_4_4_factor.txt @@ -0,0 +1 @@ +[29.26145362854004] \ No newline at end of file diff --git a/vgg16_aws_dist/train_speed_vgg_16_7_8_7_factor.txt b/vgg16_aws_dist/train_speed_vgg_16_7_8_7_factor.txt new file mode 100644 index 00000000..b0991782 --- /dev/null +++ b/vgg16_aws_dist/train_speed_vgg_16_7_8_7_factor.txt @@ -0,0 +1 @@ +[18.49097442626953] \ No newline at end of file diff --git a/vgg16_aws_dist/vgg16_dist_test/Dockerfile b/vgg16_aws_dist/vgg16_dist_test/Dockerfile deleted file mode 100644 index 467fdbaa..00000000 --- a/vgg16_aws_dist/vgg16_dist_test/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM paddlepaddlece/paddle:latest - -ENV HOME /root -COPY ./ /root/ -WORKDIR /root -RUN apt install -y python-opencv -ENTRYPOINT ["python", "vgg16_fluid.py"] diff --git a/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py b/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py deleted file mode 100644 index 3fd28575..00000000 --- a/vgg16_aws_dist/vgg16_dist_test/vgg16_fluid.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""VGG16 benchmark in Fluid""" -from __future__ import print_function - -import sys -import time -import numpy as np -import paddle -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.profiler as profiler -import argparse -import functools -import os -from paddle.fluid import debuger - - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--batch_size', type=int, default=128, help="Batch size for training.") -parser.add_argument( - '--learning_rate', - type=float, - default=1e-3, - help="Learning rate for training.") -parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") -parser.add_argument( - '--device', - type=str, - default='CPU', - choices=['CPU', 'GPU'], - help="The device type.") -parser.add_argument('--device_id', type=int, default=0, help="The device id.") -parser.add_argument( - '--data_format', - type=str, - default='NCHW', - choices=['NCHW', 'NHWC'], - help='The data order, now only support NCHW.') -parser.add_argument( - '--data_set', - type=str, - default='cifar10', - choices=['cifar10', 'flowers'], - help='Optional dataset for benchmark.') -parser.add_argument( - '--local', - type=str2bool, - default=True, - help='Whether to run as local mode.') - -parser.add_argument( - "--ps_hosts", - type=str, - default=os.getenv("PSERVER_HOSTS"), - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--trainer_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--profile", action='store_true', help="If set, profile a few steps.") - -parser.add_argument( - "--acc_target", default=None, help="trianing will be terminated when acc_target reaches") - -# Flags for defining the tf.train.Server -parser.add_argument( - "--task_index", type=int, default=os.getenv("TRAINER_INDEX"), help="Index of task within the job") -args = parser.parse_args() - - -def vgg16_bn_drop(input): - def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') - - conv1 = conv_block(input, 64, 2, [0.3, 0]) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) - fc1 = fluid.layers.fc(input=drop, size=4096, act=None) - bn = fluid.layers.batch_norm(input=fc1, act='relu') - drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) - fc2 = fluid.layers.fc(input=drop2, size=4096, act=None) - return fc2 - - -def main(): - if args.data_set == "cifar10": - classdim = 10 - if args.data_format == 'NCHW': - data_shape = [3, 32, 32] - else: - data_shape = [32, 32, 3] - else: - classdim = 102 - if args.data_format == 'NCHW': - data_shape = [3, 224, 224] - else: - data_shape = [224, 224, 3] - - # Input data - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - - # Train program - net = vgg16_bn_drop(images) - predict = fluid.layers.fc(input=net, size=classdim, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) - - # Evaluator - batch_size = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size) - - # inference program - inference_program = fluid.default_main_program().clone() - with fluid.program_guard(inference_program): - inference_program = fluid.io.get_inference_program(batch_acc) - - # Optimization - optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) - optimize_ops, params_grads = optimizer.minimize(avg_cost) - - # Initialize executor - place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace( - args.device_id) - exe = fluid.Executor(place) - - # test - def test(exe): - test_pass_acc = fluid.average.WeightedAverage() - for batch_id, data in enumerate(test_reader()): - img_data = np.array(map(lambda x: x[0].reshape(data_shape), - data)).astype("float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - outs = exe.run(inference_program, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[batch_acc, batch_size]) - test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1])) - - return test_pass_acc.eval() - - def train_loop(exe, trainer_prog): - iters = 0 - ts = time.time() - train_pass_acc = fluid.average.WeightedAverage() - acc_4passes = None - converge_speed = None - for pass_id in range(args.num_passes): - # train - start_time = time.time() - num_samples = 0 - train_pass_acc.reset() - - def run_step(batch_id, data): - img_data = np.array( - map(lambda x: x[0].reshape(data_shape), data)).astype( - "float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - loss, acc, b_size = exe.run( - trainer_prog, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[avg_cost, batch_acc, batch_size]) - return loss, acc, b_size - - if args.profile and args.task_index == 0: - # warmup. - for batch_id, data in enumerate(train_reader()): - if batch_id > 5: break - run_step(batch_id, data) - with profiler.profiler('All', 'total', '/tmp/profile_vgg'): - for batch_id, data in enumerate(train_reader()): - if batch_id > 5: break - run_step(batch_id, data) - - for batch_id, data in enumerate(train_reader()): - ts = time.time() - loss, acc, b_size = run_step(batch_id, data) - iters += 1 - num_samples += len(data) - train_pass_acc.add(value=acc, weight=b_size) - print( - "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, " - "Speed = %.2f img/s" % (pass_id, iters, loss, acc, - len(data) / (time.time() - ts)) - ) # The accuracy is the accumulation of batches, but not the current batch. - # terminate training when acc_target reaches - if args.acc_target and acc >= args.acc_target: - converge_speed = time.time() - start_time - print("converge_speed set with %d" % converge_speed) - - pass_elapsed = time.time() - start_time - pass_train_acc = train_pass_acc.eval() - pass_test_acc = test(exe) - - if pass_id == 4: - print("acc_4passes set with %f" % pass_train_acc) - acc_4passes = float(pass_train_acc) - - msgs = [] - msgs.append("pass = %d" % pass_id) - msgs.append("train_speed = %f" % float(num_samples / pass_elapsed)) - msgs.append("train_accuracy = %f" % pass_train_acc) - msgs.append("test_accuracy = %f" % pass_test_acc) - - if isinstance(acc_4passes, float): - msgs.append("acc_4passes = %f" % acc_4passes) - if isinstance(converge_speed, int): - msgs.append("converge_speed = %d" % converge_speed) - - print("**metrics_data: " + ", ".join(msgs)) - - if args.local: - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), - batch_size=args.batch_size) - train_loop(exe, fluid.default_main_program()) - else: - trainers = int(os.getenv("TRAINERS")) # total trainer count - print("trainers total: ", trainers) - - training_role = os.getenv( - "TRAINING_ROLE", - "TRAINER") # get the training role: trainer/pserver - - t = fluid.DistributeTranspiler() - t.transpile( - trainer_id=args.task_index, - pservers=args.ps_hosts, - trainers=trainers) - - if training_role == "PSERVER": - current_endpoint = os.getenv("SERVER_ENDPOINT") - if not current_endpoint: - print("need env SERVER_ENDPOINT") - exit(1) - pserver_prog = t.get_pserver_program(current_endpoint) - pserver_startup = t.get_startup_program(current_endpoint, - pserver_prog) - exe.run(pserver_startup) - exe.run(pserver_prog) - elif training_role == "TRAINER": - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else - paddle.dataset.flowers.test(), - batch_size=args.batch_size) - - trainer_prog = t.get_trainer_program() - feeder = fluid.DataFeeder(feed_list=[images, label], place=place) - # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver - exe.run(fluid.default_startup_program()) - train_loop(exe, trainer_prog) - else: - print("environment var TRAINER_ROLE should be TRAINER os PSERVER") - - -def print_arguments(): - print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -if __name__ == "__main__": - print_arguments() - main()