Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
update all to paddle master (#28)
Browse files Browse the repository at this point in the history
* code formated

* update transformer model train 

* edit text_classification/README.md

* update resnet50/vgg16 baseline

* active model lstm

* active model language model

* active model text_classification

* active sequence_tagging_for_ner

* active model object_detection

* add model object_detection four gpu card kpi

* add CPU model

* Update README.md

* add last kpi record for CPU model

* add multi card for 4 models

* fix nan digital for CPU

* Revert "fix nan digital for CPU"

* fix nan digital for CPU

* change transform change api

* remove resnet30 for CPU, reuse it's function in resnet50

* image_classification add four card kpi

* evalute [d0a62bfcf2067f9eab487b4662ebc3b3a0fc07c8]

* fix some code style error

* fix speed bug

* evalute [8e3e65ff93718efbe3fa7f01dc52132f560e8bfc]

* Update run.xsh

* add mul card for sequence_tagging_for_ner to test pr

* not save models

* update run.xsh
  • Loading branch information
Superjomn committed May 28, 2018
1 parent 81253b9 commit 6b8c122
Show file tree
Hide file tree
Showing 93 changed files with 778 additions and 355 deletions.
13 changes: 13 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
repos:
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
hooks:
- id: check-added-large-files
- id: check-merge-conflict
- id: check-symlinks
- id: end-of-file-fixer
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Howtos

### Contribute
- Run `pre-commit run -a` before your PR, this will help to format code automatically

### Add New Evaluation Task

Reference [mnist task](https://github.com/Superjomn/paddle-ce-latest-kpis/tree/master/mnist),
Expand Down
1 change: 0 additions & 1 deletion __ocr_recognition/continuous_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
train_avg_loss_kpi = CostKpi('train_avg_loss', 0.2, 0)
train_seq_err_kpi = CostKpi('train_seq_err', 0.2, 0)


tracking_kpis = [
train_avg_loss_kpi,
train_seq_err_kpi,
Expand Down
3 changes: 2 additions & 1 deletion __ocr_recognition/ctc_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def train(batch_size, train_images_dir=None, train_list_file=None):
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
if train_list_file is None:
train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME)
return generator.train_reader(train_images_dir, train_list_file, batch_size)
return generator.train_reader(train_images_dir, train_list_file,
batch_size)


def test(batch_size=1, test_images_dir=None, test_list_file=None):
Expand Down
10 changes: 7 additions & 3 deletions __ocr_recognition/ctc_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ def train(args, data_reader=ctc_reader):
if batch_id == args.iterations - 1:
avg_seq_err = batch_seq_error[0] / args.batch_size
avg_loss = batch_loss[0] / args.batch_size
train_avg_loss_kpi.add_record(np.array(avg_loss, dtype='float32'))
train_seq_err_kpi.add_record(np.array(avg_seq_err, dtype='float32'))
train_avg_loss_kpi.add_record(
np.array(
avg_loss, dtype='float32'))
train_seq_err_kpi.add_record(
np.array(
avg_seq_err, dtype='float32'))
break
# evaluate
if batch_id % args.eval_period == 0:
Expand All @@ -121,7 +125,7 @@ def train(args, data_reader=ctc_reader):

batch_id += 1
train_avg_loss_kpi.persist()
train_seq_err_kpi.persist()
train_seq_err_kpi.persist()


def main():
Expand Down
3 changes: 2 additions & 1 deletion __ocr_recognition/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
count = 0
for data in test_reader():
count += 1
exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
exe.run(fluid.default_main_program(),
feed=get_feeder_data(data, place))
avg_distance, avg_seq_error = evaluator.eval(exe)
print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
count, avg_distance, avg_seq_error)
Expand Down
File renamed without changes.
19 changes: 19 additions & 0 deletions __resnet30/continuous_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, AccKpi, DurationKpi

train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.02, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.05, 0, actived=True)
train_speed_kpi = AccKpi('train_speed', 0.01, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.02, 0, actived=True)


tracking_kpis = [
train_cost_kpi,
train_acc_kpi,
test_acc_kpi,
train_speed_kpi,
train_duration_kpi,
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
[[2.744691848754883]]
[[2.5916006565093994]]
[[2.459857225418091]]
[[2.3514037132263184]]
[[2.3514037132263184]]
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
[10.211545944213867]
[10.223276853561401]
[10.213245153427124]
[10.241420984268188]
[10.241420984268188]
1 change: 1 addition & 0 deletions __resnet30/latest_kpis/test_acc_factor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[0.459300000667572]
1 change: 1 addition & 0 deletions __resnet30/latest_kpis/train_acc_factor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[0.56150390625]
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
[[2.416034460067749]]
[[2.4315545558929443]]
[[2.4579968452453613]]
[[2.449829578399658]]
[[2.449829578399658]]
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
[21.280965089797974]
[21.29200315475464]
[21.28358292579651]
[21.292808055877686]
[21.292808055877686]
1 change: 1 addition & 0 deletions __resnet30/latest_kpis/train_speed_factor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[55.21354293823242]
133 changes: 88 additions & 45 deletions resnet30/model.py → __resnet30/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core

from continuous_evaluation import (train_cost_kpi, train_duration_kpi,
tracking_kpis)
from continuous_evaluation import *

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -85,93 +83,138 @@ def train(batch_size, device, pass_num, iterations):
input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')

# Train program
predict = resnet_cifar10(input, class_dim)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
opts = optimizer.minimize(avg_cost)
# accuracy = fluid.evaluator.Evaluator(input=predict, label=label)

# Evaluator
#accuracy = fluid.evaluator.Evaluator(input=predict, label=label)

batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)
accuracy = fluid.average.WeightedAverage()

# inference program
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
# test_target = accuracy.metrics + accuracy.states
test_target = [predict, avg_cost]
inference_program = fluid.io.get_inference_program(test_target)
target_vars=[batch_acc, batch_size_tensor]
inference_program = fluid.io.get_inference_program(target_vars)

# Optimization
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
opts = optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())

train_reader = paddle.batch(
paddle.dataset.cifar.train10(),
batch_size=batch_size)
paddle.dataset.cifar.train10(), batch_size=batch_size)

test_reader = paddle.batch(
paddle.dataset.cifar.test10(), batch_size=batch_size)

# Initialize executor
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)

# Parameter initialization
exe.run(fluid.default_startup_program())

def test(exe):
# accuracy.reset(exe)
test_accuracy = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape(dshape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])

# print('image_data', img_data)
# print('y_data', y_data)

predict_, avg_cost_ = exe.run(
inference_program,
feed={
"data": img_data,
"label": y_data
},
fetch_list=[predict, avg_cost])
return avg_cost
acc, weight = exe.run(inference_program,
feed={"data": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_accuracy.add(value=acc, weight=weight)

# return accuracy.eval(exe)

place = core.CPUPlace() if device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
return test_accuracy.eval()

for pass_id in range(1):
logger.warning('Pass {}'.format(pass_id))
# accuracy.reset(exe)
im_num = 0
total_train_time = 0.0
for pass_id in range(args.pass_num):
iter = 0
every_pass_loss = []
accuracy.reset()
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()):
logger.warning('Batch {}'.format(batch_id))
batch_start = time.time()
if iter == iterations:
break
image = np.array(map(lambda x: x[0].reshape(dshape),
data)).astype('float32')
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
'float32')
label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1])
avg_cost_ = exe.run(

loss, acc, weight = exe.run(
fluid.default_main_program(),
feed={
'data': image,
'label': label
},
fetch_list=[avg_cost])
feed={'data': image,
'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor])

batch_end = time.time()
print('avg_cost', np.array(avg_cost_, dtype='float32'))
train_cost_kpi.add_record(np.array(avg_cost_, dtype='float32'))
train_duration_kpi.add_record(batch_end - batch_start)
every_pass_loss.append(loss)
accuracy.add(value=acc, weight=weight)


if iter >= args.skip_batch_num or pass_id != 0:
batch_duration = time.time() - batch_start
pass_duration += batch_duration
im_num += label.shape[0]

iter += 1

# test_start = time.time()
# test(exe)
# test_end = time.time()
# valid_tracker.add(test_end - test_start, pass_test_acc)
print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iter, loss, acc))
pass_train_acc = accuracy.eval()
pass_test_acc = test(exe)

total_train_time += pass_duration
pass_train_loss = np.mean(every_pass_loss)
print(
"Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n"
% (pass_id, pass_train_loss, pass_train_acc,
pass_test_acc, pass_duration))
if pass_id == args.pass_num - 1:
train_cost_kpi.add_record(np.array(pass_train_loss, dtype='float32'))
train_cost_kpi.persist()
train_acc_kpi.add_record(np.array(pass_train_acc, dtype='float32'))
train_acc_kpi.persist()
test_acc_kpi.add_record(np.array(pass_test_acc, dtype='float32'))
test_acc_kpi.persist()
train_duration_kpi.add_record(batch_end - batch_start)
train_duration_kpi.persist()

if total_train_time > 0.0:
examples_per_sec = im_num / total_train_time
sec_per_batch = total_train_time / \
(iter * args.pass_num - args.skip_batch_num)
train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32'))
train_speed_kpi.persist()


def parse_args():
parser = argparse.ArgumentParser('model')
parser.add_argument('--batch_size', type=int)
parser.add_argument('--device', type=str, choices=('CPU', 'GPU'))
parser.add_argument('--iters', type=int)
parser.add_argument(
'--pass_num', type=int, default=3, help='The number of passes.')
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
args = parser.parse_args()
return args

Expand Down
2 changes: 1 addition & 1 deletion resnet30/run.xsh → __resnet30/run.xsh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ import sys

model_file = 'model.py'

python @(model_file) --batch_size 1000 --iters 10 --device CPU
python @(model_file) --batch_size 128 --pass_num 5 --iters 80 --device CPU
31 changes: 22 additions & 9 deletions image_classification/continuous_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,26 @@
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi

train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.2, 0)
train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.2, 0)
train_cost_kpi = CostKpi('train_cost_kpi', 0.2, 0)
train_acc_top1_kpi = AccKpi('train_acc_top1_kpi', 0.05, 0,
actived=True,
desc='TOP1 ACC')
train_acc_top5_kpi = AccKpi('train_acc_top5_kpi', 0.05, 0,
actived=True,
desc='TOP5 ACC')
train_cost_kpi = CostKpi('train_cost_kpi', 0.05, 0,
actived=True,
desc='train cost')
train_speed_kpi = AccKpi('train_speed_kpi', 0.05, 0,
actived=True,
unit_repr='images/s',
desc='train speed in one GPU card')
four_card_train_speed_kpi = AccKpi('four_card_train_speed_kpi', 0.05, 0,
actived=True,
unit_repr='images/s',
desc='train speed in four GPU card')


tracking_kpis = [
train_acc_top1_kpi,
train_acc_top5_kpi,
train_cost_kpi
]
tracking_kpis = [train_acc_top1_kpi,
train_acc_top5_kpi,
train_cost_kpi,
train_speed_kpi,
four_card_train_speed_kpi]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[174.80782203734947]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[0.33659327030181885]
[0.3767074942588806]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[0.5419688820838928]
[0.5719688820838928]
2 changes: 1 addition & 1 deletion image_classification/latest_kpis/train_cost_kpi_factor.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[2.975904941558838]
[2.875904941558838]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[101.29667191639184]
Loading

0 comments on commit 6b8c122

Please sign in to comment.