Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update trainer api #10653

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions python/paddle/fluid/inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,35 @@
# limitations under the License.

import core
import framework

import executor
import framework
import io
import unique_name
from trainer import check_and_get_place

__all__ = ['Inferencer', ]


class Inferencer(object):
def __init__(self, param_path, place=None):
def __init__(self, infer_func, param_path, place=None):
"""
:param param_path: the path where the inference model is saved by fluid.io.save_inference_model
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
"""
self.param_path = param_path
self.scope = core.Scope()

self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()

self.exe = executor.Executor(check_and_get_place(place))
with executor.scope_guard(self.scope):
# load params from param_path into scope
[self.inference_program, _,
self.fetch_targets] = io.load_inference_model(
executor=self.exe, dirname=param_path)
io.load_params(self.exe, param_path, self.inference_program)

def infer(self, inputs, return_numpy=True):
"""
Expand All @@ -51,7 +57,7 @@ def infer(self, inputs, return_numpy=True):
with executor.scope_guard(self.scope):
results = self.exe.run(self.inference_program,
feed=inputs,
fetch_list=self.fetch_targets,
fetch_list=[self.predict_var],
return_numpy=return_numpy)

return results
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ def linear():
return avg_loss


def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

trainer = fluid.Trainer(
train_func=linear,
infer_func=inference_program,
train_func=train_program,
place=place,
optimizer=fluid.optimizer.SGD(learning_rate=0.001))

Expand All @@ -72,11 +71,7 @@ def event_handler(event):
'''
if float(test_metrics[0]) < 20.0:
if save_dirname is not None:
# NOT clear yet
# fluid.io.save_inference_model(save_dirname, ['x'], [y_predict])
# trainer.save_params(save_dirname)
# https://github.com/PaddlePaddle/Paddle/pull/10445
trainer.save_inference_model(save_dirname)
trainer.save_params(save_dirname)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed we switched from trainer.save_inference_model(save_dirname) back to trainer.save_params(save_dirname). What is the diff between them? We discussed this here. To me save_inference_model looks more reasonable.

Copy link
Member Author

@jacquesqiao jacquesqiao May 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#10648
@wangkuiyi Think that we only need to save parameters and do not need to save inference program, inference program should be provided by a Python function.

return

trainer.train(
Expand All @@ -87,12 +82,13 @@ def event_handler(event):


# infer
def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
if save_dirname is None:
return

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)

batch_size = 10
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
Expand All @@ -108,8 +104,8 @@ def main(use_cuda):
# Directory for saving the trained model
save_dirname = "fit_a_line.inference.model"

train(use_cuda, save_dirname)
infer(use_cuda, save_dirname)
train(use_cuda, linear, save_dirname)
infer(use_cuda, inference_program, save_dirname)


class TestFitALine(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,48 +53,39 @@ def train_program():
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc
return avg_cost
acc = fluid.layers.accuracy(input=predict, label=label)
return [avg_cost, acc]


def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)

trainer = fluid.Trainer(
train_func=train_program,
infer_func=inference_program,
place=place,
optimizer=optimizer)
train_func=train_program, place=place, optimizer=optimizer)

def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0:
# trainer.save_params(save_dirname)
trainer.save_inference_model(save_dirname)

# TODO: Uncomment this part once we are sure that .train is working
# test_reader = paddle.batch(
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
# test_metrics = trainer.test(reader=test_reader)
# avg_cost_set = test_metrics[0]
# acc_set = test_metrics[1]
#
# # get test acc and loss
# acc = numpy.array(acc_set).mean()
# avg_cost = numpy.array(avg_cost_set).mean()
#
# print("avg_cost: %s" % avg_cost)
# print("acc : %s" % acc)
#
# if float(acc) > 0.2: # Smaller value to increase CI speed
# trainer.save_params(save_dirname)
# else:
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(reader=test_reader)
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]

# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()

print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)

if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")

train_reader = paddle.batch(
paddle.reader.shuffle(
Expand All @@ -108,10 +99,11 @@ def event_handler(event):
feed_order=['img', 'label'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_recognize_digits_conv fails on this line.



def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)

batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
Expand All @@ -126,8 +118,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_conv.inference.model"

# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,47 +40,40 @@ def train_program():
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
# acc = fluid.layers.accuracy(input=predict, label=label)
# return avg_cost, acc
return avg_cost
acc = fluid.layers.accuracy(input=predict, label=label)
return [avg_cost, acc]


def train(use_cuda, save_dirname):
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)

trainer = fluid.Trainer(
train_func=train_program,
infer_func=inference_program,
place=place,
optimizer=optimizer)
train_func=train_program, place=place, optimizer=optimizer)

def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
# if (event.epoch + 1) % 10 == 0:
trainer.save_inference_model(save_dirname)

# TODO: Uncomment this part once we are sure that .train is working
# test_reader = paddle.batch(
# paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
# test_metrics = trainer.test(reader=test_reader)
# avg_cost_set = test_metrics[0]
# acc_set = test_metrics[1]
#
# # get test acc and loss
# acc = numpy.array(acc_set).mean()
# avg_cost = numpy.array(avg_cost_set).mean()
#
# print("avg_cost: %s" % avg_cost)
# print("acc : %s" % acc)
#
# if float(acc) > 0.2: # Smaller value to increase CI speed
# trainer.save_params(save_dirname)
# else:
# print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# event.epoch + 1, float(avg_cost), float(acc)))
# if math.isnan(float(avg_cost)):
# sys.exit("got NaN loss, training failed.")
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(
reader=test_reader, feed_order=['img', 'label'])
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]

# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()

print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)

if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")

train_reader = paddle.batch(
paddle.reader.shuffle(
Expand All @@ -94,10 +87,11 @@ def event_handler(event):
feed_order=['img', 'label'])


def infer(use_cuda, save_dirname=None):
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

inferencer = fluid.Inferencer(param_path=save_dirname, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)

batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
Expand All @@ -112,8 +106,14 @@ def main(use_cuda):
save_dirname = "recognize_digits_mlp.inference.model"

# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def train_program(is_sparse):
return avg_cost


def train(use_cuda, is_sparse, save_path):
def train(use_cuda, train_program, save_path):
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
test_reader = paddle.batch(
Expand All @@ -105,23 +105,21 @@ def event_handler(event):
print("loss= ", avg_cost)

if avg_cost < 5.0:
trainer.save_inference_model(save_path)
trainer.save_params(save_path)
return
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")

trainer = fluid.Trainer(
partial(train_program, is_sparse),
partial(inference_program, is_sparse),
fluid.optimizer.SGD(learning_rate=0.001),
place=place)
train_program, fluid.optimizer.SGD(learning_rate=0.001), place=place)
trainer.train(
reader=train_reader, num_epochs=1, event_handler=event_handler)


def infer(use_cuda, is_sparse, save_path):
def infer(use_cuda, inference_program, save_path):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(param_path=save_path, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_path, place=place)

lod = [0, 1]
first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
Expand All @@ -144,9 +142,9 @@ def main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return

save_path = "word2vec.inference.model"
train(use_cuda, is_sparse, save_path)
infer(use_cuda, is_sparse, save_path)
save_path = "word2vec.params"
train(use_cuda, partial(train_program, is_sparse), save_path)
infer(use_cuda, partial(inference_program, is_sparse), save_path)


if __name__ == '__main__':
Expand Down
Loading