-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Inference example and inference-framework unit-test for NMT model #8314
Changes from all commits
9b7bc7b
75b4ec9
2938c79
46a0089
abc35c1
da9e124
ed7b3b4
ab1bd3a
f8d536b
f7d6069
1401f99
a23190c
a14daf7
730c7cf
fc49ecf
760e3cc
e1844c7
07838ef
75dca38
63309cd
c183ffa
877841c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <gtest/gtest.h> | ||
#include "gflags/gflags.h" | ||
#include "paddle/fluid/inference/tests/test_helper.h" | ||
|
||
DEFINE_string(dirname, "", "Directory of the inference model."); | ||
|
||
// Test for testing inference using "decode_main" in python | ||
TEST(inference, machine_translation_decode) { | ||
if (FLAGS_dirname.empty()) { | ||
LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; | ||
} | ||
|
||
LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; | ||
std::string dirname = FLAGS_dirname; | ||
|
||
// 0. Call `paddle::framework::InitDevices()` initialize all the devices | ||
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc | ||
|
||
// Setup init_ids and init_scores | ||
paddle::framework::LoDTensor init_ids, init_scores; | ||
paddle::framework::LoD init_lod{{0, 1}}; | ||
init_lod.push_back({0, 1}); | ||
|
||
std::vector<int64_t> init_ids_data = {1}; | ||
SetupLoDTensor<int64_t>(init_ids, {1, 1}, init_lod, init_ids_data); | ||
|
||
std::vector<float> init_scores_data = {1}; | ||
SetupLoDTensor<float>(init_scores, {1, 1}, init_lod, init_scores_data); | ||
|
||
// Setup input sequence of 5 words | ||
paddle::framework::LoDTensor input_sequence; | ||
std::vector<int64_t> inp_data = {100, 50, 8, 94, 122}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this data have a specific meaning? Or can we use random data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a random sequence of 5 words (The ids need to be within a range). I will make it clearer. |
||
SetupLoDTensor(input_sequence, {5, 1}, {{0, 5}}, inp_data); | ||
|
||
std::vector<paddle::framework::LoDTensor*> cpu_feeds; | ||
cpu_feeds.push_back(&input_sequence); | ||
cpu_feeds.push_back(&init_ids); | ||
cpu_feeds.push_back(&init_scores); | ||
|
||
paddle::framework::LoDTensor result_ids1, result_scores1; | ||
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1; | ||
cpu_fetchs1.push_back(&result_ids1); | ||
cpu_fetchs1.push_back(&result_scores1); | ||
|
||
// Run inference on CPU | ||
TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1); | ||
LOG(INFO) << result_ids1.lod(); | ||
LOG(INFO) << result_ids1.dims(); | ||
LOG(INFO) << result_scores1.lod(); | ||
LOG(INFO) << result_scores1.dims(); | ||
|
||
#ifdef PADDLE_WITH_CUDA | ||
LOG(INFO) << "Beam search isn't supported on gpu yet"; | ||
/* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please delete code which are no longer needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, thanks. |
||
paddle::framework::LoDTensor result_ids2, result_scores2; | ||
std::vector<paddle::framework::LoDTensor*> cpu_fetchs2; | ||
cpu_fetchs2.push_back(&result_ids2); | ||
cpu_fetchs2.push_back(&result_scores2); | ||
|
||
// Run inference on CUDA GPU | ||
TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2); | ||
LOG(INFO) << result_ids2.lod(); | ||
LOG(INFO) << result_ids2.dims(); | ||
LOG(INFO) << result_scores2.lod(); | ||
LOG(INFO) << result_scores2.dims(); | ||
|
||
CheckError<int64_t>(result_ids1, result_ids2); | ||
CheckError<float>(result_scores1, result_scores2); | ||
*/ | ||
#endif | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <gtest/gtest.h> | ||
#include "gflags/gflags.h" | ||
#include "paddle/fluid/inference/tests/test_helper.h" | ||
|
||
DEFINE_string(dirname, "", "Directory of the inference model."); | ||
|
||
// Test for testing inference using "train_main" in python | ||
TEST(inference, machine_translation_train) { | ||
if (FLAGS_dirname.empty()) { | ||
LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; | ||
} | ||
|
||
LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; | ||
std::string dirname = FLAGS_dirname; | ||
// 0. Call `paddle::framework::InitDevices()` initialize all the devices | ||
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc | ||
|
||
int64_t dict_size = 30000; // Hard-coded number of unique tokens | ||
|
||
// Setup init_ids and init_scores | ||
paddle::framework::LoDTensor input_sequence, target_sequence; | ||
paddle::framework::LoD lod{{0, 5}}; | ||
|
||
SetupLoDTensor(input_sequence, lod, static_cast<int64_t>(0), dict_size - 1); | ||
SetupLoDTensor(target_sequence, lod, static_cast<int64_t>(0), dict_size - 1); | ||
|
||
std::vector<paddle::framework::LoDTensor*> cpu_feeds; | ||
cpu_feeds.push_back(&input_sequence); | ||
cpu_feeds.push_back(&target_sequence); | ||
|
||
paddle::framework::LoDTensor output1; | ||
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1; | ||
cpu_fetchs1.push_back(&output1); | ||
|
||
// Run inference on CPU | ||
TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1); | ||
LOG(INFO) << output1.lod(); | ||
LOG(INFO) << output1.dims(); | ||
|
||
#ifdef PADDLE_WITH_CUDA | ||
paddle::framework::LoDTensor output2; | ||
std::vector<paddle::framework::LoDTensor*> cpu_fetchs2; | ||
cpu_fetchs2.push_back(&output2); | ||
|
||
// Run inference on CUDA GPU | ||
TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2); | ||
LOG(INFO) << output2.lod(); | ||
LOG(INFO) << output2.dims(); | ||
|
||
CheckError<float>(output1, output2); | ||
#endif | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,7 @@ | |
max_length = 8 | ||
topk_size = 50 | ||
trg_dic_size = 10000 | ||
beam_size = 2 | ||
beam_size = 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why beam_size =1 is better than 2? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I just changed it for debugging purposes. Thanks for pointing it out, will fix. |
||
|
||
decoder_size = hidden_dim | ||
|
||
|
@@ -168,9 +168,7 @@ def to_lodtensor(data, place): | |
return res | ||
|
||
|
||
def train_main(use_cuda, is_sparse): | ||
if use_cuda and not fluid.core.is_compiled_with_cuda(): | ||
return | ||
def train_main_and_save_model(use_cuda, is_sparse, save_dirname): | ||
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() | ||
|
||
context = encoder(is_sparse) | ||
|
@@ -209,13 +207,15 @@ def train_main(use_cuda, is_sparse): | |
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + | ||
" avg_cost=" + str(avg_cost_val)) | ||
if batch_id > 3: | ||
break | ||
# Save the trained model in 'save_dirname' | ||
fluid.io.save_inference_model( | ||
save_dirname, ['src_word_id', 'target_language_word'], | ||
[rnn_out], exe) | ||
return | ||
batch_id += 1 | ||
|
||
|
||
def decode_main(use_cuda, is_sparse): | ||
if use_cuda and not fluid.core.is_compiled_with_cuda(): | ||
return | ||
def decode_main_and_save_model(use_cuda, is_sparse, save_dirname): | ||
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() | ||
|
||
context = encoder(is_sparse) | ||
|
@@ -241,7 +241,6 @@ def decode_main(use_cuda, is_sparse): | |
init_scores = set_init_lod(init_scores_data, init_lod, place) | ||
|
||
src_word_data = to_lodtensor(map(lambda x: x[0], data), place) | ||
|
||
result_ids, result_scores = exe.run( | ||
framework.default_main_program(), | ||
feed={ | ||
|
@@ -251,10 +250,132 @@ def decode_main(use_cuda, is_sparse): | |
}, | ||
fetch_list=[translation_ids, translation_scores], | ||
return_numpy=False) | ||
print result_ids.lod() | ||
fluid.io.save_inference_model( | ||
save_dirname, ['src_word_id', 'init_ids', 'init_scores'], | ||
[translation_ids, translation_scores], exe) | ||
break | ||
|
||
|
||
def create_random_lodtensor(lod, place, low, high): | ||
data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") | ||
res = fluid.LoDTensor() | ||
res.set(data, place) | ||
res.set_lod([lod]) | ||
return res | ||
|
||
|
||
# This function tests for loading a model using fluid.io.save_inference_model | ||
# after training (calling train_main). This is just for checking a general flow | ||
# of training, saving a model and loading it successfully, and doesn't | ||
# simulate a real inference setting as we don't have access to target_sequence | ||
# while doing inference. | ||
def infer_for_train_main(use_cuda, save_dirname=None): | ||
if save_dirname is None: | ||
return | ||
|
||
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() | ||
exe = fluid.Executor(place) | ||
|
||
# Use fluid.io.load_inference_model to obtain the inference program desc, | ||
# the feed_target_names (the names of variables that will be feeded | ||
# data using feed operators), and the fetch_targets (variables that | ||
# we want to obtain data from using fetch operators). | ||
[inference_program, feed_target_names, | ||
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) | ||
|
||
# setup input sequence | ||
lod = [0, 10] | ||
inp_sequence = create_random_lodtensor( | ||
lod, place, low=0, high=dict_size - 1) | ||
|
||
# setup output sequence | ||
out_sequence = create_random_lodtensor( | ||
lod, place, low=0, high=dict_size - 1) | ||
|
||
# Construct feed as a dictionary of {feed_target_name: feed_target_data} | ||
# and results will contain a list of data corresponding to fetch_targets. | ||
assert feed_target_names[0] == 'src_word_id' | ||
assert feed_target_names[1] == 'target_language_word' | ||
|
||
results = exe.run(inference_program, | ||
feed={ | ||
feed_target_names[0]: inp_sequence, | ||
feed_target_names[1]: out_sequence, | ||
}, | ||
fetch_list=fetch_targets, | ||
return_numpy=False) | ||
print(results[0].lod()) | ||
np_data = np.array(results[0]) | ||
print("Inference shape: ", np_data.shape) | ||
|
||
|
||
# This function tests for loading a model using fluid.io.save_inference_model | ||
# after calling decode_main. This is just for checking a general flow of saving | ||
# a model which has the beam_search op and loading it successfully. | ||
def infer_for_decode_main(use_cuda, save_dirname=None): | ||
if save_dirname is None: | ||
return | ||
|
||
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() | ||
exe = fluid.Executor(place) | ||
|
||
# Use fluid.io.load_inference_model to obtain the inference program desc, | ||
# the feed_target_names (the names of variables that will be feeded | ||
# data using feed operators), and the fetch_targets (variables that | ||
# we want to obtain data from using fetch operators). | ||
[inference_program, feed_target_names, | ||
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) | ||
|
||
# setup init_ids and init_scores | ||
init_ids_data = np.ones((1, 1), dtype='int64') | ||
init_scores_data = np.ones((1, 1), dtype='float32') | ||
init_lod = [0, 1] | ||
init_lod = [init_lod, init_lod] | ||
|
||
init_ids = set_init_lod(init_ids_data, init_lod, place) | ||
init_scores = set_init_lod(init_scores_data, init_lod, place) | ||
|
||
# Setup test input sequence of 10 words | ||
lod = [0, 10] | ||
inp_sequence = create_random_lodtensor( | ||
lod, place, low=0, high=dict_size - 1) | ||
|
||
# Construct feed as a dictionary of {feed_target_name: feed_target_data} | ||
# and results will contain a list of data corresponding to fetch_targets. | ||
assert feed_target_names[0] == 'src_word_id' | ||
assert feed_target_names[1] == 'init_ids' | ||
assert feed_target_names[2] == 'init_scores' | ||
|
||
result_ids, result_scores = exe.run(inference_program, | ||
feed={ | ||
feed_target_names[0]: inp_sequence, | ||
feed_target_names[1]: init_ids, | ||
feed_target_names[2]: init_scores | ||
}, | ||
fetch_list=fetch_targets, | ||
return_numpy=False) | ||
print(result_ids.lod()) | ||
np_data = np.array(result_ids) | ||
print("Inference shape: ", np_data.shape) | ||
print("Inference results: ", np_data) | ||
|
||
|
||
def train_main(use_cuda, is_sparse): | ||
if use_cuda and not fluid.core.is_compiled_with_cuda(): | ||
return | ||
save_dirname = "machine_translation_train.inference.model" | ||
train_main_and_save_model(use_cuda, is_sparse, save_dirname) | ||
infer_for_train_main(use_cuda, save_dirname) | ||
|
||
|
||
def decode_main(use_cuda, is_sparse): | ||
if use_cuda and not fluid.core.is_compiled_with_cuda(): | ||
return | ||
save_dirname = "machine_translation_decode.inference.model" | ||
decode_main_and_save_model(use_cuda, is_sparse, save_dirname) | ||
infer_for_decode_main(use_cuda, save_dirname) | ||
|
||
|
||
class TestMachineTranslation(unittest.TestCase): | ||
pass | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please try to use
inference_test
here, and try to simplify the unittest.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I couldn't get both the tests to be expressed with the
inference_test
function. So I added explicit statements. Do you suggest modifying theinference_test
function to accommodate this?