-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add variant of new load and save ops for storing model params in a single file #7909
Changes from 10 commits
7781287
6adc36e
99d1d07
a24fca3
11e3181
53cc3e3
b1cc306
6a3710f
98d2aba
c7c2125
57bcdbb
3d0f724
383acc7
511f472
d9de9ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
#include <fstream> | ||
|
||
#include "paddle/framework/op_registry.h" | ||
#include "paddle/platform/device_context.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
class LoadCombineOp : public framework::OperatorBase { | ||
public: | ||
LoadCombineOp(const std::string &type, | ||
const framework::VariableNameMap &inputs, | ||
const framework::VariableNameMap &outputs, | ||
const framework::AttributeMap &attrs) | ||
: OperatorBase(type, inputs, outputs, attrs) {} | ||
void Run(const framework::Scope &scope, | ||
const platform::Place &place) const override { | ||
auto filename = Attr<std::string>("file_path"); | ||
|
||
std::ifstream fin(filename); | ||
PADDLE_ENFORCE(static_cast<bool>(fin), | ||
"Cannot open file %s for load_combine op", filename); | ||
|
||
auto out_var_names = Outputs("Out"); | ||
PADDLE_ENFORCE_GT( | ||
static_cast<int>(out_var_names.size()), 0, | ||
"The number of output variables should be greater than 0"); | ||
|
||
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); | ||
auto &dev_ctx = *pool.Get(place); | ||
|
||
for (size_t i = 0; i < out_var_names.size(); i++) { | ||
auto *out_var = scope.FindVar(out_var_names[i]); | ||
|
||
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", | ||
out_var_names[i]); | ||
|
||
auto *tensor = out_var->GetMutable<framework::LoDTensor>(); | ||
|
||
// Error checking | ||
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot read more from file %s", | ||
filename); | ||
|
||
// Get data from fin to tensor | ||
DeserializeFromStream(fin, tensor, dev_ctx); | ||
|
||
if (platform::is_gpu_place(place)) { | ||
// copy CPU to GPU | ||
framework::LoDTensor cpu_tensor; | ||
cpu_tensor.ShareDataWith(*tensor); | ||
cpu_tensor.set_lod(tensor->lod()); | ||
|
||
// reset tensor | ||
out_var->Clear(); | ||
tensor = out_var->GetMutable<framework::LoDTensor>(); | ||
tensor->set_lod(cpu_tensor.lod()); | ||
Copy(cpu_tensor, place, dev_ctx, tensor); | ||
} | ||
} | ||
} | ||
}; | ||
|
||
class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
LoadCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddOutput("Out", "(LoDTensor) The tensor need to be load_combined") | ||
.AsDuplicable(); | ||
AddAttr<std::string>("file_path", | ||
"(string) " | ||
"Variable will be load_combined from \"file_path\".") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Variable will be load_combined from "file_path". -> Variables will be loaded from "file_path". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
.AddCustomChecker( | ||
[](const std::string &path) { return !path.empty(); }); | ||
AddComment(R"DOC( | ||
LoadCombine Operator. | ||
|
||
LoadCombine operator combines together various tensor variable into a file. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be something like "load various tensor variables from a file"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, will change the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
)DOC"); | ||
} | ||
}; | ||
} // namespace operators | ||
} // namespace paddle | ||
namespace ops = paddle::operators; | ||
|
||
REGISTER_OPERATOR(load_combine, ops::LoadCombineOp, | ||
ops::LoadCombineOpProtoMaker); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <stdint.h> | ||
#include <sys/stat.h> | ||
#include <fstream> | ||
#include <numeric> | ||
#include <sstream> | ||
#include "paddle/framework/data_type.h" | ||
#include "paddle/framework/framework.pb.h" | ||
#include "paddle/framework/lod_tensor.h" | ||
#include "paddle/framework/op_registry.h" | ||
#include "paddle/platform/device_context.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
// TODO(sidgoyal78): These function are needed by other files (save_op), move | ||
// them to paddle::filesystem namespace. (as noted by yuyang18 in save_op). | ||
constexpr char kSEP = '/'; | ||
static bool FileExists(const std::string &filepath) { | ||
struct stat buffer; | ||
return (stat(filepath.c_str(), &buffer) == 0); | ||
} | ||
|
||
static std::string DirName(const std::string &filepath) { | ||
auto pos = filepath.rfind(kSEP); | ||
if (pos == std::string::npos) { | ||
return ""; | ||
} | ||
return filepath.substr(0, pos); | ||
} | ||
|
||
static void MkDir(const char *path) { | ||
if (mkdir(path, 0755)) { | ||
PADDLE_ENFORCE_EQ(errno, EEXIST, "%s mkdir failed!", path); | ||
} | ||
} | ||
|
||
static void MkDirRecursively(const char *fullpath) { | ||
if (*fullpath == '\0') return; // empty string | ||
if (FileExists(fullpath)) return; | ||
|
||
MkDirRecursively(DirName(fullpath).c_str()); | ||
MkDir(fullpath); | ||
} | ||
|
||
class SaveCombineOp : public framework::OperatorBase { | ||
public: | ||
SaveCombineOp(const std::string &type, | ||
const framework::VariableNameMap &inputs, | ||
const framework::VariableNameMap &outputs, | ||
const framework::AttributeMap &attrs) | ||
: OperatorBase(type, inputs, outputs, attrs) {} | ||
void Run(const framework::Scope &scope, | ||
const platform::Place &place) const override { | ||
auto filename = Attr<std::string>("file_path"); | ||
auto overwrite = Attr<bool>("overwrite"); | ||
|
||
bool is_present = FileExists(filename); | ||
if (is_present && !overwrite) { | ||
PADDLE_THROW("%s exists!, cannot save_combine to it when overwrite=false", | ||
filename, overwrite); | ||
} | ||
|
||
MkDirRecursively(DirName(filename).c_str()); | ||
std::ofstream fout(filename); | ||
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write", | ||
filename); | ||
|
||
auto inames = Inputs("X"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. inames -> in_names There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modified, to make it consistent with variable name in load_combine: I modified it to |
||
PADDLE_ENFORCE_GT( | ||
static_cast<int>(inames.size()), 0, | ||
"The number of output variables should be greater than 0"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. output -> input There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
// get device context from pool | ||
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); | ||
auto &dev_ctx = *pool.Get(place); | ||
|
||
for (size_t i = 0; i < inames.size(); i++) { | ||
auto *var = scope.FindVar(inames[i]); | ||
|
||
PADDLE_ENFORCE(var != nullptr, | ||
"Cannot find variable %s for save_combine_op", inames[i]); | ||
PADDLE_ENFORCE(var->IsType<framework::LoDTensor>(), | ||
"SaveCombineOp only support LoDTensor, %s has wrong type", | ||
inames[i]); | ||
|
||
auto &tensor = var->Get<framework::LoDTensor>(); | ||
// Serialize tensor | ||
framework::SerializeToStream(fout, tensor, dev_ctx); | ||
} | ||
fout.close(); | ||
} | ||
}; | ||
|
||
class SaveCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
SaveCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("X", "(Tensor) Input tensors to be save_combined").AsDuplicable(); | ||
AddComment(R"DOC( | ||
Save_combine operator | ||
|
||
This operator will serialize and write a list of input tensor variables | ||
to a file on disk. | ||
)DOC"); | ||
AddAttr<bool>("overwrite", | ||
"(boolean, default true)" | ||
"Overwrite the output file if exist") | ||
.SetDefault(true); | ||
AddAttr<std::string>( | ||
"file_path", | ||
"(string)" | ||
"The \"file_path\" where the variable will be save_combined.") | ||
.AddCustomChecker( | ||
[](const std::string &path) { return !path.empty(); }); | ||
} | ||
}; | ||
|
||
} // namespace operators | ||
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
|
||
REGISTER_OPERATOR(save_combine, ops::SaveCombineOp, | ||
ops::SaveCombineOpProtoMaker); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <iostream> | ||
#include <string> | ||
#include <vector> | ||
#include "gtest/gtest.h" | ||
#include "paddle/framework/op_registry.h" | ||
|
||
USE_NO_KERNEL_OP(save_combine); | ||
USE_NO_KERNEL_OP(load_combine); | ||
|
||
int* create_for_save_combine_op(int x, int y, const std::vector<int>& lod_info, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. create_for_save_combine_op -> CreateSaveCombineOp There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
std::string var_name, | ||
paddle::platform::CPUPlace& place, | ||
paddle::framework::Scope& scope, | ||
paddle::framework::LoD& expect_lod) { | ||
auto var = scope.Var(var_name); | ||
auto tensor = var->GetMutable<paddle::framework::LoDTensor>(); | ||
tensor->Resize({x, y}); | ||
expect_lod.resize(1); | ||
for (size_t i = 0; i < lod_info.size(); i++) { | ||
expect_lod[0].push_back(lod_info[i]); | ||
} | ||
tensor->set_lod(expect_lod); | ||
int* expect = tensor->mutable_data<int>(place); | ||
for (int64_t i = 0; i < tensor->numel(); ++i) { | ||
expect[i] = static_cast<int>(i); | ||
} | ||
return expect; | ||
} | ||
|
||
paddle::framework::LoDTensor* generate_placeholder_before_load( | ||
const std::string out_var_name, paddle::framework::Scope& scope) { | ||
auto load_var = scope.Var(out_var_name); | ||
auto target = load_var->GetMutable<paddle::framework::LoDTensor>(); | ||
return target; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a blank line here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
int* get_values_after_load_combine_op(paddle::framework::LoDTensor* target, | ||
paddle::framework::Scope& scope, | ||
paddle::framework::LoD& actual_lod) { | ||
int* actual = target->data<int>(); | ||
actual_lod = target->lod(); | ||
return actual; | ||
} | ||
|
||
void check_values(int* expect, int* actual, paddle::framework::LoD expect_lod, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check_values -> CheckValues There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
paddle::framework::LoD actual_lod, const int& numel) { | ||
for (int64_t i = 0; i < numel; ++i) { | ||
EXPECT_EQ(expect[i], actual[i]); | ||
} | ||
EXPECT_EQ(expect_lod.size(), actual_lod.size()); | ||
for (size_t i = 0; i < expect_lod.size(); ++i) { | ||
for (size_t j = 0; j < expect_lod[i].size(); ++j) { | ||
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); | ||
} | ||
} | ||
} | ||
|
||
// Here, we create 4 LoDTensors and use save_combine_op to first save these | ||
// in a single file. Then, we use load_combine_op to load these sequentially | ||
TEST(SaveLoadCombineOp, CPU) { | ||
paddle::framework::Scope scope; | ||
paddle::platform::CPUPlace place; | ||
|
||
std::vector<int> lod1 = {0, 1, 2, 3}; | ||
int numel1 = 100; | ||
paddle::framework::LoD expect_lod1; | ||
int* expect1 = create_for_save_combine_op(10, 10, lod1, "test_var1", place, | ||
scope, expect_lod1); | ||
|
||
std::vector<int> lod2 = {0, 2, 5}; | ||
int numel2 = 200; | ||
paddle::framework::LoD expect_lod2; | ||
int* expect2 = create_for_save_combine_op(10, 20, lod2, "test_var2", place, | ||
scope, expect_lod2); | ||
|
||
std::vector<int> lod3 = {0, 2, 3}; | ||
int numel3 = 4000; | ||
paddle::framework::LoD expect_lod3; | ||
int* expect3 = create_for_save_combine_op(200, 20, lod3, "test_var3", place, | ||
scope, expect_lod3); | ||
|
||
std::vector<int> lod4 = {0, 1}; | ||
int numel4 = 1000; | ||
paddle::framework::LoD expect_lod4; | ||
int* expect4 = create_for_save_combine_op(50, 20, lod4, "test_var4", place, | ||
scope, expect_lod4); | ||
|
||
// Set attributes | ||
std::string filename = "check_tensor.ls"; | ||
paddle::framework::AttributeMap attrs; | ||
attrs.insert({"file_path", std::string(filename)}); | ||
|
||
// Run the save_combine_op | ||
auto save_combine_op = paddle::framework::OpRegistry::CreateOp( | ||
"save_combine", | ||
{{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs); | ||
save_combine_op->Run(scope, place); | ||
|
||
// Set up output vars | ||
auto target1 = generate_placeholder_before_load("out_var1", scope); | ||
auto target2 = generate_placeholder_before_load("out_var2", scope); | ||
auto target3 = generate_placeholder_before_load("out_var3", scope); | ||
auto target4 = generate_placeholder_before_load("out_var4", scope); | ||
|
||
// Run the load_combine_op | ||
auto load_combine_op = paddle::framework::OpRegistry::CreateOp( | ||
"load_combine", {}, | ||
{{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs); | ||
load_combine_op->Run(scope, place); | ||
|
||
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4; | ||
int* actual1 = get_values_after_load_combine_op(target1, scope, actual_lod1); | ||
int* actual2 = get_values_after_load_combine_op(target2, scope, actual_lod2); | ||
int* actual3 = get_values_after_load_combine_op(target3, scope, actual_lod3); | ||
int* actual4 = get_values_after_load_combine_op(target4, scope, actual_lod4); | ||
|
||
check_values(expect1, actual1, expect_lod1, actual_lod1, numel1); | ||
check_values(expect2, actual2, expect_lod2, actual_lod2, numel2); | ||
check_values(expect3, actual3, expect_lod3, actual_lod3, numel3); | ||
check_values(expect4, actual4, expect_lod4, actual_lod4, numel4); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(LoDTensor) The tensor need to be load_combined -> (vector) The tensors need to be loaded
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks.