Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix]fix bug of opt tool #8590

Merged
merged 1 commit into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions lite/core/optimizer/mir/static_kernel_pick_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
CHECK(graph) << "graph not valid";

// sort kernels by the factors.
VLOG(4) << "graph->mutable_nodes().size():" << graph->mutable_nodes().size();
VLOG(2) << "graph block_idx: " << graph->blockIdx();
VLOG(2) << "graph->mutable_nodes().size(): " << graph->mutable_nodes().size();
size_t idx = 0;
for (auto& node : graph->mutable_nodes()) {
if (!node.IsStmt()) continue;
auto& instruct = node.AsStmt();
VLOG(2) << "pick kernel for op : " << instruct.op_type() << ", in block "
<< graph->blockIdx() << ", idx : " << idx++;

std::map<std::string, PrecisionType> in_types;
std::map<std::string, PrecisionType> out_types;
Expand All @@ -66,17 +70,19 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
std::vector<std::pair<float, std::unique_ptr<KernelBase>>> scored;
CHECK(!instruct.kernels().empty()) << "No kernels found for "
<< instruct.op_type();
VLOG(4) << "instruct.kernels().size():" << instruct.kernels().size();

VLOG(2) << "candidate kernels size:" << instruct.kernels().size();
for (auto&& kernel : instruct.kernels()) {
float score = KernelGrade(instruct,
VLOG(2) << "current candidate kernel is: " << kernel->summary();
VLOG(2) << "valid_places size is: " << graph->valid_places().size();
float score = KernelGrade(&node,
*kernel,
graph->valid_places(),
in_types,
out_types,
instruct.op_info()->input_names(),
instruct.op_info()->output_names());
VLOG(4) << "kernel->summary():" << kernel->summary()
<< " score:" << score;

scored.emplace_back(score, std::move(kernel));
}
std::stable_sort(scored.begin(), scored.end(), KernelScoreCmp);
Expand All @@ -87,7 +93,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
// Just keep a single best kernel.
// TODO(Superjomn) reconsider this.
instruct.kernels().emplace_back(std::move(scored.front().second));
VLOG(2) << "pick " << instruct.kernels().front()->summary() << "\n\n";
VLOG(2) << "the final pick kernel is "
<< instruct.kernels().front()->summary() << "\n\n";

} else {
bool out_type_int8 = true;
Expand Down Expand Up @@ -137,7 +144,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
instruct.ResetOp(update_desc, graph->valid_places());
scored.clear();
for (auto&& kernel : instruct.kernels()) {
float score = KernelGrade(instruct,
float score = KernelGrade(&node,
*kernel,
graph->valid_places(),
in_types,
Expand Down
67 changes: 56 additions & 11 deletions lite/core/optimizer/mir/static_kernel_pick_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ class StaticKernelPickPass : public mir::StmtPass {

private:
// Score the kernel.
size_t KernelGrade(const lite::mir::Node::Stmt& instruct,
size_t KernelGrade(lite::mir::Node* node,
const lite::KernelBase& kernel,
const std::vector<Place>& places,
const std::map<std::string, PrecisionType>& in_types,
const std::map<std::string, PrecisionType>& out_types,
const std::vector<std::string>& in_names,
const std::vector<std::string>& out_names) {
const auto& instruct = node->AsStmt();
CHECK_GT(places.size(), static_cast<size_t>(0)) << "valid_places is empty.";
float final_score{-1.};
Place winner_place{places[0]};
Expand All @@ -76,14 +77,19 @@ class StaticKernelPickPass : public mir::StmtPass {
for (size_t i = 0; i < place_size; ++i) {
const auto& place = places[i];
float weight = static_cast<float>(place_size - i) / place_size;
VLOG(4) << "current place is " << place.DebugString() << ", idx : " << i
<< ", weight : " << weight;
size_t score{};

// The more important factor comes first
if (kernel_pick_factors_.IsTargetConsidered() &&
(place.target == kernel.target() || kernel.target() == TARGET(kAny) ||
place.target == TARGET(kAny))) {
score += kMax /
static_cast<int>(core::KernelPickFactor::Factor::TargetFirst);
size_t target_score =
kMax /
static_cast<int>(core::KernelPickFactor::Factor::TargetFirst);
score += target_score;
VLOG(4) << "[TargetConsidered score]:" << target_score;
}
VLOG(4) << "[score s1]:" << score;
if (kernel_pick_factors_.IsPrecisionConsidered() &&
Expand All @@ -93,17 +99,23 @@ class StaticKernelPickPass : public mir::StmtPass {
// score skipped, if kernel is int8, but op is not int8
if (!(kernel.precision() == PRECISION(kInt8) &&
!instruct.op_info()->HasAttr("enable_int8"))) {
score += kMax / static_cast<int>(
core::KernelPickFactor::Factor::PrecisionFirst);
size_t precision_score =
kMax /
static_cast<int>(core::KernelPickFactor::Factor::PrecisionFirst);
score += precision_score;
VLOG(4) << "[PrecisionConsidered score]:" << precision_score;
}
}
VLOG(4) << "[score s2]:" << score;
if (kernel_pick_factors_.IsDataLayoutConsidered() &&
(place.layout == kernel.layout() ||
kernel.layout() == DATALAYOUT(kAny) ||
place.layout == DATALAYOUT(kAny))) {
score += kMax / static_cast<int>(
core::KernelPickFactor::Factor::DataLayoutFirst);
size_t datalayout_score =
kMax /
static_cast<int>(core::KernelPickFactor::Factor::DataLayoutFirst);
score += datalayout_score;
VLOG(4) << "[DataLayoutConsidered score]:" << datalayout_score;
}
VLOG(4) << "[score s3]:" << score;

Expand Down Expand Up @@ -138,10 +150,44 @@ class StaticKernelPickPass : public mir::StmtPass {
}
if (type_match) {
score *= 2;
VLOG(4) << "[Input precision compatible]: *2";
}
VLOG(4) << "[score s4]:" << score;
}

// add new rules for datatype: When the input types are consistent with
// kernel's input types, select the kernel of the datatype.
if (instruct.op_info()->Type() != "conditional_block" &&
instruct.op_info()->Type() != "while" &&
instruct.op_info()->Type() != "subgraph") {
bool datatype_match = true;
for (auto* in : node->inlinks) {
if (!in->IsArg()) continue;
if (in->AsArg().name == "feed" || in->AsArg().is_persist) continue;
std::string argname;
instruct.op_info()->GetInputArgname(in->AsArg().name, &argname);
VLOG(5) << "intput var name : " << in->AsArg().name;
// only when datatype is LOD_TENSOR, LOD_TENSOR_ARRAY, STEP_SCOPES,
// the type pointer is not null;
if (in->AsArg().type) {
VLOG(5) << "input datatype : "
<< static_cast<int>(in->AsArg().type->id());
VLOG(5) << "kernel bind datatype : "
<< static_cast<int>(kernel.GetInputDeclType(argname)->id());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这一部分实现有风险:
eg.

op {float in1 , int in2, float out}
place <CPU\NCHW\float32>
候选kernel
Kernel1 <CPU\NCHW\float32>   {float in1 , float in2, float out}
Kernel2 <CPU\NCHW\float32>   {float in1 , int in2, float out}

kernel2 更匹配,却实际选中了kernel 1

if (static_cast<int>(in->AsArg().type->id()) !=
static_cast<int>(kernel.GetInputDeclType(argname)->id()))
datatype_match = false;
} else {
datatype_match = false;
}
}
if (datatype_match) {
score *= 2;
VLOG(4) << "[Input datatype compatible]: *2";
}
VLOG(4) << "[score s5]:" << score;
}

if (weight * score > final_score) {
final_score = weight * score;
winner_place = place;
Expand Down Expand Up @@ -191,9 +237,8 @@ class StaticKernelPickPass : public mir::StmtPass {
}
}

VLOG(4) << "[score(final)]:" << final_score;
VLOG(2) << "-------- pick summary for " << instruct.op_type()
<< " --------";
VLOG(2) << "-------- score summary for candidate kernel : "
<< kernel.summary() << " --------";
VLOG(2) << " ===> winner_place():" << PrecisionToStr(winner_place.precision)
<< " " << DataLayoutToStr(winner_place.layout) << " "
<< TargetToStr(winner_place.target);
Expand All @@ -203,8 +248,8 @@ class StaticKernelPickPass : public mir::StmtPass {
<< TargetToStr(kernel.place().target);
VLOG(4) << "kernel.op_type():" << kernel.op_type();
VLOG(4) << "kernel picker factors:" << kernel_pick_factors_;
VLOG(4) << "kernel place:" << kernel.place().DebugString();
VLOG(4) << "winner_picker place:" << winner_place.DebugString();
VLOG(4) << "[score(final)]:" << final_score;
VLOG(4) << "------------------------------";

// The data layout is not considered, for the input and output arguments
Expand Down
2 changes: 2 additions & 0 deletions lite/core/program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,8 @@ void Program::PrepareWorkspace(
} else if (var_type == lite::VarDescAPI::Type::LOD_TENSOR_ARRAY) {
var_type_map_[var_name] = LiteType::GetTensorListTy(
TARGET(kUnk), PRECISION(kUnk), DATALAYOUT(kUnk));
auto* tensor_array = var->GetMutable<std::vector<lite::Tensor>>();
tensor_array->resize(0);
} else if (var_type == lite::VarDescAPI::Type::STEP_SCOPES) {
var->GetMutable<std::vector<lite::Scope*>>();
}
Expand Down
58 changes: 15 additions & 43 deletions lite/core/type_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,11 @@ namespace lite {
// We use Types to declare the definition of a kernel, each inputs' and outputs'
// arguments have a specific Types.
//
// REGISTER_LITE_KERNEL(mul, kHost, kFloat,
// paddle::lite::kernels::host::MulCompute, def)
// .BindInput("X", {paddle::lite::Type::Get<paddle::lite::TensorFp32NCHWTy>(
// TARGET(kHost))})
// .BindInput("Y", {paddle::lite::Type::Get<paddle::lite::TensorFp32NCHWTy>(
// TARGET(kHost))})
// .BindOutput("Out",
// {paddle::lite::Type::Get<paddle::lite::TensorFp32NCHWTy>(TARGET(kHost))})
// .Finalize();
// REGISTER_LITE_KERNEL(mul, kARM, kInt8, kNCHW, Mul_int8_f32, def)
// .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
// .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
// .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
// .Finalize();
//
// The above definition will be used in MIR by Type inference and uncompatible
// types check.
Expand Down Expand Up @@ -116,13 +112,13 @@ class DataType {
};

/*
* Datatype with device info considered.
* NOTE A Type with different device is treated as different DeviceDataType.
* Datatype with Place info considered.
* NOTE A Type with different Place info is treated as different Type.
*/
class Type : public DataType {
public:
// Can cast to another type. This is heavily used in MIR, by determine whether
// is is possible to add a statement to transform a type to another.
// is possible to add a statement to transform a type to another.
virtual bool TypeCastable(const Type& type) const { return id_ == type.id(); }

/// Get a Tensor type.
Expand Down Expand Up @@ -258,30 +254,6 @@ struct ParamType {
std::string DebugString() const { return type->name(); }
};

/*
* The data types of kernel parameters. It is used to track the type of kernel's
* inputs and outputs.
*/
struct ParamTypeRecorder {
std::map<std::string, ParamType> inputs;
std::map<std::string, ParamType> outputs;

void RegisterInputType(const std::string& arg_name, const ParamType& type) {
Register(&inputs, arg_name, type);
}

void RegisterOutputType(const std::string& arg_name, const ParamType& type) {
Register(&outputs, arg_name, type);
}

private:
void Register(std::map<std::string, ParamType>* ts,
const std::string& arg_name,
ParamType type) {
(*ts)[arg_name] = type;
}
};

/*
* The ParamTypeRegistry help register the input and output data types for all
* the kernels. It is made singleton so that all the objects of the same kernel
Expand All @@ -296,19 +268,19 @@ struct ParamTypeRecorder {
class ParamTypeRegistry {
public:
enum class IO : int { kInvalid = 0, kInput, kOutput };

template <TargetType target,
PrecisionType precision,
DataLayoutType layout = DataLayoutType::kNCHW>
/*
* Helper class for registering a ParamType for a Kernel.
* Usage:
*
* NewInstance<TARGET(kHost), PRECISION(kFloat)>("fc")
* .BindInput(0, {typeid(Tensor).hash_code(), {TARGET(kHost)})
* .BindInput(1, {typeid(Tensor).hash_code(), {TARGET(kHost),
* PRECISION(kFloat)});
* .BindInput("Input_0", {Type::GetTensorTy(TARGET(kHost),
* PRECISION(kInt64))})
* .BindInput("Input_1", {Type::GetTensorTy(TARGET(kHost),
* PRECISION(kInt64))});
*/
template <TargetType target,
PrecisionType precision,
DataLayoutType layout = DataLayoutType::kNCHW>
struct NewInstance {
explicit NewInstance(const std::string& kernel_type)
: kernel_type_(kernel_type) {}
Expand Down
Loading