From 3ae781eb2bc139a946b7f195183e31304af49822 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Wed, 27 Dec 2017 15:45:13 +0800 Subject: [PATCH 1/3] Executor check nan --- paddle/framework/executor.cc | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 997773c1689ef..9ee2ddb7c3e8d 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/framework/executor.h" -#include -#include -#include #include -#include +#include "gflags/gflags.h" #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/lod_rank_table.h" -#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/scope.h" + +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); namespace paddle { namespace framework { @@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { } } +static void CheckTensorNANOrInf(const std::string& name, + const framework::Tensor& tensor) { + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { + return; + } + if (tensor.memory_size() == 0) { + return; + } + PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); +} + void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope, bool create_vars) { // TODO(tonyyang-svail): @@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugString(); op->Run(*local_scope, place_); + if (FLAGS_check_nan_inf) { + for (auto& vname : op->OutputVars(true)) { + auto* var = local_scope->FindVar(vname); + if (var == nullptr) continue; + if (var->IsType()) { + CheckTensorNANOrInf(vname, var->Get()); + } + } + } } if (create_local_scope) { scope->DeleteScope(local_scope); From 5162c41a9209da9daf5c440396ac3fbd516f16e7 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Wed, 27 Dec 2017 16:02:28 +0800 Subject: [PATCH 2/3] Add gflags --- python/paddle/v2/fluid/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index c72b5730695db..225b41c5043b5 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -36,7 +36,7 @@ def __read_gflags_from_env__(): """ import sys import core - read_env_flags = ['use_pinned_memory'] + read_env_flags = ['use_pinned_memory', 'check_nan_inf'] if core.is_compile_gpu(): read_env_flags.append('fraction_of_gpu_memory_to_use') core.init_gflags([sys.argv[0]] + From 5139e6c740f9829234de3cc4ed5a3fcd56e2331c Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 29 Dec 2017 12:57:57 +0800 Subject: [PATCH 3/3] Follow comments --- paddle/framework/executor.cc | 6 +++--- paddle/framework/tensor_util.h | 4 ++-- paddle/framework/tensor_util_test.cc | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index de4d3395eb1bf..bf1f0471ccbfc 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -59,11 +59,11 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { static void CheckTensorNANOrInf(const std::string& name, const framework::Tensor& tensor) { - if (tensor.type().hash_code() != typeid(float).hash_code() && - tensor.type().hash_code() != typeid(double).hash_code()) { + if (tensor.memory_size() == 0) { return; } - if (tensor.memory_size() == 0) { + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { return; } PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index a86fab29250fd..6a21f8db1e396 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -210,10 +210,10 @@ inline void CopyToVector(const Tensor& src, std::vector* dst) { } // Returns true if a tensor contains NAN, i.e., Not A Number. -extern bool HasNAN(const framework::Tensor& tensor); +bool HasNAN(const framework::Tensor& tensor); // Returns true if a tensor contains Inf, i.e., Infinity. -extern bool HasInf(const framework::Tensor& tensor); +bool HasInf(const framework::Tensor& tensor); inline void SerializeToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx) { diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index f00ce795488bd..0dc5166fcabf7 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -231,7 +231,7 @@ TEST(CopyToVector, Tensor) { #endif } -TEST(IsNAN, CPU) { +TEST(HasNAN, CPU) { using namespace paddle::framework; using namespace paddle::platform; Tensor src; @@ -243,7 +243,7 @@ TEST(IsNAN, CPU) { ASSERT_TRUE(HasNAN(src)); } -TEST(IsInf, CPU) { +TEST(HasInf, CPU) { using namespace paddle::framework; using namespace paddle::platform; Tensor src;