diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 31749743a5883..bf1f0471ccbfc 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/framework/executor.h" -#include -#include -#include #include -#include +#include "gflags/gflags.h" #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/lod_rank_table.h" -#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/scope.h" + +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); namespace paddle { namespace framework { @@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { } } +static void CheckTensorNANOrInf(const std::string& name, + const framework::Tensor& tensor) { + if (tensor.memory_size() == 0) { + return; + } + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { + return; + } + PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); +} + void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope, bool create_vars) { // TODO(tonyyang-svail): @@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugString(); op->Run(*local_scope, place_); + if (FLAGS_check_nan_inf) { + for (auto& vname : op->OutputVars(true)) { + auto* var = local_scope->FindVar(vname); + if (var == nullptr) continue; + if (var->IsType()) { + CheckTensorNANOrInf(vname, var->Get()); + } + } + } } if (create_vars && create_local_scope) { scope->DeleteScope(local_scope); diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index a86fab29250fd..6a21f8db1e396 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -210,10 +210,10 @@ inline void CopyToVector(const Tensor& src, std::vector* dst) { } // Returns true if a tensor contains NAN, i.e., Not A Number. -extern bool HasNAN(const framework::Tensor& tensor); +bool HasNAN(const framework::Tensor& tensor); // Returns true if a tensor contains Inf, i.e., Infinity. -extern bool HasInf(const framework::Tensor& tensor); +bool HasInf(const framework::Tensor& tensor); inline void SerializeToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx) { diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index f00ce795488bd..0dc5166fcabf7 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -231,7 +231,7 @@ TEST(CopyToVector, Tensor) { #endif } -TEST(IsNAN, CPU) { +TEST(HasNAN, CPU) { using namespace paddle::framework; using namespace paddle::platform; Tensor src; @@ -243,7 +243,7 @@ TEST(IsNAN, CPU) { ASSERT_TRUE(HasNAN(src)); } -TEST(IsInf, CPU) { +TEST(HasInf, CPU) { using namespace paddle::framework; using namespace paddle::platform; Tensor src; diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index c72b5730695db..225b41c5043b5 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -36,7 +36,7 @@ def __read_gflags_from_env__(): """ import sys import core - read_env_flags = ['use_pinned_memory'] + read_env_flags = ['use_pinned_memory', 'check_nan_inf'] if core.is_compile_gpu(): read_env_flags.append('fraction_of_gpu_memory_to_use') core.init_gflags([sys.argv[0]] +