Too lengthy implementation of TensorContainsNan and TensorContainsINF #11949

wangkuiyi · 2018-07-03T23:52:34Z

The implementation contains three parts:

VisitPlace (35 lines)

Lines 97 to 132 in 037ce12

    
           template <typename Visitor> 
        
           struct PlaceVisitorWrapper 
        
               : public boost::static_visitor<typename Visitor::result_type> { 
        
             const Visitor &visitor_; 
        
             explicit PlaceVisitorWrapper(const Visitor &visitor) : visitor_(visitor) {} 
        
             typename Visitor::result_type operator()(const CPUPlace &cpu) const { 
        
               return visitor_(cpu); 
        
             } 
        
             typename Visitor::result_type operator()(const CUDAPlace &cuda) const { 
        
           #ifdef PADDLE_WITH_CUDA 
        
               return visitor_(cuda); 
        
           #else 
        
               PADDLE_THROW("Paddle is not compiled with CUDA. Cannot visit cuda device"); 
        
               return typename Visitor::result_type(); 
        
           #endif 
        
             } 
        
             typename Visitor::result_type operator()( 
        
                 const CUDAPinnedPlace &cuda_pinned) const { 
        
           #ifdef PADDLE_WITH_CUDA 
        
               return visitor_(cuda_pinned); 
        
           #else 
        
               PADDLE_THROW("Paddle is not compiled with CUDA. Cannot visit cuda_pinned"); 
        
               return typename Visitor::result_type(); 
        
           #endif 
        
             } 
        
           }; 
        
           template <typename Visitor> 
        
           typename Visitor::result_type VisitPlace(const Place &place, 
        
                                                    const Visitor &visitor) { 
        
             return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place); 
        
           }

The CPU implementation of TensorContainsNaN (~100 lines)

Paddle/paddle/fluid/framework/tensor_util.cc

Lines 139 to 241 in 7b54f16

    
           template <typename Predicate, typename DevCtx> 
        
           struct AnyDTypeVisitor { 
        
             Predicate predicate_; 
        
             const Tensor& tensor_; 
        
             const DevCtx& ctx_; 
        
             Tensor* out_; 
        
             AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, 
        
                             Tensor* out) 
        
                 : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} 
        
             template <typename T> 
        
             void operator()() const { 
        
               auto t = EigenVector<T>::Flatten(tensor_); 
        
               auto o = EigenScalar<bool>::From(*out_); 
        
               // return any of predicate_(t) is true. 
        
               o.device(*ctx_.eigen_device()) = predicate_(t).any(); 
        
             } 
        
           }; 
        
           template <typename Predicate, typename DevCtx> 
        
           inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, 
        
                               const DevCtx& ctx, framework::Tensor* out) { 
        
             VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>( 
        
                                                          predicate, tensor, ctx, out)); 
        
           } 
        
           template <typename Predicate> 
        
           struct AnyVisitor : public boost::static_visitor<bool> { 
        
             const framework::Tensor& tensor_; 
        
             Predicate predicate_; 
        
             AnyVisitor(const framework::Tensor& tensor, Predicate predicate) 
        
                 : tensor_(tensor), predicate_(std::move(predicate)) {} 
        
             template <typename Place> 
        
             bool operator()(const Place& place) const { 
        
               framework::Tensor out; 
        
               out.Resize({1}); 
        
               out.mutable_data<bool>(place); 
        
               auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); 
        
               AnyImpl(predicate_, tensor_, *ctx, &out); 
        
               return this->GetResult(out, place); 
        
             } 
        
             bool GetResult(const framework::Tensor& out, 
        
                            const platform::CUDAPlace& gpu) const { 
        
               platform::CPUPlace cpu; 
        
               framework::Tensor tmp; 
        
               tmp.Resize({1}); 
        
               tmp.mutable_data<bool>(cpu); 
        
               auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); 
        
               gpuctx->Wait(); 
        
               TensorCopy(out, cpu, *gpuctx, &tmp); 
        
               gpuctx->Wait(); 
        
               return GetResult(tmp, cpu); 
        
             } 
        
             bool GetResult(const framework::Tensor& out, 
        
                            const platform::CPUPlace& cpu) const { 
        
               return *out.data<bool>(); 
        
             } 
        
             bool GetResult(const framework::Tensor& out, 
        
                            const platform::CUDAPinnedPlace& cpu) const { 
        
               return *out.data<bool>(); 
        
             } 
        
           }; 
        
           template <typename Predicate> 
        
           inline bool Any(const framework::Tensor& tensor, Predicate predicate) { 
        
             AnyVisitor<Predicate> visitor(tensor, predicate); 
        
             auto place = tensor.place(); 
        
             return platform::VisitPlace(place, visitor); 
        
           } 
        
           struct ContainsNANPredicate { 
        
             template <typename T> 
        
             auto operator()(const T& eigen_vec) const 
        
                 -> decltype(std::declval<T>().isnan()) { 
        
               // Cast eigen_vector to vector of bool. true if is inf. 
        
               return eigen_vec.isnan(); 
        
             } 
        
           }; 
        
           bool TensorContainsNAN(const framework::Tensor& tensor) { 
        
             ContainsNANPredicate predicate; 
        
             return Any(tensor, predicate); 
        
           } 
        
           struct ContainsInfPredicate { 
        
             template <typename T> 
        
             auto operator()(const T& eigen_vec) const 
        
                 -> decltype(std::declval<T>().isinf()) { 
        
               // Cast eigen_vector to vector of bool. true if is inf. 
        
               return eigen_vec.isinf(); 
        
             } 
        
           }; 
        
           bool TensorContainsInf(const framework::Tensor& tensor) { 
        
             ContainsInfPredicate predicate; 
        
             return Any(tensor, predicate); 
        
           }

The CUDA implementation (~100 lines)

Paddle/paddle/fluid/framework/tensor_util.cu

Lines 19 to 117 in 8370fa8

    
           template <typename Predicate, typename DevCtx> 
        
           struct AnyDTypeVisitor { 
        
             Predicate predicate_; 
        
             const Tensor& tensor_; 
        
             const DevCtx& ctx_; 
        
             Tensor* out_; 
        
             AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, 
        
                             Tensor* out) 
        
                 : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} 
        
             template <typename T> 
        
             void operator()() const { 
        
               auto t = EigenVector<T>::Flatten(tensor_); 
        
               auto o = EigenScalar<bool>::From(*out_); 
        
               // return any of predicate_(t) is true. 
        
               o.device(*ctx_.eigen_device()) = predicate_(t).any(); 
        
             } 
        
           }; 
        
           template <typename Predicate, typename DevCtx> 
        
           inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, 
        
                               const DevCtx& ctx, framework::Tensor* out) { 
        
             VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>( 
        
                                                          predicate, tensor, ctx, out)); 
        
           } 
        
           template <typename Predicate> 
        
           struct AnyVisitor : public boost::static_visitor<bool> { 
        
             const framework::Tensor& tensor_; 
        
             Predicate predicate_; 
        
             AnyVisitor(const framework::Tensor& tensor, Predicate predicate) 
        
                 : tensor_(tensor), predicate_(std::move(predicate)) {} 
        
             template <typename Place> 
        
             bool operator()(const Place& place) const { 
        
               framework::Tensor out; 
        
               out.Resize({1}); 
        
               out.mutable_data<bool>(place); 
        
               auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); 
        
               AnyImpl(predicate_, tensor_, *ctx, &out); 
        
               return this->GetResult(out, place); 
        
             } 
        
             bool GetResult(const framework::Tensor& out, 
        
                            const platform::CUDAPlace& gpu) const { 
        
               platform::CPUPlace cpu; 
        
               framework::Tensor tmp; 
        
               tmp.Resize({1}); 
        
               tmp.mutable_data<bool>(cpu); 
        
               auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); 
        
               gpuctx->Wait(); 
        
               Copy(out, cpu, *gpuctx, &tmp); 
        
               gpuctx->Wait(); 
        
               return GetResult(tmp, cpu); 
        
             } 
        
             bool GetResult(const framework::Tensor& out, 
        
                            const platform::CPUPlace& cpu) const { 
        
               return *out.data<bool>(); 
        
             } 
        
           }; 
        
           template <typename Predicate> 
        
           inline bool Any(const framework::Tensor& tensor, Predicate predicate) { 
        
             AnyVisitor<Predicate> visitor(tensor, predicate); 
        
             auto place = tensor.place(); 
        
             return platform::VisitPlace(place, visitor); 
        
           } 
        
           struct HasNANPredicate { 
        
             template <typename T> 
        
             auto operator()(const T& eigen_vec) const 
        
                 -> decltype(std::declval<T>().isnan()) { 
        
               // Cast eigen_vector to vector of bool. true if is inf. 
        
               return eigen_vec.isnan(); 
        
             } 
        
           }; 
        
           bool HasNAN(const framework::Tensor& tensor) { 
        
             HasNANPredicate predicate; 
        
             return Any(tensor, predicate); 
        
           } 
        
           struct HasInfPredicate { 
        
             template <typename T> 
        
             auto operator()(const T& eigen_vec) const 
        
                 -> decltype(std::declval<T>().isinf()) { 
        
               // Cast eigen_vector to vector of bool. true if is inf. 
        
               return eigen_vec.isinf(); 
        
             } 
        
           }; 
        
           bool HasInf(const framework::Tensor& tensor) { 
        
             HasInfPredicate predicate; 
        
             return Any(tensor, predicate); 
        
           }

There uses about 235 lines of C++ code to test if a tensor contains NaN or INF!

lucywsq · 2018-08-28T02:28:06Z

Hello, this issue has not been updated in the past month. we will close it today for the sake of other user’s experience. If you still need to follow up on this question after closing, please feel free to reopen it. In that case, we will get back to you within 24 hours. We apologize for the inconvenience caused by the closure and thank you so much for your support of PaddlePaddle Group!

wangkuiyi mentioned this issue Jul 4, 2018

The type Place should be a class hierarchy other than boost::variant #11952

Closed

lucywsq added Code Cleanup 内部提出 labels Aug 28, 2018

lucywsq closed this as completed Aug 28, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Too lengthy implementation of TensorContainsNan and TensorContainsINF #11949

Too lengthy implementation of TensorContainsNan and TensorContainsINF #11949

wangkuiyi commented Jul 3, 2018

lucywsq commented Aug 28, 2018

Too lengthy implementation of TensorContainsNan and TensorContainsINF #11949

Too lengthy implementation of TensorContainsNan and TensorContainsINF #11949

Comments

wangkuiyi commented Jul 3, 2018

lucywsq commented Aug 28, 2018