diff --git a/cub/cub/warp/warp_reduce.cuh b/cub/cub/warp/warp_reduce.cuh index b1d6c04c3ca..77c0005e5cc 100644 --- a/cub/cub/warp/warp_reduce.cuh +++ b/cub/cub/warp/warp_reduce.cuh @@ -504,7 +504,7 @@ public: [[nodiscard]] _CCCL_DEVICE _CCCL_FORCEINLINE T Reduce(const InputType& input, ReductionOp reduction_op) { auto thread_reduction = cub::ThreadReduce(input, reduction_op); - return WarpReduce::Reduce(thread_reduction, LogicalWarpThreads, reduction_op); + return WarpReduce::Reduce(thread_reduction, reduction_op); } //! @rst //! Computes a partially-full warp-wide reduction in the calling warp using the specified binary