Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions example/12_reduce/reduce_blockwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <initializer_list>
#include <cstdlib>
#include <getopt.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
Expand All @@ -27,10 +26,6 @@ using InDataType = ck::half_t;
using OutDataType = ck::half_t;
using AccDataType = float;

using HostInDataType = half_float::half;
using HostOutDataType = half_float::half;
using HostAccDataType = float;

constexpr int Rank = 4;
constexpr int NumReduceDim = 3;

Expand Down Expand Up @@ -306,21 +301,18 @@ int main(int argc, char* argv[])

if(args.do_verification)
{
ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReductionHost<InDataType,
AccDataType,
OutDataType,
ReduceOpId,
Rank,
NumReduceDim,
PropagateNan,
NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);

hostReduce.Run(alpha,
reinterpret_cast<const HostInDataType*>(in.mData.data()),
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
out_indices_ref.mData.data());
hostReduce.Run(
alpha, in.mData.data(), beta, out_ref.mData.data(), out_indices_ref.mData.data());
};

const auto i_inLengths = to_int_vector(args.inLengths);
Expand Down
56 changes: 53 additions & 3 deletions include/ck/utility/math_v2.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,64 @@
#ifndef CK_MATH_V2_HPP
#define CK_MATH_V2_HPP

#include <cmath>
#include "data_type.hpp"
#include "half.hpp"

namespace ck {
namespace math {

static inline __device__ half_t abs(half_t x) { return __habs(x); };
static inline __device__ half_t sqrtf(half_t x) { return hsqrt(x); };
static inline __device__ bool isnan(half_t x) { return __hisnan(x); };
static inline __host__ float abs(float x) { return std::abs(x); };

static inline __host__ double abs(double x) { return std::abs(x); };

static inline __host__ int8_t abs(int8_t x)
{
int8_t sgn = x >> (8 - 1);

return (x ^ sgn) - sgn;
};

static inline __host__ int32_t abs(int32_t x)
{
int32_t sgn = x >> (32 - 1);

return (x ^ sgn) - sgn;
};

static inline __host__ half_t abs(half_t x)
{
half_float::half xx = *reinterpret_cast<half_float::half*>(&x);

half_float::half abs_xx = half_float::abs(xx);

half_t abs_x = *reinterpret_cast<half_t*>(&abs_xx);

return abs_x;
};

static inline __host__ float isnan(float x) { return std::isnan(x); };

static inline __host__ double isnan(double x) { return std::isnan(x); };

static inline __host__ int8_t isnan(int8_t x)
{
(void)x;
return false;
};

static inline __host__ int32_t isnan(int32_t x)
{
(void)x;
return false;
};

static inline __host__ bool isnan(half_t x)
{
half_float::half xx = *reinterpret_cast<half_float::half*>(&x);

return half_float::isnan(xx);
};

} // namespace math
} // namespace ck
Expand Down
4 changes: 2 additions & 2 deletions include/ck/utility/reduction_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace ck {
struct float_equal_one
{
template <class T>
__device__ inline bool operator()(T x)
__host__ __device__ inline bool operator()(T x)
{
return x <= static_cast<T>(1.0f) and x >= static_cast<T>(1.0f);
};
Expand All @@ -42,7 +42,7 @@ struct float_equal_one
struct float_equal_zero
{
template <class T>
__device__ inline bool operator()(T x)
__host__ __device__ inline bool operator()(T x)
{
return x <= static_cast<T>(0.0f) and x >= static_cast<T>(0.0f);
};
Expand Down
37 changes: 7 additions & 30 deletions library/include/ck/library/host_tensor/host_reduce_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@
#ifndef GUARD_HOST_REDUCE_UTIL_HPP
#define GUARD_HOST_REDUCE_UTIL_HPP

#include <half.hpp>
#include <limits>
#include <cmath>
#include <cassert>
#include <stdexcept>
#include <string>

#include "reduction_enums.hpp"
#include "data_type.hpp"
#include "math_v2.hpp"

namespace ck {

Expand All @@ -42,34 +43,10 @@ namespace host_reduce {
using ck::NanPropagation;
using ck::ReduceTensorOp;

template <typename T>
static inline bool float_equal_one(T);

static inline bool float_equal_one(float x) { return x == 1.0f; };

static inline bool float_equal_one(double x) { return x == 1.0; };

static inline bool float_equal_one(half_float::half x)
{
return x == static_cast<half_float::half>(1.0f);
};

template <typename T>
static inline bool float_equal_zero(T x);

static inline bool float_equal_zero(float x) { return x == 0.0f; };

static inline bool float_equal_zero(double x) { return x == 0.0; };

static inline bool float_equal_zero(half_float::half x)
{
return x == static_cast<half_float::half>(0.0f);
};

template <typename AccDataType, ReduceTensorOp ReduceOpId>
__host__ static inline std::function<void(AccDataType&)> PreUnaryOpFn(int)
{
using std::abs;
using ck::math::abs;

if constexpr(ReduceOpId == ReduceTensorOp::NORM1)
{
Expand Down Expand Up @@ -196,11 +173,11 @@ __host__ static inline AccDataType ReduceOpZeroVal()
}
else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
{
return (std::numeric_limits<AccDataType>::max());
return (ck::NumericLimits<AccDataType>::Max());
}
else if constexpr(ReduceOpId == ReduceTensorOp::MAX)
{
return (std::numeric_limits<AccDataType>::lowest());
return (ck::NumericLimits<AccDataType>::Lowest());
}
else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
{
Expand All @@ -222,7 +199,7 @@ binop_with_nan_check(std::function<void(AccDataType&, AccDataType)> opReduce,
AccDataType& accuVal,
AccDataType currVal)
{
using std::isnan;
using ck::math::isnan;

if constexpr(!PropagateNan)
{
Expand All @@ -245,7 +222,7 @@ binop_with_nan_check2(std::function<void(AccDataType&, AccDataType, bool&)> opRe
int& accuIndex,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this tensor index? If so, index_t should be used.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, need correction

int currIndex)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

{
using std::isnan;
using ck::math::isnan;

if constexpr(!PropagateNan)
{
Expand Down
25 changes: 13 additions & 12 deletions library/include/ck/library/host_tensor/host_reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <functional>

#include "reduction_enums.hpp"
#include "reduction_common.hpp"
#include "host_reduce_util.hpp"
#include "host_tensor.hpp"
#include "data_type.hpp"
Expand Down Expand Up @@ -196,10 +197,10 @@ struct ReductionHost
OutDataType* out_data,
IndexDataType* out_indices)
{
using ck::float_equal_one;
using ck::float_equal_zero;
using ck::type_convert;
using ck::host_reduce::binop_with_nan_check2;
using ck::host_reduce::float_equal_one;
using ck::host_reduce::float_equal_zero;
using ck::host_reduce::ReduceOpFn2;
using ck::host_reduce::ReduceOpZeroVal;

Expand Down Expand Up @@ -227,10 +228,10 @@ struct ReductionHost

posUnaryOp(accuVal);

if(!float_equal_one(alpha))
if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha);

if(!float_equal_zero(beta))
if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta);

out_data[0] = type_convert<OutDataType>(accuVal);
Expand Down Expand Up @@ -263,13 +264,13 @@ struct ReductionHost

posUnaryOp(accuVal);

if(!float_equal_one(alpha))
if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha);

auto dst_offset =
get_offset_from_index<NumInvariantDim>(outStrides, invariant_index);

if(!float_equal_zero(beta))
if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[dst_offset]) *
type_convert<AccDataType>(beta);

Expand Down Expand Up @@ -303,10 +304,10 @@ struct ReductionHost

void RunImpl_no_index(float alpha, const InDataType* in_data, float beta, OutDataType* out_data)
{
using ck::float_equal_one;
using ck::float_equal_zero;
using ck::type_convert;
using ck::host_reduce::binop_with_nan_check;
using ck::host_reduce::float_equal_one;
using ck::host_reduce::float_equal_zero;
using ck::host_reduce::ReduceOpFn;
using ck::host_reduce::ReduceOpZeroVal;

Expand All @@ -330,10 +331,10 @@ struct ReductionHost

posUnaryOp(accuVal);

if(!float_equal_one(alpha))
if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha);

if(!float_equal_zero(beta))
if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta);

out_data[0] = type_convert<OutDataType>(accuVal);
Expand Down Expand Up @@ -361,13 +362,13 @@ struct ReductionHost

posUnaryOp(accuVal);

if(!float_equal_one(alpha))
if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha);

auto dst_offset =
get_offset_from_index<NumInvariantDim>(outStrides, invariant_index);

if(!float_equal_zero(beta))
if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[dst_offset]) *
type_convert<AccDataType>(beta);

Expand Down
17 changes: 5 additions & 12 deletions profiler/include/profile_reduce_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,25 +380,18 @@ void profile_reduce_impl_impl(bool do_verification,

if(do_verification)
{
using HostInDataType = typename type_mapping<InDataType>::OutType;
using HostOutDataType = typename type_mapping<OutDataType>::OutType;
using HostAccDataType = typename type_mapping<AccDataType>::OutType;

ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReductionHost<InDataType,
AccDataType,
OutDataType,
ReduceOpId,
Rank,
NumReduceDim,
PropagateNan,
NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);

hostReduce.Run(alpha,
reinterpret_cast<const HostInDataType*>(in.mData.data()),
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
out_indices_ref.mData.data());
hostReduce.Run(
alpha, in.mData.data(), beta, out_ref.mData.data(), out_indices_ref.mData.data());
};

const auto i_inLengths = to_int_vector(inLengths);
Expand Down
29 changes: 4 additions & 25 deletions test/reduce/reduce_no_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,6 @@ static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduce
return invariantDims;
};

// map the data type used by the GPU kernels to the corresponding type used by the host codes
template <typename InType>
struct type_mapping
{
using OutType = InType;
};

template <>
struct type_mapping<ck::half_t>
{
using OutType = half_float::half;
};

constexpr int Rank = 4;

constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AVG;
Expand Down Expand Up @@ -226,25 +213,17 @@ bool test_reduce_no_index_impl(int init_method,

bool result = true;

using HostInDataType = typename type_mapping<InDataType>::OutType;
using HostOutDataType = typename type_mapping<OutDataType>::OutType;
using HostAccDataType = typename type_mapping<AccDataType>::OutType;

ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReductionHost<InDataType,
AccDataType,
OutDataType,
ReduceOpId,
Rank,
NumReduceDim,
PropagateNan,
NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);

hostReduce.Run(alpha,
reinterpret_cast<const HostInDataType*>(in.mData.data()),
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
nullptr);
hostReduce.Run(alpha, in.mData.data(), beta, out_ref.mData.data(), nullptr);

const auto i_inLengths = to_int_vector(inLengths);
const auto i_inStrides = to_int_vector(inStrides);
Expand Down
Loading