Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct GridwiseReduction_xy_to_x_blockwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());

Expand Down Expand Up @@ -178,11 +178,11 @@ struct GridwiseReduction_xy_to_x_blockwise
if(thread_local_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -246,7 +246,7 @@ struct GridwiseReduction_xy_to_x_blockwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());
auto dst_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -347,11 +347,11 @@ struct GridwiseReduction_xy_to_x_blockwise
if(thread_local_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -433,10 +433,8 @@ struct GridwiseReduction_xy_to_x_blockwise

const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_val_buf =
make_dynamic_buffer<AddressSpaceEnum_t::Global>(ws_values_global,
src2dDesc.GetElementSpaceSize(),
type_convert<srcDataType>{}(zeroVal));
const auto src_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_values_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
const auto src_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_indices_global, src2dDesc.GetElementSpaceSize());
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -553,11 +551,11 @@ struct GridwiseReduction_xy_to_x_blockwise
if(thread_local_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());

Expand Down Expand Up @@ -145,11 +145,11 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
make_naive_tensor_descriptor_packed(make_tuple(Number<1>{}));

if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -207,7 +207,7 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());
auto dst_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -273,11 +273,11 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
make_naive_tensor_descriptor_packed(make_tuple(Number<1>{}));

if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -350,10 +350,8 @@ struct GridwiseReduction_xy_to_x_direct_threadwise

const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_val_buf =
make_dynamic_buffer<AddressSpaceEnum_t::Global>(ws_values_global,
src2dDesc.GetElementSpaceSize(),
type_convert<srcDataType>{}(zeroVal));
const auto src_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_values_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
const auto src_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_indices_global, src2dDesc.GetElementSpaceSize());
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -436,11 +434,11 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
make_naive_tensor_descriptor_packed(make_tuple(Number<1>{}));

if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct GridwiseReduction_xy_to_x_direct_warpwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());

Expand Down Expand Up @@ -154,11 +154,11 @@ struct GridwiseReduction_xy_to_x_direct_warpwise
if(thread_inwarp_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -218,7 +218,7 @@ struct GridwiseReduction_xy_to_x_direct_warpwise
const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_dst_global, dst1dDesc.GetElementSpaceSize());
auto dst_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -293,11 +293,11 @@ struct GridwiseReduction_xy_to_x_direct_warpwise
if(thread_inwarp_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down Expand Up @@ -375,10 +375,8 @@ struct GridwiseReduction_xy_to_x_direct_warpwise

const auto zeroVal = opReduce::GetReductionZeroVal();

const auto src_global_val_buf =
make_dynamic_buffer<AddressSpaceEnum_t::Global>(ws_values_global,
src2dDesc.GetElementSpaceSize(),
type_convert<srcDataType>{}(zeroVal));
const auto src_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_values_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
const auto src_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_indices_global, src2dDesc.GetElementSpaceSize());
auto dst_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down Expand Up @@ -472,11 +470,11 @@ struct GridwiseReduction_xy_to_x_direct_warpwise
if(thread_inwarp_id == 0)
{
if(!float_equal_one{}(alpha))
accuValue_buf(I0) *= type_convert<compType>{}(alpha);
accuValue_buf(I0) *= type_convert<compType>(alpha);

StaticBuffer<AddressSpaceEnum_t::Vgpr, dstDataType, 1, true> dstValue_buf;

dstValue_buf(I0) = type_convert<dstDataType>{}(accuValue_buf[I0]);
dstValue_buf(I0) = type_convert<dstDataType>(accuValue_buf[I0]);

if(!float_equal_zero{}(beta))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ struct GridwiseReduction_xy_to_x_multiblock
__shared__ compType p_in_block_buffer[BlockBufferSize];

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto workspace_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_values_global, dst1dDesc.GetLength(I0) * BlkGroupSize);

Expand Down Expand Up @@ -223,7 +223,7 @@ struct GridwiseReduction_xy_to_x_multiblock
__shared__ int p_in_block_indices_buffer[BlockBufferSize];

const auto src_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>{}(zeroVal));
p_src_global, src2dDesc.GetElementSpaceSize(), type_convert<srcDataType>(zeroVal));
auto workspace_global_val_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
ws_values_global, dst1dDesc.GetLength(I0) * BlkGroupSize);
auto workspace_global_idx_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct BlockwiseReduction_2d_block_buffer
offset = blockIsOneRow
? buffer2dDesc.CalculateOffset(make_tuple(otherDimInd, thread_local_id))
: buffer2dDesc.CalculateOffset(make_tuple(thread_local_id, otherDimInd));
compType opData = type_convert<compType>{}(block_buffer[offset]);
compType opData = type_convert<compType>(block_buffer[offset]);

binop::calculate(lAccuData, opData);
}
Expand All @@ -89,18 +89,18 @@ struct BlockwiseReduction_2d_block_buffer
? buffer2dDesc.CalculateOffset(make_tuple(0, thread_local_id + indOffset))
: buffer2dDesc.CalculateOffset(make_tuple(thread_local_id + indOffset, 0));

compType opData1 = type_convert<compType>{}(block_buffer[offset1]);
compType opData2 = type_convert<compType>{}(block_buffer[offset2]);
compType opData1 = type_convert<compType>(block_buffer[offset1]);
compType opData2 = type_convert<compType>(block_buffer[offset2]);
binop::calculate(opData1, opData2);
block_buffer(offset1) = type_convert<compType>{}(opData1);
block_buffer(offset1) = type_convert<compType>(opData1);
}

__syncthreads();
}

if(thread_local_id == 0)
{
compType tmpVal = type_convert<compType>{}(block_buffer[0]);
compType tmpVal = type_convert<compType>(block_buffer[0]);

binop::calculate(accuData, tmpVal);
}
Expand Down Expand Up @@ -131,13 +131,13 @@ struct BlockwiseReduction_2d_block_buffer
index_t offset2 = buffer2dDesc.CalculateOffset(
make_tuple(otherDimInd, thread_local_id + indOffset));

compType currVal1 = type_convert<compType>{}(block_buffer[offset1]);
compType currVal2 = type_convert<compType>{}(block_buffer[offset2]);
compType currVal1 = type_convert<compType>(block_buffer[offset1]);
compType currVal2 = type_convert<compType>(block_buffer[offset2]);
int currIndex1 = block_indices_buffer[offset1];
int currIndex2 = block_indices_buffer[offset2];

binop::calculate(currVal1, currVal2, currIndex1, currIndex2);
block_buffer(offset1) = type_convert<compType>{}(currVal1);
block_buffer(offset1) = type_convert<compType>(currVal1);
block_indices_buffer(offset1) = currIndex1;
}
__syncthreads();
Expand All @@ -150,7 +150,7 @@ struct BlockwiseReduction_2d_block_buffer
{
index_t offset = buffer2dDesc.CalculateOffset(make_tuple(otherDimInd, 0));

compType tmpVal = type_convert<compType>{}(block_buffer[offset]);
compType tmpVal = type_convert<compType>(block_buffer[offset]);
int tmpIndex = block_indices_buffer[offset];

binop::calculate(lAccuData, tmpVal, lAccuIndex, tmpIndex);
Expand All @@ -166,7 +166,7 @@ struct BlockwiseReduction_2d_block_buffer
for(index_t otherDimInd = 0; otherDimInd < toReduceBlocks; otherDimInd++)
{
offset = buffer2dDesc.CalculateOffset(make_tuple(thread_local_id, otherDimInd));
compType currVal = type_convert<compType>{}(block_buffer[offset]);
compType currVal = type_convert<compType>(block_buffer[offset]);
int currIndex = block_indices_buffer[offset];

binop::calculate(lAccuData, currVal, lAccuIndex, currIndex);
Expand All @@ -187,13 +187,13 @@ struct BlockwiseReduction_2d_block_buffer
index_t offset2 =
buffer2dDesc.CalculateOffset(make_tuple(thread_local_id + indOffset, 0));

compType currVal1 = type_convert<compType>{}(block_buffer[offset1]);
compType currVal2 = type_convert<compType>{}(block_buffer[offset2]);
compType currVal1 = type_convert<compType>(block_buffer[offset1]);
compType currVal2 = type_convert<compType>(block_buffer[offset2]);
int currIndex1 = block_indices_buffer[offset1];
int currIndex2 = block_indices_buffer[offset2];

binop::calculate(currVal1, currVal2, currIndex1, currIndex2);
block_buffer(offset1) = type_convert<compType>{}(currVal1);
block_buffer(offset1) = type_convert<compType>(currVal1);
block_indices_buffer(offset1) = currIndex1;
}

Expand All @@ -202,7 +202,7 @@ struct BlockwiseReduction_2d_block_buffer

if(thread_local_id == 0)
{
compType tmpVal = type_convert<compType>{}(block_buffer[0]);
compType tmpVal = type_convert<compType>(block_buffer[0]);
int tmpIndex = block_indices_buffer[0];

binop::calculate(accuData, tmpVal, accuIndex, tmpIndex);
Expand All @@ -227,9 +227,9 @@ struct BlockwiseReduction_2d_block_buffer
}
};

// Initialize the block-wise indices buffer, the index for each element in the block-wise data
// buffer
// is calculated according to its position in the buffer and the global starting index
// Initialize the block-wise indices buffer, the index for each element in the block-wise
// data buffer is calculated according to its position in the buffer and the global starting
// index
template <typename IdxBufferType>
__device__ static void init_buffer_indices(IdxBufferType& block_indices_buffer, int indexStart)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
src_slice_origin_idx + dst_data_idx + i * dst_scalar_step_in_vector);

dst_vector.template AsType<DstData>()(i) =
type_convert<DstData>{}(src_buf[Number<src_offset>{}]);
type_convert<DstData>(src_buf[Number<src_offset>{}]);
});

const bool is_dst_valid =
Expand Down Expand Up @@ -983,7 +983,7 @@ struct ThreadwiseTensorSliceTransfer_v3
buffer_desc_.CalculateOffset(dst_data_idx + i * dst_scalar_step_in_vector);

dst_tmp_vector.template AsType<DstData>()(i) =
type_convert<DstData>{}(buffer_[Number<buffer_offset>{}]);
type_convert<DstData>(buffer_[Number<buffer_offset>{}]);
});

using dst_vector_t = typename decltype(dst_tmp_vector)::type;
Expand Down Expand Up @@ -1403,7 +1403,7 @@ struct ThreadwiseTensorSliceTransfer_v4
// TODO: if SrcData and DstData are vetor type, then static_cast may not compile
static_for<0, SrcScalarPerVector, 1>{}([&](auto i) {
dst_tmp_vector.template AsType<DstData>()(i) =
type_convert<DstData>{}(src_tmp_vector.template AsType<SrcData>()[i]);
type_convert<DstData>(src_tmp_vector.template AsType<SrcData>()[i]);
});

// copy data from dst_tmp_vector into dst_buf
Expand Down
Loading