Skip to content

Commit

Permalink
Launch bounds doubles occupancy.
Browse files Browse the repository at this point in the history
  • Loading branch information
RAMitchell committed Jun 23, 2022
1 parent 7e3f548 commit 9a3eb14
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/tree/gpu_hist/evaluate_splits.cu
Expand Up @@ -199,7 +199,7 @@ __device__ void EvaluateFeature(
}

template <int BLOCK_THREADS, typename GradientSumT>
__global__ void EvaluateSplitsKernel(bst_feature_t number_active_features,common::Span<const EvaluateSplitInputs> d_inputs,
__global__ __launch_bounds__(BLOCK_THREADS) void EvaluateSplitsKernel(bst_feature_t number_active_features,common::Span<const EvaluateSplitInputs> d_inputs,
const EvaluateSplitSharedInputs shared_inputs,
common::Span<bst_feature_t> sorted_idx,
TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
Expand Down Expand Up @@ -233,6 +233,7 @@ __global__ void EvaluateSplitsKernel(bst_feature_t number_active_features,common
const auto input_idx = blockIdx.x / number_active_features;
const EvaluateSplitInputs &inputs = d_inputs[input_idx];
// One block for each feature. Features are sampled, so fidx != blockIdx.x

int fidx = inputs.feature_set[blockIdx.x % number_active_features];

if (common::IsCat(shared_inputs.feature_types, fidx)) {
Expand Down

0 comments on commit 9a3eb14

Please sign in to comment.