Skip to content

Commit

Permalink
[c++][fix] Support Quantized Training with Categorical Features on CPU (
Browse files Browse the repository at this point in the history
#6301)

* support quantized training with categorical features on cpu

* remove white spaces

* add tests for quantized training with categorical features

* skip tests for cuda version

* fix cases when only 1 data block in row-wise quantized histogram construction with 8 inner bits

* remove useless capture

* fix compilation warnings

revert useless changes

* revert useless change

* separate functions in feature histogram into cpp file

* add feature_histogram.o in Makevars
  • Loading branch information
shiyu1994 committed Feb 23, 2024
1 parent 8b61a15 commit 776c5c3
Show file tree
Hide file tree
Showing 10 changed files with 818 additions and 301 deletions.
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ OBJECTS = \
network/linkers_socket.o \
network/network.o \
treelearner/data_parallel_tree_learner.o \
treelearner/feature_histogram.o \
treelearner/feature_parallel_tree_learner.o \
treelearner/gpu_tree_learner.o \
treelearner/gradient_discretizer.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ OBJECTS = \
network/linkers_socket.o \
network/network.o \
treelearner/data_parallel_tree_learner.o \
treelearner/feature_histogram.o \
treelearner/feature_parallel_tree_learner.o \
treelearner/gpu_tree_learner.o \
treelearner/gradient_discretizer.o \
Expand Down
8 changes: 5 additions & 3 deletions src/io/train_share_states.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,17 @@ void MultiValBinWrapper::HistMove(const std::vector<hist_t,
reinterpret_cast<int64_t*>(origin_hist_data_) + hist_move_dest_[i] / 2);
}
} else if (HIST_BITS == 16) {
const int32_t* src = reinterpret_cast<const int32_t*>(hist_buf.data()) + hist_buf.size() / 2 -
static_cast<size_t>(num_bin_aligned_);
if (is_use_subcol_) {
const int32_t* src = reinterpret_cast<const int32_t*>(hist_buf.data()) + hist_buf.size() / 2 -
static_cast<size_t>(num_bin_aligned_);
#pragma omp parallel for schedule(static) num_threads(num_threads_)
for (int i = 0; i < static_cast<int>(hist_move_src_.size()); ++i) {
std::copy_n(src + hist_move_src_[i] / 2, hist_move_size_[i] / 2,
reinterpret_cast<int32_t*>(origin_hist_data_) + hist_move_dest_[i] / 2);
}
} else {
CHECK_EQ(INNER_HIST_BITS, 8);
const int32_t* src = reinterpret_cast<const int32_t*>(hist_buf.data()) + hist_buf.size() / 2;
int32_t* orig_ptr = reinterpret_cast<int32_t*>(origin_hist_data_);
#pragma omp parallel for schedule(static) num_threads(num_threads_)
for (int i = 0; i < num_bin_; ++i) {
Expand Down Expand Up @@ -148,7 +150,7 @@ void MultiValBinWrapper::HistMerge(std::vector<hist_t,
}
}
} else if (HIST_BITS == 16 && INNER_HIST_BITS == 8) {
int32_t* dst = reinterpret_cast<int32_t*>(hist_buf->data()) + hist_buf->size() / 2 - static_cast<size_t>(num_bin_aligned_);
int32_t* dst = reinterpret_cast<int32_t*>(hist_buf->data()) + hist_buf->size() / 2;
std::memset(reinterpret_cast<void*>(dst), 0, num_bin_ * kInt16HistBufferEntrySize);
#pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
for (int t = 0; t < n_bin_block; ++t) {
Expand Down

0 comments on commit 776c5c3

Please sign in to comment.