Skip to content

Commit

Permalink
Fix external memory, gpu_hist and subsampling combination bug. (dmlc#…
Browse files Browse the repository at this point in the history
…7476)

- The error happens because when reading from external memory the batch is
  reassembled for every new iteration. The variable `original_page_` is
  initialized from the first batch, when the constructor of `GradiendBasedSample`
  is called. After iterating through the batches the original memory is not
  accessible, so when trying to access the memory pointed by `original_page_`
  causes an error.

- The solution is instead of accessing data from the `original_page_`, to access
  the data from the first page of the available batch.

fix dmlc#7476
  • Loading branch information
GinkoBalboa committed Nov 23, 2021
1 parent b124a27 commit e2e63db
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
24 changes: 13 additions & 11 deletions src/tree/gpu_hist/gradient_based_sampler.cu
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(EllpackPageImpl con
size_t n_rows,
BatchParam batch_param,
float subsample)
: original_page_(page),
batch_param_(std::move(batch_param)),
: batch_param_(std::move(batch_param)),
subsample_(subsample),
sample_row_index_(n_rows) {}

Expand Down Expand Up @@ -218,15 +217,17 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientP
sample_row_index_.begin(),
ClearEmptyRows());

auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(
batch_param_.gpu_id, original_page_->Cuts(), original_page_->is_dense,
original_page_->row_stride, sample_rows));
batch_param_.gpu_id, first_page->Cuts(), first_page->is_dense,
first_page->row_stride, sample_rows));

// Compact the ELLPACK pages into the single sample page.
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
for (auto& batch : batch_iterator) {
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
}

Expand Down Expand Up @@ -263,8 +264,7 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(
size_t n_rows,
BatchParam batch_param,
float subsample)
: original_page_(page),
batch_param_(std::move(batch_param)),
: batch_param_(std::move(batch_param)),
subsample_(subsample),
threshold_(n_rows + 1, 0.0f),
grad_sum_(n_rows, 0.0f),
Expand Down Expand Up @@ -300,15 +300,17 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<Gra
sample_row_index_.begin(),
ClearEmptyRows());

auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(batch_param_.gpu_id, original_page_->Cuts(),
original_page_->is_dense,
original_page_->row_stride, sample_rows));
page_.reset(new EllpackPageImpl(batch_param_.gpu_id, first_page->Cuts(),
first_page->is_dense,
first_page->row_stride, sample_rows));

// Compact the ELLPACK pages into the single sample page.
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
for (auto& batch : batch_iterator) {
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
}

Expand Down
2 changes: 0 additions & 2 deletions src/tree/gpu_hist/gradient_based_sampler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ class ExternalMemoryUniformSampling : public SamplingStrategy {
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;

private:
EllpackPageImpl const* original_page_;
BatchParam batch_param_;
float subsample_;
std::unique_ptr<EllpackPageImpl> page_;
Expand Down Expand Up @@ -107,7 +106,6 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;

private:
EllpackPageImpl const* original_page_;
BatchParam batch_param_;
float subsample_;
dh::caching_device_vector<float> threshold_;
Expand Down

0 comments on commit e2e63db

Please sign in to comment.