Skip to content

Commit

Permalink
Address reviewer comments dmlc#2
Browse files Browse the repository at this point in the history
  • Loading branch information
RAMitchell committed Sep 30, 2018
1 parent 7078d36 commit 02a9a5b
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 11 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ XGBoost Change Log

This file records the changes in xgboost library in reverse chronological order.

## Master (2018.09.30)
* BREAKING CHANGES
- External memory page files have changed, breaking backwards compatibility for temporary storage used during external memory training. This only affects external memory users upgrading their xgboost version - we recommend clearing all *.page files before resuming training. Model serialization is unaffected.

## v0.80 (2018.08.13)
* **JVM packages received a major upgrade**: To consolidate the APIs and improve the user experience, we refactored the design of XGBoost4J-Spark in a significant manner. (#3387)
- Consolidated APIs: It is now much easier to integrate XGBoost models into a Spark ML pipeline. Users can control behaviors like output leaf prediction results by setting corresponding column names. Training is now more consistent with other Estimators in Spark MLLIB: there is now one single method `fit()` to train decision trees.
Expand Down
23 changes: 19 additions & 4 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,19 +309,34 @@ class BatchIterator {
public:
using iterator_category = std::forward_iterator_tag;
explicit BatchIterator(BatchIteratorImpl* impl) { impl_.reset(impl); }

BatchIterator(const BatchIterator& other) {
if (other.impl_) {
impl_.reset(other.impl_->Clone());
} else {
impl_.reset();
}
}
void operator++() { ++(*impl_); }
const SparsePage& operator*() const { return *(*impl_); }

bool operator!=(const BatchIterator& rhs) const { return !impl_->AtEnd(); }
void operator++() {
CHECK(impl_ != nullptr);
++(*impl_);
}

const SparsePage& operator*() const {
CHECK(impl_ != nullptr);
return *(*impl_);
}

bool AtEnd() const { return impl_->AtEnd(); }
bool operator!=(const BatchIterator& rhs) const {
CHECK(impl_ != nullptr);
return !impl_->AtEnd();
}

bool AtEnd() const {
CHECK(impl_ != nullptr);
return impl_->AtEnd();
}

private:
std::unique_ptr<BatchIteratorImpl> impl_;
Expand Down
5 changes: 4 additions & 1 deletion src/data/simple_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ float SimpleDMatrix::GetColDensity(size_t cidx) {
class SimpleBatchIteratorImpl : public BatchIteratorImpl {
public:
explicit SimpleBatchIteratorImpl(SparsePage* page) : page_(page) {}
const SparsePage& operator*() const override { return *page_; }
const SparsePage& operator*() const override {
CHECK(page_ != nullptr);
return *page_;
}
void operator++() override { page_ = nullptr; }
bool AtEnd() const override { return page_ == nullptr; }
SimpleBatchIteratorImpl* Clone() override {
Expand Down
5 changes: 3 additions & 2 deletions src/data/sparse_page_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ const MetaInfo& SparsePageDMatrix::Info() const {

class SparseBatchIteratorImpl : public BatchIteratorImpl {
public:
explicit SparseBatchIteratorImpl(SparsePageSource* source)
: source_(source) {}
explicit SparseBatchIteratorImpl(SparsePageSource* source) : source_(source) {
CHECK(source_ != nullptr);
}
const SparsePage& operator*() const override { return source_->Value(); }
void operator++() override { at_end_ = !source_->Next(); }
bool AtEnd() const override { return at_end_; }
Expand Down
1 change: 1 addition & 0 deletions src/tree/updater_colmaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ class ColMaker: public TreeUpdater {
{
// setup position
position_.resize(gpair.size());
CHECK_EQ(fmat.Info().num_row_, position_.size());
if (root_index.size() == 0) {
std::fill(position_.begin(), position_.end(), 0);
} else {
Expand Down
3 changes: 3 additions & 0 deletions tests/cpp/data/test_simple_csr_source.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);

// Test we have non-empty batch
EXPECT_EQ(dmat->GetRowBatches().begin().AtEnd(), false);

auto row_iter = dmat->GetRowBatches().begin();
auto row_iter_read = dmat_read->GetRowBatches().begin();
// Test the data read into the first row
Expand Down
7 changes: 5 additions & 2 deletions tests/cpp/data/test_simple_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ TEST(SimpleDMatrix, RowAccess) {

// Loop over the batches and count the records
long row_count = 0;
for(auto &batch:dmat->GetRowBatches())
{
for (auto &batch : dmat->GetRowBatches()) {
row_count += batch.Size();
}
EXPECT_EQ(row_count, dmat->Info().num_row_);
Expand Down Expand Up @@ -56,6 +55,10 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
num_col_batch += 1;
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
<< "Expected batch size = number of cells as #batches is 1.";
for (int i = 0ull; i < batch.Size(); ++i) {
EXPECT_EQ(batch[i].size(), dmat->Info().num_row_)
<< "Expected length of each colbatch = num_row as #batches is 1.";
}
}
EXPECT_EQ(num_col_batch, 1) << "Expected number of batches to be 1";
delete dmat;
Expand Down
3 changes: 1 addition & 2 deletions tests/cpp/data/test_sparse_page_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ TEST(SparsePageDMatrix, RowAccess) {

// Loop over the batches and count the records
long row_count = 0;
for(auto &batch:dmat->GetRowBatches())
{
for (auto &batch : dmat->GetRowBatches()) {
row_count += batch.Size();
}
EXPECT_EQ(row_count, dmat->Info().num_row_);
Expand Down

0 comments on commit 02a9a5b

Please sign in to comment.