Skip to content

Commit

Permalink
Add a num_segment threshold when loading multiple big files(#2067)
Browse files Browse the repository at this point in the history
A threshold is necessary
before the vertical compaction is used to alleviate the compaction memory usage.
It will reuse config::tablet_max_versions because it's a temporary solution.
  • Loading branch information
chaoyli committed Dec 13, 2021
1 parent 55997a0 commit 3a2b3b6
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 0 deletions.
2 changes: 2 additions & 0 deletions be/src/storage/rowset/beta_rowset_writer.h
Expand Up @@ -73,6 +73,8 @@ class BetaRowsetWriter : public RowsetWriter {

int64_t num_rows() override { return _num_rows_written; }

int num_segment() override { return _num_segment; }

int64_t total_data_size() override { return _total_data_size; }

RowsetId rowset_id() override { return _context.rowset_id; }
Expand Down
2 changes: 2 additions & 0 deletions be/src/storage/rowset/rowset_writer.h
Expand Up @@ -113,6 +113,8 @@ class RowsetWriter {

virtual int64_t num_rows() = 0;

virtual int num_segment() = 0;

virtual int64_t total_data_size() = 0;

virtual RowsetId rowset_id() = 0;
Expand Down
2 changes: 2 additions & 0 deletions be/src/storage/rowset/vectorized/rowset_writer_adapter.h
Expand Up @@ -54,6 +54,8 @@ class RowsetWriterAdapter : public RowsetWriter {

int64_t num_rows() override { return _writer->num_rows(); }

int num_segment() override { return _writer->num_segment(); }

int64_t total_data_size() override { return _writer->total_data_size(); }

RowsetId rowset_id() override { return _writer->rowset_id(); }
Expand Down
6 changes: 6 additions & 0 deletions be/src/storage/vectorized/memtable.cpp
Expand Up @@ -203,6 +203,12 @@ OLAPStatus MemTable::flush() {
RETURN_NOT_OK(_rowset_writer->flush_chunk_with_deletes(*_result_chunk, *_deletes));
}
}
if (_rowset_writer->num_segment() > config::tablet_max_versions) {
LOG(WARNING) << "Too many segment files in one load. tablet=" << _tablet_id
<< ", segment_count=" << _rowset_writer->num_segment()
<< ", limit=" << config::tablet_max_versions;
return OLAP_ERR_OTHER_ERROR;
}
StarRocksMetrics::instance()->memtable_flush_total.increment(1);
StarRocksMetrics::instance()->memtable_flush_duration_us.increment(duration_ns / 1000);
VLOG(1) << "memtable flush: " << duration_ns / 1000 << "us";
Expand Down
2 changes: 2 additions & 0 deletions be/test/storage/vectorized/rowset_merger_test.cpp
Expand Up @@ -53,6 +53,8 @@ class TestRowsetWriter : public RowsetWriter {

int64_t num_rows() override { return all_pks->size(); }

int num_segment() override { return 0; }

int64_t total_data_size() override { return 0; }

RowsetId rowset_id() override { return RowsetId(); }
Expand Down

0 comments on commit 3a2b3b6

Please sign in to comment.