From 3a2b3b66bd3bef7cd7494e3ff341fe054ae1e613 Mon Sep 17 00:00:00 2001 From: lichaoyong Date: Mon, 13 Dec 2021 19:51:01 +0800 Subject: [PATCH] Add a num_segment threshold when loading multiple big files(#2067) A threshold is necessary before the vertical compaction is used to alleviate the compaction memory usage. It will reuse config::tablet_max_versions because it's a temporary solution. --- be/src/storage/rowset/beta_rowset_writer.h | 2 ++ be/src/storage/rowset/rowset_writer.h | 2 ++ be/src/storage/rowset/vectorized/rowset_writer_adapter.h | 2 ++ be/src/storage/vectorized/memtable.cpp | 6 ++++++ be/test/storage/vectorized/rowset_merger_test.cpp | 2 ++ 5 files changed, 14 insertions(+) diff --git a/be/src/storage/rowset/beta_rowset_writer.h b/be/src/storage/rowset/beta_rowset_writer.h index b03dfb5804156..551a56efe0c94 100644 --- a/be/src/storage/rowset/beta_rowset_writer.h +++ b/be/src/storage/rowset/beta_rowset_writer.h @@ -73,6 +73,8 @@ class BetaRowsetWriter : public RowsetWriter { int64_t num_rows() override { return _num_rows_written; } + int num_segment() override { return _num_segment; } + int64_t total_data_size() override { return _total_data_size; } RowsetId rowset_id() override { return _context.rowset_id; } diff --git a/be/src/storage/rowset/rowset_writer.h b/be/src/storage/rowset/rowset_writer.h index 61524550ad106..700993aa082c1 100644 --- a/be/src/storage/rowset/rowset_writer.h +++ b/be/src/storage/rowset/rowset_writer.h @@ -113,6 +113,8 @@ class RowsetWriter { virtual int64_t num_rows() = 0; + virtual int num_segment() = 0; + virtual int64_t total_data_size() = 0; virtual RowsetId rowset_id() = 0; diff --git a/be/src/storage/rowset/vectorized/rowset_writer_adapter.h b/be/src/storage/rowset/vectorized/rowset_writer_adapter.h index 3daed2016c29d..be509ca7795dd 100644 --- a/be/src/storage/rowset/vectorized/rowset_writer_adapter.h +++ b/be/src/storage/rowset/vectorized/rowset_writer_adapter.h @@ -54,6 +54,8 @@ class RowsetWriterAdapter : public RowsetWriter { int64_t num_rows() override { return _writer->num_rows(); } + int num_segment() override { return _writer->num_segment(); } + int64_t total_data_size() override { return _writer->total_data_size(); } RowsetId rowset_id() override { return _writer->rowset_id(); } diff --git a/be/src/storage/vectorized/memtable.cpp b/be/src/storage/vectorized/memtable.cpp index 5b773c9507381..955bf1591417a 100644 --- a/be/src/storage/vectorized/memtable.cpp +++ b/be/src/storage/vectorized/memtable.cpp @@ -203,6 +203,12 @@ OLAPStatus MemTable::flush() { RETURN_NOT_OK(_rowset_writer->flush_chunk_with_deletes(*_result_chunk, *_deletes)); } } + if (_rowset_writer->num_segment() > config::tablet_max_versions) { + LOG(WARNING) << "Too many segment files in one load. tablet=" << _tablet_id + << ", segment_count=" << _rowset_writer->num_segment() + << ", limit=" << config::tablet_max_versions; + return OLAP_ERR_OTHER_ERROR; + } StarRocksMetrics::instance()->memtable_flush_total.increment(1); StarRocksMetrics::instance()->memtable_flush_duration_us.increment(duration_ns / 1000); VLOG(1) << "memtable flush: " << duration_ns / 1000 << "us"; diff --git a/be/test/storage/vectorized/rowset_merger_test.cpp b/be/test/storage/vectorized/rowset_merger_test.cpp index 98bfe4bdc23e6..073f3b6731d3d 100644 --- a/be/test/storage/vectorized/rowset_merger_test.cpp +++ b/be/test/storage/vectorized/rowset_merger_test.cpp @@ -53,6 +53,8 @@ class TestRowsetWriter : public RowsetWriter { int64_t num_rows() override { return all_pks->size(); } + int num_segment() override { return 0; } + int64_t total_data_size() override { return 0; } RowsetId rowset_id() override { return RowsetId(); }