Skip to content

Commit

Permalink
[Enhancement] make lake pk compaction upper threhold configurable
Browse files Browse the repository at this point in the history
Signed-off-by: luohaha <18810541851@163.com>
  • Loading branch information
luohaha committed Nov 15, 2023
1 parent 852240f commit 50e1eda
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ CONF_mInt64(update_compaction_size_threshold, "268435456");
CONF_mInt64(update_compaction_result_bytes, "1073741824");
// This config controls the io amp ratio of delvec files.
CONF_mInt32(update_compaction_delvec_file_io_amp_ratio, "2");
// This config defines the maximum percentage of data allowed per compaction
CONF_mDouble(update_compaction_ratio_threshold, "0.5");

CONF_mInt32(repair_compaction_interval_seconds, "600"); // 10 min
CONF_Int32(manual_compaction_threads, "4");
Expand Down
9 changes: 6 additions & 3 deletions be/src/storage/lake/compaction_policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ StatusOr<std::vector<RowsetPtr>> PrimaryCompactionPolicy::pick_rowsets(
std::priority_queue<RowsetCandidate> rowset_queue;
const auto tablet_id = tablet_metadata->id();
const auto tablet_version = tablet_metadata->version();
const auto tablet_data_size = _get_data_size(tablet_metadata);
const int64_t compaction_data_size_threhold =
std::static_cast<int64_t>(_get_data_size(tablet_metadata) * config::update_compaction_ratio_threshold);
for (const auto& rowset_pb : tablet_metadata->rowsets()) {
RowsetStat stat;
stat.num_rows = rowset_pb.num_rows();
Expand All @@ -168,7 +169,8 @@ StatusOr<std::vector<RowsetPtr>> PrimaryCompactionPolicy::pick_rowsets(
const auto& rowset_candidate = rowset_queue.top();
cur_compaction_result_bytes += rowset_candidate.read_bytes();
if (input_rowsets.size() > 0 &&
cur_compaction_result_bytes > std::max(config::update_compaction_result_bytes * 2, tablet_data_size / 2)) {
cur_compaction_result_bytes >
std::max(config::update_compaction_result_bytes * 2, compaction_data_size_threhold)) {
break;
}
input_rowsets.emplace_back(std::make_shared<Rowset>(tablet, std::move(rowset_candidate.rowset_meta_ptr)));
Expand All @@ -178,7 +180,8 @@ StatusOr<std::vector<RowsetPtr>> PrimaryCompactionPolicy::pick_rowsets(
input_infos << input_rowsets.back()->id() << "|";

// Allow to merge half of this tablet
if (cur_compaction_result_bytes > std::max(config::update_compaction_result_bytes, tablet_data_size / 2) ||
if (cur_compaction_result_bytes >
std::max(config::update_compaction_result_bytes, compaction_data_size_threhold) ||
input_rowsets.size() >= config::max_update_compaction_num_singleton_deltas) {
break;
}
Expand Down

0 comments on commit 50e1eda

Please sign in to comment.