Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] make lake pk compaction upper threshold configurable #35129

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ CONF_mInt64(update_compaction_size_threshold, "268435456");
CONF_mInt64(update_compaction_result_bytes, "1073741824");
// This config controls the io amp ratio of delvec files.
CONF_mInt32(update_compaction_delvec_file_io_amp_ratio, "2");
// This config defines the maximum percentage of data allowed per compaction
CONF_mDouble(update_compaction_ratio_threshold, "0.5");

CONF_mInt32(repair_compaction_interval_seconds, "600"); // 10 min
CONF_Int32(manual_compaction_threads, "4");
Expand Down
11 changes: 4 additions & 7 deletions be/src/storage/lake/compaction_policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ StatusOr<std::vector<RowsetPtr>> PrimaryCompactionPolicy::pick_rowsets(
std::priority_queue<RowsetCandidate> rowset_queue;
const auto tablet_id = tablet_metadata->id();
const auto tablet_version = tablet_metadata->version();
const auto tablet_data_size = _get_data_size(tablet_metadata);
const int64_t compaction_data_size_threshold =
static_cast<int64_t>((double)_get_data_size(tablet_metadata) * config::update_compaction_ratio_threshold);
for (const auto& rowset_pb : tablet_metadata->rowsets()) {
RowsetStat stat;
stat.num_rows = rowset_pb.num_rows();
Expand All @@ -167,18 +168,14 @@ StatusOr<std::vector<RowsetPtr>> PrimaryCompactionPolicy::pick_rowsets(
while (!rowset_queue.empty()) {
const auto& rowset_candidate = rowset_queue.top();
cur_compaction_result_bytes += rowset_candidate.read_bytes();
if (input_rowsets.size() > 0 &&
cur_compaction_result_bytes > std::max(config::update_compaction_result_bytes * 2, tablet_data_size / 2)) {
break;
}
input_rowsets.emplace_back(std::make_shared<Rowset>(tablet, std::move(rowset_candidate.rowset_meta_ptr)));
if (has_dels != nullptr) {
has_dels->push_back(rowset_candidate.delete_bytes() > 0);
}
input_infos << input_rowsets.back()->id() << "|";

// Allow to merge half of this tablet
if (cur_compaction_result_bytes > std::max(config::update_compaction_result_bytes, tablet_data_size / 2) ||
if (cur_compaction_result_bytes >
std::max(config::update_compaction_result_bytes, compaction_data_size_threshold) ||
input_rowsets.size() >= config::max_update_compaction_num_singleton_deltas) {
break;
}
Expand Down