Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions src/Storages/MergeTree/MergeTreeReadPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,19 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
auto [required_columns, required_pre_columns, should_reorder] =
getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns);

if (predict_block_size_bytes)
{
const auto & required_column_names = required_columns.getNames();
const auto & required_pre_column_names = required_pre_columns.getNames();
NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());

per_part_size_predictor.emplace_back(std::make_unique<MergeTreeBlockSizePredictor>(
part.data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block));
}
else
per_part_size_predictor.emplace_back(nullptr);

/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
const auto & required_column_names = required_columns.getNames();
per_part_column_name_set.emplace_back(required_column_names.begin(), required_column_names.end());
Expand All @@ -240,14 +253,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
per_part_should_reorder.push_back(should_reorder);

parts_with_idx.push_back({ part.data_part, part.part_index_in_query });

if (predict_block_size_bytes)
{
per_part_size_predictor.emplace_back(std::make_unique<MergeTreeBlockSizePredictor>(
part.data_part, column_names, sample_block));
}
else
per_part_size_predictor.emplace_back(nullptr);
}

return per_part_sum_marks;
Expand Down
14 changes: 11 additions & 3 deletions src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,17 @@ try
MarkRanges mark_ranges_for_task = { all_mark_ranges.back() };
all_mark_ranges.pop_back();

auto size_predictor = (preferred_block_size_bytes == 0)
? nullptr
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, metadata_snapshot->getSampleBlock());
std::unique_ptr<MergeTreeBlockSizePredictor> size_predictor;
if (preferred_block_size_bytes)
{
const auto & required_column_names = task_columns.columns.getNames();
const auto & required_pre_column_names = task_columns.pre_columns.getNames();
NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());

size_predictor = std::make_unique<MergeTreeBlockSizePredictor>(
data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock());
}

task = std::make_unique<MergeTreeReadTask>(
data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set,
Expand Down
14 changes: 11 additions & 3 deletions src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,17 @@ try
storage, metadata_snapshot, data_part,
required_columns, prewhere_info, check_columns);

auto size_predictor = (preferred_block_size_bytes == 0)
? nullptr
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, metadata_snapshot->getSampleBlock());
std::unique_ptr<MergeTreeBlockSizePredictor> size_predictor;
if (preferred_block_size_bytes)
{
const auto & required_column_names = task_columns.columns.getNames();
const auto & required_pre_column_names = task_columns.pre_columns.getNames();
NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());

size_predictor = std::make_unique<MergeTreeBlockSizePredictor>(
data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock());
}

/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
const auto & column_names = task_columns.columns.getNames();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
8
4
4
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple();

INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000);

ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]));

SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000;

SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000;

SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000;