Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert sparse to full in CreateSetAndFilterOnTheFlyStep #53000

Merged
merged 3 commits into from Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -8,6 +8,7 @@
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnSparse.h>
#include <Core/ColumnWithTypeAndName.h>
#include <base/types.h>

Expand Down Expand Up @@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector<size_t> & ind
Columns columns;
const Columns & all_cols = chunk.getColumns();
for (const auto & index : indices)
columns.push_back(all_cols.at(index));
{
auto col = recursiveRemoveSparse(all_cols.at(index));
columns.push_back(std::move(col));
}

return columns;
}

Expand Down Expand Up @@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare()
LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered",
Poco::toLower(getDescription()), fmt::join(column_names, ", "),
stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows,
100 - 100.0 * stat.result_rows / stat.consumed_rows);
stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0);
}
else
{
Expand Down
@@ -0,0 +1,2 @@
3428033
3428033
19 changes: 19 additions & 0 deletions tests/queries/0_stateless/02841_join_filter_set_sparse.sql
@@ -0,0 +1,19 @@

DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;

CREATE TABLE t1 (s String) ENGINE = MergeTree ORDER BY s
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5;

INSERT INTO t1 SELECT if (number % 13 = 0, toString(number), '') FROM numbers(2000);

CREATE TABLE t2 (s String) ENGINE = MergeTree ORDER BY s
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5;

INSERT INTO t2 SELECT if (number % 14 = 0, toString(number), '') FROM numbers(2000);

SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s;

SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 100_000;

SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s;
alexey-milovidov marked this conversation as resolved.
Show resolved Hide resolved