Skip to content

Commit

Permalink
Merge pull request #53000 from ClickHouse/vdimir/join_filter_set_sparse
Browse files Browse the repository at this point in the history
  • Loading branch information
vdimir committed Aug 7, 2023
2 parents b1f0cb8 + e89bd29 commit 490dfc9
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
Expand Up @@ -8,6 +8,7 @@
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnSparse.h>
#include <Core/ColumnWithTypeAndName.h>
#include <base/types.h>

Expand Down Expand Up @@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector<size_t> & ind
Columns columns;
const Columns & all_cols = chunk.getColumns();
for (const auto & index : indices)
columns.push_back(all_cols.at(index));
{
auto col = recursiveRemoveSparse(all_cols.at(index));
columns.push_back(std::move(col));
}

return columns;
}

Expand Down Expand Up @@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare()
LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered",
Poco::toLower(getDescription()), fmt::join(column_names, ", "),
stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows,
100 - 100.0 * stat.result_rows / stat.consumed_rows);
stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0);
}
else
{
Expand Down
@@ -0,0 +1,2 @@
3428033
3428033
22 changes: 22 additions & 0 deletions tests/queries/0_stateless/02841_join_filter_set_sparse.sql
@@ -0,0 +1,22 @@

DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;

CREATE TABLE t1 (s String) ENGINE = MergeTree ORDER BY s
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5;

INSERT INTO t1 SELECT if (number % 13 = 0, toString(number), '') FROM numbers(2000);

CREATE TABLE t2 (s String) ENGINE = MergeTree ORDER BY s
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5;

INSERT INTO t2 SELECT if (number % 14 = 0, toString(number), '') FROM numbers(2000);

SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s;

SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 100_000;

SELECT countIf(ignore(*) == 0) FROM t1 JOIN t2 ON t1.s = t2.s;

DROP TABLE t1;
DROP TABLE t2;

0 comments on commit 490dfc9

Please sign in to comment.