Skip to content

Commit

Permalink
RFC: Fix filtering by virtual columns with OR expression
Browse files Browse the repository at this point in the history
Virtual columns did not supports queries with OR, for example query like
this (here `m` is the `Merge` table, see the test):

    select key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1');

Will always leads to:

    Cannot find column `value` in source stream, there are only columns ...

The reason for this is that it actually executes the following queries:

    SELECT key, value FROM default.d1 WHERE ((value = 10) AND ('v1' = 'v1')) OR ((value = 20) AND ('v1' = 'v1'));
    SELECT key FROM default.d2 WHERE 0;

And this kind of filtering is used not only for `Merge` table but also:
- `_table` for `Merge` (already mentioned)
- `_file` for `File`
- `_idx` for `S3`
- and as well as filtering `system.*` tables by `database`/`table`/...

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
  • Loading branch information
azat committed Jul 27, 2023
1 parent 5611b2f commit 68aed0d
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 9 deletions.
36 changes: 27 additions & 9 deletions src/Storages/VirtualColumnUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <Storages/VirtualColumnUtils.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
#include <Parsers/makeASTForLogicalFunction.h>
#include <Columns/ColumnSet.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ActionsVisitor.h>
Expand Down Expand Up @@ -63,14 +64,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function<bool(const A
bool extractFunctions(const ASTPtr & expression, const std::function<bool(const ASTPtr &)> & is_constant, ASTs & result)
{
const auto * function = expression->as<ASTFunction>();
if (function && (function->name == "and" || function->name == "indexHint"))

if (function)
{
bool ret = true;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, result);
return ret;
if (function->name == "and" || function->name == "indexHint")
{
bool ret = true;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, result);
return ret;
}
else if (function->name == "or")
{
bool ret = true;
ASTs or_args;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, or_args);
/// We can keep condition only if it still OR condition (i.e. we
/// have dependent conditions for columns at both sides)
if (or_args.size() == 2)
result.push_back(makeASTForLogicalOr(std::move(or_args)));
return ret;
}
}
else if (isValidFunction(expression, is_constant))

if (isValidFunction(expression, is_constant))
{
result.push_back(expression->clone());
return true;
Expand All @@ -80,13 +98,13 @@ bool extractFunctions(const ASTPtr & expression, const std::function<bool(const
}

/// Construct a conjunction from given functions
ASTPtr buildWhereExpression(const ASTs & functions)
ASTPtr buildWhereExpression(ASTs && functions)
{
if (functions.empty())
return nullptr;
if (functions.size() == 1)
return functions[0];
return makeASTFunction("and", functions);
return makeASTForLogicalAnd(std::move(functions));
}

}
Expand Down Expand Up @@ -171,7 +189,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
if (select.prewhere())
unmodified &= extractFunctions(select.prewhere(), is_constant, functions);

expression_ast = buildWhereExpression(functions);
expression_ast = buildWhereExpression(std::move(functions));
return unmodified;
}

Expand Down
38 changes: 38 additions & 0 deletions tests/queries/0_stateless/02840_merge__table_or_filter.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
-- { echoOn }

select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
34 changes: 34 additions & 0 deletions tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
drop table if exists m;
drop view if exists v1;
drop view if exists v2;
drop table if exists d1;
drop table if exists d2;

create table d1 (key Int, value Int) engine=Memory();
create table d2 (key Int, value Int) engine=Memory();

insert into d1 values (1, 10);
insert into d1 values (2, 20);

insert into d2 values (1, 10);
insert into d2 values (2, 20);

create view v1 as select * from d1;
create view v2 as select * from d2;

create table m as v1 engine=Merge(currentDatabase(), '^(v1|v2)$');

-- avoid reorder
set max_threads=1;
-- { echoOn }
{% for settings in [
'allow_experimental_analyzer=0, convert_query_to_cnf=0',
'allow_experimental_analyzer=0, convert_query_to_cnf=1',
'allow_experimental_analyzer=1, convert_query_to_cnf=0',
'allow_experimental_analyzer=1, convert_query_to_cnf=1'
] %}
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings {{ settings }};
{% endfor %}

0 comments on commit 68aed0d

Please sign in to comment.