Skip to content

Commit

Permalink
HIVE-26722: HiveFilterSetOpTransposeRule incorrectly prunes UNION ALL…
Browse files Browse the repository at this point in the history
… operands. (apache#3748). (Alessandro Solimando, reviewed by Ayush Saxena, Simhadri Govindappa)
  • Loading branch information
asolimando committed Nov 17, 2022
1 parent 5cb1b10 commit 310cdd8
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 22 deletions.
Expand Up @@ -42,8 +42,6 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;

import com.google.common.collect.ImmutableList;

public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule {

public static final HiveFilterSetOpTransposeRule INSTANCE =
Expand All @@ -66,8 +64,8 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule {
* Op1 Op2
*
*
* It additionally can remove branch(es) of filter if its able to determine
* that they are going to generate empty result set.
* It additionally can remove branch(es) of filter if it's able to determine
* that they are going to generate an empty result set.
*/
private HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) {
super(relBuilderFactory);
Expand Down Expand Up @@ -111,18 +109,14 @@ public void onMatch(RelOptRuleCall call) {
final RelMetadataQuery mq = call.getMetadataQuery();
final RelOptPredicateList predicates = mq.getPulledUpPredicates(input);
if (predicates != null) {
ImmutableList.Builder<RexNode> listBuilder = ImmutableList.builder();
listBuilder.addAll(predicates.pulledUpPredicates);
listBuilder.add(newCondition);
RexExecutor executor =
final RexExecutor executor =
Util.first(filterRel.getCluster().getPlanner().getExecutor(), RexUtil.EXECUTOR);
final RexSimplify simplify = new RexSimplify(rexBuilder, RelOptPredicateList.EMPTY, executor);
final RexNode cond = RexUtil.composeConjunction(rexBuilder, listBuilder.build());
final RexNode x = simplify.simplifyUnknownAs(cond, RexUnknownAs.FALSE);
final RexSimplify simplify = new RexSimplify(rexBuilder, predicates, executor);
final RexNode x = simplify.simplifyUnknownAs(newCondition, RexUnknownAs.FALSE);
if (x.isAlwaysFalse()) {
// this is the last branch, and it is always false
// We assume alwaysFalse filter will get pushed down to TS so this
// branch so it won't read any data.
// branch won't read any data.
if (index == setOp.getInputs().size() - 1) {
lastInput = relBuilder.push(input).filter(newCondition).build();
}
Expand Down
32 changes: 32 additions & 0 deletions ql/src/test/queries/clientpositive/union_all_filter_transpose.q
@@ -0,0 +1,32 @@
# needed to avoid the simplification of CAST(NULL) into NULL
set hive.cbo.rule.exclusion.regex=ReduceExpressionsRule\(Project\);

CREATE EXTERNAL TABLE t (a string, b string);

INSERT INTO t VALUES ('1000', 'b1');
INSERT INTO t VALUES ('2000', 'b2');

SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000;

EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000;
Expand Up @@ -46,11 +46,10 @@ POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@v1
CBO PLAN:
HiveProject(col0=[CAST(10):INTEGER])
HiveAggregate(group=[{0}])
HiveProject($f0=[true])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveAggregate(group=[{0}])
HiveProject($f0=[CAST(10):INTEGER])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: create view v1 as
select sub.* from (select * from t1 where col0 > 2 union select * from t1 where col0 = 0) sub
Expand Down Expand Up @@ -80,11 +79,10 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@v1
#### A masked pattern was here ####
CBO PLAN:
HiveProject(col0=[CAST(10):INTEGER])
HiveAggregate(group=[{0}])
HiveProject($f0=[true])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveAggregate(group=[{0}])
HiveProject($f0=[CAST(10):INTEGER])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from v1
PREHOOK: type: QUERY
Expand Down
@@ -0,0 +1,99 @@
PREHOOK: query: CREATE EXTERNAL TABLE t (a string, b string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t
POSTHOOK: query: CREATE EXTERNAL TABLE t (a string, b string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t
PREHOOK: query: INSERT INTO t VALUES ('1000', 'b1')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t
POSTHOOK: query: INSERT INTO t VALUES ('1000', 'b1')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t
POSTHOOK: Lineage: t.a SCRIPT []
POSTHOOK: Lineage: t.b SCRIPT []
PREHOOK: query: INSERT INTO t VALUES ('2000', 'b2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t
POSTHOOK: query: INSERT INTO t VALUES ('2000', 'b2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t
POSTHOOK: Lineage: t.a SCRIPT []
POSTHOOK: Lineage: t.b SCRIPT []
PREHOOK: query: SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
PREHOOK: type: QUERY
PREHOOK: Input: default@t
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
1000 b1
1000 NULL
PREHOOK: query: EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
PREHOOK: type: QUERY
PREHOOK: Input: default@t
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
Excluded rules: ReduceExpressionsRule\(Project\)

CBO PLAN:
HiveUnion(all=[true])
HiveProject(a=[$0], b=[$1])
HiveFilter(condition=[=(CAST($0):DOUBLE, 1000)])
HiveTableScan(table=[[default, t]], table:alias=[t])
HiveProject(a=[$0], _o__c1=[null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE"])
HiveFilter(condition=[=(CAST($0):DOUBLE, 1000)])
HiveTableScan(table=[[default, t]], table:alias=[t])

0 comments on commit 310cdd8

Please sign in to comment.