diff --git a/src/dataflow/src/render/constant.rs b/src/dataflow/src/render/constant.rs new file mode 100644 index 000000000000..4bd0459a3f6e --- /dev/null +++ b/src/dataflow/src/render/constant.rs @@ -0,0 +1,55 @@ +// Copyright Materialize, Inc. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use differential_dataflow::lattice::Lattice; +use differential_dataflow::{AsCollection, Collection}; + +use timely::dataflow::operators::map::Map; +use timely::dataflow::operators::to_stream::ToStream; +use timely::dataflow::Scope; +use timely::progress::{timestamp::Refines, Timestamp}; + +use expr::RelationExpr; +use repr::Row; + +use crate::operator::CollectionExt; +use crate::render::context::Context; + +impl Context +where + G: Scope, + G::Timestamp: Lattice + Refines, + T: Timestamp + Lattice, +{ + pub fn render_constant( + &mut self, + relation_expr: &RelationExpr, + scope: &mut G, + worker_index: usize, + ) { + // The constant collection is instantiated only on worker zero. + if let RelationExpr::Constant { rows, .. } = relation_expr { + let rows = if worker_index == 0 { + rows.clone() + } else { + vec![] + }; + + let collection = rows + .to_stream(scope) + .map(|(x, diff)| (x, timely::progress::Timestamp::minimum(), diff)) + .as_collection(); + + let err_collection = Collection::empty(scope); + + self.collections + .insert(relation_expr.clone(), (collection, err_collection)); + } + } +} diff --git a/src/dataflow/src/render/filter.rs b/src/dataflow/src/render/filter.rs new file mode 100644 index 000000000000..e7538475da8c --- /dev/null +++ b/src/dataflow/src/render/filter.rs @@ -0,0 +1,59 @@ +// Copyright Materialize, Inc. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use differential_dataflow::lattice::Lattice; +use differential_dataflow::Collection; + +use timely::dataflow::Scope; +use timely::progress::{timestamp::Refines, Timestamp}; + +use dataflow_types::DataflowError; +use expr::{RelationExpr, ScalarExpr}; +use repr::{Datum, Row}; + +use crate::operator::CollectionExt; +use crate::render::context::Context; + +impl Context +where + G: Scope, + G::Timestamp: Lattice + Refines, + T: Timestamp + Lattice, +{ + pub fn render_filter( + &mut self, + input: &RelationExpr, + predicates: Vec, + ) -> (Collection, Collection) { + let (ok_collection, err_collection) = self.collection(input).unwrap(); + let (ok_collection, new_err_collection) = render_filter_inner(ok_collection, predicates); + let err_collection = err_collection.concat(&new_err_collection); + (ok_collection, err_collection) + } +} + +pub fn render_filter_inner( + collection: Collection, + predicates: Vec, +) -> (Collection, Collection) +where + G: Scope, + G::Timestamp: Lattice, +{ + let temp_storage = repr::RowArena::new(); + collection.filter_fallible(move |input_row| { + let datums = input_row.unpack(); + for p in &predicates { + if p.eval(&datums, &temp_storage)? != Datum::True { + return Ok(false); + } + } + Ok::<_, DataflowError>(true) + }) +} diff --git a/src/dataflow/src/render/join.rs b/src/dataflow/src/render/join.rs index a71599702522..baa08b15f2bb 100644 --- a/src/dataflow/src/render/join.rs +++ b/src/dataflow/src/render/join.rs @@ -9,7 +9,7 @@ use differential_dataflow::lattice::Lattice; use differential_dataflow::operators::arrange::arrangement::Arrange; -use differential_dataflow::operators::join::JoinCore; +use differential_dataflow::operators::join::{Join, JoinCore}; use differential_dataflow::trace::implementations::ord::OrdValSpine; use differential_dataflow::Collection; use timely::dataflow::Scope; @@ -17,7 +17,7 @@ use timely::progress::{timestamp::Refines, Timestamp}; use dataflow_types::*; use expr::{RelationExpr, ScalarExpr}; -use repr::{Datum, Row, RowArena}; +use repr::{Datum, RelationType, Row, RowArena}; use crate::operator::CollectionExt; use crate::render::context::{ArrangementFlavor, Context}; @@ -277,4 +277,73 @@ where panic!("render_join called on invalid expression.") } } + + pub fn render_semi_join( + &mut self, + input: &RelationExpr, + keys: &[ScalarExpr], + scope: &mut G, + worker_index: usize, + ) { + if let RelationExpr::Join { + inputs, + equivalences, + implementation: expr::JoinImplementation::Semijoin, + .. + } = input + { + // Extract the constant to semijoin against from the equivalence classes + let temp_storage = RowArena::new(); + let constant_row = keys + .iter() + .map(|k| { + let eval_result = equivalences.iter().find_map(|e| { + if e.contains(k) { + return e.iter().find_map(|s| match s { + ScalarExpr::Literal(_, _) => Some(s.eval(&[], &temp_storage)), + _ => None, + }); + } + None + }); + match eval_result { + Some(Ok(datum)) => datum, + Some(Err(_)) => { + unreachable!("Error occurred when evaluating semijoin constant") + } + None => unreachable!("No constant for semijoin key"), + } + }) + .collect::>(); + + // Create a RelationExpr representing the constant + // Having the RelationType is technically not an accurate + // representation of the expression, but relation type doesn't + // matter when it comes to rendering. + let mut row_packer = repr::RowPacker::new(); + let constant_row_expr = RelationExpr::Constant { + rows: vec![(row_packer.pack(constant_row), 1)], + typ: RelationType::empty(), + }; + + self.render_constant(&constant_row_expr, scope, worker_index); + + let (constant_collection, errs) = self.collection(&constant_row_expr).unwrap(); + match self.arrangement(&inputs[0], keys) { + Some(ArrangementFlavor::Local(oks, es)) => { + let result = oks.semijoin(&constant_collection).arrange_named("Semijoin"); + let es = errs.concat(&es.as_collection(|k, _v| k.clone())).arrange(); + self.set_local(input, keys, (result, es)); + } + Some(ArrangementFlavor::Trace(_gid, oks, es)) => { + let result = oks.semijoin(&constant_collection).arrange_named("Semijoin"); + let es = errs.concat(&es.as_collection(|k, _v| k.clone())).arrange(); + self.set_local(input, keys, (result, es)); + } + None => { + panic!("Arrangement alarmingly absent!"); + } + }; + } + } } diff --git a/src/dataflow/src/render/mod.rs b/src/dataflow/src/render/mod.rs index 6666b7acfde4..68ffafc28ec4 100644 --- a/src/dataflow/src/render/mod.rs +++ b/src/dataflow/src/render/mod.rs @@ -110,7 +110,6 @@ use differential_dataflow::operators::arrange::upsert::arrange_from_upsert; use differential_dataflow::{AsCollection, Collection}; use timely::communication::Allocate; use timely::dataflow::operators::generic::operator; -use timely::dataflow::operators::to_stream::ToStream; use timely::dataflow::operators::unordered_input::UnorderedInput; use timely::dataflow::operators::Map; use timely::dataflow::Scope; @@ -139,8 +138,10 @@ use crate::server::{TimestampDataUpdates, TimestampMetadataUpdates}; use crate::source::{FileSourceInfo, KafkaSourceInfo}; mod arrange_by; +mod constant; mod context; mod delta_join; +mod filter; mod join; mod reduce; mod threshold; @@ -737,23 +738,8 @@ where // a collection or an arrangement. In either case, we associate the result with // the `relation_expr` argument in the context. match relation_expr { - // The constant collection is instantiated only on worker zero. - RelationExpr::Constant { rows, .. } => { - let rows = if worker_index == 0 { - rows.clone() - } else { - vec![] - }; - - let collection = rows - .to_stream(scope) - .map(|(x, diff)| (x, timely::progress::Timestamp::minimum(), diff)) - .as_collection(); - - let err_collection = Collection::empty(scope); - - self.collections - .insert(relation_expr.clone(), (collection, err_collection)); + RelationExpr::Constant { .. } => { + self.render_constant(relation_expr, scope, worker_index); } // A get should have been loaded into the context, and it is surprising to @@ -908,6 +894,10 @@ where .render_delta_join(input, predicates, scope, worker_index, |t| { t.saturating_sub(1) }), + expr::JoinImplementation::Semijoin => { + self.ensure_rendered(input, scope, worker_index); + self.render_filter(input, predicates.clone()) + } expr::JoinImplementation::Unimplemented => { panic!("Attempt to render unimplemented join"); } @@ -915,21 +905,7 @@ where (ok_collection, err_collection.map(Into::into)) } else { self.ensure_rendered(input, scope, worker_index); - let temp_storage = RowArena::new(); - let predicates = predicates.clone(); - let (ok_collection, err_collection) = self.collection(input).unwrap(); - let (ok_collection, new_err_collection) = - ok_collection.filter_fallible(move |input_row| { - let datums = input_row.unpack(); - for p in &predicates { - if p.eval(&datums, &temp_storage)? != Datum::True { - return Ok(false); - } - } - Ok::<_, DataflowError>(true) - }); - let err_collection = err_collection.concat(&new_err_collection); - (ok_collection, err_collection) + self.render_filter(input, predicates.clone()) }; self.collections.insert(relation_expr.clone(), collections); } @@ -957,6 +933,11 @@ where ); self.collections.insert(relation_expr.clone(), collection); } + expr::JoinImplementation::Semijoin => { + if let RelationExpr::ArrangeBy { keys, .. } = &inputs[0] { + self.render_semi_join(relation_expr, &keys[0], scope, worker_index); + } + } expr::JoinImplementation::Unimplemented => { panic!("Attempt to render unimplemented join"); } diff --git a/src/expr/src/explain.rs b/src/expr/src/explain.rs index 1a0b9659e337..cb551472265d 100644 --- a/src/expr/src/explain.rs +++ b/src/expr/src/explain.rs @@ -451,6 +451,7 @@ impl JoinImplementation { .collect() ) ), + Semijoin => "Semijoin".to_owned(), Unimplemented => "Unimplemented".to_owned(), } } diff --git a/src/expr/src/relation/mod.rs b/src/expr/src/relation/mod.rs index 7faa40bc995a..717c43c0e624 100644 --- a/src/expr/src/relation/mod.rs +++ b/src/expr/src/relation/mod.rs @@ -1152,6 +1152,14 @@ pub enum JoinImplementation { /// Each plan starts from the corresponding index, and then in sequence joins /// against collections identified by index and with the specified arrangement key. DeltaQuery(Vec)>>), + /// Perform a semi join of a single input to a constant literal + /// + /// Technically, both differential and delta query implementations will + /// also perform semijoins if the semijoin would shrink the size of the + /// arranged inputs. A join is designated as having this implementation + /// in the single input case where neither `Differential` nor `DeltaQuery` + /// make sense. + Semijoin, /// No implementation yet selected. Unimplemented, } diff --git a/src/transform/src/join_implementation.rs b/src/transform/src/join_implementation.rs index 69842d921a5c..6ef98024a428 100644 --- a/src/transform/src/join_implementation.rs +++ b/src/transform/src/join_implementation.rs @@ -75,6 +75,11 @@ impl JoinImplementation { /// Determines the join implementation for join operators. pub fn action(&self, relation: &mut RelationExpr, indexes: &HashMap>>) { if let RelationExpr::Join { inputs, .. } = relation { + if inputs.len() == 1 { + *relation = semijoin::plan(relation); + return; + } + // Common information of broad utility. // TODO: Figure out how to package this up for everyone who uses it. let types = inputs.iter().map(|i| i.typ()).collect::>(); @@ -352,6 +357,76 @@ mod differential { } } +mod semijoin { + + use expr::{JoinImplementation, RelationExpr, ScalarExpr}; + + /// Creates a semijoin plan and lifts any predicates that don't belong. + pub fn plan(join: &RelationExpr) -> RelationExpr { + let mut new_join = join.clone(); + + if let RelationExpr::Join { + inputs, + equivalences, + demand: _, + implementation, + } = &mut new_join + { + let mut input = &mut inputs[0]; + while let RelationExpr::Filter { + input: inner, + predicates: _, + } = input + { + input = inner; + } + if let RelationExpr::ArrangeBy { input: _, keys } = input { + let available_arrangements = vec![keys.clone()]; + let order = vec![(0, keys[0].clone())]; + let mut lifted = Vec::new(); + super::implement_arrangements( + inputs, + &available_arrangements, + &[0], + order.iter(), + &mut lifted, + ); + *implementation = JoinImplementation::Semijoin; + if !lifted.is_empty() { + new_join = new_join.filter(lifted); + } + new_join + } else { + // Currently, this branch should never be traversed because + // we assume that join_elision has done its job of removing + // spurious single-input joins and has only left behind ones + // resulting from filters on indexed columns -> join transforms + // but if there is no index, the proper thing to do would be + // to convert the join back into a filter. + let filters = equivalences.iter_mut().flat_map(|equivalence| { + let last = equivalence.pop(); + let mut filters = vec![]; + if let Some(last) = last { + let mut expr = equivalence.pop(); + while let Some(e) = expr { + filters.push(ScalarExpr::CallBinary { + func: expr::BinaryFunc::Eq, + expr1: Box::new(last.clone()), + expr2: Box::new(e), + }); + expr = equivalence.pop(); + } + } + filters + }); + inputs[0].take_dangerous().filter(filters) + } + } else { + panic!("semijoin::plan call on non-join expression.") + } + } +} + /// Modify `inputs` to ensure specified arrangements are available. /// /// Lift filter predicates when all needed arrangements are otherwise available. diff --git a/src/transform/src/lib.rs b/src/transform/src/lib.rs index 0e02190c9843..7d59435c201d 100644 --- a/src/transform/src/lib.rs +++ b/src/transform/src/lib.rs @@ -48,7 +48,7 @@ pub mod redundant_join; pub mod split_predicates; pub mod topk_elision; pub mod update_let; -// pub mod use_indexes; +pub mod use_indexes; pub mod dataflow; pub use dataflow::optimize_dataflow; @@ -210,6 +210,7 @@ impl Default for Optimizer { Box::new(crate::predicate_pushdown::PredicatePushdown), Box::new(crate::fusion::join::Join), Box::new(crate::fusion::filter::Filter), + Box::new(crate::use_indexes::FilterEqualLiteral), Box::new(crate::fusion::project::Project), Box::new(crate::fusion::map::Map), Box::new(crate::empty_map::EmptyMap), diff --git a/src/transform/src/predicate_pushdown.rs b/src/transform/src/predicate_pushdown.rs index a8d0eb681b30..dd708aa3cd46 100644 --- a/src/transform/src/predicate_pushdown.rs +++ b/src/transform/src/predicate_pushdown.rs @@ -187,10 +187,7 @@ impl PredicatePushdown { let mut pushed = false; // Attempt to push down each predicate to each input. for (index, push_down) in push_downs.iter_mut().enumerate() { - if let RelationExpr::ArrangeBy { .. } = inputs[index] { - // do nothing. We do not want to push down a filter and block - // usage of an index - } else if let Some(localized) = localize_predicate( + if let Some(localized) = localize_predicate( &predicate, index, &input_relation[..], diff --git a/src/transform/src/use_indexes.rs b/src/transform/src/use_indexes.rs index 3d7f3a290b3e..18da9edf93d0 100644 --- a/src/transform/src/use_indexes.rs +++ b/src/transform/src/use_indexes.rs @@ -18,20 +18,16 @@ use std::collections::HashMap; -use repr::RelationType; - -use crate::{GlobalId, Id, RelationExpr, ScalarExpr, TransformArgs, TransformError}; -use expr::BinaryFunc; +use crate::TransformArgs; +use expr::{BinaryFunc, GlobalId, Id, RelationExpr, ScalarExpr}; /// Replaces filters of the form ScalarExpr::Column(i) == ScalarExpr::Literal, where i is a column for /// which an index exists, with a /// Join{ -/// variables: [(0, i), (1,0)], +/// equivalences: [(0, i), (1,0)], /// ArrangeBy{input, keys: [ScalarExpr::Column(i)]}, /// /// } -/// TODO (wangandi): materialize#616 consider a general case when there exists in an index on an -/// expression of column i #[derive(Debug)] pub struct FilterEqualLiteral; @@ -47,13 +43,20 @@ impl crate::Transform for FilterEqualLiteral { } impl FilterEqualLiteral { + /// Replaces filters of the form ScalarExpr::Column(i) == ScalarExpr::Literal, where i is a column for + /// which an index exists, with a + /// Join{ + /// equivalences: [(0, i), (1,0)], + /// ArrangeBy{input, keys: [ScalarExpr::Column(i)]}, + /// + /// } pub fn transform(&self, relation: &mut RelationExpr, args: TransformArgs) { relation.visit_mut(&mut |e| { self.action(e, args.indexes); }); } - pub fn action( + fn action( &self, relation: &mut RelationExpr, indexes: &HashMap>>, @@ -63,75 +66,58 @@ impl FilterEqualLiteral { id: Id::Global(id), .. } = &mut **input { - // gather predicates of the form CallBinary{Binaryfunc::Eq, Column, Literal} - let (columns, predinfo): (Vec<_>, Vec<_>) = predicates - .iter() - .enumerate() - .filter_map(|(i, p)| { + if indexes.contains_key(id) { + let mut predicates_by_column: HashMap = + HashMap::new(); + // gather predicates of the form CallBinary{Binaryfunc::Eq, + // Column, Literal} + // TODO (wangandi): materialize#616 relax the requirement + // `Column` to be any arbitrary ScalarExpr + for (predicate_num, predicate) in predicates.iter().enumerate() { if let ScalarExpr::CallBinary { func: BinaryFunc::Eq, expr1, expr2, - } = p + } = predicate { match (&**expr1, &**expr2) { - (ScalarExpr::Literal(litrow, littyp), ScalarExpr::Column(c)) => { - Some((*c, (litrow.clone(), littyp.clone(), i))) + (ScalarExpr::Literal(_, _), ScalarExpr::Column(_)) => { + predicates_by_column.insert( + (**expr2).clone(), + ((**expr1).clone(), predicate_num), + ); } - (ScalarExpr::Column(c), ScalarExpr::Literal(litrow, littyp)) => { - Some((*c, (litrow.clone(), littyp.clone(), i))) + (ScalarExpr::Column(_), ScalarExpr::Literal(_, _)) => { + predicates_by_column.insert( + (**expr1).clone(), + ((**expr2).clone(), predicate_num), + ); } - _ => None, + _ => {} } - } else { - None } - }) - .unzip(); - if !columns.is_empty() { - let key_set = &indexes[id]; - // find set of keys of the largest size that is a subset of columns - let best_index = key_set - .iter() - .filter(|ks| { - ks.iter().all(|k| match k { - ScalarExpr::Column(c) => columns.contains(c), - _ => false, - }) - }) - .max_by_key(|ks| ks.len()); - if let Some(keys) = best_index { - let column_order = keys + } + if !predicates_by_column.is_empty() { + let key_set = &indexes[id]; + // find set of keys of the largest size that is a subset of columns + let best_index = key_set .iter() - .map(|k| match k { - ScalarExpr::Column(c) => { - columns.iter().position(|d| c == d).unwrap() - } - _ => unreachable!(), - }) - .collect::>(); - let mut constant_row = Vec::new(); - let mut constant_col_types = Vec::new(); - let mut variables = Vec::new(); - for (new_idx, old_idx) in column_order.into_iter().enumerate() { - variables.push(vec![(0, columns[old_idx]), (1, new_idx)]); - constant_row.extend(predinfo[old_idx].0.unpack()); - constant_col_types.push(predinfo[old_idx].1.clone()); - } - let mut constant_type = RelationType::new(constant_col_types); - for i in 0..keys.len() { - constant_type = constant_type.add_keys(vec![i]); + .filter(|ks| ks.iter().all(|k| predicates_by_column.contains_key(k))) + .max_by_key(|ks| ks.len()); + if let Some(keys) = best_index { + let mut equivalences = Vec::new(); + for key in keys { + equivalences.push(vec![ + key.clone(), + predicates_by_column.remove(key).unwrap().0, + ]); + } + let converted_join = RelationExpr::join_scalars( + vec![input.take_dangerous().arrange_by(&[keys.clone()])], + equivalences, + ); + *input = Box::new(converted_join); } - let arity = input.arity(); - let converted_join = RelationExpr::join( - vec![ - input.take_dangerous().arrange_by(&[keys.clone()]), - RelationExpr::constant(vec![constant_row], constant_type), - ], - variables, - ) - .project((0..arity).collect::>()); - *input = Box::new(converted_join); } } } diff --git a/test/sqllogictest/index_planning.slt b/test/sqllogictest/index_planning.slt deleted file mode 100644 index b18999d771de..000000000000 --- a/test/sqllogictest/index_planning.slt +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright Materialize, Inc. All rights reserved. -# -# Use of this software is governed by the Business Source License -# included in the LICENSE file at the root of this repository. -# -# As of the Change Date specified in that file, in accordance with -# the Business Source License, use of this software will be governed -# by the Apache License, Version 2.0. - -statement ok -create table foo ( - a int NOT NULL, - b varchar, - c decimal(13, 1) -) - -statement ok -INSERT INTO foo (a, b, c) VALUES (5, 'this', -4.4), (3, 'just', 11.3), (-45, 'it', 10.2) - -statement ok -create table bar ( - a int NOT NULL, - d varchar, - e decimal(13, 1) -) - -statement ok -INSERT INTO bar (a, d, e) VALUES (-45, 'our', 3.14), (5, 'still', -0.0), (-3, 'is', -1.0) - -query T multiline -explain plan for select b, c from foo where a = 5 ----- -%0 = -| Get materialize.public.foo (u1) -| Filter (#0 = 5) -| Project (#1, #2) - -EOF - -query TR -select b, c from foo where a = 5 ----- -this --4.400 - -statement ok -create index foo_idx on foo(a); - -#TODO (wangandi) (materialize#616): uncomment these tests when FilterEqualLiteral is enabled - -#query T multiline -#explain plan for select b, c from foo where a = 5 -#---- -#Project { -# outputs: [1, 2], -# Join { -# variables: [[(0, 0), (1, 0)]], -# ArrangeBy { keys: [[#0]], Get { foo (u1) } }, -# Constant [[5]] -# } -#} - -query TR -select b, c from foo where a = 5 ----- -this --4.400 - -#query T multiline -#explain plan for select b, c from foo where a = 5 and b = 'this' -#---- -#Project { -# outputs: [1, 2], -# Filter { -# predicates: [#1 = "this"], -# Join { -# variables: [[(0, 0), (1, 0)]], -# ArrangeBy { keys: [[#0]], Get { foo (u1) } }, -# Constant [[5]], -# } -# } -#} - -query TR -select b, c from foo where a = 5 ----- -this --4.400 - -statement ok -create index foo_idx2 on foo(b, a); - -statement ok -create index foo_idx3 on foo(b); - -#query T multiline -#explain plan for select b, c from foo where a = 5 and b = 'this' -#---- -#Project { -# outputs: [1, 2], -# Join { -# variables: [[(0, 1), (1, 0)], [(0, 0), (1, 1)]], -# ArrangeBy { keys: [[#1, #0]], Get { foo (u1) } }, -# Constant [["this", 5]], -# } -#} - -#TODO (wangandi) (materialize#616): uncomment these tests when FilterEqualLiteral is enabled -#think about what the desired plan is when -#statement ok -#create index bar_idx on bar(a) -# -#query T multiline -#explain plan for select foo.a, b, c, d, e from foo, bar where foo.a = bar.a and b = 'this' -#---- - -query ITRTR -select foo.a, b, c, d, e from foo, bar where foo.a = bar.a and b = 'this' ----- -5 -this --4.400 -still -0.000 diff --git a/test/sqllogictest/transform/index_planning.slt b/test/sqllogictest/transform/index_planning.slt new file mode 100644 index 000000000000..1675de88c834 --- /dev/null +++ b/test/sqllogictest/transform/index_planning.slt @@ -0,0 +1,768 @@ +# Copyright Materialize, Inc. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +# Tests of planning in light of the existence of indexes. +# Annotate each explain plan with what indexes exist to make +# verifying the correctness of the plan easier + +statement ok +create table foo ( + a int NOT NULL, + b varchar, + c int +) + +## Plain filter tests ## + +### Single equality filter tests ### + +statement ok +INSERT INTO foo (a, b, c) VALUES (5, 'this', -4), (3, 'just', 11), (5, 'it', 5), (5, null, 12) + +statement ok +create table bar ( + a int NOT NULL, + d varchar, + e int +) + +statement ok +INSERT INTO bar (a, d, e) VALUES (-45, 'our', 3), (5, 'still', 12), (3, 'is', -1) + +# no indexes +query T multiline +EXPLAIN PLAN FOR SELECT b, c FROM foo WHERE a = 5 +---- +%0 = +| Get materialize.public.foo (u1) +| Filter (#0 = 5) +| Project (#1, #2) + +EOF + +query TR +SELECT b, c FROM foo WHERE a = 5 +---- +NULL +12 +it +5 +this +-4 + +statement ok +create index foo_idx on foo(a); + +#index foo(a) exists +query T multiline +EXPLAIN PLAN FOR SELECT b, c FROM foo WHERE a = 5 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Join %0 (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Project (#1, #2) + +EOF + +query TR +SELECT b, c FROM foo WHERE a = 5 +---- +NULL +12 +it +5 +this +-4 + +### Tests involving two equality filters ### + +#index on foo(a) exists +query T multiline +EXPLAIN PLAN FOR SELECT b, c FROM foo WHERE a = 5 and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Join %0 (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Filter (#1 = "this") +| Project (#1, #2) + +EOF + +query TR +SELECT b, c FROM foo WHERE a = 5 and b = 'this' +---- +this +-4 + +statement ok +create index foo_idx2 on foo(b, a); + +statement ok +create index foo_idx3 on foo(b); + +#indexes on foo(a), foo(b, a), and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT b, c FROM foo WHERE a = 5 and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#1, #0) + +%1 = +| Join %0 (= #1 "this") (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Project (#1, #2) + +EOF + +query TR +SELECT b, c FROM foo WHERE a = 5 and b = 'this' +---- +this +-4 + +### Test of two equality filters, both containing the same literal ### + +#indexes on foo(a), foo(b, a), and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT b FROM foo WHERE a = 5 and c = 5 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Join %0 (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Filter (#2 = 5) +| Project (#1) + +EOF + +statement ok +create index foo_idx4 on foo(a, c) + +#indexes on foo(a), foo(b, a), foo(b), and foo(a,c) exist +query T multiline +EXPLAIN PLAN FOR SELECT b FROM foo WHERE a = 5 and c = 5 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0, #2) + +%1 = +| Join %0 (= #0 #2 5) +| | implementation = Semijoin +| | demand = (#1) +| Project (#1) + +EOF + +query T +SELECT b FROM foo WHERE a = 5 and c = 5 +---- +it + +### Test of one equality filter + one inequality filter ### + +#indexes on foo(a), foo(b, a), foo(b), and foo(a,c) exist +query T multiline +EXPLAIN PLAN FOR SELECT b FROM foo WHERE a = 5 and c < 10 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Join %0 (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Filter (#2 < 10) +| Project (#1) + +EOF + +query T +SELECT b FROM foo WHERE a = 5 and c < 10 +---- +it +this + +statement ok +drop index foo_idx4 + +#indexes on foo(a), foo(b, a), and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT b FROM foo WHERE a = 5 and c < 10 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Join %0 (= #0 5) +| | implementation = Semijoin +| | demand = (#1, #2) +| Filter (#2 < 10) +| Project (#1) + +EOF + +query T +SELECT b FROM foo WHERE a = 5 and c < 10 +---- +it +this + +## Join Interaction Tests ## + +### tests of join + filter for equality on a column that is not a column being joined on ### + +statement ok +create index bar_idx on bar(a) + +#indexes on bar(a), foo(a), foo(b, a), and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.bar (u3) +| ArrangeBy (#0) + +%2 = +| Join %0 %1 (= #1 "this") (= #0 #3) +| | implementation = DeltaQuery %0 %1.(#0) | %1 %0.(#0) +| | demand = (#0..#2, #4, #5) +| Project (#0..#2, #4, #5) + +EOF + +query ITRTR +SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +5 +this +-4 +still +12 + +statement ok +drop index foo_idx2; + +#indexes on bar(a), foo(a), and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.bar (u3) +| ArrangeBy (#0) + +%2 = +| Join %0 %1 (= #1 "this") (= #0 #3) +| | implementation = DeltaQuery %0 %1.(#0) | %1 %0.(#0) +| | demand = (#0..#2, #4, #5) +| Project (#0..#2, #4, #5) + +EOF + +query ITRTR +SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +5 +this +-4 +still +12 + +statement ok +drop index foo_idx3; + +#indexes on bar(a) and foo(a) exist +query T multiline +EXPLAIN PLAN FOR SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.bar (u3) +| ArrangeBy (#0) + +%2 = +| Join %0 %1 (= #0 #3) +| | implementation = DeltaQuery %0 %1.(#0) | %1 %0.(#0) +| | demand = (#0..#2, #4, #5) +| Filter (#1 = "this") +| Project (#0..#2, #4, #5) + +EOF + +query ITRTR +SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +5 +this +-4 +still +12 + +statement ok +drop index foo_idx; + +statement ok +create index foo_idx3 on foo(b); + +# indexes on bar(a) and foo(b) exist +query T multiline +EXPLAIN PLAN FOR SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +%0 = +| Get materialize.public.foo (u1) + +%1 = +| Get materialize.public.bar (u3) +| ArrangeBy (#0) + +%2 = +| Join %0 %1 (= #0 #3) (= #1 "this") +| | implementation = Differential %0 %1.(#0) +| | demand = (#0..#2, #4, #5) +| Project (#0..#2, #4, #5) + +EOF + +# TODO: new plan after filters on indexes works with join planning should look like this: +#%0 = +#| Get materialize.public.foo (u1) + +#%1 = +#| Join %0 (= #1 \"this\") +#| | implementation = Semijoin +#| | demand = (#0..#2) + +#%2 = +#| Get materialize.public.bar (u3) +#| ArrangeBy (#0) + +#%3 = +#| Join %1 %2 (= #0 #3) +#| | implementation = Differential %1 %2.(#0) +#| | demand = (#0..#2, #4, #5) +#| Project (#0..#2, #4, #5) + +query ITRTR +SELECT foo.a, b, c, d, e FROM foo, bar WHERE bar.a = foo.a and b = 'this' +---- +5 +this +-4 +still +12 + +### tests of join + filter for equality on a column that is also being joined on ### + +# Note: this is a simplified version of a test FROM sqlite/SELECT5.test + +statement ok +CREATE TABLE t36( + a36 INTEGER PRIMARY KEY, + b36 INTEGER, + x36 VARCHAR(40) +) + +statement ok +INSERT INTO t36 VALUES(1,7,'table t36 row 1') + +statement ok +INSERT INTO t36 VALUES(2,8,'table t36 row 2') + +statement ok +INSERT INTO t36 VALUES(3,6,'table t36 row 3') + +statement ok +INSERT INTO t36 VALUES(4,4,'table t36 row 4') + +statement ok +INSERT INTO t36 VALUES(5,9,'table t36 row 5') + +statement ok +INSERT INTO t36 VALUES(6,1,'table t36 row 6') + +statement ok +INSERT INTO t36 VALUES(7,2,'table t36 row 7') + +statement ok +INSERT INTO t36 VALUES(8,3,'table t36 row 8') + +statement ok +INSERT INTO t36 VALUES(9,5,'table t36 row 9') + +statement ok +INSERT INTO t36 VALUES(10,10,'table t36 row 10') + +statement ok +CREATE TABLE t57( + a57 INTEGER PRIMARY KEY, + b57 INTEGER, + x57 VARCHAR(40) +) + +statement ok +INSERT INTO t57 VALUES(1,9,'table t57 row 1') + +statement ok +INSERT INTO t57 VALUES(2,5,'table t57 row 2') + +statement ok +INSERT INTO t57 VALUES(3,4,'table t57 row 3') + +statement ok +INSERT INTO t57 VALUES(4,8,'table t57 row 4') + +statement ok +INSERT INTO t57 VALUES(5,2,'table t57 row 5') + +statement ok +INSERT INTO t57 VALUES(6,3,'table t57 row 6') + +statement ok +INSERT INTO t57 VALUES(7,10,'table t57 row 7') + +statement ok +INSERT INTO t57 VALUES(8,7,'table t57 row 8') + +statement ok +INSERT INTO t57 VALUES(9,1,'table t57 row 9') + +statement ok +INSERT INTO t57 VALUES(10,6,'table t57 row 10') + +# indices exist on t36(a) and t57(a) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +%0 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.t57 (u33) + +%2 = +| Join %0 %1 (= #0 #4 3) +| | implementation = Differential %1 %0.(#0) +| | demand = (#2, #5) +| Project (#2, #5) + +EOF + +query TT valuesort +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +table t36 row 3 +table t57 row 6 + +statement ok +create index b57 on t57(b57) + +# indices exist on t36(a57), t57(a57), and t57(b57) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +%0 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.t57 (u33) +| ArrangeBy (#1) + +%2 = +| Join %0 %1 (= #0 #4 3) +| | implementation = DeltaQuery %0 %1.(#1) | %1 %0.(#0) +| | demand = (#2, #5) +| Project (#2, #5) + +EOF + +query TT valuesort +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +table t36 row 3 +table t57 row 6 + +statement ok +drop index b57 + +statement ok +create index b36 on t36(b36) + +statement ok +drop index t36_primary_idx + +# indices exist on t36(b36), t57(a57) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +%0 = +| Get materialize.public.t36 (u31) +| Filter (#0 = 3) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.t57 (u33) + +%2 = +| Join %0 %1 (= #0 #4) +| | implementation = Differential %1 %0.(#0) +| | demand = (#2, #5) +| Project (#2, #5) + +EOF + +query TT valuesort +SELECT x36, x57 FROM t36, t57 WHERE a36=3 AND a36=b57 +---- +table t36 row 3 +table t57 row 6 + +### test of join + filter on a join column + filter not on a join column but ### +### with same literal as other filter ### + +statement ok +create index foo_idx5 on foo(c) + +# indexes exist on t36(b36), foo(b), foo(c) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +%0 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.foo (u1) +| ArrangeBy (#2) +| Filter (#0 = 5) + +%2 = +| Join %0 %1 (= #0 #3) (= #5 5) +| | implementation = Differential %1 %0.(#0) +| | demand = (#2, #4) +| Project (#2, #4) + +EOF + +# TODO: new plan after filters on indexes works with join planning should look like this: + +#%0 = +#| Get materialize.public.t36 (u31) +#| ArrangeBy (#0) + +#%1 = +#| Get materialize.public.foo (u1) + +#%2 = +#| Join %1 (= #2 5) +#| | implementation = Semijoin +#| | demand = (#0, #1) + +#%3 = +#| Join %0 %2 (= #0 #3 5) +#| | implementation = Differential %2 %0.(#0) +#| | demand = (#2, #4) +#| Project (#2, #4) + +query TT valuesort +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +it +table t36 row 5 + +statement ok +create index foo_idx4 on foo(a, c) + +# indexes exist on t36(b36), foo(b), foo(c), foo(a, c) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +%0 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0, #2) + +%2 = +| Join %0 %1 (= #0 #3 #5 5) +| | implementation = DeltaQuery %0 %1.(#0, #2) | %1 %0.(#0) +| | demand = (#2, #4) +| Project (#2, #4) + +EOF + +query TT valuesort +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +it +table t36 row 5 + +statement ok +create index foo_idx on foo(a) + +# indexes exist on t36(b36), foo(a), foo(b), foo(c), foo(a, c) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +%0 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0, #2) + +%2 = +| Join %0 %1 (= #0 #3 #5 5) +| | implementation = DeltaQuery %0 %1.(#0, #2) | %1 %0.(#0) +| | demand = (#2, #4) +| Project (#2, #4) + +EOF + +query TT valuesort +SELECT x36, foo.b FROM t36, foo WHERE foo.a = 5 AND foo.c = 5 AND a36 = foo.a +---- +it +table t36 row 5 + +### tests of delta query WHERE there are two arrangements on an input that is being filtered ### + +# arrangements in delta query do not overlap + +statement ok +create index t36_primary_idx on t36(a36) + +statement ok +create index b57 on t57(b57) + +# indexes exist on t36(a36), t36(b36), foo(a), foo(b), foo(c), foo(a, c), t57(a57) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, x57, foo.b FROM foo, t36, t57 WHERE a36 = 5 AND foo.a = a36 AND b36 = b57 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) + +%1 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) (#1) + +%2 = +| Get materialize.public.t57 (u33) +| ArrangeBy (#1) + +%3 = +| Join %0 %1 %2 (= #0 #3 5) (= #4 #7) +| | implementation = DeltaQuery %0 %1.(#0) %2.(#1) | %1 %0.(#0) %2.(#1) | %2 %1.(#1) %0.(#0) +| | demand = (#1, #4, #5, #8) +| Filter !(isnull(#4)) +| Project (#5, #8, #1) + +EOF + +query TTT +SELECT x36, x57, foo.b FROM foo, t36, t57 WHERE a36 = 5 AND foo.a = a36 AND b36 = b57 +---- +table t36 row 5 +table t57 row 1 +NULL +table t36 row 5 +table t57 row 1 +it +table t36 row 5 +table t57 row 1 +this + +# arrangements in delta query do overlap + +statement ok +create index bar_idx2 on bar(a, e) + +# indexes exist on t36(a36), t36(b36), foo(a), foo(b), foo(c), foo(a, c), bar(a), bar(a,e) +query T multiline +EXPLAIN PLAN FOR +SELECT x36, bar.d, foo.b +FROM foo, bar, t36 +WHERE foo.a = bar.a + AND foo.a = a36 + AND foo.c = bar.e + AND foo.a = 5 +---- +%0 = +| Get materialize.public.foo (u1) +| ArrangeBy (#0) (#0, #2) + +%1 = +| Get materialize.public.bar (u3) +| ArrangeBy (#0, #2) + +%2 = +| Get materialize.public.t36 (u31) +| ArrangeBy (#0) + +%3 = +| Join %0 %1 %2 (= #0 #3 #6 5) (= #2 #5) +| | implementation = DeltaQuery %0 %2.(#0) %1.(#0, #2) | %1 %2.(#0) %0.(#0, #2) | %2 %0.(#0) %1.(#0, #2) +| | demand = (#1, #2, #4, #8) +| Filter !(isnull(#2)) +| Project (#8, #4, #1) + +EOF + +query TTT +SELECT x36, bar.d, foo.b +FROM foo, bar, t36 +WHERE foo.a = bar.a + AND foo.a = a36 + AND foo.c = bar.e + AND foo.a = 5 +---- +table t36 row 5 +still +NULL diff --git a/test/sqllogictest/planning.slt b/test/sqllogictest/transform/topk.slt similarity index 92% rename from test/sqllogictest/planning.slt rename to test/sqllogictest/transform/topk.slt index 1eea7758da0e..aceae732b1b5 100644 --- a/test/sqllogictest/planning.slt +++ b/test/sqllogictest/transform/topk.slt @@ -8,6 +8,9 @@ # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. +# This file contains tests for the TopK elision transform +# and interaction of transforms with the TopK operator. + statement ok CREATE TABLE test1(a int, b int, c int, d int)