Skip to content

Commit

Permalink
Merge pull request #10509 from aalexandrov/qgm-outer-to-inner-join
Browse files Browse the repository at this point in the history
qgm: implement `SimplifyOuterJoins` rule in QGM
  • Loading branch information
wangandi committed Feb 14, 2022
2 parents f6333e8 + eaf5442 commit e49579c
Show file tree
Hide file tree
Showing 6 changed files with 1,005 additions and 51 deletions.
18 changes: 14 additions & 4 deletions src/sql/src/query_model/model/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,17 @@ impl From<Select> for BoxType {
}
}

impl Select {
pub fn new(predicates: Vec<BoxScalarExpr>) -> Select {
Select {
predicates,
order_key: None,
limit: None,
offset: None,
}
}
}

#[derive(Debug, Default)]
pub(crate) struct TableFunction {
pub parameters: Vec<BoxScalarExpr>,
Expand Down Expand Up @@ -414,11 +425,10 @@ impl Model {
}

/// Get a mutable reference to the box identified by `box_id` bound to this [`Model`].
#[allow(dead_code)]
pub(crate) fn get_mut_quantifier(
&mut self,
pub(crate) fn get_mut_quantifier<'a>(
&'a mut self,
quantifier_id: QuantifierId,
) -> BoundRefMut<'_, Quantifier> {
) -> BoundRefMut<'a, Quantifier> {
let model_ptr = self as *mut Self;
unsafe {
let reference = (*model_ptr)
Expand Down
7 changes: 6 additions & 1 deletion src/sql/src/query_model/rewrite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
//!
//! The public interface consists of the [`Model::optimize`] method.

mod rule;

use std::collections::HashSet;

use super::attribute::core::{
Expand Down Expand Up @@ -124,7 +126,10 @@ impl Default for VisitOrder {

/// Apply all available rewrite rules to the model.
pub fn rewrite_model(model: &mut Model) {
let rules: Vec<Box<dyn ApplyRule>> = vec![];
let rules: Vec<Box<dyn ApplyRule>> = vec![
// simplify outer joins first
Box::new(rule::simplify_outer_joins::SimplifyOuterJoins),
];
apply_rules_to_model(model, rules);
model.garbage_collect();

Expand Down
18 changes: 18 additions & 0 deletions src/sql/src/query_model/rewrite/rule/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Implements outerjoin simplification as a variant of Algorithm A in the seminal
//! paper by Rosenthal and Galindo-Legaria[^1].
//!
//! [^1]: [Galindo-Legaria, Cesar, and Arnon Rosenthal.
//! "Outerjoin simplification and reordering for query optimization."
//! ACM Transactions on Database Systems (TODS) 22.1 (1997): 43-74.
//! ](https://www.academia.edu/26160408/Outerjoin_simplification_and_reordering_for_query_optimization)

pub(crate) mod simplify_outer_joins;
224 changes: 224 additions & 0 deletions src/sql/src/query_model/rewrite/rule/simplify_outer_joins.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Implements outerjoin simplification.
//!
//! For each outer join box, check to see if any ancestor rejects nulls
//! on its output.

use std::cell::RefCell;
use std::collections::HashSet;

use crate::query_model::attribute::core::Attribute;
use crate::query_model::attribute::propagated_nulls::PropagatedNulls;
use crate::query_model::attribute::rejected_nulls::RejectedNulls;
use crate::query_model::model::{
BoundRef, BoxType, ColumnReference, QuantifierType, QueryBox, Select,
};
use crate::query_model::rewrite::ApplyStrategy;
use crate::query_model::rewrite::Rule;
use crate::query_model::rewrite::VisitOrder;
use crate::query_model::{BoxId, Model, QuantifierId};

pub(crate) struct SimplifyOuterJoins;

impl Rule for SimplifyOuterJoins {
/// A (non-empty) sequence of ids corresponding to quantifiers whose type
/// should be changed from [QuantifierType::PreservedForeach] to
/// [QuantifierType::Foreach], and the box that the quantifiers belong to.
type Match = (BoxId, Vec<QuantifierId>);

fn name(&self) -> &'static str {
"SimplifyOuterJoins"
}

fn strategy(&self) -> ApplyStrategy {
ApplyStrategy::AllBoxes(VisitOrder::Pre)
}

fn required_attributes(&self) -> std::collections::HashSet<Box<dyn Attribute>> {
HashSet::from([
Box::new(PropagatedNulls) as Box<dyn Attribute>,
Box::new(RejectedNulls) as Box<dyn Attribute>,
])
}

fn check(
&self,
model: &crate::query_model::Model,
box_id_to_check: crate::query_model::BoxId,
) -> Option<Self::Match> {
let mut quantifiers_to_change = vec![];

let box_to_check = model.get_box(box_id_to_check);
if let BoxType::OuterJoin(..) = box_to_check.box_type {
assert_eq!(box_to_check.input_quantifiers().count(), 2);

// We currently only apply the rule to an outer join if a unique
// ancestor chain can be found going from the top box to the box to
// check. We might remove this constraint in the future.
// See [this comment](https://github.com/MaterializeInc/materialize/issues/10239#issuecomment-1030123237)
// and the preceding discussion for more details.

let unique_ancestor_chain = unique_ancestor_chain(model, box_id_to_check);

if let Some(ancestor_chain) = unique_ancestor_chain {
// Collect all the rejected nulls from all the ancestors in the chain.
let mut ancestral_rejected_nulls = HashSet::new();

// As we move down the chain, going from parent to child,
// We want to translate the rejected nulls for each parent box
// to be in terms of the child box. Then we want to add the
// nulls that the child box rejects.
for ancestor_id in ancestor_chain {
let ancestor_box = model.get_box(ancestor_id);
map_nulls_to_inputs(&mut ancestral_rejected_nulls, &ancestor_box);
ancestral_rejected_nulls.extend(
ancestor_box
.attributes
.get::<RejectedNulls>()
.into_iter()
.cloned(),
);
}
map_nulls_to_inputs(&mut ancestral_rejected_nulls, &box_to_check);

let mut quantifiers = box_to_check.input_quantifiers();
let lhs = quantifiers.next().unwrap();
let rhs = quantifiers.next().unwrap();

let rej_lhs = ancestral_rejected_nulls
.iter()
.any(|c| c.quantifier_id == lhs.id);
let rej_rhs = ancestral_rejected_nulls
.iter()
.any(|c| c.quantifier_id == rhs.id);

// If null rows are rejected from LHS, and RHS is a
// PreservedForeach quantifier, change the RHS to a Foreach
// quantifier.
if rej_lhs && rhs.quantifier_type == QuantifierType::PreservedForeach {
quantifiers_to_change.push(rhs.id);
}
// And vice versa.
if rej_rhs && lhs.quantifier_type == QuantifierType::PreservedForeach {
quantifiers_to_change.push(lhs.id);
}
}
}

if quantifiers_to_change.len() > 0 {
Some((box_id_to_check, quantifiers_to_change))
} else {
// If there are no quantifiers to change, return None.
None
}
}

fn rewrite(&self, model: &mut Model, mat: Self::Match) {
let (box_id, q_ids) = (mat.0, mat.1);

// Change the specified quantifiers to type Foreach.
for q_id in q_ids {
let mut q = model.get_mut_quantifier(q_id);
q.quantifier_type = QuantifierType::Foreach;
}

// If all the quantifiers in the box are type Foreach,
// convert the box to type Select.
let mut r#box = model.get_mut_box(box_id);
if r#box
.input_quantifiers()
.all(|q| q.quantifier_type == QuantifierType::Foreach)
{
r#box.box_type = match &mut r#box.box_type {
BoxType::OuterJoin(outer_join) => {
Select::new(outer_join.predicates.split_off(0)).into()
}
_ => Select::default().into(),
};
}
}
}

/// Find the unique ancestor chain from the top box of `model` to `target` box.
/// The ancestor chain does not contain `target`.
fn unique_ancestor_chain(model: &Model, target: BoxId) -> Option<Vec<BoxId>> {
// Until target is reached, this is the stack of boxes we have entered.
// This does not change after target is reached.
let ancestor_chain = RefCell::new(vec![]);

let unique_ancestor_chain_found = RefCell::new(false);

// Traverse the graph starting from the top until `target` is reached.
let _: Result<(), ()> = mz_ore::graph::try_nonrecursive_dft(
model,
model.top_box,
&mut |model, box_id| {
let r#box = model.get_box(*box_id);
if *unique_ancestor_chain_found.borrow() {
// If target has been reached, don't do anything.
Ok(vec![])
} else {
// Register that we have visited this node.
ancestor_chain.borrow_mut().push(*box_id);

if r#box.ranging_quantifiers().count() > 1 {
// If a box with more than one parent is found:
// * Do not go deeper.
// * Do not check if the box is the target.
Ok(vec![])
} else if *box_id == target {
*unique_ancestor_chain_found.borrow_mut() = true;
Ok(vec![])
} else {
Ok(r#box.input_quantifiers().map(|q| q.input_box).collect())
}
}
},
&mut |_, _| {
if !*unique_ancestor_chain_found.borrow() {
ancestor_chain.borrow_mut().pop();
}
Ok(())
},
);
if *unique_ancestor_chain_found.borrow() {
let mut ancestor_chain = ancestor_chain.take();
ancestor_chain.pop();
Some(ancestor_chain)
} else {
None
}
}

/// Map (rejected nulls from any ancestor of `box`, expressed as column
/// references from an input of the parent of `box`) ->
/// (column references from an input of `box`)
fn map_nulls_to_inputs(
ancestral_rejected_nulls: &mut HashSet<ColumnReference>,
r#box: &BoundRef<'_, QueryBox>,
) {
if !ancestral_rejected_nulls.is_empty() {
// Get the ID of the quantifer that connects this box with its parent.
let quantifier_id = r#box.ranging_quantifiers().nth(0).unwrap().id;
// Retain only columns from the child box.
ancestral_rejected_nulls.retain(|c| c.quantifier_id == quantifier_id);
// Replace each column with the set of nulls that
// the column propagates from its input.
let propagated_nulls = r#box.attributes.get::<PropagatedNulls>();
*ancestral_rejected_nulls = ancestral_rejected_nulls
.iter()
.flat_map(|c| match propagated_nulls.get(c.position) {
Some(set) => set.clone(),
None => HashSet::new(),
})
.collect();
}
}
Loading

0 comments on commit e49579c

Please sign in to comment.