Skip to content

Commit

Permalink
Count non-leaves instead of leaves.
Browse files Browse the repository at this point in the history
This allows distinguishing between the complexity of
CallUnary{CallUnary{Column}} and CallUnary{Column}.

Constants are considered to be less complex that any other type of
MirScalarExpr.
  • Loading branch information
Andi Wang committed Aug 23, 2021
1 parent c2fafa1 commit 828dae6
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 38 deletions.
28 changes: 16 additions & 12 deletions src/expr/src/relation/canonicalize.rs
Expand Up @@ -18,7 +18,7 @@ use repr::{Datum, RelationType, ScalarType};
/// This function:
/// * ensures the same expression appears in only one equivalence class.
/// * ensures the equivalence classes are sorted and dedupped.
/// * simplifies expressions to involve the least number of leaves.
/// * simplifies expressions to involve the least number of non-leaves.
///
/// ```rust
/// use expr::MirScalarExpr;
Expand All @@ -40,13 +40,13 @@ use repr::{Datum, RelationType, ScalarType};
/// assert_eq!(expected, equivalences)
/// ````
pub fn canonicalize_equivalences(equivalences: &mut Vec<Vec<MirScalarExpr>>) {
// Calculate the number of leaves for each expression.
// Calculate the number of non-leaves for each expression.
let mut to_reduce = equivalences
.drain(..)
.filter_map(|mut cls| {
let mut result = cls
.drain(..)
.map(|expr| (count_leaves(&expr), expr))
.map(|expr| (rank_complexity(&expr), expr))
.collect::<Vec<_>>();
result.sort();
result.dedup();
Expand Down Expand Up @@ -83,15 +83,15 @@ pub fn canonicalize_equivalences(equivalences: &mut Vec<Vec<MirScalarExpr>>) {
expressions_rewritten = true;
}
});
new_equivalence.push((count_leaves(&popped_expr), popped_expr));
new_equivalence.push((rank_complexity(&popped_expr), popped_expr));
}
new_equivalence.sort();
new_equivalence.dedup();
to_reduce[i] = new_equivalence;
}
}

// Map away the leaf count.
// Map away the complexity rating.
*equivalences = to_reduce
.drain(..)
.map(|mut cls| cls.drain(..).map(|(_, expr)| expr).collect::<Vec<_>>())
Expand All @@ -109,26 +109,30 @@ pub fn canonicalize_equivalences(equivalences: &mut Vec<Vec<MirScalarExpr>>) {
}
}
}

for equivalence in equivalences.iter_mut() {
equivalence.sort();
equivalence.dedup();
}

equivalences.retain(|es| es.len() > 1);
equivalences.sort();
}

fn count_leaves(expr: &MirScalarExpr) -> usize {
let mut leaf_count = 0;
fn rank_complexity(expr: &MirScalarExpr) -> usize {
if expr.is_literal() {
// literals are the least complex
return 0;
}
// the number of non-leaves determine complexity of all other expressions.
let mut non_leaf_count = 1;
expr.visit(&mut |e: &MirScalarExpr| {
if e.is_literal() {
leaf_count += 1
} else if let MirScalarExpr::Column(_) = e {
leaf_count += 1
} else if let MirScalarExpr::CallNullary(_) = e {
} else {
non_leaf_count += 1
}
});
leaf_count
non_leaf_count
}

/// Canonicalize predicates of a filter.
Expand Down
68 changes: 42 additions & 26 deletions src/expr/tests/testdata/reduce
Expand Up @@ -331,32 +331,7 @@ canonicalize
!(isnull(#1))
(!(isnull(#3)) || (#0 < #2))

canonicalize-join
[[#0 #0 #3] [(call_binary add_int32 #0 #0) (call_binary add_int32 #3 #3)]]
----
[#0 #3]

canonicalize-join
[[#0 #3 #3] [(call_binary add_int32 #0 #0) #1] [(call_binary add_int32 #3 #3) #2]]
----
[#0 #3]
[#1 #2 (#0 + #0)]

canonicalize-join
[
[#0 #3]
[#1
(call_binary add_int32 (call_binary add_int32 #0 #0) #0)
(call_binary add_int32 (call_binary add_int32 #2 #2) #1)
(call_binary add_int32
(call_binary add_int32 #2 #2)
(call_binary add_int32 (call_binary add_int32 #3 #3) #3))
(call_binary add_int32 (call_binary add_int32 #3 #3) #3)
]
]
----
[#0 #3]
[#1 ((#0 + #0) + #0) ((#2 + #2) + #1)]
# expressions in equivalence classes only become simpler.

canonicalize-join
[[
Expand Down Expand Up @@ -399,6 +374,9 @@ canonicalize-join
[#1 ((#0 + #0) + #0) ((#2 + #2) + #1)]
[(#2 + #2) (#4 * #5)]

# replacing expressions with simpler equivalent ones can result in the
# collapsing of equivalence classes.

canonicalize-join
[
[#0 #3]
Expand All @@ -416,3 +394,41 @@ canonicalize-join
----
[#0 #3]
[#1 (#0 + #0) (#1 + #0) (#1 + #1) ((#2 + #2) + #1) (#1 - #1) (#4 * #5)]

canonicalize-join
[[#0 #3 #3] [(call_binary add_int32 #0 #0) #1] [(call_binary add_int32 #3 #3) #2]]
----
[#0 #3]
[#1 #2 (#0 + #0)]

# replacing expressions with simpler equivalent ones can result in the
# removal of redundant equivalence classes.

canonicalize-join
[[#0 #0 #3] [(call_binary add_int32 #0 #0) (call_binary add_int32 #3 #3)]]
----
[#0 #3]

# test an equivalence class when the number of leaves are the same but the
# number of nonleaves are not.

canonicalize-join
[[
(call_unary cast_int16_to_int32 #0)
(call_unary neg_int32 (call_unary cast_int16_to_int32 #0))
(call_unary neg_int32 (call_unary neg_int32 (call_unary cast_int16_to_int32
#0)))
]]
----
[-(i16toi32(#0)) i16toi32(#0)]

# literals don't get overwritten with equivalent expressions

canonicalize-join
[[
#0
(4 int32)
(call_binary add_int32 #1 (4 int32))
]]
----
[#0 4 (#1 + 4)]

0 comments on commit 828dae6

Please sign in to comment.