ci: Test output consistency between constant-folding vs. rendered query evaluations #18812

nrainer-materialize · 2023-04-18T06:59:59Z

This fixes #17767.

TODO:

✅ compare results
✅ create issues where necessary and link them (all issues already exist, no new problems found)

test/sqllogictest/transform/fold_constants.slt

philip-stoev · 2023-04-18T07:15:18Z

@nrainer-materialize did you write those queries by hand or you used a script to generate them?

nrainer-materialize · 2023-04-18T07:18:08Z

@nrainer-materialize did you write those queries by hand or you used a script to generate them?

Queries are handwritten (sometimes with Regex replacement magic), results are generated.

def-

[2023-04-18T08:59:54Z] lint: error: copyright: test/sqllogictest/transform/fold_vs_dataflow/1_numbers_dataflow.slt is missing copyright header

You can copy the copyright header from another file

vmarcos

Thanks for working on these tests! Not sure how much time you are willing to invest into refining these, but here are a few more items that would be good to validate:

Top-K computation, both with LATERAL JOIN (i.e., with groups) or not.
More coverage of set operations, e.g., EXCEPT, EXCEPT ALL.
More coverage of joins.
If possible, filters.

The constant-folding transformation has arms to handle all of the above:

materialize/src/transform/src/fold_constants.rs

Lines 85 to 402 in 8147c0b

    
           MirRelationExpr::Constant { .. } => { /* handled after match */ } 
        
           MirRelationExpr::Get { .. } => {} 
        
           MirRelationExpr::Let { .. } | MirRelationExpr::LetRec { .. } => { 
        
               // Constant propagation through bindings is currently handled by in NormalizeLets. 
        
               // Maybe we should move it / replicate it here (see #18180 for context)? 
        
           } 
        
           MirRelationExpr::Reduce { 
        
               input, 
        
               group_key, 
        
               aggregates, 
        
               monotonic: _, 
        
               expected_group_size: _, 
        
           } => { 
        
               let input_typ = input_types.next().unwrap(); 
        
               // Reduce expressions to their simplest form. 
        
               for key in group_key.iter_mut() { 
        
                   key.reduce(input_typ); 
        
               } 
        
               for aggregate in aggregates.iter_mut() { 
        
                   aggregate.expr.reduce(input_typ); 
        
               } 
        
               // Guard against evaluating an expression that may contain 
        
               // unmaterializable functions. 
        
               if group_key.iter().any(|e| e.contains_unmaterializable()) 
        
                   || aggregates 
        
                       .iter() 
        
                       .any(|a| a.expr.contains_unmaterializable()) 
        
               { 
        
                   return Ok(()); 
        
               } 
        
               if let Some((rows, ..)) = (**input).as_const() { 
        
                   let new_rows = match rows { 
        
                       Ok(rows) => { 
        
                           if let Some(rows) = 
        
                               Self::fold_reduce_constant(group_key, aggregates, rows, self.limit) 
        
                           { 
        
                               rows 
        
                           } else { 
        
                               return Ok(()); 
        
                           } 
        
                       } 
        
                       Err(e) => Err(e.clone()), 
        
                   }; 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: new_rows, 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } 
        
           } 
        
           MirRelationExpr::TopK { 
        
               input, 
        
               group_key, 
        
               order_key, 
        
               limit, 
        
               offset, 
        
               .. 
        
           } => { 
        
               if let Some((rows, ..)) = (**input).as_const_mut() { 
        
                   if let Ok(rows) = rows { 
        
                       Self::fold_topk_constant(group_key, order_key, limit, offset, rows); 
        
                   } 
        
                   *relation = input.take_dangerous(); 
        
               } 
        
           } 
        
           MirRelationExpr::Negate { input } => { 
        
               if let Some((rows, ..)) = (**input).as_const_mut() { 
        
                   if let Ok(rows) = rows { 
        
                       for (_row, diff) in rows { 
        
                           *diff *= -1; 
        
                       } 
        
                   } 
        
                   *relation = input.take_dangerous(); 
        
               } 
        
           } 
        
           MirRelationExpr::Threshold { input } => { 
        
               if let Some((rows, ..)) = (**input).as_const_mut() { 
        
                   if let Ok(rows) = rows { 
        
                       rows.retain(|(_, diff)| *diff > 0); 
        
                   } 
        
                   *relation = input.take_dangerous(); 
        
               } 
        
           } 
        
           MirRelationExpr::Map { input, scalars } => { 
        
               // Before reducing the scalar expressions, we need to form an appropriate 
        
               // RelationType to provide to each. Each expression needs a different 
        
               // relation type; although we could in principle use `relation_type` here, 
        
               // we shouldn't rely on `reduce` not looking at its cardinality to assess 
        
               // the number of columns. 
        
               let input_arity = input_types.next().unwrap().len(); 
        
               for (index, scalar) in scalars.iter_mut().enumerate() { 
        
                   scalar.reduce(&relation_type.column_types[..(input_arity + index)]); 
        
               } 
        
               // Guard against evaluating expression that may contain 
        
               // unmaterializable functions. 
        
               if scalars.iter().any(|e| e.contains_unmaterializable()) { 
        
                   return Ok(()); 
        
               } 
        
               if let Some((rows, ..)) = (**input).as_const() { 
        
                   let new_rows = match rows { 
        
                       Ok(rows) => rows 
        
                           .iter() 
        
                           .cloned() 
        
                           .map(|(input_row, diff)| { 
        
                               // TODO: reduce allocations to zero. 
        
                               let mut unpacked = input_row.unpack(); 
        
                               let temp_storage = RowArena::new(); 
        
                               for scalar in scalars.iter() { 
        
                                   unpacked.push(scalar.eval(&unpacked, &temp_storage)?) 
        
                               } 
        
                               Ok::<_, EvalError>((Row::pack_slice(&unpacked), diff)) 
        
                           }) 
        
                           .collect::<Result<_, _>>(), 
        
                       Err(e) => Err(e.clone()), 
        
                   }; 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: new_rows, 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } 
        
           } 
        
           MirRelationExpr::FlatMap { input, func, exprs } => { 
        
               let input_typ = input_types.next().unwrap(); 
        
               for expr in exprs.iter_mut() { 
        
                   expr.reduce(input_typ); 
        
               } 
        
               // Guard against evaluating expression that may contain unmaterializable functions. 
        
               if exprs.iter().any(|e| e.contains_unmaterializable()) { 
        
                   return Ok(()); 
        
               } 
        
               if let Some((rows, ..)) = (**input).as_const() { 
        
                   let new_rows = match rows { 
        
                       Ok(rows) => Self::fold_flat_map_constant(func, exprs, rows, self.limit), 
        
                       Err(e) => Err(e.clone()), 
        
                   }; 
        
                   match new_rows { 
        
                       Ok(None) => {} 
        
                       Ok(Some(rows)) => { 
        
                           *relation = MirRelationExpr::Constant { 
        
                               rows: Ok(rows), 
        
                               typ: relation_type.clone(), 
        
                           }; 
        
                       } 
        
                       Err(err) => { 
        
                           *relation = MirRelationExpr::Constant { 
        
                               rows: Err(err), 
        
                               typ: relation_type.clone(), 
        
                           }; 
        
                       } 
        
                   }; 
        
               } 
        
           } 
        
           MirRelationExpr::Filter { input, predicates } => { 
        
               let input_typ = input_types.next().unwrap(); 
        
               for predicate in predicates.iter_mut() { 
        
                   predicate.reduce(input_typ); 
        
               } 
        
               predicates.retain(|p| !p.is_literal_true()); 
        
               // Guard against evaluating expression that may contain 
        
               // unmaterializable function calls. 
        
               if predicates.iter().any(|e| e.contains_unmaterializable()) { 
        
                   return Ok(()); 
        
               } 
        
               // If any predicate is false, reduce to the empty collection. 
        
               if predicates 
        
                   .iter() 
        
                   .any(|p| p.is_literal_false() || p.is_literal_null()) 
        
               { 
        
                   relation.take_safely(); 
        
               } else if let Some((rows, ..)) = (**input).as_const() { 
        
                   // Evaluate errors last, to reduce risk of spurious errors. 
        
                   predicates.sort_by_key(|p| p.is_literal_err()); 
        
                   let new_rows = match rows { 
        
                       Ok(rows) => Self::fold_filter_constant(predicates, rows), 
        
                       Err(e) => Err(e.clone()), 
        
                   }; 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: new_rows, 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } 
        
           } 
        
           MirRelationExpr::Project { input, outputs } => { 
        
               if let Some((rows, ..)) = (**input).as_const() { 
        
                   let mut row_buf = Row::default(); 
        
                   let new_rows = match rows { 
        
                       Ok(rows) => Ok(rows 
        
                           .iter() 
        
                           .map(|(input_row, diff)| { 
        
                               // TODO: reduce allocations to zero. 
        
                               let datums = input_row.unpack(); 
        
                               row_buf.packer().extend(outputs.iter().map(|i| &datums[*i])); 
        
                               (row_buf.clone(), *diff) 
        
                           }) 
        
                           .collect()), 
        
                       Err(e) => Err(e.clone()), 
        
                   }; 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: new_rows, 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } 
        
           } 
        
           MirRelationExpr::Join { 
        
               inputs, 
        
               equivalences, 
        
               .. 
        
           } => { 
        
               if inputs.iter().any(|e| e.is_empty()) { 
        
                   relation.take_safely(); 
        
               } else if let Some(e) = inputs.iter().find_map(|i| i.as_const_err()) { 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: Err(e.clone()), 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } else if inputs 
        
                   .iter() 
        
                   .all(|i| matches!(i.as_const(), Some((Ok(_), ..)))) 
        
               { 
        
                   // Guard against evaluating expression that may contain unmaterializable functions. 
        
                   if equivalences 
        
                       .iter() 
        
                       .any(|equiv| equiv.iter().any(|e| e.contains_unmaterializable())) 
        
                   { 
        
                       return Ok(()); 
        
                   } 
        
                   // We can fold all constant inputs together, but must apply the constraints to restrict them. 
        
                   // We start with a single 0-ary row. 
        
                   let mut old_rows = vec![(Row::pack::<_, Datum>(None), 1)]; 
        
                   let mut row_buf = Row::default(); 
        
                   for input in inputs.iter() { 
        
                       if let Some((Ok(rows), ..)) = input.as_const() { 
        
                           if let Some(limit) = self.limit { 
        
                               if old_rows.len() * rows.len() > limit { 
        
                                   // Bail out if we have produced too many rows. 
        
                                   // TODO: progressively apply equivalences to narrow this count 
        
                                   // as we go, rather than at the end. 
        
                                   return Ok(()); 
        
                               } 
        
                           } 
        
                           let mut next_rows = Vec::new(); 
        
                           for (old_row, old_count) in old_rows { 
        
                               for (new_row, new_count) in rows.iter() { 
        
                                   row_buf 
        
                                       .packer() 
        
                                       .extend(old_row.iter().chain(new_row.iter())); 
        
                                   next_rows.push((row_buf.clone(), old_count * *new_count)); 
        
                               } 
        
                           } 
        
                           old_rows = next_rows; 
        
                       } 
        
                   } 
        
                   // Now throw away anything that doesn't satisfy the requisite constraints. 
        
                   let mut datum_vec = mz_repr::DatumVec::new(); 
        
                   old_rows.retain(|(row, _count)| { 
        
                       let datums = datum_vec.borrow_with(row); 
        
                       let temp_storage = RowArena::new(); 
        
                       equivalences.iter().all(|equivalence| { 
        
                           let mut values = 
        
                               equivalence.iter().map(|e| e.eval(&datums, &temp_storage)); 
        
                           if let Some(value) = values.next() { 
        
                               values.all(|v| v == value) 
        
                           } else { 
        
                               true 
        
                           } 
        
                       }) 
        
                   }); 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: Ok(old_rows), 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } 
        
               // TODO: General constant folding for all constant inputs. 
        
           } 
        
           MirRelationExpr::Union { base, inputs } => { 
        
               if let Some(e) = iter::once(&mut **base) 
        
                   .chain(&mut *inputs) 
        
                   .find_map(|i| i.as_const_err()) 
        
               { 
        
                   *relation = MirRelationExpr::Constant { 
        
                       rows: Err(e.clone()), 
        
                       typ: relation_type.clone(), 
        
                   }; 
        
               } else { 
        
                   let mut rows = vec![]; 
        
                   let mut new_inputs = vec![]; 
        
                   for input in iter::once(&mut **base).chain(&mut *inputs) { 
        
                       if let Some((Ok(rs), ..)) = input.as_const() { 
        
                           rows.extend(rs.clone()); 
        
                       } else { 
        
                           new_inputs.push(input.clone()) 
        
                       } 
        
                   } 
        
                   if !rows.is_empty() { 
        
                       new_inputs.push(MirRelationExpr::Constant { 
        
                           rows: Ok(rows), 
        
                           typ: relation_type.clone(), 
        
                       }); 
        
                   } 
        
                   *relation = MirRelationExpr::union_many(new_inputs, relation_type.clone()); 
        
               } 
        
           } 
        
           MirRelationExpr::ArrangeBy { .. } => { 
        
               // Don't fold ArrangeBys, because that could result in unarranged Delta join inputs. 
        
               // See also the comment on `MirRelationExpr::Constant`. 
        
           }

.

A complementary approach to having a fixed test suite that runs as part as our fast SQL logic tests is to also include a set of constant relations (created as views the way you do in the tests in this PR) in our fuzzing nightlies with SQLsmith. Would this be feasible?

vmarcos · 2023-04-18T10:39:29Z

test/sqllogictest/transform/fold_vs_dataflow/3_number_aggfns_dataflow.slt

+ SUM(real1), SUM(double1), SUM(numeric1),
+ SUM(real1 + real1), SUM(double1 + double1), SUM(numeric1 + numeric1),
+ MIN(real1), MIN(double1), MIN(numeric1),
+ MIN(real1 + real1), MIN(double1 + double1), MIN(numeric1 + numeric1),
+ MAX(real1), MAX(double1), MAX(numeric1),
+ MAX(real1 + real1), MAX(double1 + double1), MAX(numeric1 + numeric1),
+ AVG(real1), AVG(double1), AVG(numeric1),
+ AVG(real1 + real1), AVG(double1 + double1), AVG(numeric1 + numeric1)


Should we have expressions here also with the integer columns?

test/sqllogictest/transform/fold_vs_dataflow/5_repeat_row_dataflow.slt

def- · 2023-04-18T11:42:49Z

A complementary approach to having a fixed test suite that runs as part as our fast SQL logic tests is to also include a set of constant relations (created as views the way you do in the tests in this PR) in our fuzzing nightlies with SQLsmith. Would this be feasible?

SQLsmith might be difficult. I think a new oracle for SQLancer would be possible, but some effort and probably on my plate.

philip-stoev

I would defer to @vmarcos so say if more testing is needed as part of this task -- if this is the case, I think some sort of scripting should be used in order to exercise the various combinations more exhaustively.

vmarcos · 2023-04-18T12:19:30Z

I would defer to @vmarcos so say if more testing is needed as part of this task -- if this is the case, I think some sort of scripting should be used in order to exercise the various combinations more exhaustively.

I think that to uncover potential new issues, a scripting approach would be ideal, yes. That way, we'd get more coverage of the constant-folding code, increasing confidence. This PR, however, already improves on our current state of affairs. So would you be OK with saying this PR advances #17767, but does not close it? That way, we can take on follow-up work separately and merge this now.

nrainer-materialize · 2023-04-18T14:19:14Z

I would defer to @vmarcos so say if more testing is needed as part of this task -- if this is the case, I think some sort of scripting should be used in order to exercise the various combinations more exhaustively.

I took the time to briefly sketch a small Python script to generate such code for mathematical operations. An excerpt of the output is (untested draft):


# -------------------------------
# BEGIN TYPE UINT2 USING VIEW (constant folding)
# -------------------------------

statement ok
CREATE VIEW v_data_UINT2 (zero_value, max_value, smallest_value) AS SELECT 0::UINT2, 65535::UINT2, 1::UINT2, 1::UINT2;

query III
SELECT SUM(max_value), MIN(max_value), MAX(max_value) FROM v_data_UINT2;
----
<to be generated with --write-results>

query III
SELECT SUM(max_value + max_value), MIN(max_value + max_value), MAX(max_value + max_value) FROM v_data_UINT2;
----
<to be generated with --write-results>

@philip-stoev: Is this something I should proceed with or would you like me to continue on other issues with higher priorities?

nrainer-materialize · 2023-04-19T09:23:26Z

As discussed with @philip-stoev,

I will merge this PR
I will come up with a small plan how to 1) automatically generate further numeric validations with additional operators and number types (keeping complexity and runtime in mind) and 2) address further ideas brought up by @vmarcos
we will decide which parts of the plan are feasible with reasonable efforts and shall be implemented

vmarcos

Sounds good to me! It would be great also if you would either create another issue for the follow-up work or not close the original one, whichever you prefer.

nrainer-materialize · 2023-04-20T07:14:37Z

Sounds good to me! It would be great also if you would either create another issue for the follow-up work or not close the original one, whichever you prefer.

To be addressed with #18858.

nrainer-materialize added the T-testing Theme: tests or test infrastructure label Apr 18, 2023

nrainer-materialize self-assigned this Apr 18, 2023

nrainer-materialize force-pushed the feature/17767-evaluation-consistency branch from efce847 to e8099ae Compare April 18, 2023 07:01

nrainer-materialize commented Apr 18, 2023

View reviewed changes

test/sqllogictest/transform/fold_constants.slt Outdated Show resolved Hide resolved

test/sqllogictest/transform/fold_constants.slt Outdated Show resolved Hide resolved

nrainer-materialize requested review from philip-stoev, vmarcos and def- April 18, 2023 07:04

Consistency test

e0d4701

nrainer-materialize force-pushed the feature/17767-evaluation-consistency branch from e8099ae to e0d4701 Compare April 18, 2023 08:00

nrainer-materialize added 5 commits April 18, 2023 10:26

Move tests to separate files

b74508b

Extract bytea stuff

3679940

Split text stuff

bf96939

Rename files

056d2bf

Formatting adjustments to allow file comparisons

46b1291

def- requested changes Apr 18, 2023

View reviewed changes

nrainer-materialize added 5 commits April 18, 2023 11:45

Add copyright headers

629a5b5

Link issues

443f3d3

Add comments

bf757a1

Change mode for text and merge statements

274bc9c

Add comments

3d983e6

nrainer-materialize requested a review from def- April 18, 2023 10:49

vmarcos reviewed Apr 18, 2023

View reviewed changes

Fix

0feca19

philip-stoev approved these changes Apr 18, 2023

View reviewed changes

def- approved these changes Apr 19, 2023

View reviewed changes

vmarcos approved these changes Apr 19, 2023

View reviewed changes

nrainer-materialize mentioned this pull request Apr 20, 2023

Output consistency test framework (data-flow rendering vs. constant folding): V1 + V2 #18858

Closed

nrainer-materialize merged commit b8874da into MaterializeInc:main Apr 20, 2023

nrainer-materialize deleted the feature/17767-evaluation-consistency branch April 20, 2023 07:15

materialize-bot mentioned this pull request Apr 21, 2023

release: v0.52.0 required reviews #18907

Closed

12 tasks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: Test output consistency between constant-folding vs. rendered query evaluations #18812

ci: Test output consistency between constant-folding vs. rendered query evaluations #18812

nrainer-materialize commented Apr 18, 2023 •

edited

Loading

philip-stoev commented Apr 18, 2023

nrainer-materialize commented Apr 18, 2023 •

edited

Loading

def- left a comment

vmarcos left a comment

vmarcos Apr 18, 2023

def- commented Apr 18, 2023

philip-stoev left a comment

vmarcos commented Apr 18, 2023

nrainer-materialize commented Apr 18, 2023 •

edited

Loading

nrainer-materialize commented Apr 19, 2023

vmarcos left a comment

nrainer-materialize commented Apr 20, 2023

	MirRelationExpr::Constant { .. } => { /* handled after match */ }
	MirRelationExpr::Get { .. } => {}
	MirRelationExpr::Let { .. } \| MirRelationExpr::LetRec { .. } => {
	// Constant propagation through bindings is currently handled by in NormalizeLets.
	// Maybe we should move it / replicate it here (see #18180 for context)?
	}
	MirRelationExpr::Reduce {
	input,
	group_key,
	aggregates,
	monotonic: _,
	expected_group_size: _,
	} => {
	let input_typ = input_types.next().unwrap();
	// Reduce expressions to their simplest form.
	for key in group_key.iter_mut() {
	key.reduce(input_typ);
	}
	for aggregate in aggregates.iter_mut() {
	aggregate.expr.reduce(input_typ);
	}

	// Guard against evaluating an expression that may contain
	// unmaterializable functions.
	if group_key.iter().any(\|e\| e.contains_unmaterializable())
	\|\| aggregates
	.iter()
	.any(\|a\| a.expr.contains_unmaterializable())
	{
	return Ok(());
	}

	if let Some((rows, ..)) = (**input).as_const() {
	let new_rows = match rows {
	Ok(rows) => {
	if let Some(rows) =
	Self::fold_reduce_constant(group_key, aggregates, rows, self.limit)
	{
	rows
	} else {
	return Ok(());
	}
	}
	Err(e) => Err(e.clone()),
	};
	*relation = MirRelationExpr::Constant {
	rows: new_rows,
	typ: relation_type.clone(),
	};
	}
	}
	MirRelationExpr::TopK {
	input,
	group_key,
	order_key,
	limit,
	offset,
	..
	} => {
	if let Some((rows, ..)) = (**input).as_const_mut() {
	if let Ok(rows) = rows {
	Self::fold_topk_constant(group_key, order_key, limit, offset, rows);
	}
	*relation = input.take_dangerous();
	}
	}
	MirRelationExpr::Negate { input } => {
	if let Some((rows, ..)) = (**input).as_const_mut() {
	if let Ok(rows) = rows {
	for (_row, diff) in rows {
	diff = -1;
	}
	}
	*relation = input.take_dangerous();
	}
	}
	MirRelationExpr::Threshold { input } => {
	if let Some((rows, ..)) = (**input).as_const_mut() {
	if let Ok(rows) = rows {
	rows.retain(\|(_, diff)\| *diff > 0);
	}
	*relation = input.take_dangerous();
	}
	}
	MirRelationExpr::Map { input, scalars } => {
	// Before reducing the scalar expressions, we need to form an appropriate
	// RelationType to provide to each. Each expression needs a different
	// relation type; although we could in principle use `relation_type` here,
	// we shouldn't rely on `reduce` not looking at its cardinality to assess
	// the number of columns.
	let input_arity = input_types.next().unwrap().len();
	for (index, scalar) in scalars.iter_mut().enumerate() {
	scalar.reduce(&relation_type.column_types[..(input_arity + index)]);
	}

	// Guard against evaluating expression that may contain
	// unmaterializable functions.
	if scalars.iter().any(\|e\| e.contains_unmaterializable()) {
	return Ok(());
	}

	if let Some((rows, ..)) = (**input).as_const() {
	let new_rows = match rows {
	Ok(rows) => rows
	.iter()
	.cloned()
	.map(\|(input_row, diff)\| {
	// TODO: reduce allocations to zero.
	let mut unpacked = input_row.unpack();
	let temp_storage = RowArena::new();
	for scalar in scalars.iter() {
	unpacked.push(scalar.eval(&unpacked, &temp_storage)?)
	}
	Ok::<_, EvalError>((Row::pack_slice(&unpacked), diff))
	})
	.collect::<Result<_, _>>(),
	Err(e) => Err(e.clone()),
	};
	*relation = MirRelationExpr::Constant {
	rows: new_rows,
	typ: relation_type.clone(),
	};
	}
	}
	MirRelationExpr::FlatMap { input, func, exprs } => {
	let input_typ = input_types.next().unwrap();
	for expr in exprs.iter_mut() {
	expr.reduce(input_typ);
	}

	// Guard against evaluating expression that may contain unmaterializable functions.
	if exprs.iter().any(\|e\| e.contains_unmaterializable()) {
	return Ok(());
	}

	if let Some((rows, ..)) = (**input).as_const() {
	let new_rows = match rows {
	Ok(rows) => Self::fold_flat_map_constant(func, exprs, rows, self.limit),
	Err(e) => Err(e.clone()),
	};
	match new_rows {
	Ok(None) => {}
	Ok(Some(rows)) => {
	*relation = MirRelationExpr::Constant {
	rows: Ok(rows),
	typ: relation_type.clone(),
	};
	}
	Err(err) => {
	*relation = MirRelationExpr::Constant {
	rows: Err(err),
	typ: relation_type.clone(),
	};
	}
	};
	}
	}
	MirRelationExpr::Filter { input, predicates } => {
	let input_typ = input_types.next().unwrap();
	for predicate in predicates.iter_mut() {
	predicate.reduce(input_typ);
	}
	predicates.retain(\|p\| !p.is_literal_true());

	// Guard against evaluating expression that may contain
	// unmaterializable function calls.
	if predicates.iter().any(\|e\| e.contains_unmaterializable()) {
	return Ok(());
	}

	// If any predicate is false, reduce to the empty collection.
	if predicates
	.iter()
	.any(\|p\| p.is_literal_false() \|\| p.is_literal_null())
	{
	relation.take_safely();
	} else if let Some((rows, ..)) = (**input).as_const() {
	// Evaluate errors last, to reduce risk of spurious errors.
	predicates.sort_by_key(\|p\| p.is_literal_err());
	let new_rows = match rows {
	Ok(rows) => Self::fold_filter_constant(predicates, rows),
	Err(e) => Err(e.clone()),
	};
	*relation = MirRelationExpr::Constant {
	rows: new_rows,
	typ: relation_type.clone(),
	};
	}
	}
	MirRelationExpr::Project { input, outputs } => {
	if let Some((rows, ..)) = (**input).as_const() {
	let mut row_buf = Row::default();
	let new_rows = match rows {
	Ok(rows) => Ok(rows
	.iter()
	.map(\|(input_row, diff)\| {
	// TODO: reduce allocations to zero.
	let datums = input_row.unpack();
	row_buf.packer().extend(outputs.iter().map(\|i\| &datums[*i]));
	(row_buf.clone(), *diff)
	})
	.collect()),
	Err(e) => Err(e.clone()),
	};
	*relation = MirRelationExpr::Constant {
	rows: new_rows,
	typ: relation_type.clone(),
	};
	}
	}
	MirRelationExpr::Join {
	inputs,
	equivalences,
	..
	} => {
	if inputs.iter().any(\|e\| e.is_empty()) {
	relation.take_safely();
	} else if let Some(e) = inputs.iter().find_map(\|i\| i.as_const_err()) {
	*relation = MirRelationExpr::Constant {
	rows: Err(e.clone()),
	typ: relation_type.clone(),
	};
	} else if inputs
	.iter()
	.all(\|i\| matches!(i.as_const(), Some((Ok(_), ..))))
	{
	// Guard against evaluating expression that may contain unmaterializable functions.
	if equivalences
	.iter()
	.any(\|equiv\| equiv.iter().any(\|e\| e.contains_unmaterializable()))
	{
	return Ok(());
	}

	// We can fold all constant inputs together, but must apply the constraints to restrict them.
	// We start with a single 0-ary row.
	let mut old_rows = vec![(Row::pack::<_, Datum>(None), 1)];
	let mut row_buf = Row::default();
	for input in inputs.iter() {
	if let Some((Ok(rows), ..)) = input.as_const() {
	if let Some(limit) = self.limit {
	if old_rows.len() * rows.len() > limit {
	// Bail out if we have produced too many rows.
	// TODO: progressively apply equivalences to narrow this count
	// as we go, rather than at the end.
	return Ok(());
	}
	}
	let mut next_rows = Vec::new();
	for (old_row, old_count) in old_rows {
	for (new_row, new_count) in rows.iter() {
	row_buf
	.packer()
	.extend(old_row.iter().chain(new_row.iter()));
	next_rows.push((row_buf.clone(), old_count * *new_count));
	}
	}
	old_rows = next_rows;
	}
	}

	// Now throw away anything that doesn't satisfy the requisite constraints.
	let mut datum_vec = mz_repr::DatumVec::new();
	old_rows.retain(\|(row, _count)\| {
	let datums = datum_vec.borrow_with(row);
	let temp_storage = RowArena::new();
	equivalences.iter().all(\|equivalence\| {
	let mut values =
	equivalence.iter().map(\|e\| e.eval(&datums, &temp_storage));
	if let Some(value) = values.next() {
	values.all(\|v\| v == value)
	} else {
	true
	}
	})
	});

	*relation = MirRelationExpr::Constant {
	rows: Ok(old_rows),
	typ: relation_type.clone(),
	};
	}
	// TODO: General constant folding for all constant inputs.
	}
	MirRelationExpr::Union { base, inputs } => {
	if let Some(e) = iter::once(&mut **base)
	.chain(&mut *inputs)
	.find_map(\|i\| i.as_const_err())
	{
	*relation = MirRelationExpr::Constant {
	rows: Err(e.clone()),
	typ: relation_type.clone(),
	};
	} else {
	let mut rows = vec![];
	let mut new_inputs = vec![];

	for input in iter::once(&mut *base).chain(&mut inputs) {
	if let Some((Ok(rs), ..)) = input.as_const() {
	rows.extend(rs.clone());
	} else {
	new_inputs.push(input.clone())
	}
	}
	if !rows.is_empty() {
	new_inputs.push(MirRelationExpr::Constant {
	rows: Ok(rows),
	typ: relation_type.clone(),
	});
	}

	*relation = MirRelationExpr::union_many(new_inputs, relation_type.clone());
	}
	}
	MirRelationExpr::ArrangeBy { .. } => {
	// Don't fold ArrangeBys, because that could result in unarranged Delta join inputs.
	// See also the comment on `MirRelationExpr::Constant`.
	}

ci: Test output consistency between constant-folding vs. rendered query evaluations #18812

ci: Test output consistency between constant-folding vs. rendered query evaluations #18812

Conversation

nrainer-materialize commented Apr 18, 2023 • edited Loading

philip-stoev commented Apr 18, 2023

nrainer-materialize commented Apr 18, 2023 • edited Loading

def- left a comment

Choose a reason for hiding this comment

vmarcos left a comment

Choose a reason for hiding this comment

vmarcos Apr 18, 2023

Choose a reason for hiding this comment

def- commented Apr 18, 2023

philip-stoev left a comment

Choose a reason for hiding this comment

vmarcos commented Apr 18, 2023

nrainer-materialize commented Apr 18, 2023 • edited Loading

nrainer-materialize commented Apr 19, 2023

vmarcos left a comment

Choose a reason for hiding this comment

nrainer-materialize commented Apr 20, 2023

nrainer-materialize commented Apr 18, 2023 •

edited

Loading

nrainer-materialize commented Apr 18, 2023 •

edited

Loading

nrainer-materialize commented Apr 18, 2023 •

edited

Loading