Skip to content

Commit 0e95ab4

Browse files
committedNov 12, 2023
feat(optimizer): Implement LIKE operator rule for query optimization
The commit introduces a new rule for the optimization of LIKE operator in SQL queries. The LIKE operator expressions are rewritten to make use of binary operators such as GtEq and Lt in certain cases which enhances the performance of queries. Additionally, new tests for incremented character rule have been added, and `LikeRewrite` has been added to optimizer rules in the rule set.
1 parent 43787a7 commit 0e95ab4

File tree

3 files changed

+122
-13
lines changed

3 files changed

+122
-13
lines changed
 

‎src/db.rs

+14-4
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ impl<S: Storage> Database<S> {
5454
/// Limit(1)
5555
/// Project(a,b)
5656
let source_plan = binder.bind(&stmts[0])?;
57-
// println!("source_plan plan: {:#?}", source_plan);
57+
//println!("source_plan plan: {:#?}", source_plan);
5858

5959
let best_plan = Self::default_optimizer(source_plan).find_best()?;
60-
// println!("best_plan plan: {:#?}", best_plan);
60+
//println!("best_plan plan: {:#?}", best_plan);
6161

6262
let transaction = RefCell::new(transaction);
6363
let mut stream = build(best_plan, &transaction);
@@ -78,10 +78,10 @@ impl<S: Storage> Database<S> {
7878
.batch(
7979
"Simplify Filter".to_string(),
8080
HepBatchStrategy::fix_point_topdown(10),
81-
vec![RuleImpl::SimplifyFilter, RuleImpl::ConstantCalculation],
81+
vec![RuleImpl::LikeRewrite, RuleImpl::SimplifyFilter, RuleImpl::ConstantCalculation],
8282
)
8383
.batch(
84-
"Predicate Pushdown".to_string(),
84+
"Predicate Pushown".to_string(),
8585
HepBatchStrategy::fix_point_topdown(10),
8686
vec![
8787
RuleImpl::PushPredicateThroughJoin,
@@ -206,6 +206,12 @@ mod test {
206206
let _ = kipsql
207207
.run("insert into t3 (a, b) values (4, 4444), (5, 5222), (6, 1.00)")
208208
.await?;
209+
let _ = kipsql
210+
.run("create table t4 (a int primary key, b varchar(100))")
211+
.await?;
212+
let _ = kipsql
213+
.run("insert into t4 (a, b) values (1, 'abc'), (2, 'abdc'), (3, 'abcd'), (4, 'ddabc')")
214+
.await?;
209215

210216
println!("show tables:");
211217
let tuples_show_tables = kipsql.run("show tables").await?;
@@ -371,6 +377,10 @@ mod test {
371377
let tuples_decimal = kipsql.run("select * from t3").await?;
372378
println!("{}", create_table(&tuples_decimal));
373379

380+
println!("like rewrite:");
381+
let tuples_like_rewrite = kipsql.run("select * from t4 where b like 'abc%'").await?;
382+
println!("{}", create_table(&tuples_like_rewrite));
383+
374384
Ok(())
375385
}
376386
}

‎src/optimizer/rule/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::optimizer::rule::pushdown_limit::{
99
};
1010
use crate::optimizer::rule::pushdown_predicates::PushPredicateIntoScan;
1111
use crate::optimizer::rule::pushdown_predicates::PushPredicateThroughJoin;
12-
use crate::optimizer::rule::simplification::ConstantCalculation;
12+
use crate::optimizer::rule::simplification::{ConstantCalculation, LikeRewrite};
1313
use crate::optimizer::rule::simplification::SimplifyFilter;
1414
use crate::optimizer::OptimizerError;
1515

@@ -37,6 +37,7 @@ pub enum RuleImpl {
3737
// Simplification
3838
SimplifyFilter,
3939
ConstantCalculation,
40+
LikeRewrite,
4041
}
4142

4243
impl Rule for RuleImpl {
@@ -53,6 +54,7 @@ impl Rule for RuleImpl {
5354
RuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.pattern(),
5455
RuleImpl::SimplifyFilter => SimplifyFilter.pattern(),
5556
RuleImpl::ConstantCalculation => ConstantCalculation.pattern(),
57+
RuleImpl::LikeRewrite =>LikeRewrite.pattern(),
5658
}
5759
}
5860

@@ -69,6 +71,7 @@ impl Rule for RuleImpl {
6971
RuleImpl::SimplifyFilter => SimplifyFilter.apply(node_id, graph),
7072
RuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(node_id, graph),
7173
RuleImpl::ConstantCalculation => ConstantCalculation.apply(node_id, graph),
74+
RuleImpl::LikeRewrite => LikeRewrite.apply(node_id, graph),
7275
}
7376
}
7477
}

‎src/optimizer/rule/simplification.rs

+104-8
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ use crate::optimizer::OptimizerError;
55
use crate::planner::operator::join::JoinCondition;
66
use crate::planner::operator::Operator;
77
use lazy_static::lazy_static;
8+
use crate::expression::{BinaryOperator, ScalarExpression};
9+
use crate::types::value::{DataValue, ValueRef};
810
lazy_static! {
11+
static ref LIKE_REWRITE_RULE: Pattern = {
12+
Pattern {
13+
predicate: |op| matches!(op, Operator::Filter(_)),
14+
children: PatternChildrenPredicate::None,
15+
}
16+
};
917
static ref CONSTANT_CALCULATION_RULE: Pattern = {
1018
Pattern {
1119
predicate: |_| true,
@@ -109,6 +117,84 @@ impl Rule for SimplifyFilter {
109117
}
110118
}
111119

120+
pub struct LikeRewrite;
121+
122+
impl Rule for LikeRewrite {
123+
fn pattern(&self) -> &Pattern {
124+
&LIKE_REWRITE_RULE
125+
}
126+
127+
fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), OptimizerError> {
128+
if let Operator::Filter(mut filter_op) = graph.operator(node_id).clone() {
129+
// if is like expression
130+
if let ScalarExpression::Binary {
131+
op: BinaryOperator::Like,
132+
left_expr,
133+
right_expr,
134+
ty,
135+
} = &mut filter_op.predicate
136+
{
137+
// if left is column and right is constant
138+
if let ScalarExpression::ColumnRef(_) = left_expr.as_ref() {
139+
if let ScalarExpression::Constant(value) = right_expr.as_ref() {
140+
match value.as_ref() {
141+
DataValue::Utf8(val_str) => {
142+
let mut value = val_str.clone().unwrap_or_else(|| "".to_string());
143+
144+
if value.ends_with('%') {
145+
value.pop(); // remove '%'
146+
if let Some(last_char) = value.clone().pop() {
147+
if let Some(next_char) = increment_char(last_char) {
148+
let mut new_value = value.clone();
149+
new_value.pop();
150+
new_value.push(next_char);
151+
152+
let new_expr = ScalarExpression::Binary {
153+
op: BinaryOperator::And,
154+
left_expr: Box::new(ScalarExpression::Binary {
155+
op: BinaryOperator::GtEq,
156+
left_expr: left_expr.clone(),
157+
right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(value))))),
158+
ty: ty.clone(),
159+
}),
160+
right_expr: Box::new(ScalarExpression::Binary {
161+
op: BinaryOperator::Lt,
162+
left_expr: left_expr.clone(),
163+
right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(new_value))))),
164+
ty: ty.clone(),
165+
}),
166+
ty: ty.clone(),
167+
};
168+
filter_op.predicate = new_expr;
169+
}
170+
}
171+
}
172+
}
173+
_ => {
174+
graph.version += 1;
175+
return Ok(());
176+
}
177+
}
178+
}
179+
}
180+
}
181+
graph.replace_node(node_id, Operator::Filter(filter_op))
182+
}
183+
// mark changed to skip this rule batch
184+
graph.version += 1;
185+
Ok(())
186+
}
187+
}
188+
189+
fn increment_char(v: char) -> Option<char> {
190+
match v {
191+
'z' => None,
192+
'Z' => None,
193+
_ => std::char::from_u32(v as u32 + 1),
194+
}
195+
}
196+
197+
112198
#[cfg(test)]
113199
mod test {
114200
use crate::binder::test::select_sql_run;
@@ -126,6 +212,15 @@ mod test {
126212
use crate::types::LogicalType;
127213
use std::collections::Bound;
128214
use std::sync::Arc;
215+
use crate::optimizer::rule::simplification::increment_char;
216+
217+
218+
#[test]
219+
fn test_increment_char() {
220+
assert_eq!(increment_char('a'), Some('b'));
221+
assert_eq!(increment_char('z'), None);
222+
assert_eq!(increment_char('A'), Some('B'));
223+
}
129224

130225
#[tokio::test]
131226
async fn test_constant_calculation_omitted() -> Result<(), DatabaseError> {
@@ -302,6 +397,7 @@ mod test {
302397
Ok(())
303398
}
304399

400+
305401
#[tokio::test]
306402
async fn test_simplify_filter_multiple_column() -> Result<(), DatabaseError> {
307403
// c1 + 1 < -1 => c1 < -2
@@ -343,7 +439,7 @@ mod test {
343439
cb_1_c1,
344440
Some(ConstantBinary::Scope {
345441
min: Bound::Unbounded,
346-
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2))))
442+
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))),
347443
})
348444
);
349445

@@ -353,7 +449,7 @@ mod test {
353449
cb_1_c2,
354450
Some(ConstantBinary::Scope {
355451
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))),
356-
max: Bound::Unbounded
452+
max: Bound::Unbounded,
357453
})
358454
);
359455

@@ -363,7 +459,7 @@ mod test {
363459
cb_2_c1,
364460
Some(ConstantBinary::Scope {
365461
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))),
366-
max: Bound::Unbounded
462+
max: Bound::Unbounded,
367463
})
368464
);
369465

@@ -373,7 +469,7 @@ mod test {
373469
cb_1_c1,
374470
Some(ConstantBinary::Scope {
375471
min: Bound::Unbounded,
376-
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2))))
472+
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))),
377473
})
378474
);
379475

@@ -383,7 +479,7 @@ mod test {
383479
cb_3_c1,
384480
Some(ConstantBinary::Scope {
385481
min: Bound::Unbounded,
386-
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1))))
482+
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))),
387483
})
388484
);
389485

@@ -393,7 +489,7 @@ mod test {
393489
cb_3_c2,
394490
Some(ConstantBinary::Scope {
395491
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))),
396-
max: Bound::Unbounded
492+
max: Bound::Unbounded,
397493
})
398494
);
399495

@@ -403,7 +499,7 @@ mod test {
403499
cb_4_c1,
404500
Some(ConstantBinary::Scope {
405501
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))),
406-
max: Bound::Unbounded
502+
max: Bound::Unbounded,
407503
})
408504
);
409505

@@ -413,7 +509,7 @@ mod test {
413509
cb_4_c2,
414510
Some(ConstantBinary::Scope {
415511
min: Bound::Unbounded,
416-
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1))))
512+
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))),
417513
})
418514
);
419515

0 commit comments

Comments
 (0)
Failed to load comments.