From 1dc1fa89ee906a23f4e1e382cf245b4d950e657a Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Fri, 9 Feb 2024 18:08:36 +0800 Subject: [PATCH] feat: support `SUBSTRING()` (#134) * feat: support `SUBSTRING()` * docs: fix tupes -> tuples --- README.md | 2 +- src/binder/aggregate.rs | 27 ++++++++++++++ src/binder/expr.rs | 21 +++++++++++ src/expression/evaluator.rs | 35 ++++++++++++++++++ src/expression/mod.rs | 66 ++++++++++++++++++++-------------- src/expression/simplify.rs | 6 ++-- tests/slt/basic_test.slt | 32 ++++++++--------- tests/slt/sql_2016/E021_06.slt | 25 +++++++------ tests/slt/substring | 26 ++++++++++++++ 9 files changed, 182 insertions(+), 58 deletions(-) create mode 100644 tests/slt/substring diff --git a/README.md b/README.md index d8d0608c..af9a3ed3 100755 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ then use `psql` to enter sql Using FnckSQL in code ```rust let fnck_sql = Database::with_kipdb("./data").await?; -let tupes = fnck_sql.run("select * from t1").await?; +let tuples = fnck_sql.run("select * from t1").await?; ``` Storage Support: - KipDB diff --git a/src/binder/aggregate.rs b/src/binder/aggregate.rs index 80bd8a6e..22bdec6a 100644 --- a/src/binder/aggregate.rs +++ b/src/binder/aggregate.rs @@ -120,6 +120,19 @@ impl<'a, T: Transaction> Binder<'a, T> { self.visit_column_agg_expr(left_expr)?; self.visit_column_agg_expr(right_expr)?; } + ScalarExpression::SubString { + expr, + for_expr, + from_expr, + } => { + self.visit_column_agg_expr(expr)?; + if let Some(expr) = for_expr { + self.visit_column_agg_expr(expr)?; + } + if let Some(expr) = from_expr { + self.visit_column_agg_expr(expr)?; + } + } ScalarExpression::Constant(_) | ScalarExpression::ColumnRef { .. } => {} } @@ -278,6 +291,20 @@ impl<'a, T: Transaction> Binder<'a, T> { self.validate_having_orderby(right_expr)?; Ok(()) } + ScalarExpression::SubString { + expr, + for_expr, + from_expr, + } => { + self.validate_having_orderby(expr)?; + if let Some(expr) = for_expr { + self.validate_having_orderby(expr)?; + } + if let Some(expr) = from_expr { + self.validate_having_orderby(expr)?; + } + Ok(()) + } ScalarExpression::Constant(_) => Ok(()), } } diff --git a/src/binder/expr.rs b/src/binder/expr.rs index adf49ca3..06a94635 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -68,6 +68,27 @@ impl<'a, T: Transaction> Binder<'a, T> { left_expr: Box::new(self.bind_expr(low)?), right_expr: Box::new(self.bind_expr(high)?), }), + Expr::Substring { + expr, + substring_for, + substring_from, + } => { + let mut for_expr = None; + let mut from_expr = None; + + if let Some(expr) = substring_for { + for_expr = Some(Box::new(self.bind_expr(expr)?)) + } + if let Some(expr) = substring_from { + from_expr = Some(Box::new(self.bind_expr(expr)?)) + } + + Ok(ScalarExpression::SubString { + expr: Box::new(self.bind_expr(expr)?), + for_expr, + from_expr, + }) + } _ => { todo!() } diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index 53cc8bef..9980b9b2 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -4,6 +4,7 @@ use crate::expression::value_compute::{binary_op, unary_op}; use crate::expression::ScalarExpression; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, ValueRef}; +use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; use std::cmp::Ordering; @@ -13,6 +14,19 @@ lazy_static! { static ref NULL_VALUE: ValueRef = Arc::new(DataValue::Null); } +macro_rules! eval_to_num { + ($num_expr:expr, $tuple:expr) => { + if let Some(num_i32) = DataValue::clone($num_expr.eval($tuple)?.as_ref()) + .cast(&LogicalType::Integer)? + .i32() + { + num_i32 as usize + } else { + return Ok(Arc::new(DataValue::Utf8(None))); + } + }; +} + impl ScalarExpression { pub fn eval(&self, tuple: &Tuple) -> Result { if let Some(value) = Self::eval_with_summary(tuple, self.output_column().summary()) { @@ -124,6 +138,27 @@ impl ScalarExpression { } Ok(Arc::new(DataValue::Boolean(Some(is_between)))) } + ScalarExpression::SubString { + expr, + for_expr, + from_expr, + } => { + if let Some(mut string) = DataValue::clone(expr.eval(tuple)?.as_ref()) + .cast(&LogicalType::Varchar(None))? + .utf8() + { + if let Some(from_expr) = from_expr { + string = string.split_off(eval_to_num!(from_expr, tuple).saturating_sub(1)); + } + if let Some(for_expr) = for_expr { + let _ = string.split_off(eval_to_num!(for_expr, tuple)); + } + + Ok(Arc::new(DataValue::Utf8(Some(string)))) + } else { + Ok(Arc::new(DataValue::Utf8(None))) + } + } } } diff --git a/src/expression/mod.rs b/src/expression/mod.rs index d40b91d2..45a3e664 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -64,6 +64,11 @@ pub enum ScalarExpression { left_expr: Box, right_expr: Box, }, + SubString { + expr: Box, + for_expr: Option>, + from_expr: Option>, + }, } impl ScalarExpression { @@ -91,32 +96,6 @@ impl ScalarExpression { } } - pub fn nullable(&self) -> bool { - match self { - ScalarExpression::Constant(_) => false, - ScalarExpression::ColumnRef(col) => col.nullable, - ScalarExpression::Alias { expr, .. } => expr.nullable(), - ScalarExpression::TypeCast { expr, .. } => expr.nullable(), - ScalarExpression::IsNull { expr, .. } => expr.nullable(), - ScalarExpression::Unary { expr, .. } => expr.nullable(), - ScalarExpression::Binary { - left_expr, - right_expr, - .. - } => left_expr.nullable() && right_expr.nullable(), - ScalarExpression::In { expr, args, .. } => { - expr.nullable() && args.iter().all(ScalarExpression::nullable) - } - ScalarExpression::AggCall { args, .. } => args.iter().all(ScalarExpression::nullable), - ScalarExpression::Between { - expr, - left_expr, - right_expr, - .. - } => expr.nullable() && left_expr.nullable() && right_expr.nullable(), - } - } - pub fn return_type(&self) -> LogicalType { match self { Self::Constant(v) => v.logical_type(), @@ -136,6 +115,7 @@ impl ScalarExpression { Self::IsNull { .. } | Self::In { .. } | ScalarExpression::Between { .. } => { LogicalType::Boolean } + Self::SubString { .. } => LogicalType::Varchar(None), Self::Alias { expr, .. } => expr.return_type(), } } @@ -214,6 +194,21 @@ impl ScalarExpression { right_expr, .. } => expr.has_agg_call() || left_expr.has_agg_call() || right_expr.has_agg_call(), + ScalarExpression::SubString { + expr, + for_expr, + from_expr, + } => { + expr.has_agg_call() + || matches!( + for_expr.as_ref().map(|expr| expr.has_agg_call()), + Some(true) + ) + || matches!( + from_expr.as_ref().map(|expr| expr.has_agg_call()), + Some(true) + ) + } } } @@ -287,6 +282,25 @@ impl ScalarExpression { right_expr.output_name() ) } + ScalarExpression::SubString { + expr, + for_expr, + from_expr, + } => { + let op = |tag: &str, num_expr: &Option>| { + num_expr + .as_ref() + .map(|expr| format!(", {}: {}", tag, expr.output_name())) + .unwrap_or_default() + }; + + format!( + "substring({}{}{})", + expr.output_name(), + op("from", from_expr), + op("for", for_expr), + ) + } } } diff --git a/src/expression/simplify.rs b/src/expression/simplify.rs index a0cc5de3..2a7207e2 100644 --- a/src/expression/simplify.rs +++ b/src/expression/simplify.rs @@ -917,7 +917,8 @@ impl ScalarExpression { | ScalarExpression::TypeCast { expr, .. } | ScalarExpression::Unary { expr, .. } | ScalarExpression::In { expr, .. } - | ScalarExpression::Between { expr, .. } => expr.convert_binary(col_id), + | ScalarExpression::Between { expr, .. } + | ScalarExpression::SubString { expr, .. } => expr.convert_binary(col_id), ScalarExpression::IsNull { expr, negated, .. } => match expr.as_ref() { ScalarExpression::ColumnRef(column) => { Ok(column.id().is_some_and(|id| col_id == &id).then(|| { @@ -936,7 +937,8 @@ impl ScalarExpression { | ScalarExpression::Binary { .. } | ScalarExpression::AggCall { .. } | ScalarExpression::In { .. } - | ScalarExpression::Between { .. } => expr.convert_binary(col_id), + | ScalarExpression::Between { .. } + | ScalarExpression::SubString { .. } => expr.convert_binary(col_id), }, ScalarExpression::Constant(_) | ScalarExpression::ColumnRef(_) diff --git a/tests/slt/basic_test.slt b/tests/slt/basic_test.slt index 2ab998d0..683ea9bd 100644 --- a/tests/slt/basic_test.slt +++ b/tests/slt/basic_test.slt @@ -1,7 +1,7 @@ -# query I -# select 1 -# ---- -# 1 +query I +select 1 +---- +1 # query R # select 10000.00::FLOAT + 234.567::FLOAT @@ -13,20 +13,20 @@ # ---- # 12.5 -# query B -# select 2>1 -# ---- -# true +query B +select 2>1 +---- +true -# query B -# select 3>4 -# ---- -# false +query B +select 3>4 +---- +false -# query T -# select DATE '2001-02-16' -# ---- -# 2001-02-16 +query T +select DATE '2001-02-16' +---- +2001-02-16 subtest NullType diff --git a/tests/slt/sql_2016/E021_06.slt b/tests/slt/sql_2016/E021_06.slt index f148a000..9c86747b 100644 --- a/tests/slt/sql_2016/E021_06.slt +++ b/tests/slt/sql_2016/E021_06.slt @@ -1,28 +1,27 @@ # E021-06: SUBSTRING function -# TODO: SUBSTRING() +query T +SELECT SUBSTRING ( 'foo' FROM 1 ) +---- +foo -# query T -# SELECT SUBSTRING ( 'foo' FROM 1 ) -# ---- -# 'foo' - -# query T -# SELECT SUBSTRING ( 'foo' FROM 1 FOR 2 ) -# ---- -# 'fo' +query T +SELECT SUBSTRING ( 'foo' FROM 1 FOR 2 ) +---- +fo +# sqlparser-rs unsupported # query I # SELECT SUBSTRING ( 'foo' FROM 1 FOR 2 USING CHARACTERS ) - +# sqlparser-rs unsupported # query I # SELECT SUBSTRING ( 'foo' FROM 1 FOR 2 USING OCTETS ) - +# sqlparser-rs unsupported # query I # SELECT SUBSTRING ( 'foo' FROM 1 USING CHARACTERS ) - +# sqlparser-rs unsupported # query I # SELECT SUBSTRING ( 'foo' FROM 1 USING OCTETS ) diff --git a/tests/slt/substring b/tests/slt/substring new file mode 100644 index 00000000..b886b7ab --- /dev/null +++ b/tests/slt/substring @@ -0,0 +1,26 @@ +query T +select substring('pineapple' from 5 for 10 ) +---- +app + +query T +select substring('pineapple' for 4 ) +---- +pine + +query T +select substring('pineapple' from 5 ) +---- +apple + +query T +select substring('pineapple' from 1 for null ) +---- + +query T +select substring('pineapple' from null for 4 ) +---- + +query T +select substring(null from 1 for 4 ) +---- \ No newline at end of file