diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4c689c48a..9f0280826 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -659,14 +659,20 @@ impl<'a> Parser<'a> { Ok(expr) } + /// Parse Range clause with format `RANGE [ Duration literal | (INTERVAL [interval expr]) ] FILL [ NULL | PREV .....]` fn parse_range_expr(&mut self, expr: Expr) -> Result { let index = self.index; let range = if self.parse_keyword(Keyword::RANGE) { - // Make sure Range followed by a value, or it will be confused with window function syntax + // Make sure Range followed by a value or interval expr, or it will be confused with window function syntax // e.g. `COUNT(*) OVER (ORDER BY a RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND INTERVAL '1 DAY' FOLLOWING)` - if let Ok(value) = self.parse_value() { + if self.consume_token(&Token::LParen) { + self.expect_keyword(Keyword::INTERVAL)?; + let interval = self.parse_interval()?; + self.expect_token(&Token::RParen)?; + interval + } else if let Ok(value) = self.parse_value() { value.verify_duration()?; - value + Expr::Value(value) } else { self.index = index; return Ok(expr); @@ -694,7 +700,7 @@ impl<'a> Parser<'a> { if matches!(e, Expr::Function(..)) { let args = vec![ FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone())), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(range.clone()))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(range.clone())), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(fill.clone()))), ]; let range_func = Function { @@ -5757,8 +5763,8 @@ impl<'a> Parser<'a> { } else { vec![] }; - - let mut align: Option<(Value, Vec)> = None; + // triple means (align duration, to, by) + let mut align: Option<(Expr, Expr, Vec)> = None; let mut fill: Option = None; for _ in 0..2 { if self.parse_keyword(Keyword::ALIGN) { @@ -5767,8 +5773,27 @@ impl<'a> Parser<'a> { "Duplicate ALIGN keyword detected in SELECT clause.".into(), )); } - let value = self.parse_value()?; - value.verify_duration()?; + // Must use parentheses in interval, otherwise it will cause syntax conflicts. + // `INTERVAL '1-1' YEAR TO MONTH` are conflict with + // `ALIGN INTERVAL '1' day TO '1970-01-01T00:00:00+08:00'` + let value = if self.consume_token(&Token::LParen) { + self.expect_keyword(Keyword::INTERVAL)?; + let interval = self.parse_interval()?; + self.expect_token(&Token::RParen)?; + interval + } else { + let value = self.parse_value()?; + value.verify_duration()?; + Expr::Value(value) + }; + let to = if self.parse_keyword(Keyword::TO) { + let value = self.next_token().to_string(); + Expr::Value(Value::SingleQuotedString( + value.trim_matches(|x| x == '\'' || x == '"').to_string(), + )) + } else { + Expr::Value(Value::SingleQuotedString(String::new())) + }; let by = if self.parse_keyword(Keyword::BY) { self.expect_token(&Token::LParen)?; if self.consume_token(&Token::RParen) { @@ -5793,7 +5818,7 @@ impl<'a> Parser<'a> { } else { vec![] }; - align = Some((value, by)); + align = Some((value, to, by)); } if self.parse_keyword(Keyword::FILL) { if fill.is_some() { @@ -5809,7 +5834,7 @@ impl<'a> Parser<'a> { "ALIGN argument cannot be omitted in the range select query".into(), )); } - let projection = if let Some((align, by)) = align { + let projection = if let Some((align, to, by)) = align { let fill = fill.unwrap_or_default(); let by_num = FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString(by.len().to_string()), @@ -5822,7 +5847,7 @@ impl<'a> Parser<'a> { FunctionArg::Unnamed(FunctionArgExpr::Expr(x)) }) .collect::>(); - // range_fn(func, range, fill, byc, [byv], align) + // range_fn(func, range, fill, byc, [byv], align, to) // byc are length of variadic arguments [byv] let mut rewrite_count = 0; let mut align_fill_rewrite = @@ -5850,7 +5875,10 @@ impl<'a> Parser<'a> { range_func.args.push(by_num.clone()); range_func.args.extend(by.clone()); range_func.args.push(FunctionArg::Unnamed( - FunctionArgExpr::Expr(Expr::Value(align.clone())), + FunctionArgExpr::Expr(align.clone()), + )); + range_func.args.push(FunctionArg::Unnamed( + FunctionArgExpr::Expr(to.clone()), )); rewrite_count += 1; return Ok(Some(Expr::Function(range_func))); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 995db4ca6..a497b5f38 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -541,8 +541,7 @@ impl<'a> Tokenizer<'a> { pub fn tokenize(&mut self) -> Result, TokenizerError> { let twl = self.tokenize_with_location()?; - let mut tokens: Vec = vec![]; - tokens.reserve(twl.len()); + let mut tokens: Vec = Vec::with_capacity(twl.len()); for token_with_location in twl { tokens.push(token_with_location.token); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8970b8f53..6a7e86a98 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7406,59 +7406,59 @@ fn assert_sql_err(s: &'static str, result: &'static str) { #[test] fn parse_range_select() { - // rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align)` + // rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align, to)` // regular without by assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h'), range_fn(sum(metrics), '10m', 'MAX', '0', '1h'), range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t"); + "SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '0', '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t"); // regular with by assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by ((a+1)/2, b) FILL NULL;", - "SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h') FROM t GROUP BY a, b"); + "SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h', '') FROM t GROUP BY a, b"); // explicit empty by assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by () FILL NULL;", - "SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h') FROM t"); + "SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h', '') FROM t"); // expression1 assert_sql( "SELECT avg(a/2 + 1) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h') FROM t", + "SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h', '') FROM t", ); // expression2 assert_sql( "SELECT avg(a) RANGE '5m' FILL NULL + 1 FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h') + 1 FROM t", + "SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + 1 FROM t", ); // expression3 assert_sql( "SELECT ((avg(a) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;", - "SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t", + "SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t", ); // expression4 assert_sql( "SELECT covariance(a, b) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h') FROM t", + "SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h', '') FROM t", ); // expression5 assert_sql( "SELECT covariance(cos(a), sin(b)) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h') FROM t", + "SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h', '') FROM t", ); // expression6 assert_sql( "SELECT ((covariance(a+1, b/2) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;", - "SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t", + "SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t", ); // FILL... ALIGN... assert_sql( "SELECT sum(metrics) RANGE '10m' FROM t FILL NULL ALIGN '1h';", - "SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t", + "SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t", ); // FILL ... FILL ... @@ -7518,57 +7518,57 @@ fn parse_range_in_expr() { // use range in expr assert_sql( "SELECT rate(a) RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1 FROM t", + "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1 FROM t", ); assert_sql( "SELECT sin(rate(a) RANGE '6m' + 1) FROM t ALIGN '1h' FILL NULL;", - "SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1) FROM t", + "SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1) FROM t", ); assert_sql( "SELECT sin(first_value(a ORDER BY b ASC NULLS LAST) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;", - "SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1", + "SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1", ); assert_sql( "SELECT sin(count(distinct a) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;", - "SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1", + "SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1", ); assert_sql( "SELECT sin(rank() OVER (PARTITION BY a ORDER BY b DESC) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;", - "SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1", + "SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1", ); assert_sql( "SELECT sin(cos(round(sin(avg(a + b) RANGE '5m' + 1)))) FROM test ALIGN '1h' by (tag_0,tag_1);", - "SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h') + 1)))) FROM test GROUP BY tag_0, tag_1", + "SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h', '') + 1)))) FROM test GROUP BY tag_0, tag_1", ); assert_sql("SELECT rate(a) RANGE '6m' + rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t"); + "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t"); assert_sql("SELECT (rate(a) RANGE '6m' + rate(a) RANGE '5m')/b + b * rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;", - "SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t GROUP BY b"); + "SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t GROUP BY b"); assert_sql("SELECT round(max(a+1) Range '5m' FILL NULL), sin((max(a) + 1) Range '5m' FILL NULL) from t ALIGN '1h' by (b) FILL NULL;", - "SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h') + 1)) FROM t GROUP BY b"); + "SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h', '')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h', '') + 1)) FROM t GROUP BY b"); assert_sql( "SELECT floor(ceil((min(a * 2) + max(a *2)) RANGE '20s' + 1.0)) FROM t ALIGN '1h';", - "SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h') + range_fn(max(a * 2), '20s', '', '0', '1h')) + 1.0)) FROM t", + "SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h', '') + range_fn(max(a * 2), '20s', '', '0', '1h', '')) + 1.0)) FROM t", ); assert_sql( "SELECT gcd(CAST(max(a + 1) Range '5m' FILL NULL AS Int64), CAST(b AS Int64)) + round(max(c+1) Range '6m' FILL NULL + 1) + max(d+3) Range '10m' FILL NULL * CAST(e AS Float64) + 1 FROM test ALIGN '1h' by (f, g);", - "SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g", + "SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h', '') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h', '') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h', '') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g", ); // Legal syntax but illegal semantic, nested range semantics are problematic, leave semantic problem to greptimedb assert_sql( "SELECT rate(max(a) RANGE '6m') RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;", - "SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h') + 1 FROM t", + "SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h', '') + 1 FROM t", ); assert_sql_err( @@ -7586,3 +7586,43 @@ fn parse_range_in_expr() { "sql parser error: Can't use the RANGE keyword in Expr 1 without function", ); } + +#[test] +fn parse_range_interval() { + assert_sql( + "SELECT rate(a) RANGE (INTERVAL '1 year 2 hours 3 minutes') FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;", + "SELECT range_fn(rate(a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') FROM t", + ); + assert_sql( + "SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) FILL NULL;", + "SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '0', INTERVAL '1' YEAR, '') FROM t", + ); + assert_sql( + "SELECT sin(count(distinct a) RANGE (INTERVAL '1 year 2 hours 3 minutes') + 1) FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;", + "SELECT sin(range_fn(count(DISTINCT a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') + 1) FROM t", + ); + assert_sql( + "SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) TO '1970-01-01T00:00:00+08:00' BY (b, c) FILL NULL;", + "SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '2', b, c, INTERVAL '1' YEAR, '1970-01-01T00:00:00+08:00') FROM t GROUP BY b, c", + ); + assert_sql_err( + "SELECT rate(a) RANGE INTERVAL '1 year 2 hours 3 minutes' FROM t ALIGN '1h' FILL NULL;", + "sql parser error: Expected end of statement, found: RANGE at Line: 1, Column 16", + ); +} + +#[test] +fn parse_range_to() { + assert_sql( + "SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO NOW FILL NULL;", + "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'NOW') FROM t", + ); + assert_sql( + "SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO CALENDAR FILL NULL;", + "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'CALENDAR') FROM t", + ); + assert_sql( + "SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO '2021-07-01 00:00:00' FILL NULL;", + "SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '2021-07-01 00:00:00') FROM t", + ); +}