Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 199 additions & 2 deletions prqlc/prqlc/src/sql/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,41 @@ use strum::VariantNames;

use crate::{Error, Result};

/// Convert a chrono format `Item` back to its strftime string representation.
fn chrono_item_to_strftime(item: &Item) -> String {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is used in the BigQuery error message to make it easy to see exactly which part of the format is breaking the query:

    Error:
       ╭─[ :6:33 ]
       │
     6 │       d_str = (d | date.to_text "%-m/%d/%Y")
       │                                 ─────┬─────
       │                                      ╰─────── format specifier `%-m` is not supported for BigQuery
    ───╯

otherwise it wouldn't be clear if the culprit was %-m, %d, or %Y.

I have only added this to the BigQuery error message to keep the changes in this PR to a minimum. More than happy to make this change for all dialects if you're ok with this.

let pad_char = |pad: &Pad| match pad {
Pad::None => "-",
Pad::Zero => "",
Pad::Space => "_",
};
let numeric_char = |num: &Numeric| -> String {
match num {
Numeric::Year => "Y",
Numeric::YearMod100 => "y",
Numeric::Month => "m",
Numeric::Day => "d",
Numeric::Hour => "H",
Numeric::Hour12 => "I",
Numeric::Minute => "M",
Numeric::Second => "S",
Numeric::Nanosecond => "f",
_ => return format!("{num:?}"),
}
.to_string()
};
match item {
Item::Numeric(num, pad) => format!("%{}{}", pad_char(pad), numeric_char(num)),
Item::Fixed(Fixed::ShortMonthName) => "%b".to_string(),
Item::Fixed(Fixed::LongMonthName) => "%B".to_string(),
Item::Fixed(Fixed::ShortWeekdayName) => "%a".to_string(),
Item::Fixed(Fixed::LongWeekdayName) => "%A".to_string(),
Item::Fixed(Fixed::UpperAmPm) => "%p".to_string(),
Item::Fixed(Fixed::LowerAmPm) => "%P".to_string(),
Item::Fixed(Fixed::RFC3339) => "%+".to_string(),
_ => format!("{item:?}"),
}
}

/// SQL dialect.
///
/// This only changes the output for a relatively small subset of features.
Expand Down Expand Up @@ -594,6 +629,34 @@ impl DialectHandler for BigQueryDialect {
fn prefers_subquery_parentheses_shorthand(&self) -> bool {
true
}

// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
fn translate_chrono_item<'a>(&self, item: Item) -> Result<String> {
Ok(match item {
Item::Numeric(Numeric::Year, Pad::Zero) => "%Y".to_string(),
Item::Numeric(Numeric::YearMod100, Pad::Zero) => "%y".to_string(),
Item::Numeric(Numeric::Month, Pad::Zero) => "%m".to_string(),
Item::Numeric(Numeric::Day, Pad::Zero) => "%d".to_string(),
Item::Numeric(Numeric::Hour, Pad::Zero) => "%H".to_string(),
Item::Numeric(Numeric::Hour12, Pad::Zero) => "%I".to_string(),
Item::Numeric(Numeric::Minute, Pad::Zero) => "%M".to_string(),
Item::Numeric(Numeric::Second, Pad::Zero) => "%S".to_string(),
Item::Fixed(Fixed::ShortMonthName) => "%b".to_string(),
Item::Fixed(Fixed::LongMonthName) => "%B".to_string(),
Item::Fixed(Fixed::ShortWeekdayName) => "%a".to_string(),
Item::Fixed(Fixed::LongWeekdayName) => "%A".to_string(),
Item::Fixed(Fixed::UpperAmPm) => "%p".to_string(),
Item::Fixed(Fixed::RFC3339) => "%Y-%m-%dT%H:%M:%S%Ez".to_string(),
Item::Literal(literal) => literal.replace('\'', "''").replace('%', "%%"),
Item::Space(spaces) => spaces.to_string(),
item => {
return Err(Error::new_simple(format!(
"format specifier `{}` is not supported for BigQuery",
chrono_item_to_strftime(&item),
)))
}
})
}
}

impl DialectHandler for SnowflakeDialect {
Expand Down Expand Up @@ -677,9 +740,10 @@ impl DialectHandler for DuckDbDialect {
mod tests {
use std::str::FromStr;

use insta::assert_debug_snapshot;
use chrono::format::{Fixed, Item, Numeric, Pad};
use insta::{assert_debug_snapshot, assert_snapshot};

use super::Dialect;
use super::{chrono_item_to_strftime, BigQueryDialect, Dialect, DialectHandler};

#[test]
fn test_dialect_from_str() {
Expand All @@ -695,6 +759,139 @@ mod tests {
)
");
}

// -- chrono_item_to_strftime tests --

#[test]
fn chrono_item_to_strftime_numerics_zero_pad() {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not convinced these tests add a lot of value and personally I would drop 90% of them and just leave a few in as spot checks.

However the coverage tool wants high coverage, so I've just gone ahead and covered every code path. Happy to drop this if that's ok.

assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Year, Pad::Zero)), @"%Y");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::YearMod100, Pad::Zero)), @"%y");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Month, Pad::Zero)), @"%m");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Day, Pad::Zero)), @"%d");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Hour, Pad::Zero)), @"%H");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Hour12, Pad::Zero)), @"%I");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Minute, Pad::Zero)), @"%M");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Second, Pad::Zero)), @"%S");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Nanosecond, Pad::Zero)), @"%f");
}

#[test]
fn chrono_item_to_strftime_numerics_no_pad() {
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Day, Pad::None)), @"%-d");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Month, Pad::None)), @"%-m");
}

#[test]
fn chrono_item_to_strftime_numerics_space_pad() {
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Day, Pad::Space)), @"%_d");
assert_snapshot!(chrono_item_to_strftime(&Item::Numeric(Numeric::Hour, Pad::Space)), @"%_H");
}

#[test]
fn chrono_item_to_strftime_numeric_unknown() {
// Numeric variants not in the explicit list fall through to Debug format
let result = chrono_item_to_strftime(&Item::Numeric(Numeric::Ordinal, Pad::Zero));
assert!(result.contains("Ordinal"), "got: {result}");
}

#[test]
fn chrono_item_to_strftime_fixed() {
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::ShortMonthName)), @"%b");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::LongMonthName)), @"%B");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::ShortWeekdayName)), @"%a");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::LongWeekdayName)), @"%A");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::UpperAmPm)), @"%p");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::LowerAmPm)), @"%P");
assert_snapshot!(chrono_item_to_strftime(&Item::Fixed(Fixed::RFC3339)), @"%+");
}

#[test]
fn chrono_item_to_strftime_fixed_unknown() {
// Fixed variants not in the explicit list fall through to Debug format
let result = chrono_item_to_strftime(&Item::Fixed(Fixed::TimezoneOffsetColon));
assert!(result.contains("TimezoneOffsetColon"), "got: {result}");
}

#[test]
fn chrono_item_to_strftime_non_numeric_non_fixed() {
// Literal and Space items fall through to Debug format
let result = chrono_item_to_strftime(&Item::Literal("-"));
assert!(result.contains("Literal"), "got: {result}");

let result = chrono_item_to_strftime(&Item::Space(" "));
assert!(result.contains("Space"), "got: {result}");
}

// -- BigQueryDialect::translate_chrono_item tests --

#[test]
fn bigquery_translate_numeric_specifiers() {
let bq = BigQueryDialect;
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Year, Pad::Zero)).unwrap(), @"%Y");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::YearMod100, Pad::Zero)).unwrap(), @"%y");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Month, Pad::Zero)).unwrap(), @"%m");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Day, Pad::Zero)).unwrap(), @"%d");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Hour, Pad::Zero)).unwrap(), @"%H");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Hour12, Pad::Zero)).unwrap(), @"%I");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Minute, Pad::Zero)).unwrap(), @"%M");
assert_snapshot!(bq.translate_chrono_item(Item::Numeric(Numeric::Second, Pad::Zero)).unwrap(), @"%S");
}

#[test]
fn bigquery_translate_fixed_specifiers() {
let bq = BigQueryDialect;
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::ShortMonthName)).unwrap(), @"%b");
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::LongMonthName)).unwrap(), @"%B");
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::ShortWeekdayName)).unwrap(), @"%a");
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::LongWeekdayName)).unwrap(), @"%A");
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::UpperAmPm)).unwrap(), @"%p");
assert_snapshot!(bq.translate_chrono_item(Item::Fixed(Fixed::RFC3339)).unwrap(), @"%Y-%m-%dT%H:%M:%S%Ez");
}

#[test]
fn bigquery_translate_literal() {
let bq = BigQueryDialect;
assert_snapshot!(bq.translate_chrono_item(Item::Literal("-")).unwrap(), @"-");
assert_snapshot!(bq.translate_chrono_item(Item::Literal("/")).unwrap(), @"/");
// Single quotes are escaped by doubling
assert_snapshot!(bq.translate_chrono_item(Item::Literal("'")).unwrap(), @"''");
// Percent signs are escaped by doubling
assert_snapshot!(bq.translate_chrono_item(Item::Literal("%")).unwrap(), @"%%");
}

#[test]
fn bigquery_translate_space() {
let bq = BigQueryDialect;
assert_snapshot!(bq.translate_chrono_item(Item::Space(" ")).unwrap(), @" ");
assert_snapshot!(bq.translate_chrono_item(Item::Space(" ")).unwrap(), @" ");
}

#[test]
fn bigquery_translate_unsupported_specifier() {
let bq = BigQueryDialect;

// Nanosecond (%f) is not supported by BigQuery
let err = bq
.translate_chrono_item(Item::Numeric(Numeric::Nanosecond, Pad::Zero))
.unwrap_err();
assert_snapshot!(err.to_string(), @r#"Error { kind: Error, span: None, reason: Simple("format specifier `%f` is not supported for BigQuery"), hints: [], code: None }"#);

// Non-zero padding is not supported by BigQuery
let err = bq
.translate_chrono_item(Item::Numeric(Numeric::Day, Pad::None))
.unwrap_err();
assert_snapshot!(err.to_string(), @r#"Error { kind: Error, span: None, reason: Simple("format specifier `%-d` is not supported for BigQuery"), hints: [], code: None }"#);
let err = bq
.translate_chrono_item(Item::Numeric(Numeric::Month, Pad::None))
.unwrap_err();
assert_snapshot!(err.to_string(), @r#"Error { kind: Error, span: None, reason: Simple("format specifier `%-m` is not supported for BigQuery"), hints: [], code: None }"#);

// LowerAmPm (%P) is not supported by BigQuery
let err = bq
.translate_chrono_item(Item::Fixed(Fixed::LowerAmPm))
.unwrap_err();
assert_snapshot!(err.to_string(), @r#"Error { kind: Error, span: None, reason: Simple("format specifier `%P` is not supported for BigQuery"), hints: [], code: None }"#);
}
}

/*
Expand Down
4 changes: 4 additions & 0 deletions prqlc/prqlc/src/sql/std.sql.prql
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ module bigquery {
let radians = column -> s"({column:0} * PI() / 180)"
}

module date {
let to_text = format column -> s"FORMAT_TIMESTAMP({format:0}, CAST({column:0} AS TIMESTAMP))"
}

let regex_search = text pattern -> s"REGEXP_CONTAINS({text:0}, {pattern:0})"
}

Expand Down
19 changes: 0 additions & 19 deletions prqlc/prqlc/tests/integration/error_messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,25 +262,6 @@ fn date_to_text_generic() {
"#);
}

#[test]
fn date_to_text_not_supported_dialect() {
assert_snapshot!(compile(r#"
prql target:sql.bigquery

from [{d = @2021-01-01}]
derive {
d_str = (d | date.to_text "%Y/%m/%d")
}"#).unwrap_err(), @r#"
Error:
╭─[ :6:31 ]
6 │ d_str = (d | date.to_text "%Y/%m/%d")
│ ─────┬────
│ ╰────── Date formatting is not yet supported for this dialect
───╯
"#);
}

#[test]
fn date_to_text_with_column_format() {
assert_snapshot!(compile(r#"
Expand Down
25 changes: 25 additions & 0 deletions prqlc/prqlc/tests/integration/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ fn test_quoting_style(#[case] dialect: sql::Dialect, #[case] expected_sql: &'sta
#[case::postgres(sql::Dialect::Postgres, "TO_CHAR(invoice_date, 'DD/MM/YYYY')")]
#[case::mssql(sql::Dialect::MsSql, "FORMAT(invoice_date, 'dd/MM/yyyy')")]
#[case::mysql(sql::Dialect::MySql, "DATE_FORMAT(invoice_date, '%d/%m/%Y')")]
#[case::bigquery(
sql::Dialect::BigQuery,
"FORMAT_TIMESTAMP('%d/%m/%Y', CAST(invoice_date AS TIMESTAMP))"
)]
fn date_to_text_operator(
#[case] dialect: sql::Dialect,
#[case] expected_date_to_text: &'static str,
Expand All @@ -284,6 +288,27 @@ FROM
)
}

#[test]
fn date_to_text_bigquery_rfc3339() {
assert_snapshot!(compile(r#"
prql target:sql.bigquery

from [{d = @2021-01-01}]
derive {
d_str = (d | date.to_text "%+")
}"#).unwrap(), @"
WITH table_0 AS (
SELECT
DATE '2021-01-01' AS d
)
SELECT
d,
FORMAT_TIMESTAMP('%Y-%m-%dT%H:%M:%S%Ez', CAST(d AS TIMESTAMP)) AS d_str
FROM
table_0
");
}

#[test]
fn json_of_test() {
let pl = prqlc::prql_to_pl("from employees | take 10").unwrap();
Expand Down
4 changes: 2 additions & 2 deletions web/book/src/reference/stdlib/date.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ how the date or timestamp should be structured.

<!-- prettier-ignore -->
> [!NOTE]
> For now the supported DBs are: Clickhouse, DuckDB, MySQL, MSSQL and
> Postgres.
> For now the supported DBs are: BigQuery, Clickhouse, DuckDB, MySQL, MSSQL
> and Postgres.

```prql
prql target:sql.duckdb
Expand Down
Loading