From 5027967e2bbbeb46d3629a9fd4d73d7bf7459ec4 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 30 Aug 2024 20:55:30 +0200 Subject: [PATCH 01/15] Update chsql_extension.cpp --- src/chsql_extension.cpp | 170 ++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/src/chsql_extension.cpp b/src/chsql_extension.cpp index baa8acc..f738691 100644 --- a/src/chsql_extension.cpp +++ b/src/chsql_extension.cpp @@ -29,102 +29,102 @@ namespace duckdb { static DefaultMacro chsql_macros[] = { // -- Type conversion macros - {DEFAULT_SCHEMA, "toString", {"x", nullptr}, R"(CAST(x AS VARCHAR))"}, - {DEFAULT_SCHEMA, "toInt8", {"x", nullptr}, R"(CAST(x AS INT8))"}, - {DEFAULT_SCHEMA, "toInt16", {"x", nullptr}, R"(CAST(x AS INT16))"}, - {DEFAULT_SCHEMA, "toInt32", {"x", nullptr}, R"(CAST(x AS INT32))"}, - {DEFAULT_SCHEMA, "toInt64", {"x", nullptr}, R"(CAST(x AS INT64))"}, - {DEFAULT_SCHEMA, "toInt128", {"x", nullptr}, R"(CAST(x AS INT128))"}, - {DEFAULT_SCHEMA, "toInt256", {"x", nullptr}, R"(CAST(x AS HUGEINT))"}, - {DEFAULT_SCHEMA, "toInt8OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS INT8) IS NOT NULL THEN CAST(x AS INT8) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt16OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS INT16) IS NOT NULL THEN CAST(x AS INT16) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt32OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS INT32) IS NOT NULL THEN CAST(x AS INT32) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt64OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS INT64) IS NOT NULL THEN CAST(x AS INT64) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt128OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS INT128) IS NOT NULL THEN CAST(x AS INT128) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt256OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS HUGEINT) IS NOT NULL THEN CAST(x AS HUGEINT) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toInt8OrNull", {"x", nullptr}, R"(TRY_CAST(x AS INT8))"}, - {DEFAULT_SCHEMA, "toInt16OrNull", {"x", nullptr}, R"(TRY_CAST(x AS INT16))"}, - {DEFAULT_SCHEMA, "toInt32OrNull", {"x", nullptr}, R"(TRY_CAST(x AS INT32))"}, - {DEFAULT_SCHEMA, "toInt64OrNull", {"x", nullptr}, R"(TRY_CAST(x AS INT64))"}, - {DEFAULT_SCHEMA, "toInt128OrNull", {"x", nullptr}, R"(TRY_CAST(x AS INT128))"}, - {DEFAULT_SCHEMA, "toInt256OrNull", {"x", nullptr}, R"(TRY_CAST(x AS HUGEINT))"}, + {DEFAULT_SCHEMA, "toString", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS VARCHAR))"}, + {DEFAULT_SCHEMA, "toInt8", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS INT8))"}, + {DEFAULT_SCHEMA, "toInt16", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS INT16))"}, + {DEFAULT_SCHEMA, "toInt32", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS INT32))"}, + {DEFAULT_SCHEMA, "toInt64", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS INT64))"}, + {DEFAULT_SCHEMA, "toInt128", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS INT128))"}, + {DEFAULT_SCHEMA, "toInt256", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS HUGEINT))"}, + {DEFAULT_SCHEMA, "toInt8OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS INT8) IS NOT NULL THEN CAST(x AS INT8) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt16OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS INT16) IS NOT NULL THEN CAST(x AS INT16) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt32OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS INT32) IS NOT NULL THEN CAST(x AS INT32) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt64OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS INT64) IS NOT NULL THEN CAST(x AS INT64) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt128OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS INT128) IS NOT NULL THEN CAST(x AS INT128) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt256OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS HUGEINT) IS NOT NULL THEN CAST(x AS HUGEINT) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toInt8OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS INT8))"}, + {DEFAULT_SCHEMA, "toInt16OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS INT16))"}, + {DEFAULT_SCHEMA, "toInt32OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS INT32))"}, + {DEFAULT_SCHEMA, "toInt64OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS INT64))"}, + {DEFAULT_SCHEMA, "toInt128OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS INT128))"}, + {DEFAULT_SCHEMA, "toInt256OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS HUGEINT))"}, // -- Unsigned integer conversion macros - {DEFAULT_SCHEMA, "toUInt8", {"x", nullptr}, R"(CAST(x AS UTINYINT))"}, - {DEFAULT_SCHEMA, "toUInt16", {"x", nullptr}, R"(CAST(x AS USMALLINT))"}, - {DEFAULT_SCHEMA, "toUInt32", {"x", nullptr}, R"(CAST(x AS UINTEGER))"}, - {DEFAULT_SCHEMA, "toUInt64", {"x", nullptr}, R"(CAST(x AS UBIGINT))"}, - {DEFAULT_SCHEMA, "toUInt8OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS UTINYINT) IS NOT NULL THEN CAST(x AS UTINYINT) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toUInt16OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS USMALLINT) IS NOT NULL THEN CAST(x AS USMALLINT) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toUInt32OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS UINTEGER) IS NOT NULL THEN CAST(x AS UINTEGER) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toUInt64OrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS UBIGINT) IS NOT NULL THEN CAST(x AS UBIGINT) ELSE 0 END)"}, - {DEFAULT_SCHEMA, "toUInt8OrNull", {"x", nullptr}, R"(TRY_CAST(x AS UTINYINT))"}, // Fixed comma here - {DEFAULT_SCHEMA, "toUInt16OrNull", {"x", nullptr}, R"(TRY_CAST(x AS USMALLINT))"}, // And here - {DEFAULT_SCHEMA, "toUInt32OrNull", {"x", nullptr}, R"(TRY_CAST(x AS UINTEGER))"}, // Also here - {DEFAULT_SCHEMA, "toUInt64OrNull", {"x", nullptr}, R"(TRY_CAST(x AS UBIGINT))"}, // And here + {DEFAULT_SCHEMA, "toUInt8", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS UTINYINT))"}, + {DEFAULT_SCHEMA, "toUInt16", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS USMALLINT))"}, + {DEFAULT_SCHEMA, "toUInt32", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS UINTEGER))"}, + {DEFAULT_SCHEMA, "toUInt64", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS UBIGINT))"}, + {DEFAULT_SCHEMA, "toUInt8OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS UTINYINT) IS NOT NULL THEN CAST(x AS UTINYINT) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toUInt16OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS USMALLINT) IS NOT NULL THEN CAST(x AS USMALLINT) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toUInt32OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS UINTEGER) IS NOT NULL THEN CAST(x AS UINTEGER) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toUInt64OrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS UBIGINT) IS NOT NULL THEN CAST(x AS UBIGINT) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toUInt8OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS UTINYINT))"}, // Fixed comma here + {DEFAULT_SCHEMA, "toUInt16OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS USMALLINT))"}, // And here + {DEFAULT_SCHEMA, "toUInt32OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS UINTEGER))"}, // Also here + {DEFAULT_SCHEMA, "toUInt64OrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS UBIGINT))"}, // And here // -- Floating-point conversion macros - {DEFAULT_SCHEMA, "toFloat", {"x", nullptr}, R"(CAST(x AS DOUBLE))"}, - {DEFAULT_SCHEMA, "toFloatOrNull", {"x", nullptr}, R"(TRY_CAST(x AS DOUBLE))"}, - {DEFAULT_SCHEMA, "toFloatOrZero", {"x", nullptr}, R"(CASE WHEN TRY_CAST(x AS DOUBLE) IS NOT NULL THEN CAST(x AS DOUBLE) ELSE 0 END)"}, + {DEFAULT_SCHEMA, "toFloat", {"x", nullptr}, {{nullptr, nullptr}}, R"(CAST(x AS DOUBLE))"}, + {DEFAULT_SCHEMA, "toFloatOrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS DOUBLE))"}, + {DEFAULT_SCHEMA, "toFloatOrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS DOUBLE) IS NOT NULL THEN CAST(x AS DOUBLE) ELSE 0 END)"}, // -- Arithmetic macros - {DEFAULT_SCHEMA, "intDiv", {"a", "b"}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"}, - {DEFAULT_SCHEMA, "intDivOrNull", {"a", "b"}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"}, - {DEFAULT_SCHEMA, "intDivOZero", {"x", nullptr}, R"(COALESCE((TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT)),0))"}, - {DEFAULT_SCHEMA, "plus", {"a", "b"}, R"(add(a, b))"}, - {DEFAULT_SCHEMA, "minus", {"a", "b"}, R"(subtract(a, b))"}, - {DEFAULT_SCHEMA, "modulo", {"a", "b"}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"}, - {DEFAULT_SCHEMA, "moduloOrZero", {"a", "b"}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"}, + {DEFAULT_SCHEMA, "intDiv", {"a", "b"}, {{nullptr, nullptr}}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"}, + {DEFAULT_SCHEMA, "intDivOrNull", {"a", "b"}, {{nullptr, nullptr}}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"}, + {DEFAULT_SCHEMA, "intDivOZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(COALESCE((TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT)),0))"}, + {DEFAULT_SCHEMA, "plus", {"a", "b"}, {{nullptr, nullptr}}, R"(add(a, b))"}, + {DEFAULT_SCHEMA, "minus", {"a", "b"}, {{nullptr, nullptr}}, R"(subtract(a, b))"}, + {DEFAULT_SCHEMA, "modulo", {"a", "b"}, {{nullptr, nullptr}}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"}, + {DEFAULT_SCHEMA, "moduloOrZero", {"a", "b"}, {{nullptr, nullptr}}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"}, // -- Tuple macros - {DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b"}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b"}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleDivide", {"a", "b"}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleMultiply", {"a", "b"}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleMinus", {"a", "b"}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tuplePlus", {"a", "b"}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b"}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b"}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleModulo", {"a", "b"}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b"}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"}, - {DEFAULT_SCHEMA, "tupleConcat", {"a", "b"}, R"(list_concat(a, b))"}, + {DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleDivide", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleMultiply", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleMinus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tuplePlus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleModulo", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"}, + {DEFAULT_SCHEMA, "tupleConcat", {"a", "b"}, {{nullptr, nullptr}}, R"(list_concat(a, b))"}, // -- String matching macros - {DEFAULT_SCHEMA, "match", {"string", "token"}, R"(string LIKE token)"}, + {DEFAULT_SCHEMA, "match", {"string", "token"}, {{nullptr, nullptr}}, R"(string LIKE token)"}, // -- Array macros - {DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack"}, R"(haystack @> ARRAY[needle])"}, - {DEFAULT_SCHEMA, "arrayMap", {"e", "arr"}, R"(array_transform(arr, e -> (e * e)))"}, + {DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack"}, {{nullptr, nullptr}}, R"(haystack @> ARRAY[needle])"}, + {DEFAULT_SCHEMA, "arrayMap", {"e", "arr"}, {{nullptr, nullptr}}, R"(array_transform(arr, e -> (e * e)))"}, // Date and Time Functions - {DEFAULT_SCHEMA, "toYear", {"date_expression", nullptr}, R"(EXTRACT(YEAR FROM date_expression))"}, - {DEFAULT_SCHEMA, "toMonth", {"date_expression", nullptr}, R"(EXTRACT(MONTH FROM date_expression))"}, - {DEFAULT_SCHEMA, "toDayOfMonth", {"date_expression", nullptr}, R"(EXTRACT(DAY FROM date_expression))"}, - {DEFAULT_SCHEMA, "toHour", {"date_expression", nullptr}, R"(EXTRACT(HOUR FROM date_expression))"}, - {DEFAULT_SCHEMA, "toMinute", {"date_expression", nullptr}, R"(EXTRACT(MINUTE FROM date_expression))"}, - {DEFAULT_SCHEMA, "toSecond", {"date_expression", nullptr}, R"(EXTRACT(SECOND FROM date_expression))"}, - {DEFAULT_SCHEMA, "toYYYYMM", {"date_expression", nullptr}, R"(DATE_FORMAT(date_expression, '%Y%m'))"}, - {DEFAULT_SCHEMA, "toYYYYMMDD", {"date_expression", nullptr}, R"(DATE_FORMAT(date_expression, '%Y%m%d'))"}, - {DEFAULT_SCHEMA, "toYYYYMMDDhhmmss", {"date_expression", nullptr}, R"(DATE_FORMAT(date_expression, '%Y%m%d%H%M%S'))"}, - {DEFAULT_SCHEMA, "formatDateTime", {"time", "format", "timezone", nullptr}, R"(CASE WHEN timezone IS NULL THEN strftime(time, format) ELSE strftime(time AT TIME ZONE timezone, format) END)"}, + {DEFAULT_SCHEMA, "toYear", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(YEAR FROM date_expression))"}, + {DEFAULT_SCHEMA, "toMonth", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(MONTH FROM date_expression))"}, + {DEFAULT_SCHEMA, "toDayOfMonth", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(DAY FROM date_expression))"}, + {DEFAULT_SCHEMA, "toHour", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(HOUR FROM date_expression))"}, + {DEFAULT_SCHEMA, "toMinute", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(MINUTE FROM date_expression))"}, + {DEFAULT_SCHEMA, "toSecond", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(SECOND FROM date_expression))"}, + {DEFAULT_SCHEMA, "toYYYYMM", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(DATE_FORMAT(date_expression, '%Y%m'))"}, + {DEFAULT_SCHEMA, "toYYYYMMDD", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(DATE_FORMAT(date_expression, '%Y%m%d'))"}, + {DEFAULT_SCHEMA, "toYYYYMMDDhhmmss", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(DATE_FORMAT(date_expression, '%Y%m%d%H%M%S'))"}, + {DEFAULT_SCHEMA, "formatDateTime", {"time", "format", "timezone", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN timezone IS NULL THEN strftime(time, format) ELSE strftime(time AT TIME ZONE timezone, format) END)"}, // String Functions - {DEFAULT_SCHEMA, "empty", {"str", nullptr}, R"(LENGTH(str) = 0)"}, - {DEFAULT_SCHEMA, "notEmpty", {"str", nullptr}, R"(LENGTH(str) > 0)"}, - {DEFAULT_SCHEMA, "lengthUTF8", {"str", nullptr}, R"(LENGTH(str))"}, - {DEFAULT_SCHEMA, "leftPad", {"str", "length", "pad_str", nullptr}, R"(LPAD(str, length, pad_str))"}, - {DEFAULT_SCHEMA, "rightPad", {"str", "length", "pad_str", nullptr}, R"(RPAD(str, length, pad_str))"}, - {DEFAULT_SCHEMA, "extractAllGroups", {"text", "pattern", nullptr}, R"(regexp_extract_all(text, pattern))"}, - {DEFAULT_SCHEMA, "toFixedString", {"str", "length", nullptr}, R"(RPAD(LEFT(str, length), length, '\0'))"}, - {DEFAULT_SCHEMA, "ifNull", {"x", "y", nullptr}, R"(COALESCE(x, y))"}, - {DEFAULT_SCHEMA, "arrayJoin", {"arr", nullptr}, R"(UNNEST(arr))"}, - {DEFAULT_SCHEMA, "splitByChar", {"separator", "str", nullptr}, R"(string_split(str, separator))"}, + {DEFAULT_SCHEMA, "empty", {"str", nullptr}, {{nullptr, nullptr}}, R"(LENGTH(str) = 0)"}, + {DEFAULT_SCHEMA, "notEmpty", {"str", nullptr}, {{nullptr, nullptr}}, R"(LENGTH(str) > 0)"}, + {DEFAULT_SCHEMA, "lengthUTF8", {"str", nullptr}, {{nullptr, nullptr}}, R"(LENGTH(str))"}, + {DEFAULT_SCHEMA, "leftPad", {"str", "length", "pad_str", nullptr}, {{nullptr, nullptr}}, R"(LPAD(str, length, pad_str))"}, + {DEFAULT_SCHEMA, "rightPad", {"str", "length", "pad_str", nullptr}, {{nullptr, nullptr}}, R"(RPAD(str, length, pad_str))"}, + {DEFAULT_SCHEMA, "extractAllGroups", {"text", "pattern", nullptr}, {{nullptr, nullptr}}, R"(regexp_extract_all(text, pattern))"}, + {DEFAULT_SCHEMA, "toFixedString", {"str", "length", nullptr}, {{nullptr, nullptr}}, R"(RPAD(LEFT(str, length), length, '\0'))"}, + {DEFAULT_SCHEMA, "ifNull", {"x", "y", nullptr}, {{nullptr, nullptr}}, R"(COALESCE(x, y))"}, + {DEFAULT_SCHEMA, "arrayJoin", {"arr", nullptr}, {{nullptr, nullptr}}, R"(UNNEST(arr))"}, + {DEFAULT_SCHEMA, "splitByChar", {"separator", "str", nullptr}, {{nullptr, nullptr}}, R"(string_split(str, separator))"}, // URL Functions - {DEFAULT_SCHEMA, "protocol", {"url", nullptr}, R"(REGEXP_EXTRACT(url, '^(\w+)://', 1))"}, - {DEFAULT_SCHEMA, "domain", {"url", nullptr}, R"(REGEXP_EXTRACT(url, '://([^/]+)', 1))"}, - {DEFAULT_SCHEMA, "topLevelDomain", {"url", nullptr}, R"(REGEXP_EXTRACT(url, '\.([^./:]+)([:/]|$)', 1))"}, - {DEFAULT_SCHEMA, "path", {"url", nullptr}, R"(REGEXP_EXTRACT(url, '://[^/]+(/.*)', 1))"}, + {DEFAULT_SCHEMA, "protocol", {"url", nullptr}, {{nullptr, nullptr}}, R"(REGEXP_EXTRACT(url, '^(\w+)://', 1))"}, + {DEFAULT_SCHEMA, "domain", {"url", nullptr}, {{nullptr, nullptr}}, R"(REGEXP_EXTRACT(url, '://([^/]+)', 1))"}, + {DEFAULT_SCHEMA, "topLevelDomain", {"url", nullptr}, {{nullptr, nullptr}}, R"(REGEXP_EXTRACT(url, '\.([^./:]+)([:/]|$)', 1))"}, + {DEFAULT_SCHEMA, "path", {"url", nullptr}, {{nullptr, nullptr}}, R"(REGEXP_EXTRACT(url, '://[^/]+(/.*)', 1))"}, // IP Address Functions - {DEFAULT_SCHEMA, "IPv4NumToString", {"num", nullptr}, R"(CONCAT(CAST((num >> 24) & 255 AS VARCHAR), '.', CAST((num >> 16) & 255 AS VARCHAR), '.', CAST((num >> 8) & 255 AS VARCHAR), '.', CAST(num & 255 AS VARCHAR)))"}, - {DEFAULT_SCHEMA, "IPv4StringToNum", {"ip", nullptr}, R"(CAST(SPLIT_PART(ip, '.', 1) AS INTEGER) * 256 * 256 * 256 + CAST(SPLIT_PART(ip, '.', 2) AS INTEGER) * 256 * 256 + CAST(SPLIT_PART(ip, '.', 3) AS INTEGER) * 256 + CAST(SPLIT_PART(ip, '.', 4) AS INTEGER))"}, + {DEFAULT_SCHEMA, "IPv4NumToString", {"num", nullptr}, {{nullptr, nullptr}}, R"(CONCAT(CAST((num >> 24) & 255 AS VARCHAR), '.', CAST((num >> 16) & 255 AS VARCHAR), '.', CAST((num >> 8) & 255 AS VARCHAR), '.', CAST(num & 255 AS VARCHAR)))"}, + {DEFAULT_SCHEMA, "IPv4StringToNum", {"ip", nullptr}, {{nullptr, nullptr}}, R"(CAST(SPLIT_PART(ip, '.', 1) AS INTEGER) * 256 * 256 * 256 + CAST(SPLIT_PART(ip, '.', 2) AS INTEGER) * 256 * 256 + CAST(SPLIT_PART(ip, '.', 3) AS INTEGER) * 256 + CAST(SPLIT_PART(ip, '.', 4) AS INTEGER))"}, // -- Misc macros - {DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, R"(toString(uuid()))"}, - {DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"}, - {DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, R"(BIT_COUNT(num))"}, - {nullptr, nullptr, {nullptr}, nullptr}}; + {DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, {{nullptr, nullptr}}, R"(toString(uuid()))"}, + {DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, {{nullptr, nullptr}}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"}, + {DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, {{nullptr, nullptr}}, R"(BIT_COUNT(num))"}, + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}}; // To add a new table SQL macro, add a new macro to this array! // Copy and paste the top item in the array into the From 5cc9511909a5804c95e27b943c5b4044f05191bf Mon Sep 17 00:00:00 2001 From: lmangani Date: Fri, 30 Aug 2024 19:06:40 +0000 Subject: [PATCH 02/15] update submodules --- duckdb | 2 +- extension-ci-tools | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/duckdb b/duckdb index 1f98600..2be970d 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc +Subproject commit 2be970dda0e5047b1075f938691455d63ba63a67 diff --git a/extension-ci-tools b/extension-ci-tools index c60db58..638a972 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit c60db58eabacf6746fe1972d6874ae44d4d17e9e +Subproject commit 638a97210d162f6133fea31c6b524c516d10e515 From ffcf27edbe39c250428744903263f2dba94643a9 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 30 Aug 2024 22:02:09 +0200 Subject: [PATCH 03/15] Update default_table_functions.cpp --- src/default_table_functions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp index dd0e60c..b0755c8 100644 --- a/src/default_table_functions.cpp +++ b/src/default_table_functions.cpp @@ -1,4 +1,4 @@ -#include "default_table_functions.hpp" +#include "duckdb/catalog/default/default_table_functions.hpp" #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/parser/parsed_data/create_macro_info.hpp" @@ -95,7 +95,7 @@ DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMa bind_info->name = default_macro.name; bind_info->temporary = true; bind_info->internal = true; - bind_info->function = std::move(function); + bind_info->macros.push_back(std::move(function)); return bind_info; } From f8882addb9db670990ca180e82e8d5a070cf7ede Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 30 Aug 2024 22:07:09 +0200 Subject: [PATCH 04/15] Update default_table_functions.cpp --- src/default_table_functions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp index b0755c8..0cd271b 100644 --- a/src/default_table_functions.cpp +++ b/src/default_table_functions.cpp @@ -1,4 +1,4 @@ -#include "duckdb/catalog/default/default_table_functions.hpp" +#include "default_table_functions.hpp" #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/parser/parsed_data/create_macro_info.hpp" From af1a037961cc9945c282e02b99e4568d24e68bec Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 2 Sep 2024 18:04:01 +0200 Subject: [PATCH 05/15] Fix indentation --- src/chsql_extension.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/chsql_extension.cpp b/src/chsql_extension.cpp index f738691..5f71cf3 100644 --- a/src/chsql_extension.cpp +++ b/src/chsql_extension.cpp @@ -124,7 +124,8 @@ static DefaultMacro chsql_macros[] = { {DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, {{nullptr, nullptr}}, R"(toString(uuid()))"}, {DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, {{nullptr, nullptr}}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"}, {DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, {{nullptr, nullptr}}, R"(BIT_COUNT(num))"}, - {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}}; + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} + }; // To add a new table SQL macro, add a new macro to this array! // Copy and paste the top item in the array into the From 4a8c69f2f7df50d418d9376dddd1f5db1f823096 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Wed, 4 Sep 2024 14:08:38 +0200 Subject: [PATCH 06/15] Update default_functions.hpp --- src/include/default_functions.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/default_functions.hpp b/src/include/default_functions.hpp index 3466585..79c125c 100644 --- a/src/include/default_functions.hpp +++ b/src/include/default_functions.hpp @@ -10,6 +10,8 @@ #include "duckdb/catalog/default/default_generator.hpp" #include "duckdb/parser/parsed_data/create_macro_info.hpp" +#include "duckdb/common/array_ptr.hpp" +#include "duckdb/catalog/default/default_table_functions.hpp" namespace duckdb { class SchemaCatalogEntry; @@ -18,6 +20,7 @@ struct DefaultMacro { const char *schema; const char *name; const char *parameters[8]; + DefaultNamedParameter named_parameters[8]; const char *macro; }; @@ -28,14 +31,11 @@ class DefaultFunctionGenerator : public DefaultGenerator { SchemaCatalogEntry &schema; DUCKDB_API static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro); + DUCKDB_API static unique_ptr CreateInternalMacroInfo(array_ptr macro); public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; - -private: - static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro, - unique_ptr function); }; } // namespace duckdb From f867a49fac2721cb58b42211a8aa74eeb8c4b090 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Wed, 4 Sep 2024 14:09:14 +0200 Subject: [PATCH 07/15] Update default_table_functions.hpp From b61cebcd1cfb3fc4dca30b33149cfc8f4b633e4a Mon Sep 17 00:00:00 2001 From: lmangani Date: Thu, 5 Sep 2024 10:52:03 +0000 Subject: [PATCH 08/15] revert changes --- src/chsql_extension.cpp | 3 +-- src/include/default_functions.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/chsql_extension.cpp b/src/chsql_extension.cpp index 5f71cf3..f738691 100644 --- a/src/chsql_extension.cpp +++ b/src/chsql_extension.cpp @@ -124,8 +124,7 @@ static DefaultMacro chsql_macros[] = { {DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, {{nullptr, nullptr}}, R"(toString(uuid()))"}, {DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, {{nullptr, nullptr}}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"}, {DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, {{nullptr, nullptr}}, R"(BIT_COUNT(num))"}, - {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} - }; + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}}; // To add a new table SQL macro, add a new macro to this array! // Copy and paste the top item in the array into the diff --git a/src/include/default_functions.hpp b/src/include/default_functions.hpp index 79c125c..3466585 100644 --- a/src/include/default_functions.hpp +++ b/src/include/default_functions.hpp @@ -10,8 +10,6 @@ #include "duckdb/catalog/default/default_generator.hpp" #include "duckdb/parser/parsed_data/create_macro_info.hpp" -#include "duckdb/common/array_ptr.hpp" -#include "duckdb/catalog/default/default_table_functions.hpp" namespace duckdb { class SchemaCatalogEntry; @@ -20,7 +18,6 @@ struct DefaultMacro { const char *schema; const char *name; const char *parameters[8]; - DefaultNamedParameter named_parameters[8]; const char *macro; }; @@ -31,11 +28,14 @@ class DefaultFunctionGenerator : public DefaultGenerator { SchemaCatalogEntry &schema; DUCKDB_API static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro); - DUCKDB_API static unique_ptr CreateInternalMacroInfo(array_ptr macro); public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; + +private: + static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro, + unique_ptr function); }; } // namespace duckdb From 3f41860835d758413eac488b437df599fbc0ef80 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Thu, 5 Sep 2024 12:53:26 +0200 Subject: [PATCH 09/15] Update description.yml --- description.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/description.yml b/description.yml index 5cb4a51..8cb664e 100644 --- a/description.yml +++ b/description.yml @@ -11,6 +11,7 @@ extension: repo: github: lmangani/duckdb-extension-clickhouse-sql ref: 3a81f48b9ea4262eaaa5c40076ad4e6202065472 + ref_next: b61cebcd1cfb3fc4dca30b33149cfc8f4b633e4a docs: hello_world: | From 5c11e953fcf65524198f6ab39e4550d11966c2ab Mon Sep 17 00:00:00 2001 From: lmangani Date: Mon, 9 Sep 2024 16:38:10 +0000 Subject: [PATCH 10/15] duckdb 1.1.0 --- duckdb | 2 +- extension-ci-tools | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/duckdb b/duckdb index 2be970d..fa5c2fe 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 2be970dda0e5047b1075f938691455d63ba63a67 +Subproject commit fa5c2fe15f3da5f32397b009196c0895fce60820 diff --git a/extension-ci-tools b/extension-ci-tools index 638a972..2f99e2c 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 638a97210d162f6133fea31c6b524c516d10e515 +Subproject commit 2f99e2c15aa5120b6dae8ffe5e4e29fd54dd9eb8 From fa6b3302470bd046b23622c658103f345c3bc372 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 18:42:48 +0200 Subject: [PATCH 11/15] Switch to 1.1.0 headers --- src/chsql_extension.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/chsql_extension.cpp b/src/chsql_extension.cpp index f738691..9a7e868 100644 --- a/src/chsql_extension.cpp +++ b/src/chsql_extension.cpp @@ -7,13 +7,12 @@ #include "duckdb/function/scalar_function.hpp" #include "duckdb/main/extension_util.hpp" #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/catalog/default/default_functions.hpp" +#include "duckdb/catalog/default/default_table_functions.hpp" // OpenSSL linked through vcpkg #include -#include "default_functions.hpp" -#include "default_table_functions.hpp" - namespace duckdb { // To add a new scalar SQL macro, add a new macro to this array! From c7afdc4798f7045ace6ae1c15af528ac1638e68d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 18:54:14 +0200 Subject: [PATCH 12/15] Switch action to main --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 2a95f31..683f245 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -17,7 +17,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.0.0 + duckdb_version: main extension_name: chsql duckdb-stable-deploy: @@ -26,7 +26,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: v1.0.0 + duckdb_version: main extension_name: chsql deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From 5880237fa93ff3853d902469aca37c843a914f2d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 19:01:57 +0200 Subject: [PATCH 13/15] Delete src/default_table_functions.cpp --- src/default_table_functions.cpp | 148 -------------------------------- 1 file changed, 148 deletions(-) delete mode 100644 src/default_table_functions.cpp diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp deleted file mode 100644 index 0cd271b..0000000 --- a/src/default_table_functions.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "default_table_functions.hpp" -#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" -#include "duckdb/parser/parser.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" -#include "duckdb/parser/statement/select_statement.hpp" -#include "duckdb/function/table_macro_function.hpp" - -namespace duckdb { - -// clang-format off -static const DefaultTableMacro internal_table_macros[] = { - {DEFAULT_SCHEMA, "histogram_values", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( -WITH bins AS ( - SELECT - CASE - WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR - can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR - can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN - approx_top_k(col_name, bin_count) - WHEN technique='equi-height' - THEN - quantile(col_name, [x / bin_count::DOUBLE for x in generate_series(1, bin_count)]) - WHEN technique='equi-width' - THEN - equi_width_bins(MIN(col_name), MAX(col_name), bin_count, false) - WHEN technique='equi-width-nice' OR technique='auto' - THEN - equi_width_bins(MIN(col_name), MAX(col_name), bin_count, true) - ELSE - error(concat('Unrecognized technique ', technique)) - END AS bins - FROM query_table(source::VARCHAR) - ) -SELECT UNNEST(map_keys(histogram)) AS bin, UNNEST(map_values(histogram)) AS count -FROM ( - SELECT CASE - WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR - can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR - can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN - histogram_exact(col_name, bins) - ELSE - histogram(col_name, bins) - END AS histogram - FROM query_table(source::VARCHAR), bins -); -)"}, - {DEFAULT_SCHEMA, "histogram", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( -SELECT - CASE - WHEN is_histogram_other_bin(bin) - THEN '(other values)' - WHEN (NOT (can_cast_implicitly(bin, NULL::BIGINT) OR - can_cast_implicitly(bin, NULL::DOUBLE) OR - can_cast_implicitly(bin, NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN bin::VARCHAR - WHEN row_number() over () = 1 - THEN concat('x <= ', bin::VARCHAR) - ELSE concat(lag(bin::VARCHAR) over (), ' < x <= ', bin::VARCHAR) - END AS bin, - count, - bar(count, 0, max(count) over ()) AS bar -FROM histogram_values(source, col_name, bin_count := bin_count, technique := technique); -)"}, - {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} - }; -// clang-format on - -DefaultTableFunctionGenerator::DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema) - : DefaultGenerator(catalog), schema(schema) { -} - -unique_ptr -DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, - unique_ptr function) { - for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) { - function->parameters.push_back(make_uniq(default_macro.parameters[param_idx])); - } - for (idx_t named_idx = 0; default_macro.named_parameters[named_idx].name != nullptr; named_idx++) { - auto expr_list = Parser::ParseExpressionList(default_macro.named_parameters[named_idx].default_value); - if (expr_list.size() != 1) { - throw InternalException("Expected a single expression"); - } - function->default_parameters.insert( - make_pair(default_macro.named_parameters[named_idx].name, std::move(expr_list[0]))); - } - - auto type = CatalogType::TABLE_MACRO_ENTRY; - auto bind_info = make_uniq(type); - bind_info->schema = default_macro.schema; - bind_info->name = default_macro.name; - bind_info->temporary = true; - bind_info->internal = true; - bind_info->macros.push_back(std::move(function)); - return bind_info; -} - -unique_ptr -DefaultTableFunctionGenerator::CreateTableMacroInfo(const DefaultTableMacro &default_macro) { - Parser parser; - parser.ParseQuery(default_macro.macro); - if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) { - throw InternalException("Expected a single select statement in CreateTableMacroInfo internal"); - } - auto node = std::move(parser.statements[0]->Cast().node); - - auto result = make_uniq(std::move(node)); - return CreateInternalTableMacroInfo(default_macro, std::move(result)); -} - -static unique_ptr GetDefaultTableFunction(const string &input_schema, const string &input_name) { - auto schema = StringUtil::Lower(input_schema); - auto name = StringUtil::Lower(input_name); - for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { - if (internal_table_macros[index].schema == schema && internal_table_macros[index].name == name) { - return DefaultTableFunctionGenerator::CreateTableMacroInfo(internal_table_macros[index]); - } - } - return nullptr; -} - -unique_ptr DefaultTableFunctionGenerator::CreateDefaultEntry(ClientContext &context, - const string &entry_name) { - auto info = GetDefaultTableFunction(schema.name, entry_name); - if (info) { - return make_uniq_base(catalog, schema, info->Cast()); - } - return nullptr; -} - -vector DefaultTableFunctionGenerator::GetDefaultEntries() { - vector result; - for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { - if (StringUtil::Lower(internal_table_macros[index].name) != internal_table_macros[index].name) { - throw InternalException("Default macro name %s should be lowercase", internal_table_macros[index].name); - } - if (internal_table_macros[index].schema == schema.name) { - result.emplace_back(internal_table_macros[index].name); - } - } - return result; -} - -} // namespace duckdb From 03ddd68d9754a986224578a35f881d43476a6aac Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 19:08:32 +0200 Subject: [PATCH 14/15] 1.0.3 --- description.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/description.yml b/description.yml index 8cb664e..7c87176 100644 --- a/description.yml +++ b/description.yml @@ -1,7 +1,7 @@ extension: name: chsql description: Clickhouse SQL Macros for DuckDB - version: 1.0.2 + version: 1.0.3 language: C++ build: cmake license: MIT @@ -10,11 +10,11 @@ extension: repo: github: lmangani/duckdb-extension-clickhouse-sql - ref: 3a81f48b9ea4262eaaa5c40076ad4e6202065472 - ref_next: b61cebcd1cfb3fc4dca30b33149cfc8f4b633e4a + ref: main docs: hello_world: | SELECT toString('world') as hello, toInt8OrZero('world') as zero; extended_description: | - This extension provides a growing number of Clickhouse SQL Macros for DuckDB. + This extension provides a growing number of ClickHouse SQL Macros for DuckDB. + For a list of supported functions, please refer to [latest release notes](https://github.com/lmangani/duckdb-extension-clickhouse-sql/releases). From 410939b1190dbca17c8718f8f54e410707c7657b Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 19:14:27 +0200 Subject: [PATCH 15/15] Create default_table_functions.cpp --- src/default_table_functions.cpp | 148 ++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 src/default_table_functions.cpp diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp new file mode 100644 index 0000000..b0755c8 --- /dev/null +++ b/src/default_table_functions.cpp @@ -0,0 +1,148 @@ +#include "duckdb/catalog/default/default_table_functions.hpp" +#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" +#include "duckdb/parser/parser.hpp" +#include "duckdb/parser/parsed_data/create_macro_info.hpp" +#include "duckdb/parser/statement/select_statement.hpp" +#include "duckdb/function/table_macro_function.hpp" + +namespace duckdb { + +// clang-format off +static const DefaultTableMacro internal_table_macros[] = { + {DEFAULT_SCHEMA, "histogram_values", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( +WITH bins AS ( + SELECT + CASE + WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR + can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR + can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN + approx_top_k(col_name, bin_count) + WHEN technique='equi-height' + THEN + quantile(col_name, [x / bin_count::DOUBLE for x in generate_series(1, bin_count)]) + WHEN technique='equi-width' + THEN + equi_width_bins(MIN(col_name), MAX(col_name), bin_count, false) + WHEN technique='equi-width-nice' OR technique='auto' + THEN + equi_width_bins(MIN(col_name), MAX(col_name), bin_count, true) + ELSE + error(concat('Unrecognized technique ', technique)) + END AS bins + FROM query_table(source::VARCHAR) + ) +SELECT UNNEST(map_keys(histogram)) AS bin, UNNEST(map_values(histogram)) AS count +FROM ( + SELECT CASE + WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR + can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR + can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN + histogram_exact(col_name, bins) + ELSE + histogram(col_name, bins) + END AS histogram + FROM query_table(source::VARCHAR), bins +); +)"}, + {DEFAULT_SCHEMA, "histogram", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( +SELECT + CASE + WHEN is_histogram_other_bin(bin) + THEN '(other values)' + WHEN (NOT (can_cast_implicitly(bin, NULL::BIGINT) OR + can_cast_implicitly(bin, NULL::DOUBLE) OR + can_cast_implicitly(bin, NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN bin::VARCHAR + WHEN row_number() over () = 1 + THEN concat('x <= ', bin::VARCHAR) + ELSE concat(lag(bin::VARCHAR) over (), ' < x <= ', bin::VARCHAR) + END AS bin, + count, + bar(count, 0, max(count) over ()) AS bar +FROM histogram_values(source, col_name, bin_count := bin_count, technique := technique); +)"}, + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} + }; +// clang-format on + +DefaultTableFunctionGenerator::DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema) + : DefaultGenerator(catalog), schema(schema) { +} + +unique_ptr +DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, + unique_ptr function) { + for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) { + function->parameters.push_back(make_uniq(default_macro.parameters[param_idx])); + } + for (idx_t named_idx = 0; default_macro.named_parameters[named_idx].name != nullptr; named_idx++) { + auto expr_list = Parser::ParseExpressionList(default_macro.named_parameters[named_idx].default_value); + if (expr_list.size() != 1) { + throw InternalException("Expected a single expression"); + } + function->default_parameters.insert( + make_pair(default_macro.named_parameters[named_idx].name, std::move(expr_list[0]))); + } + + auto type = CatalogType::TABLE_MACRO_ENTRY; + auto bind_info = make_uniq(type); + bind_info->schema = default_macro.schema; + bind_info->name = default_macro.name; + bind_info->temporary = true; + bind_info->internal = true; + bind_info->macros.push_back(std::move(function)); + return bind_info; +} + +unique_ptr +DefaultTableFunctionGenerator::CreateTableMacroInfo(const DefaultTableMacro &default_macro) { + Parser parser; + parser.ParseQuery(default_macro.macro); + if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) { + throw InternalException("Expected a single select statement in CreateTableMacroInfo internal"); + } + auto node = std::move(parser.statements[0]->Cast().node); + + auto result = make_uniq(std::move(node)); + return CreateInternalTableMacroInfo(default_macro, std::move(result)); +} + +static unique_ptr GetDefaultTableFunction(const string &input_schema, const string &input_name) { + auto schema = StringUtil::Lower(input_schema); + auto name = StringUtil::Lower(input_name); + for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { + if (internal_table_macros[index].schema == schema && internal_table_macros[index].name == name) { + return DefaultTableFunctionGenerator::CreateTableMacroInfo(internal_table_macros[index]); + } + } + return nullptr; +} + +unique_ptr DefaultTableFunctionGenerator::CreateDefaultEntry(ClientContext &context, + const string &entry_name) { + auto info = GetDefaultTableFunction(schema.name, entry_name); + if (info) { + return make_uniq_base(catalog, schema, info->Cast()); + } + return nullptr; +} + +vector DefaultTableFunctionGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { + if (StringUtil::Lower(internal_table_macros[index].name) != internal_table_macros[index].name) { + throw InternalException("Default macro name %s should be lowercase", internal_table_macros[index].name); + } + if (internal_table_macros[index].schema == schema.name) { + result.emplace_back(internal_table_macros[index].name); + } + } + return result; +} + +} // namespace duckdb