From fec28995ef91fad3eb75a23715c9deacbc0f55c6 Mon Sep 17 00:00:00 2001 From: Piyush Singariya Date: Fri, 26 Sep 2025 14:31:13 +0530 Subject: [PATCH 1/2] feat: adding type hint parsing in JSON column type --- parser/ast.go | 54 +++++++++++++++++-- parser/parser_column.go | 46 +++++++++++++++- .../ddl/create_table_json_typehints.sql | 6 +++ .../format/create_table_json_typehints.sql | 12 +++++ 4 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 parser/testdata/ddl/create_table_json_typehints.sql create mode 100644 parser/testdata/ddl/format/create_table_json_typehints.sql diff --git a/parser/ast.go b/parser/ast.go index e5a906a..cd2eb11 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -4169,11 +4169,18 @@ func (j *JSONPath) String() string { return builder.String() } +type JSONTypeHint struct { + Path *JSONPath + Type ColumnType +} + type JSONOption struct { SkipPath *JSONPath SkipRegex *StringLiteral MaxDynamicPaths *NumberLiteral MaxDynamicTypes *NumberLiteral + // Type hint for specific JSON subcolumn path, e.g., "message String" or "a.b UInt64" + Column *JSONTypeHint } func (j *JSONOption) String() string { @@ -4196,6 +4203,16 @@ func (j *JSONOption) String() string { builder.WriteByte('=') builder.WriteString(j.MaxDynamicTypes.String()) } + if j.Column != nil && j.Column.Path != nil && j.Column.Type != nil { + // add a leading space if there is already content + if builder.Len() > 0 { + builder.WriteByte(' ') + } + builder.WriteString(j.Column.Path.String()) + builder.WriteByte(' ') + builder.WriteString(j.Column.Type.String()) + } + return builder.String() } @@ -4216,12 +4233,41 @@ func (j *JSONOptions) End() Pos { func (j *JSONOptions) String() string { var builder strings.Builder builder.WriteByte('(') - for i, item := range j.Items { - if i > 0 { - builder.WriteString(", ") + // Ensure stable, readable ordering: + // 1) numeric options (max_dynamic_*), 2) type-hint items, 3) skip options (SKIP, SKIP REGEXP) + // Preserve original relative order within each group. + numericOptionItems := make([]*JSONOption, 0, len(j.Items)) + columnItems := make([]*JSONOption, 0, len(j.Items)) + skipOptionItems := make([]*JSONOption, 0, len(j.Items)) + for _, item := range j.Items { + if item.MaxDynamicPaths != nil || item.MaxDynamicTypes != nil { + numericOptionItems = append(numericOptionItems, item) + continue + } + if item.Column != nil { + columnItems = append(columnItems, item) + continue + } + if item.SkipPath != nil || item.SkipRegex != nil { + skipOptionItems = append(skipOptionItems, item) + continue + } + // Fallback: treat as numeric option to avoid dropping unknown future fields + numericOptionItems = append(numericOptionItems, item) + } + + writeItems := func(items []*JSONOption) { + for _, item := range items { + if builder.Len() > 1 { // account for the initial '(' + builder.WriteString(", ") + } + builder.WriteString(item.String()) } - builder.WriteString(item.String()) } + + writeItems(numericOptionItems) + writeItems(columnItems) + writeItems(skipOptionItems) builder.WriteByte(')') return builder.String() } diff --git a/parser/parser_column.go b/parser/parser_column.go index 119658b..5312319 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -1018,7 +1018,51 @@ func (p *Parser) parseJSONOption() (*JSONOption, error) { SkipPath: jsonPath, }, nil case p.matchTokenKind(TokenKindIdent): - return p.parseJSONMaxDynamicOptions(p.Pos()) + // Could be max_dynamic_* option OR a type hint like: a.b String + // Lookahead to see if there's an '=' following the identifier path (max_dynamic_*) + // or if it's a path followed by a ColumnType. + // We'll parse a JSONPath first, then decide. + // Save lexer state by consuming as path greedily using existing helpers. + // Try: if single ident and next is '=' -> max_dynamic_*; else treat as path + type + + // Peek next token after current ident without consuming type; we need to + // attempt to parse as max_dynamic_* first as it's existing behavior for a single ident. + // To support dotted paths, we need to capture path, then if '=' exists, it's option; otherwise parse type. + path, err := p.parseJSONPath() + if err != nil { + return nil, err + } + if p.tryConsumeTokenKind(TokenKindSingleEQ) != nil { + // This is a max_dynamic_* option; only valid when path is a single ident of that name + // Reconstruct handling similar to parseJSONMaxDynamicOptions but we already consumed ident and '=' + // Determine which option based on the first ident name + if len(path.Idents) != 1 { + return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) + } + name := path.Idents[0].Name + switch name { + case "max_dynamic_types": + number, err := p.parseNumber(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{MaxDynamicTypes: number}, nil + case "max_dynamic_paths": + number, err := p.parseNumber(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{MaxDynamicPaths: number}, nil + default: + return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) + } + } + // Otherwise, expect a ColumnType as a type hint for the JSON subpath + colType, err := p.parseColumnType(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{Column: &JSONTypeHint{Path: path, Type: colType}}, nil default: return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) } diff --git a/parser/testdata/ddl/create_table_json_typehints.sql b/parser/testdata/ddl/create_table_json_typehints.sql new file mode 100644 index 0000000..68c605e --- /dev/null +++ b/parser/testdata/ddl/create_table_json_typehints.sql @@ -0,0 +1,6 @@ +CREATE TABLE t ( + j JSON(message String, a.b UInt64, max_dynamic_paths=0, SKIP x, SKIP REGEXP 're') +) ENGINE = MergeTree +ORDER BY tuple(); + + diff --git a/parser/testdata/ddl/format/create_table_json_typehints.sql b/parser/testdata/ddl/format/create_table_json_typehints.sql new file mode 100644 index 0000000..e2095c1 --- /dev/null +++ b/parser/testdata/ddl/format/create_table_json_typehints.sql @@ -0,0 +1,12 @@ +-- Origin SQL: +CREATE TABLE t ( + j JSON(message String, a.b UInt64, max_dynamic_paths=0, SKIP x, SKIP REGEXP 're') +) ENGINE = MergeTree +ORDER BY tuple(); + + + +-- Format SQL: +CREATE TABLE t (j JSON(max_dynamic_paths=0, message String, a.b UInt64, SKIP x, SKIP REGEXP 're')) ENGINE = MergeTree ORDER BY tuple(); + + From df1462c124daf3e18eeebdf44a0891ae6332208c Mon Sep 17 00:00:00 2001 From: Piyush Singariya Date: Mon, 29 Sep 2025 12:32:36 +0530 Subject: [PATCH 2/2] test: run make update_test --- .../format/create_table_json_typehints.sql | 3 +- .../output/create_table_basic.sql.golden.json | 19 +- ...reate_table_json_typehints.sql.golden.json | 204 ++++++++++++++++++ 3 files changed, 217 insertions(+), 9 deletions(-) create mode 100644 parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json diff --git a/parser/testdata/ddl/format/create_table_json_typehints.sql b/parser/testdata/ddl/format/create_table_json_typehints.sql index e2095c1..8b014f1 100644 --- a/parser/testdata/ddl/format/create_table_json_typehints.sql +++ b/parser/testdata/ddl/format/create_table_json_typehints.sql @@ -6,7 +6,6 @@ ORDER BY tuple(); + -- Format SQL: CREATE TABLE t (j JSON(max_dynamic_paths=0, message String, a.b UInt64, SKIP x, SKIP REGEXP 're')) ENGINE = MergeTree ORDER BY tuple(); - - diff --git a/parser/testdata/ddl/output/create_table_basic.sql.golden.json b/parser/testdata/ddl/output/create_table_basic.sql.golden.json index 5cb87b3..fc79f87 100644 --- a/parser/testdata/ddl/output/create_table_basic.sql.golden.json +++ b/parser/testdata/ddl/output/create_table_basic.sql.golden.json @@ -685,22 +685,24 @@ "SkipRegex": null, "MaxDynamicPaths": null, "MaxDynamicTypes": { - "NumPos": 571, + "NumPos": 589, "NumEnd": 591, "Literal": "10", "Base": 10 - } + }, + "Column": null }, { "SkipPath": null, "SkipRegex": null, "MaxDynamicPaths": { - "NumPos": 593, + "NumPos": 611, "NumEnd": 612, "Literal": "3", "Base": 10 }, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": { @@ -715,7 +717,8 @@ }, "SkipRegex": null, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": { @@ -742,7 +745,8 @@ }, "SkipRegex": null, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": null, @@ -752,7 +756,8 @@ "Literal": "hello" }, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null } ] } diff --git a/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json b/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json new file mode 100644 index 0000000..9d2644c --- /dev/null +++ b/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json @@ -0,0 +1,204 @@ +[ + { + "CreatePos": 0, + "StatementEnd": 139, + "OrReplace": false, + "Name": { + "Database": null, + "Table": { + "Name": "t", + "QuoteType": 1, + "NamePos": 13, + "NameEnd": 14 + } + }, + "IfNotExists": false, + "UUID": null, + "OnCluster": null, + "TableSchema": { + "SchemaPos": 15, + "SchemaEnd": 103, + "Columns": [ + { + "NamePos": 21, + "ColumnEnd": 101, + "Name": { + "Ident": { + "Name": "j", + "QuoteType": 1, + "NamePos": 21, + "NameEnd": 22 + }, + "DotIdent": null + }, + "Type": { + "Name": { + "Name": "JSON", + "QuoteType": 1, + "NamePos": 23, + "NameEnd": 27 + }, + "Options": { + "LParen": 28, + "RParen": 101, + "Items": [ + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": { + "Path": { + "Idents": [ + { + "Name": "message", + "QuoteType": 1, + "NamePos": 28, + "NameEnd": 35 + } + ] + }, + "Type": { + "Name": { + "Name": "String", + "QuoteType": 1, + "NamePos": 36, + "NameEnd": 42 + } + } + } + }, + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": { + "Path": { + "Idents": [ + { + "Name": "a", + "QuoteType": 1, + "NamePos": 44, + "NameEnd": 45 + }, + { + "Name": "b", + "QuoteType": 1, + "NamePos": 46, + "NameEnd": 47 + } + ] + }, + "Type": { + "Name": { + "Name": "UInt64", + "QuoteType": 1, + "NamePos": 48, + "NameEnd": 54 + } + } + } + }, + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": { + "NumPos": 74, + "NumEnd": 75, + "Literal": "0", + "Base": 10 + }, + "MaxDynamicTypes": null, + "Column": null + }, + { + "SkipPath": { + "Idents": [ + { + "Name": "x", + "QuoteType": 1, + "NamePos": 82, + "NameEnd": 83 + } + ] + }, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": null + }, + { + "SkipPath": null, + "SkipRegex": { + "LiteralPos": 98, + "LiteralEnd": 100, + "Literal": "re" + }, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": null + } + ] + } + }, + "NotNull": null, + "Nullable": null, + "DefaultExpr": null, + "MaterializedExpr": null, + "AliasExpr": null, + "Codec": null, + "TTL": null, + "Comment": null, + "CompressionCodec": null + } + ], + "AliasTable": null, + "TableFunction": null + }, + "Engine": { + "EnginePos": 105, + "EngineEnd": 139, + "Name": "MergeTree", + "Params": null, + "PrimaryKey": null, + "PartitionBy": null, + "SampleBy": null, + "TTL": null, + "Settings": null, + "OrderBy": { + "OrderPos": 124, + "ListEnd": 139, + "Items": [ + { + "OrderPos": 124, + "Expr": { + "Name": { + "Name": "tuple", + "QuoteType": 1, + "NamePos": 133, + "NameEnd": 138 + }, + "Params": { + "LeftParenPos": 138, + "RightParenPos": 139, + "Items": { + "ListPos": 139, + "ListEnd": 139, + "HasDistinct": false, + "Items": [] + }, + "ColumnArgList": null + } + }, + "Alias": null, + "Direction": "" + } + ] + } + }, + "SubQuery": null, + "HasTemporary": false, + "Comment": null + } +] \ No newline at end of file