Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 57 additions & 22 deletions parser/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -1770,19 +1770,19 @@ func (c *CreateDatabase) Accept(visitor ASTVisitor) error {
}

type CreateTable struct {
CreatePos Pos // position of CREATE|ATTACH keyword
StatementEnd Pos
OrReplace bool
Name *TableIdentifier
IfNotExists bool
UUID *UUID
OnCluster *ClusterClause
TableSchema *TableSchemaClause
Engine *EngineExpr
SubQuery *SubQuery
TableFunction *TableFunctionExpr
HasTemporary bool
Comment *StringLiteral
CreatePos Pos // position of CREATE|ATTACH keyword
StatementEnd Pos
OrReplace bool
Name *TableIdentifier
IfNotExists bool
UUID *UUID
OnCluster *ClusterClause
TableSchema *TableSchemaClause
Engine *EngineExpr
SubQuery *SubQuery
TableFunction *TableFunctionExpr
HasTemporary bool
Comment *StringLiteral
}

func (c *CreateTable) Pos() Pos {
Expand Down Expand Up @@ -7633,36 +7633,71 @@ func (g *GlobalInOperation) Accept(visitor ASTVisitor) error {
return visitor.VisitGlobalInExpr(g)
}

type IntervalFrom struct {
Interval *Ident
FromPos Pos
FromExpr Expr
}

func (i *IntervalFrom) Pos() Pos {
return i.Interval.NamePos
}

func (i *IntervalFrom) End() Pos {
return i.FromExpr.End()
}

func (i *IntervalFrom) String() string {
var builder strings.Builder
builder.WriteString(i.Interval.String())
builder.WriteString(" FROM ")
builder.WriteString(i.FromExpr.String())
return builder.String()
}

func (i *IntervalFrom) Accept(visitor ASTVisitor) error {
visitor.Enter(i)
defer visitor.Leave(i)
if err := i.FromExpr.Accept(visitor); err != nil {
return err
}
return visitor.VisitIntervalFrom(i)
}

type ExtractExpr struct {
ExtractPos Pos
Interval *Ident
FromPos Pos
FromExpr Expr
ExtractEnd Pos
Parameters []Expr
}

func (e *ExtractExpr) Pos() Pos {
return e.ExtractPos
}

func (e *ExtractExpr) End() Pos {
return e.FromExpr.End()
return e.ExtractEnd
}

func (e *ExtractExpr) String() string {
var builder strings.Builder
builder.WriteString("EXTRACT(")
builder.WriteString(e.Interval.String())
builder.WriteString(" FROM ")
builder.WriteString(e.FromExpr.String())
for i, param := range e.Parameters {
if i > 0 {
builder.WriteString(", ")
}
builder.WriteString(param.String())
}
builder.WriteByte(')')
return builder.String()
}

func (e *ExtractExpr) Accept(visitor ASTVisitor) error {
visitor.Enter(e)
defer visitor.Leave(e)
if err := e.FromExpr.Accept(visitor); err != nil {
return err
for _, param := range e.Parameters {
if err := param.Accept(visitor); err != nil {
return err
}
}
return visitor.VisitExtractExpr(e)
}
Expand Down
8 changes: 8 additions & 0 deletions parser/ast_visitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ type ASTVisitor interface {
VisitNegateExpr(expr *NegateExpr) error
VisitGlobalInExpr(expr *GlobalInOperation) error
VisitExtractExpr(expr *ExtractExpr) error
VisitIntervalFrom(expr *IntervalFrom) error
VisitDropDatabase(expr *DropDatabase) error
VisitDropStmt(expr *DropStmt) error
VisitDropUserOrRole(expr *DropUserOrRole) error
Expand Down Expand Up @@ -1318,6 +1319,13 @@ func (v *DefaultASTVisitor) VisitExtractExpr(expr *ExtractExpr) error {
return nil
}

func (v *DefaultASTVisitor) VisitIntervalFrom(expr *IntervalFrom) error {
if v.Visit != nil {
return v.Visit(expr)
}
return nil
}

func (v *DefaultASTVisitor) VisitDropDatabase(expr *DropDatabase) error {
if v.Visit != nil {
return v.Visit(expr)
Expand Down
64 changes: 47 additions & 17 deletions parser/parser_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,40 +261,70 @@ func (p *Parser) parseTernaryExpr(condition Expr) (*TernaryOperation, error) {
}, nil
}

func (p *Parser) parseColumnExtractExpr(pos Pos) (*ExtractExpr, error) {
if err := p.expectKeyword(KeywordExtract); err != nil {
func (p *Parser) parseExtractFrom(ident *Ident) (*IntervalFrom, error) {
fromPos := p.Pos()
if err := p.expectKeyword(KeywordFrom); err != nil {
return nil, err
}
if err := p.expectTokenKind(TokenKindLParen); err != nil {

expr, err := p.parseExpr(p.Pos())
if err != nil {
return nil, err
}
return &IntervalFrom{
Interval: ident,
FromPos: fromPos,
FromExpr: expr,
}, nil
}

// parse interval
ident, err := p.parseIdent()
if err != nil {
func (p *Parser) parseColumnExtractExpr(pos Pos) (*ExtractExpr, error) {
if err := p.expectKeyword(KeywordExtract); err != nil {
return nil, err
}
if !intervalUnits.Contains(strings.ToUpper(ident.Name)) {
return nil, fmt.Errorf("unknown interval type: <%q>", ident.Name)
if err := p.expectTokenKind(TokenKindLParen); err != nil {
return nil, err
}

fromPos := p.Pos()
if err := p.expectKeyword(KeywordFrom); err != nil {
return nil, err
parameters := make([]Expr, 0)
for !p.lexer.isEOF() {
Copy link

Copilot AI Dec 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The loop condition for !p.lexer.isEOF() is too broad and could lead to an infinite loop if tryConsumeTokenKind(TokenKindComma) at line 311 doesn't consume any tokens and EOF is never reached. The loop should have a more specific exit condition, such as checking for the closing parenthesis first: for !p.matchTokenKind(TokenKindRParen) && !p.lexer.isEOF().

Suggested change
for !p.lexer.isEOF() {
for !p.matchTokenKind(TokenKindRParen) && !p.lexer.isEOF() {

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unnecessary to check TokenKindRParen here.

expr, err := p.parseExpr(p.Pos())
if err != nil {
return nil, err
}

var param Expr
if ident, ok := expr.(*Ident); ok {
if intervalUnits.Contains(strings.ToUpper(ident.Name)) && p.matchKeyword(KeywordFrom) {
param, err = p.parseExtractFrom(ident)
if err != nil {
return nil, err
}
parameters = append(parameters, param)
} else {
parameters = append(parameters, expr)
}
} else {
parameters = append(parameters, expr)
}

if p.tryConsumeTokenKind(TokenKindComma) == nil {
break
}
}

expr, err := p.parseExpr(p.Pos())
if err != nil {
return nil, err
if len(parameters) == 0 {
return nil, fmt.Errorf("EXTRACT requires at least one parameter")
}

extractEnd := p.Pos()
if err := p.expectTokenKind(TokenKindRParen); err != nil {
return nil, err
}
return &ExtractExpr{
ExtractPos: pos,
Interval: ident,
FromPos: fromPos,
FromExpr: expr,
ExtractEnd: extractEnd,
Parameters: parameters,
}, nil
}

Expand Down
27 changes: 27 additions & 0 deletions parser/testdata/query/format/select_extract_with_regex.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Origin SQL:
SELECT
COUNT(1), SRC_TYPE, NODE_CLASS, PORT, CLIENT_PORT
FROM
test.table
WHERE
app_id = 999118646
AND toUnixTimestamp(timestamp) >= 1740366695
AND toUnixTimestamp(timestamp) <= 1740377495
GROUP BY
CASE
WHEN length(extract(instance, '((\\d+\\.){3}\\d+)')) > 0 THEN instance
ELSE '空'
END,
CASE
WHEN length(extract(client_ip, '((\\d+\\.){3}\\d+)')) > 0 THEN client_ip
ELSE '空'
END,
src_type,
node_class,
port,
client_port
LIMIT 10000


-- Format SQL:
SELECT COUNT(1), SRC_TYPE, NODE_CLASS, PORT, CLIENT_PORT FROM test.table WHERE app_id = 999118646 AND toUnixTimestamp(timestamp) >= 1740366695 AND toUnixTimestamp(timestamp) <= 1740377495 GROUP BY CASE WHEN length(EXTRACT(instance, '((\\d+\\.){3}\\d+)')) > 0 THEN instance ELSE '空' END, CASE WHEN length(EXTRACT(client_ip, '((\\d+\\.){3}\\d+)')) > 0 THEN client_ip ELSE '空' END, src_type, node_class, port, client_port LIMIT 10000;
Loading