Skip to content

feat(parsing): Overhaul query parser for extended OpenCypher syntax and block parsing #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 14, 2025
Merged
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ __debug*
build/

.DS_Store

**ast*.json
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/OpenDgraph/Otter
go 1.24.0

require (
github.com/alecthomas/participle/v2 v2.1.4
github.com/dgraph-io/dgo/v240 v240.2.0
github.com/gorilla/websocket v1.5.3
github.com/hypermodeinc/dgraph/v24 v24.1.2
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXY
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZP/GkPY=
github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/participle/v2 v2.1.4 h1:W/H79S8Sat/krZ3el6sQMvMaahJ+XcM9WSI2naI7w2U=
github.com/alecthomas/participle/v2 v2.1.4/go.mod h1:8tqVbpTX20Ru4NfYQgZf4mP18eXPTBViyMWiArNEgGI=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down
138 changes: 138 additions & 0 deletions internal/parsing/cpAST.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package parsing

import (
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)

// ==================
// AST
// ==================

type Query struct {
Match *MatchClause `parser:"\"MATCH\" @@"`
Create *CreateClause `parser:"[ \"CREATE\" @@ ]"`
Where *WhereClause `parser:"[ \"WHERE\" @@ ]"`
Return *ReturnClause `parser:"\"RETURN\" @@"`
}

// ==================
// CREATE
// ==================

type CreateClause struct {
Patterns []*Pattern `parser:"@@ { \",\" @@ }"`
}

// ==================
// MATCH
// ==================

type MatchClause struct {
Patterns []*Pattern `parser:"@@ { \",\" @@ }"`
}

type Pattern struct {
StartNode *NodePattern `parser:"\"(\" @@ \")\""` // O padrão DEVE começar com um nó
Segments []*PathSegment `parser:" { @@ } "` // Segmentos de relação/nó subsequentes
}

type NodePattern struct {
Variable string `parser:"@Ident"`
Label string `parser:"[ \":\" @(Ident | Keyword) ]"`
Properties *Properties `parser:"[ @@ ]"` // Propriedades são opcionais e definidas em sua própria struct
}

type PathSegment struct {
Relationship *RelationshipPattern `parser:"@@"` // Detalhes da relação (setas, tipo, alias)
EndNode *NodePattern `parser:"\"(\" @@ \")\""` // O nó no final deste segmento
}

type RelationshipPattern struct {
LeftArrow string `parser:"(@ArrowL | @Punct)"` // Captura '<-' ou '-' (Punct aqui DEVE ser '-')
Edge *EdgePattern `parser:"\"[\" @@ \"]\""` // Detalhes dentro dos colchetes
RightArrow string `parser:"(@ArrowR | @Punct)"` // Captura '->' ou '-' (Punct aqui DEVE ser '-')
}

type EdgePattern struct {
Variable string `parser:"@Ident?"` // Alias opcional (e.g., 'r' em [r:KNOWS])
Type string `parser:"[ \":\" @Ident ]"` // Tipo da relação (e.g., 'KNOWS')
Properties *Properties `parser:"[ @@ ]"`
}

type Properties struct {
Entries []*Property `parser:"\"{\" @@ { \",\" @@ } \"}\""`
}

type Property struct {
Key string `parser:"@Ident \":\""`
Value string `parser:"@String"` // Por agora, apenas valores string. Poderia ser estendido.
}

// ==================
// WHERE
// ==================

type WhereClause struct {
Cond *Condition `parser:"@@"`
}

type Condition struct {
Left *PropertyAccess `parser:"@@"`
Operator string `parser:"@Operator"`
Right string `parser:"@String"` // Ou outros tipos de valor
}

type PropertyAccess struct {
Object string `parser:"@Ident"`
Dot string `parser:"@Punct"` // Captura o '.'
Field string `parser:"@Ident"`
}

// ==================
// RETURN
// ==================
type ReturnClause struct {
Fields []string `parser:"@Ident { \",\" @Ident }"`
}

// ==================
// Custom Lexer
// ==================

var myLexer = lexer.MustSimple([]lexer.SimpleRule{
{Name: "Keyword", Pattern: `(?i)\b(MATCH|RETURN|WHERE|AND|OR|NOT|NULL|TRUE|FALSE|IN|IS|AS|WITH|UNWIND|OPTIONAL|DETACH|DELETE|SET|CREATE|MERGE|ON|CASE|WHEN|THEN|ELSE|DISTINCT|ORDER|BY|SKIP|LIMIT|ASC|DESC)\b`},
{Name: "ArrowL", Pattern: `<-`},
{Name: "ArrowR", Pattern: `->`},
{Name: "Ident", Pattern: `[a-zA-Z_][a-zA-Z0-9_]*`},
{Name: "String", Pattern: `'[^']*'|"[^"]*"`},
{Name: "Operator", Pattern: `<>|<=|>=|=|<|>`},
{Name: "Punct", Pattern: `[-:\[\]\(\),\{\}.]`},
{Name: "Whitespace", Pattern: `\s+`},
{Name: "comment", Pattern: `/\*.*?\*/`},
{Name: "line_comment", Pattern: `//[^\n]*`},
})

func BuildParser[T any](options ...participle.Option) *participle.Parser[T] {
defaultOptions := []participle.Option{
participle.Lexer(myLexer),
participle.Unquote("String"),
participle.Elide("Whitespace", "comment", "line_comment"),
participle.CaseInsensitive("Keyword"),
participle.UseLookahead(2),
}
return participle.MustBuild[T](append(defaultOptions, options...)...)
}

func BuildQueryParser(options ...participle.Option) *participle.Parser[Query] {
defaultOptions := []participle.Option{
participle.Lexer(myLexer),
participle.Unquote("String"),
participle.Elide("Whitespace", "comment", "line_comment"),
participle.CaseInsensitive("Keyword"),
participle.UseLookahead(2),
}
parser := participle.MustBuild[Query](append(defaultOptions, options...)...)

return parser
}
147 changes: 147 additions & 0 deletions internal/parsing/neo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package parsing

import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"

"github.com/alecthomas/participle/v2"
)

var (
matchParser = mustBuildParser[MatchClause]()
whereParser = mustBuildParser[WhereClause]()
returnParser = mustBuildParser[ReturnClause]()
createParser = mustBuildParser[CreateClause]()
)

func mustBuildParser[T any]() *participle.Parser[T] {
return BuildParser[T]()
}

func ParseMatchClause(src string) (*MatchClause, error) {
return matchParser.ParseString("", src)
}

func ParseWhereClause(src string) (*WhereClause, error) {
return whereParser.ParseString("", src)
}

func ParseReturnClause(src string) (*ReturnClause, error) {
return returnParser.ParseString("", src)
}

func ParseCreateClause(src string) (*CreateClause, error) {
return createParser.ParseString("", src)
}

// Full dispatcher for breaking a query into parts and parsing them
func ParseQueryParts(query string) (*MatchClause, *WhereClause, *ReturnClause, error) {
query = strings.TrimSpace(query)
upper := strings.ToUpper(query)

switch {
case strings.HasPrefix(upper, "CREATE"):
createClause, err := ParseCreateClause(query)
if err != nil {
return nil, nil, nil, fmt.Errorf("create parse error: %w", err)
}
// Se quiser devolver CREATE como resultado exclusivo:
return nil, nil, nil, fmt.Errorf("CREATE clause parsed: %+v", createClause) // ou outro retorno que faça sentido

default:

matchIndex := strings.Index(query, "MATCH")
whereIndex := strings.Index(query, "WHERE")
returnIndex := strings.Index(query, "RETURN")

if matchIndex == -1 {
return nil, nil, nil, fmt.Errorf("invalid query: must start with MATCH")
}

if returnIndex == -1 {
return nil, nil, nil, fmt.Errorf("invalid query: must contain RETURN")
}

matchEnd := len(query)
if whereIndex != -1 {
matchEnd = whereIndex
} else if returnIndex != -1 {
matchEnd = returnIndex
}

matchPart := query[matchIndex+len("MATCH") : matchEnd]

var wherePart string
if whereIndex != -1 {
whereEnd := len(query)
if returnIndex != -1 && returnIndex > whereIndex {
whereEnd = returnIndex
} else if returnIndex != -1 && returnIndex < whereIndex {
return nil, nil, nil, fmt.Errorf("invalid query: RETURN cannot come before WHERE")
}
wherePart = query[whereIndex+len("WHERE") : whereEnd]
}

var returnPart string
if returnIndex != -1 {
returnPart = query[returnIndex+len("RETURN"):]
} else {
return nil, nil, nil, fmt.Errorf("invalid query: must contain RETURN")
}

matchClause, err := ParseMatchClause(matchPart)
if err != nil {
return nil, nil, nil, fmt.Errorf("match parse error: %w", err)
}

var whereClause *WhereClause
if wherePart != "" {
whereClause, err = ParseWhereClause(wherePart)
if err != nil {
return nil, nil, nil, fmt.Errorf("where parse error: %w", err)
}
}

returnClause, err := ParseReturnClause(returnPart)
if err != nil {
return nil, nil, nil, fmt.Errorf("return parse error: %w", err)
}

return matchClause, whereClause, returnClause, nil
}
}

type ASTSnapshot struct {
Test string `json:"test"`
Input string `json:"input"`
AST interface{} `json:"ast"`
}

func saveASTSnapshot(t *testing.T, snapshot ASTSnapshot, path string) {
t.Helper()

dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}

var snapshots []ASTSnapshot
if data, err := os.ReadFile(path); err == nil {
_ = json.Unmarshal(data, &snapshots)
}

snapshots = append(snapshots, snapshot)

data, err := json.MarshalIndent(snapshots, "", " ")
if err != nil {
t.Fatalf("failed to marshal snapshot: %v", err)
}

if err := os.WriteFile(path, data, 0644); err != nil {
t.Fatalf("failed to write snapshot file: %v", err)
}
}
Loading