forked from crestonbunch/godata
-
Notifications
You must be signed in to change notification settings - Fork 9
/
expression_parser.go
373 lines (347 loc) · 19.2 KB
/
expression_parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
package godata
import (
"context"
"strings"
)
// tokenDurationRe is a regex for a token of type duration.
// The token value is set to the ISO 8601 string inside the single quotes
// For example, if the input data is duration'PT2H', then the token value is set to PT2H without quotes.
const tokenDurationRe = `^(duration)?'(?P<subtoken>-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))))'`
// Addressing properties.
// Addressing items within a collection:
// ABNF: entityColNavigationProperty [ collectionNavigation ]
// collectionNavigation = [ "/" qualifiedEntityTypeName ] [ collectionNavPath ]
// Description: OData identifier, optionally followed by collection navigation.
//
// propertyPath = entityColNavigationProperty [ collectionNavigation ]
// / entityNavigationProperty [ singleNavigation ]
// / complexColProperty [ collectionPath ]
// / complexProperty [ complexPath ]
// / primitiveColProperty [ collectionPath ]
// / primitiveProperty [ singlePath ]
// / streamProperty [ boundOperation ]
type ExpressionTokenType int
func (e ExpressionTokenType) Value() int {
return (int)(e)
}
const (
ExpressionTokenOpenParen ExpressionTokenType = iota // Open parenthesis - parenthesis expression, list expression, or path segment selector.
ExpressionTokenCloseParen // Close parenthesis
ExpressionTokenWhitespace // white space token
ExpressionTokenNav // Property navigation
ExpressionTokenColon // Function arg separator for 'any(v:boolExpr)' and 'all(v:boolExpr)' lambda operators
ExpressionTokenComma // [5] List delimiter and function argument delimiter.
ExpressionTokenLogical // eq|ne|gt|ge|lt|le|and|or|not|has|in
ExpressionTokenOp // add|sub|mul|divby|div|mod
ExpressionTokenFunc // Function, e.g. contains, substring...
ExpressionTokenLambdaNav // "/" token when used in lambda expression, e.g. tags/any()
ExpressionTokenLambda // [10] any(), all() lambda functions
ExpressionTokenCase // A case() statement. See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case
ExpressionTokenCasePair // A case statement expression pair [ <boolean expression> : <value expression> ]
ExpressionTokenNull //
ExpressionTokenIt // The '$it' token
ExpressionTokenRoot // [15] The '$root' token
ExpressionTokenFloat // A floating point value.
ExpressionTokenInteger // An integer value
ExpressionTokenString // SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE
ExpressionTokenDate // A date value
ExpressionTokenTime // [20] A time value
ExpressionTokenDateTime // A date-time value
ExpressionTokenBoolean // A literal boolean value
ExpressionTokenLiteral // A literal non-boolean value
ExpressionTokenDuration // duration = [ "duration" ] SQUOTE durationValue SQUOTE
ExpressionTokenGuid // [25] A 128-bit GUID
ExpressionTokenAssignement // The '=' assignement for function arguments.
ExpressionTokenGeographyPolygon //
ExpressionTokenGeometryPolygon //
expressionTokenLast
)
func (e ExpressionTokenType) String() string {
return [...]string{
"ExpressionTokenOpenParen",
"ExpressionTokenCloseParen",
"ExpressionTokenWhitespace",
"ExpressionTokenNav",
"ExpressionTokenColon",
"ExpressionTokenComma",
"ExpressionTokenLogical",
"ExpressionTokenOp",
"ExpressionTokenFunc",
"ExpressionTokenLambdaNav",
"ExpressionTokenLambda",
"ExpressionTokenCase",
"ExpressionTokenCasePair",
"ExpressionTokenNull",
"ExpressionTokenIt",
"ExpressionTokenRoot",
"ExpressionTokenFloat",
"ExpressionTokenInteger",
"ExpressionTokenString",
"ExpressionTokenDate",
"ExpressionTokenTime",
"ExpressionTokenDateTime",
"ExpressionTokenBoolean",
"ExpressionTokenLiteral",
"ExpressionTokenDuration",
"ExpressionTokenGuid",
"ExpressionTokenAssignement",
"ExpressionTokenGeographyPolygon",
"ExpressionTokenGeometryPolygon",
"expressionTokenLast",
}[e]
}
// ExpressionParser is a ODATA expression parser.
type ExpressionParser struct {
*Parser
ExpectBoolExpr bool // Request expression to validate it is a boolean expression.
tokenizer *Tokenizer // The expression tokenizer.
}
// ParseExpressionString converts a ODATA expression input string into a parse
// tree that can be used by providers to create a response.
// Expressions can be used within $filter and $orderby query options.
func (p *ExpressionParser) ParseExpressionString(ctx context.Context, expression string) (*GoDataExpression, error) {
tokens, err := p.tokenizer.Tokenize(ctx, expression)
if err != nil {
return nil, err
}
// TODO: can we do this in one fell swoop?
postfix, err := p.InfixToPostfix(ctx, tokens)
if err != nil {
return nil, err
}
tree, err := p.PostfixToTree(ctx, postfix)
if err != nil {
return nil, err
}
if tree == nil || tree.Token == nil {
return nil, BadRequestError("Expression cannot be nil")
}
if p.ExpectBoolExpr && !p.isBooleanExpression(tree.Token) {
return nil, BadRequestError("Expression does not return a boolean value")
}
return &GoDataExpression{tree, expression}, nil
}
var GlobalExpressionTokenizer *Tokenizer
var GlobalExpressionParser *ExpressionParser
// init constructs single instances of Tokenizer and ExpressionParser and initializes their
// respective packages variables.
func init() {
p := NewExpressionParser()
t := p.tokenizer // use the Tokenizer instance created by
GlobalExpressionTokenizer = t
GlobalExpressionParser = p
GlobalFilterTokenizer = t
GlobalFilterParser = p
}
// ExpressionTokenizer creates a tokenizer capable of tokenizing ODATA expressions.
// 4.01 Services MUST support case-insensitive operator names.
// See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#_Toc31360955
func NewExpressionTokenizer() *Tokenizer {
t := Tokenizer{}
// guidValue = 8HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 12HEXDIG
t.Add(`^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}`, ExpressionTokenGuid)
// duration = [ "duration" ] SQUOTE durationValue SQUOTE
// durationValue = [ SIGN ] "P" [ 1*DIGIT "D" ] [ "T" [ 1*DIGIT "H" ] [ 1*DIGIT "M" ] [ 1*DIGIT [ "." 1*DIGIT ] "S" ] ]
// Duration literals in OData 4.0 required prefixing with “duration”.
// In OData 4.01, services MUST support duration and enumeration literals with or without the type prefix.
// OData clients that want to operate across OData 4.0 and OData 4.01 services should always include the prefix for duration and enumeration types.
t.Add(tokenDurationRe, ExpressionTokenDuration)
t.Add("^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}T[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?(Z|[+-][0-9]{2,2}:[0-9]{2,2})", ExpressionTokenDateTime)
t.Add("^-?[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}", ExpressionTokenDate)
t.Add("^[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?", ExpressionTokenTime)
t.Add("^\\(", ExpressionTokenOpenParen)
t.Add("^\\)", ExpressionTokenCloseParen)
t.Add("^(?P<token>/)(?i)(any|all)", ExpressionTokenLambdaNav) // '/' as a token between a collection expression and a lambda function any() or all()
t.Add("^/", ExpressionTokenNav) // '/' as a token for property navigation.
t.Add("^=", ExpressionTokenAssignement) // '=' as a token for function argument assignment.
t.AddWithSubstituteFunc("^:", ExpressionTokenColon, func(in string) string { return "," }) // Function arg separator for lambda functions (any, all)
t.Add("^,", ExpressionTokenComma) // Default arg separator for functions
// Per ODATA ABNF grammar, functions must be followed by a open parenthesis.
// This implementation is a bit more lenient and allows space character between
// the function name and the open parenthesis.
// TODO: If we remove the optional space character, the function token will be
// mistakenly interpreted as a literal.
// E.g. ABNF for 'geo.distance':
// distanceMethodCallExpr = "geo.distance" OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE
t.Add("(?i)^(?P<token>(geo.distance|geo.intersects|geo.length))[\\s(]", ExpressionTokenFunc)
// geographyPolygon = geographyPrefix SQUOTE fullPolygonLiteral SQUOTE
// fullPolygonLiteral = sridLiteral polygonLiteral
// sridLiteral = "SRID" EQ 1*5DIGIT SEMI
// polygonLiteral = "Polygon" polygonData
// polygonData = OPEN ringLiteral *( COMMA ringLiteral ) CLOSE
// Example: geography'SRID=0;Polygon((-122.031577 47.578581, -122.031577 47.678581, -122.131577 47.678581))'
t.Add(`^geography'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeographyPolygon)
// geometryPolygon = geometryPrefix SQUOTE fullPolygonLiteral SQUOTE
t.Add(`^geometry'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeometryPolygon)
// According to ODATA ABNF notation, functions must be followed by a open parenthesis with no space
// between the function name and the open parenthesis.
// However, we are leniently allowing space characters between the function and the open parenthesis.
// TODO make leniency configurable.
// E.g. ABNF for 'indexof':
// indexOfMethodCallExpr = "indexof" OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE
t.Add("(?i)^(?P<token>(substringof|substring|length|indexof|exists|"+
"contains|endswith|startswith|tolower|toupper|trim|concat|year|month|day|"+
"hour|minute|second|fractionalseconds|date|time|totaloffsetminutes|now|"+
"maxdatetime|mindatetime|totalseconds|round|floor|ceiling|isof|cast))[\\s(]", ExpressionTokenFunc)
// Logical operators must be followed by a space character.
// However, in practice user have written requests such as not(City eq 'Seattle')
// We are leniently allowing space characters between the operator name and the open parenthesis.
// TODO make leniency configurable.
// Example:
// notExpr = "not" RWS boolCommonExpr
t.Add("(?i)^(?P<token>(eq|ne|gt|ge|lt|le|and|or|not|has|in))[\\s(]", ExpressionTokenLogical)
// Arithmetic operators must be followed by a space character.
t.Add("(?i)^(?P<token>(add|sub|mul|divby|div|mod))\\s", ExpressionTokenOp)
// anyExpr = "any" OPEN BWS [ lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr ] BWS CLOSE
// allExpr = "all" OPEN BWS lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr BWS CLOSE
t.Add("(?i)^(?P<token>(any|all))[\\s(]", ExpressionTokenLambda)
t.Add("(?i)^(?P<token>(case))[\\s(]", ExpressionTokenCase)
t.Add("^null", ExpressionTokenNull)
t.Add("^\\$it", ExpressionTokenIt)
t.Add("^\\$root", ExpressionTokenRoot)
t.Add("^-?[0-9]+\\.[0-9]+", ExpressionTokenFloat)
t.Add("^-?[0-9]+", ExpressionTokenInteger)
t.AddWithSubstituteFunc("^'(''|[^'])*'", ExpressionTokenString, unescapeTokenString)
t.Add("^(true|false)", ExpressionTokenBoolean)
t.AddWithSubstituteFunc("^@*[a-zA-Z][a-zA-Z0-9_.]*",
ExpressionTokenLiteral, unescapeUtfEncoding) // The optional '@' character is used to identify parameter aliases
t.Ignore("^ ", ExpressionTokenWhitespace)
return &t
}
// unescapeTokenString unescapes the input string according to the ODATA ABNF rules
// and returns the unescaped string.
// In ODATA ABNF, strings are encoded according to the following rules:
// string = SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE
// SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal
// pchar-no-SQUOTE = unreserved / pct-encoded-no-SQUOTE / other-delims / "$" / "&" / "=" / ":" / "@"
// pct-encoded-no-SQUOTE = "%" ( "0" / "1" / "3" / "4" / "5" / "6" / "8" / "9" / A-to-F ) HEXDIG
// / "%" "2" ( "0" / "1" / "2" / "3" / "4" / "5" / "6" / "8" / "9" / A-to-F )
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
//
// See http://docs.oasis-open.org/odata/odata/v4.01/csprd03/abnf/odata-abnf-construction-rules.txt
func unescapeTokenString(in string) string {
// The call to ReplaceAll() implements
// SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal
if in == "''" {
return in
}
return strings.ReplaceAll(in, "''", "'")
}
// TODO: should we make this configurable?
func unescapeUtfEncoding(in string) string {
return strings.ReplaceAll(in, "_x0020_", " ")
}
func NewExpressionParser() *ExpressionParser {
parser := &ExpressionParser{
Parser: EmptyParser().WithLiteralToken(ExpressionTokenLiteral),
ExpectBoolExpr: false,
tokenizer: NewExpressionTokenizer(),
}
parser.DefineOperator("/", 2, OpAssociationLeft, 8) // Note: '/' is used as a property navigator and between a collExpr and lambda function.
parser.DefineOperator("has", 2, OpAssociationLeft, 8)
// 'in' operator takes a literal list.
// City in ('Seattle') needs to be interpreted as a list expression, not a paren expression.
parser.DefineOperator("in", 2, OpAssociationLeft, 8).WithListExprPreference(true)
parser.DefineOperator("-", 1, OpAssociationNone, 7)
parser.DefineOperator("not", 1, OpAssociationRight, 7)
parser.DefineOperator("cast", 2, OpAssociationNone, 7)
parser.DefineOperator("mul", 2, OpAssociationNone, 6)
parser.DefineOperator("div", 2, OpAssociationNone, 6) // Division
parser.DefineOperator("divby", 2, OpAssociationNone, 6) // Decimal Division
parser.DefineOperator("mod", 2, OpAssociationNone, 6)
parser.DefineOperator("add", 2, OpAssociationNone, 5)
parser.DefineOperator("sub", 2, OpAssociationNone, 5)
parser.DefineOperator("gt", 2, OpAssociationLeft, 4)
parser.DefineOperator("ge", 2, OpAssociationLeft, 4)
parser.DefineOperator("lt", 2, OpAssociationLeft, 4)
parser.DefineOperator("le", 2, OpAssociationLeft, 4)
parser.DefineOperator("eq", 2, OpAssociationLeft, 3)
parser.DefineOperator("ne", 2, OpAssociationLeft, 3)
parser.DefineOperator("and", 2, OpAssociationLeft, 2)
parser.DefineOperator("or", 2, OpAssociationLeft, 1)
parser.DefineOperator("=", 2, OpAssociationRight, 0) // Function argument assignment. E.g. MyFunc(Arg1='abc')
parser.DefineFunction("contains", []int{2}, true)
parser.DefineFunction("endswith", []int{2}, true)
parser.DefineFunction("startswith", []int{2}, true)
parser.DefineFunction("exists", []int{2}, true)
parser.DefineFunction("length", []int{1}, false)
parser.DefineFunction("indexof", []int{2}, false)
parser.DefineFunction("substring", []int{2, 3}, false)
parser.DefineFunction("substringof", []int{2}, false)
parser.DefineFunction("tolower", []int{1}, false)
parser.DefineFunction("toupper", []int{1}, false)
parser.DefineFunction("trim", []int{1}, false)
parser.DefineFunction("concat", []int{2}, false)
parser.DefineFunction("year", []int{1}, false)
parser.DefineFunction("month", []int{1}, false)
parser.DefineFunction("day", []int{1}, false)
parser.DefineFunction("hour", []int{1}, false)
parser.DefineFunction("minute", []int{1}, false)
parser.DefineFunction("second", []int{1}, false)
parser.DefineFunction("fractionalseconds", []int{1}, false)
parser.DefineFunction("date", []int{1}, false)
parser.DefineFunction("time", []int{1}, false)
parser.DefineFunction("totaloffsetminutes", []int{1}, false)
parser.DefineFunction("now", []int{0}, false)
parser.DefineFunction("maxdatetime", []int{0}, false)
parser.DefineFunction("mindatetime", []int{0}, false)
parser.DefineFunction("totalseconds", []int{1}, false)
parser.DefineFunction("round", []int{1}, false)
parser.DefineFunction("floor", []int{1}, false)
parser.DefineFunction("ceiling", []int{1}, false)
parser.DefineFunction("isof", []int{1, 2}, true) // isof function can take one or two arguments.
parser.DefineFunction("cast", []int{2}, false)
parser.DefineFunction("geo.distance", []int{2}, false)
// The geo.intersects function has the following signatures:
// Edm.Boolean geo.intersects(Edm.GeographyPoint,Edm.GeographyPolygon)
// Edm.Boolean geo.intersects(Edm.GeometryPoint,Edm.GeometryPolygon)
// The geo.intersects function returns true if the specified point lies within the interior
// or on the boundary of the specified polygon, otherwise it returns false.
parser.DefineFunction("geo.intersects", []int{2}, false)
// The geo.length function has the following signatures:
// Edm.Double geo.length(Edm.GeographyLineString)
// Edm.Double geo.length(Edm.GeometryLineString)
// The geo.length function returns the total length of its line string parameter
// in the coordinate reference system signified by its SRID.
parser.DefineFunction("geo.length", []int{1}, false)
// 'any' can take either zero or two arguments with the later having the form any(d:d/Prop eq 1).
// Godata interprets the colon as an argument delimiter and considers the function to have two arguments.
parser.DefineFunction("any", []int{0, 2}, true)
// 'all' requires two arguments of a form similar to 'any'.
parser.DefineFunction("all", []int{2}, true)
// Define 'case' as a function accepting 1-10 arguments. Each argument is a pair of expressions separated by a colon.
// See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case
parser.DefineFunction("case", []int{1,2,3,4,5,6,7,8,9,10}, true)
return parser
}
func (p *ExpressionParser) SemanticizeExpression(
expression *GoDataExpression,
service *GoDataService,
entity *GoDataEntityType,
) error {
if expression == nil || expression.Tree == nil {
return nil
}
var semanticizeExpressionNode func(node *ParseNode) error
semanticizeExpressionNode = func(node *ParseNode) error {
if node.Token.Type == ExpressionTokenLiteral {
prop, ok := service.PropertyLookup[entity][node.Token.Value]
if !ok {
return BadRequestError("No property found " + node.Token.Value + " on entity " + entity.Name)
}
node.Token.SemanticType = SemanticTypeProperty
node.Token.SemanticReference = prop
} else {
node.Token.SemanticType = SemanticTypePropertyValue
node.Token.SemanticReference = &node.Token.Value
}
for _, child := range node.Children {
err := semanticizeExpressionNode(child)
if err != nil {
return err
}
}
return nil
}
return semanticizeExpressionNode(expression.Tree)
}