Permalink
Fetching contributors…
Cannot retrieve contributors at this time
558 lines (447 sloc) 13 KB
%left LOWEST.
%left TILDE.
%left TAGLIST.
%left QUOTE.
%left COLON.
%left MINUS.
%left NUMBER.
%left STOPWORD.
%left TERMLIST.
%left TERM.
%left PREFIX.
%left PERCENT.
%left ATTRIBUTE.
%right LP.
%left RP.
// needs to be above lp/rp
%left MODIFIER.
%left AND.
%left OR.
%left ORX.
%left ARROW.
%token_type {QueryToken}
%syntax_error {
int len = TOKEN.len + 100;
char buf[len];
snprintf(buf, len, "Syntax error at offset %d near '%.*s'", TOKEN.pos, TOKEN.len, TOKEN.s);
ctx->ok = 0;
ctx->errorMsg = strdup(buf);
}
%include {
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <assert.h>
#include "parse.h"
#include "../util/arr.h"
#include "../rmutil/vector.h"
#include "../query_node.h"
// strndup + lowercase in one pass!
char *strdupcase(const char *s, size_t len) {
char *ret = strndup(s, len);
char *dst = ret;
char *src = dst;
while (*src) {
// unescape
if (*src == '\\' && (ispunct(*(src+1)) || isspace(*(src+1)))) {
++src;
continue;
}
*dst = tolower(*src);
++dst;
++src;
}
*dst = '\0';
return ret;
}
// unescape a string (non null terminated) and return the new length (may be shorter than the original. This manipulates the string itself
size_t unescapen(char *s, size_t sz) {
char *dst = s;
char *src = dst;
char *end = s + sz;
while (src < end) {
// unescape
if (*src == '\\' && src + 1 < end &&
(ispunct(*(src+1)) || isspace(*(src+1)))) {
++src;
continue;
}
*dst++ = *src++;
}
return (size_t)(dst - s);
}
#define NODENN_BOTH_VALID 0
#define NODENN_BOTH_INVALID -1
#define NODENN_ONE_NULL 1
// Returns:
// 0 if a && b
// -1 if !a && !b
// 1 if a ^ b (i.e. !(a&&b||!a||!b)). The result is stored in `out`
static int one_not_null(void *a, void *b, void **out) {
if (a && b) {
return NODENN_BOTH_VALID;
} else if (a == NULL && b == NULL) {
return NODENN_BOTH_INVALID;
} if (a) {
*out = a;
return NODENN_ONE_NULL;
} else {
*out = b;
return NODENN_ONE_NULL;
}
}
} // END %include
%extra_argument { QueryParseCtx *ctx }
%default_type { QueryToken }
%default_destructor { }
%type expr { QueryNode * }
%destructor expr { QueryNode_Free($$); }
%type attribute { QueryAttribute }
%destructor attribute { free((char*)$$.value); }
%type attribute_list {QueryAttribute *}
%destructor attribute_list { array_free_ex($$, free((char*)((QueryAttribute*)ptr )->value)); }
%type prefix { QueryNode * }
%destructor prefix { QueryNode_Free($$); }
%type termlist { QueryNode * }
%destructor termlist { QueryNode_Free($$); }
%type union { QueryNode *}
%destructor union { QueryNode_Free($$); }
%type fuzzy { QueryNode *}
%destructor fuzzy { QueryNode_Free($$); }
%type tag_list { QueryNode *}
%destructor tag_list { QueryNode_Free($$); }
//%type
%type geo_filter { GeoFilter *}
%destructor geo_filter { GeoFilter_Free($$); }
%type modifierlist { Vector* }
%destructor modifierlist {
for (size_t i = 0; i < Vector_Size($$); i++) {
char *s;
Vector_Get($$, i, &s);
free(s);
}
Vector_Free($$);
}
%type num { RangeNumber }
%type numeric_range { NumericFilter * }
%destructor numeric_range {
NumericFilter_Free($$);
}
query ::= expr(A) . {
/* If the root is a negative node, we intersect it with a wildcard node */
ctx->root = A;
}
query ::= . {
ctx->root = NULL;
}
query ::= STAR . {
ctx->root = NewWildcardNode();
}
/////////////////////////////////////////////////////////////////
// AND Clause / Phrase
/////////////////////////////////////////////////////////////////
expr(A) ::= expr(B) expr(C) . [AND] {
int rv = one_not_null(B, C, &A);
if (rv == NODENN_BOTH_INVALID) {
A = NULL;
} else if (rv == NODENN_ONE_NULL) {
// Nothing- `out` is already assigned
} else {
if (B && B->type == QN_PHRASE && B->pn.exact == 0 &&
B->opts.fieldMask == RS_FIELDMASK_ALL ) {
A = B;
} else {
A = NewPhraseNode(0);
QueryPhraseNode_AddChild(A, B);
}
QueryPhraseNode_AddChild(A, C);
}
}
/////////////////////////////////////////////////////////////////
// Unions
/////////////////////////////////////////////////////////////////
expr(A) ::= union(B) . [ORX] {
A = B;
}
union(A) ::= expr(B) OR expr(C) . [OR] {
int rv = one_not_null(B, C, &A);
if (rv == NODENN_BOTH_INVALID) {
A = NULL;
} else if (rv == NODENN_ONE_NULL) {
// Nothing- already assigned
} else {
if (B->type == QN_UNION && B->opts.fieldMask == RS_FIELDMASK_ALL) {
A = B;
} else {
A = NewUnionNode();
QueryUnionNode_AddChild(A, B);
A->opts.fieldMask |= B->opts.fieldMask;
}
// Handle C
QueryUnionNode_AddChild(A, C);
A->opts.fieldMask |= C->opts.fieldMask;
QueryNode_SetFieldMask(A, A->opts.fieldMask);
}
}
union(A) ::= union(B) OR expr(C). [ORX] {
A = B;
if (C) {
QueryUnionNode_AddChild(A, C);
A->opts.fieldMask |= C->opts.fieldMask;
QueryNode_SetFieldMask(C, A->opts.fieldMask);
}
}
/////////////////////////////////////////////////////////////////
// Text Field Filters
/////////////////////////////////////////////////////////////////
expr(A) ::= modifier(B) COLON expr(C) . [MODIFIER] {
if (C == NULL) {
A = NULL;
} else {
if (ctx->sctx->spec) {
QueryNode_SetFieldMask(C, IndexSpec_GetFieldBit(ctx->sctx->spec, B.s, B.len));
}
A = C;
}
}
expr(A) ::= modifierlist(B) COLON expr(C) . [MODIFIER] {
if (C == NULL) {
A = NULL;
} else {
//C->opts.fieldMask = 0;
t_fieldMask mask = 0;
if (ctx->sctx->spec) {
for (int i = 0; i < Vector_Size(B); i++) {
char *p;
Vector_Get(B, i, &p);
mask |= IndexSpec_GetFieldBit(ctx->sctx->spec, p, strlen(p));
free(p);
}
}
QueryNode_SetFieldMask(C, mask);
Vector_Free(B);
A=C;
}
}
expr(A) ::= LP expr(B) RP . {
A = B;
}
/////////////////////////////////////////////////////////////////
// Attributes
/////////////////////////////////////////////////////////////////
attribute(A) ::= ATTRIBUTE(B) COLON term(C). {
A = (QueryAttribute){ .name = B.s, .namelen = B.len, .value = strndup(C.s, C.len), .vallen = C.len };
}
attribute_list(A) ::= attribute(B) . {
A = array_new(QueryAttribute, 2);
A = array_append(A, B);
}
attribute_list(A) ::= attribute_list(B) SEMICOLON attribute(C) . {
A = array_append(B, C);
}
attribute_list(A) ::= attribute_list(B) SEMICOLON . {
A = B;
}
attribute_list(A) ::= . {
A = NULL;
}
expr(A) ::= expr(B) ARROW LB attribute_list(C) RB . {
if (B && C) {
char *err = NULL;
if (!QueryNode_ApplyAttributes(B, C, array_len(C), &err)) {
ctx->ok = 0;
ctx->errorMsg = err;
}
}
array_free_ex(C, free((char*)((QueryAttribute*)ptr )->value));
A = B;
}
/////////////////////////////////////////////////////////////////
// Term Lists
/////////////////////////////////////////////////////////////////
expr(A) ::= QUOTE termlist(B) QUOTE. [TERMLIST] {
B->pn.exact =1;
B->opts.flags |= QueryNode_Verbatim;
A = B;
}
expr(A) ::= QUOTE term(B) QUOTE. [TERMLIST] {
A = NewTokenNode(ctx, strdupcase(B.s, B.len), -1);
A->opts.flags |= QueryNode_Verbatim;
}
expr(A) ::= term(B) . [LOWEST] {
A = NewTokenNode(ctx, strdupcase(B.s, B.len), -1);
}
expr(A) ::= prefix(B) . [PREFIX] {
A= B;
}
expr(A) ::= termlist(B) . [TERMLIST] {
A = B;
}
expr(A) ::= STOPWORD . [STOPWORD] {
A = NULL;
}
termlist(A) ::= term(B) term(C). [TERMLIST] {
A = NewPhraseNode(0);
QueryPhraseNode_AddChild(A, NewTokenNode(ctx, strdupcase(B.s, B.len), -1));
QueryPhraseNode_AddChild(A, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
}
termlist(A) ::= termlist(B) term(C) . [TERMLIST] {
A = B;
QueryPhraseNode_AddChild(A, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
}
termlist(A) ::= termlist(B) STOPWORD . [TERMLIST] {
A = B;
}
/////////////////////////////////////////////////////////////////
// Negative Clause
/////////////////////////////////////////////////////////////////
expr(A) ::= MINUS expr(B) . {
if (B) {
A = NewNotNode(B);
} else {
A = NULL;
}
}
/////////////////////////////////////////////////////////////////
// Optional Clause
/////////////////////////////////////////////////////////////////
expr(A) ::= TILDE expr(B) . {
if (B) {
A = NewOptionalNode(B);
} else {
A = NULL;
}
}
/////////////////////////////////////////////////////////////////
// Prefix experessions
/////////////////////////////////////////////////////////////////
prefix(A) ::= PREFIX(B) . [PREFIX] {
B.s = strdupcase(B.s, B.len);
A = NewPrefixNode(ctx, B.s, strlen(B.s));
}
/////////////////////////////////////////////////////////////////
// Fuzzy terms
/////////////////////////////////////////////////////////////////
expr(A) ::= PERCENT TERM(B) PERCENT. [PREFIX] {
B.s = strdupcase(B.s, B.len);
A = NewFuzzyNode(ctx, B.s, strlen(B.s), 1);
}
expr(A) ::= PERCENT PERCENT TERM(B) PERCENT PERCENT. [PREFIX] {
B.s = strdupcase(B.s, B.len);
A = NewFuzzyNode(ctx, B.s, strlen(B.s), 2);
}
expr(A) ::= PERCENT PERCENT PERCENT TERM(B) PERCENT PERCENT PERCENT. [PREFIX] {
B.s = strdupcase(B.s, B.len);
A = NewFuzzyNode(ctx, B.s, strlen(B.s), 3);
}
/////////////////////////////////////////////////////////////////
// Field Modidiers
/////////////////////////////////////////////////////////////////
modifier(A) ::= MODIFIER(B) . {
B.len = unescapen((char*)B.s, B.len);
A = B;
}
modifierlist(A) ::= modifier(B) OR term(C). {
A = NewVector(char *, 2);
char *s = strdupcase(B.s, B.len);
Vector_Push(A, s);
s = strdupcase(C.s, C.len);
Vector_Push(A, s);
}
modifierlist(A) ::= modifierlist(B) OR term(C). {
char *s = strdupcase(C.s, C.len);
Vector_Push(B, s);
A = B;
}
/////////////////////////////////////////////////////////////////
// Tag Lists - curly braces separated lists of words
/////////////////////////////////////////////////////////////////
expr(A) ::= modifier(B) COLON tag_list(C) . {
if (!C) {
A= NULL;
} else {
// Tag field names must be case sensitive, we we can't do strdupcase
char *s = strndup(B.s, B.len);
size_t slen = unescapen((char*)s, B.len);
A = NewTagNode(s, slen);
QueryTagNode_AddChildren(A, C->pn.children, C->pn.numChildren);
// Set the children count on C to 0 so they won't get recursively free'd
C->pn.numChildren = 0;
QueryNode_Free(C);
}
}
tag_list(A) ::= LB term(B) . [TAGLIST] {
A = NewPhraseNode(0);
QueryPhraseNode_AddChild(A, NewTokenNode(ctx, strdupcase(B.s, B.len), -1));
}
tag_list(A) ::= LB prefix(B) . [TAGLIST] {
A = NewPhraseNode(0);
QueryPhraseNode_AddChild(A, B);
}
tag_list(A) ::= LB termlist(B) . [TAGLIST] {
A = NewPhraseNode(0);
QueryPhraseNode_AddChild(A, B);
}
tag_list(A) ::= tag_list(B) OR term(C) . [TAGLIST] {
QueryPhraseNode_AddChild(B, NewTokenNode(ctx, strdupcase(C.s, C.len), -1));
A = B;
}
tag_list(A) ::= tag_list(B) OR prefix(C) . [TAGLIST] {
QueryPhraseNode_AddChild(B, C);
A = B;
}
tag_list(A) ::= tag_list(B) OR termlist(C) . [TAGLIST] {
QueryPhraseNode_AddChild(B, C);
A = B;
}
tag_list(A) ::= tag_list(B) RB . [TAGLIST] {
A = B;
}
/////////////////////////////////////////////////////////////////
// Numeric Ranges
/////////////////////////////////////////////////////////////////
expr(A) ::= modifier(B) COLON numeric_range(C). {
// we keep the capitalization as is
C->fieldName = strndup(B.s, B.len);
A = NewNumericNode(C);
}
numeric_range(A) ::= LSQB num(B) num(C) RSQB. [NUMBER] {
A = NewNumericFilter(B.num, C.num, B.inclusive, C.inclusive);
}
/////////////////////////////////////////////////////////////////
// Geo Filters
/////////////////////////////////////////////////////////////////
expr(A) ::= modifier(B) COLON geo_filter(C). {
// we keep the capitalization as is
C->property = strndup(B.s, B.len);
A = NewGeofilterNode(C);
}
geo_filter(A) ::= LSQB num(B) num(C) num(D) TERM(E) RSQB. [NUMBER] {
A = NewGeoFilter(B.num, C.num, D.num, strdupcase(E.s, E.len));
char *err = NULL;
if (!GeoFilter_IsValid(A, &err)) {
ctx->ok = 0;
ctx->errorMsg = strdup(err);
}
}
/////////////////////////////////////////////////////////////////
// Primitives - numbers and strings
/////////////////////////////////////////////////////////////////
num(A) ::= NUMBER(B). {
A.num = B.numval;
A.inclusive = 1;
}
num(A) ::= LP num(B). {
A=B;
A.inclusive = 0;
}
num(A) ::= MINUS num(B). {
B.num = -B.num;
A = B;
}
term(A) ::= TERM(B) . {
A = B;
}
term(A) ::= NUMBER(B) . {
A = B;
}