<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -19,6 +19,8 @@ TODO
   - Add a globally accessable, threadsafe symbol table. This will be very
     useful for storing field names so that no objects need to strdup the
     field-names but can just store the symbol representative instead.
+    + this has been done but it can be improved using actual Symbol structs
+      instead of plain char*
   - Make threading optional at compile time
   - to_json should limit output to prevent memory overflow on large indexes.
     Perhaps we could use some type of buffered read for this.
@@ -33,7 +35,6 @@ TODO
     results by
   - Auto-loading of documents during search. ie actual documents get returned
     instead of document numbers.
-  - update benchmark suite to use getrusage.u
 
 * Ruby bindings
   - argument checking for every method. We need a new api for argument checking
@@ -105,4 +106,4 @@ Done
 * Fix:
   + Working Query:  field1:value1 AND NOT field2:value2
   + Failing Query:    field1:value1 AND ( NOT field2:value2 )
-
+* update benchmark suite to use getrusage</diff>
      <filename>TODO</filename>
    </modified>
    <modified>
      <diff>@@ -3,6 +3,4 @@
 * add stopAnalyzer to bindings
 
 benchmarks
-* string actions when length is known
 * standard tokenizer
-* writevint</diff>
      <filename>c/TODO</filename>
    </modified>
    <modified>
      <diff>@@ -98,7 +98,7 @@
 
 
 /* Copy the first part of user declarations.  */
-#line 1 &quot;src/q_parser.y&quot;
+#line 3 &quot;src/q_parser.y&quot;
 
 #include &lt;string.h&gt;
 #include &lt;ctype.h&gt;
@@ -148,7 +148,7 @@ int qp_default_fuzzy_pre_len = 0;
 
 #if ! defined YYSTYPE &amp;&amp; ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 29 &quot;src/q_parser.y&quot;
+#line 31 &quot;src/q_parser.y&quot;
 {
     Query *query;
     BooleanClause *bcls;
@@ -168,7 +168,7 @@ typedef union YYSTYPE
 
 
 /* Copy the second part of user declarations.  */
-#line 37 &quot;src/q_parser.y&quot;
+#line 39 &quot;src/q_parser.y&quot;
 
 static int yylex(YYSTYPE *lvalp, QParser *qp);
 static int yyerror(QParser *qp, char const *msg);
@@ -548,12 +548,12 @@ static const yytype_int8 yyrhs[] =
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint8 yyrline[] =
 {
-       0,   131,   131,   132,   134,   135,   136,   137,   139,   140,
-     141,   143,   144,   146,   147,   148,   149,   150,   151,   152,
-     154,   155,   156,   158,   160,   160,   162,   162,   162,   165,
-     166,   168,   169,   170,   171,   173,   174,   175,   176,   177,
-     179,   180,   181,   182,   183,   184,   185,   186,   187,   188,
-     189,   190
+       0,   133,   133,   134,   136,   137,   138,   139,   141,   142,
+     143,   145,   146,   148,   149,   150,   151,   152,   153,   154,
+     156,   157,   158,   160,   162,   162,   164,   164,   164,   167,
+     168,   170,   171,   172,   173,   175,   176,   177,   178,   179,
+     181,   182,   183,   184,   185,   186,   187,   188,   189,   190,
+     191,   192
 };
 #endif
 
@@ -1212,57 +1212,57 @@ yydestruct (yymsg, yytype, yyvaluep, qp)
   switch (yytype)
     {
       case 27: /* &quot;bool_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1218 &quot;src/q_parser.c&quot;
 	break;
       case 28: /* &quot;bool_clss&quot; */
-#line 128 &quot;src/q_parser.y&quot;
+#line 130 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;bclss) &amp;&amp; qp-&gt;destruct) bca_destroy((yyvaluep-&gt;bclss)); };
 #line 1223 &quot;src/q_parser.c&quot;
 	break;
       case 29: /* &quot;bool_cls&quot; */
-#line 127 &quot;src/q_parser.y&quot;
+#line 129 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;bcls) &amp;&amp; qp-&gt;destruct) bc_deref((yyvaluep-&gt;bcls)); };
 #line 1228 &quot;src/q_parser.c&quot;
 	break;
       case 30: /* &quot;boosted_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1233 &quot;src/q_parser.c&quot;
 	break;
       case 31: /* &quot;q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1238 &quot;src/q_parser.c&quot;
 	break;
       case 32: /* &quot;term_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1243 &quot;src/q_parser.c&quot;
 	break;
       case 33: /* &quot;wild_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1248 &quot;src/q_parser.c&quot;
 	break;
       case 34: /* &quot;field_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1253 &quot;src/q_parser.c&quot;
 	break;
       case 39: /* &quot;phrase_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1258 &quot;src/q_parser.c&quot;
 	break;
       case 40: /* &quot;ph_words&quot; */
-#line 129 &quot;src/q_parser.y&quot;
+#line 131 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;phrase) &amp;&amp; qp-&gt;destruct) ph_destroy((yyvaluep-&gt;phrase)); };
 #line 1263 &quot;src/q_parser.c&quot;
 	break;
       case 41: /* &quot;range_q&quot; */
-#line 126 &quot;src/q_parser.y&quot;
+#line 128 &quot;src/q_parser.y&quot;
 	{ if ((yyvaluep-&gt;query) &amp;&amp; qp-&gt;destruct) q_deref((yyvaluep-&gt;query)); };
 #line 1268 &quot;src/q_parser.c&quot;
 	break;
@@ -1573,222 +1573,222 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 131 &quot;src/q_parser.y&quot;
+#line 133 &quot;src/q_parser.y&quot;
     {   qp-&gt;result = (yyval.query) = NULL; }
     break;
 
   case 3:
-#line 132 &quot;src/q_parser.y&quot;
+#line 134 &quot;src/q_parser.y&quot;
     { T qp-&gt;result = (yyval.query) = get_bool_q((yyvsp[(1) - (1)].bclss)); E }
     break;
 
   case 4:
-#line 134 &quot;src/q_parser.y&quot;
+#line 136 &quot;src/q_parser.y&quot;
     { T (yyval.bclss) = first_cls((yyvsp[(1) - (1)].bcls)); E }
     break;
 
   case 5:
-#line 135 &quot;src/q_parser.y&quot;
+#line 137 &quot;src/q_parser.y&quot;
     { T (yyval.bclss) = add_and_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
     break;
 
   case 6:
-#line 136 &quot;src/q_parser.y&quot;
+#line 138 &quot;src/q_parser.y&quot;
     { T (yyval.bclss) = add_or_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
     break;
 
   case 7:
-#line 137 &quot;src/q_parser.y&quot;
+#line 139 &quot;src/q_parser.y&quot;
     { T (yyval.bclss) = add_default_cls(qp, (yyvsp[(1) - (2)].bclss), (yyvsp[(2) - (2)].bcls)); E }
     break;
 
   case 8:
-#line 139 &quot;src/q_parser.y&quot;
+#line 141 &quot;src/q_parser.y&quot;
     { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST); E }
     break;
 
   case 9:
-#line 140 &quot;src/q_parser.y&quot;
+#line 142 &quot;src/q_parser.y&quot;
     { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST_NOT); E }
     break;
 
   case 10:
-#line 141 &quot;src/q_parser.y&quot;
+#line 143 &quot;src/q_parser.y&quot;
     { T (yyval.bcls) = get_bool_cls((yyvsp[(1) - (1)].query), BC_SHOULD); E }
     break;
 
   case 12:
-#line 144 &quot;src/q_parser.y&quot;
+#line 146 &quot;src/q_parser.y&quot;
     { T if ((yyvsp[(1) - (3)].query)) sscanf((yyvsp[(3) - (3)].str),&quot;%f&quot;,&amp;((yyvsp[(1) - (3)].query)-&gt;boost));  (yyval.query)=(yyvsp[(1) - (3)].query); E }
     break;
 
   case 14:
-#line 147 &quot;src/q_parser.y&quot;
+#line 149 &quot;src/q_parser.y&quot;
     { T (yyval.query) = bq_new_max(true, qp-&gt;max_clauses); E }
     break;
 
   case 15:
-#line 148 &quot;src/q_parser.y&quot;
+#line 150 &quot;src/q_parser.y&quot;
     { T (yyval.query) = get_bool_q((yyvsp[(2) - (3)].bclss)); E }
     break;
 
   case 20:
-#line 154 &quot;src/q_parser.y&quot;
+#line 156 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
     break;
 
   case 21:
-#line 155 &quot;src/q_parser.y&quot;
+#line 157 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str))); Y}
     break;
 
   case 22:
-#line 156 &quot;src/q_parser.y&quot;
+#line 158 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (2)].str), NULL)); Y}
     break;
 
   case 23:
-#line 158 &quot;src/q_parser.y&quot;
+#line 160 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
     break;
 
   case 24:
-#line 160 &quot;src/q_parser.y&quot;
+#line 162 &quot;src/q_parser.y&quot;
     { qp-&gt;fields = qp-&gt;def_fields; }
     break;
 
   case 25:
-#line 161 &quot;src/q_parser.y&quot;
+#line 163 &quot;src/q_parser.y&quot;
     { (yyval.query) = (yyvsp[(3) - (4)].query); }
     break;
 
   case 26:
-#line 162 &quot;src/q_parser.y&quot;
+#line 164 &quot;src/q_parser.y&quot;
     { qp-&gt;fields = qp-&gt;all_fields; }
     break;
 
   case 27:
-#line 162 &quot;src/q_parser.y&quot;
+#line 164 &quot;src/q_parser.y&quot;
     {qp-&gt;fields = qp-&gt;def_fields;}
     break;
 
   case 28:
-#line 163 &quot;src/q_parser.y&quot;
+#line 165 &quot;src/q_parser.y&quot;
     { (yyval.query) = (yyvsp[(4) - (5)].query); }
     break;
 
   case 29:
-#line 165 &quot;src/q_parser.y&quot;
+#line 167 &quot;src/q_parser.y&quot;
     { (yyval.hashset) = first_field(qp, (yyvsp[(1) - (1)].str)); }
     break;
 
   case 30:
-#line 166 &quot;src/q_parser.y&quot;
+#line 168 &quot;src/q_parser.y&quot;
     { (yyval.hashset) = add_field(qp, (yyvsp[(3) - (3)].str));}
     break;
 
   case 31:
-#line 168 &quot;src/q_parser.y&quot;
+#line 170 &quot;src/q_parser.y&quot;
     { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (3)].phrase), NULL); }
     break;
 
   case 32:
-#line 169 &quot;src/q_parser.y&quot;
+#line 171 &quot;src/q_parser.y&quot;
     { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (5)].phrase), (yyvsp[(5) - (5)].str)); }
     break;
 
   case 33:
-#line 170 &quot;src/q_parser.y&quot;
+#line 172 &quot;src/q_parser.y&quot;
     { (yyval.query) = NULL; }
     break;
 
   case 34:
-#line 171 &quot;src/q_parser.y&quot;
+#line 173 &quot;src/q_parser.y&quot;
     { (yyval.query) = NULL; (void)(yyvsp[(4) - (4)].str);}
     break;
 
   case 35:
-#line 173 &quot;src/q_parser.y&quot;
+#line 175 &quot;src/q_parser.y&quot;
     { (yyval.phrase) = ph_first_word((yyvsp[(1) - (1)].str)); }
     break;
 
   case 36:
-#line 174 &quot;src/q_parser.y&quot;
+#line 176 &quot;src/q_parser.y&quot;
     { (yyval.phrase) = ph_first_word(NULL); }
     break;
 
   case 37:
-#line 175 &quot;src/q_parser.y&quot;
+#line 177 &quot;src/q_parser.y&quot;
     { (yyval.phrase) = ph_add_word((yyvsp[(1) - (2)].phrase), (yyvsp[(2) - (2)].str)); }
     break;
 
   case 38:
-#line 176 &quot;src/q_parser.y&quot;
+#line 178 &quot;src/q_parser.y&quot;
     { (yyval.phrase) = ph_add_word((yyvsp[(1) - (3)].phrase), NULL); }
     break;
 
   case 39:
-#line 177 &quot;src/q_parser.y&quot;
+#line 179 &quot;src/q_parser.y&quot;
     { (yyval.phrase) = ph_add_multi_word((yyvsp[(1) - (3)].phrase), (yyvsp[(3) - (3)].str));  }
     break;
 
   case 40:
-#line 179 &quot;src/q_parser.y&quot;
+#line 181 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  true,  true)); Y}
     break;
 
   case 41:
-#line 180 &quot;src/q_parser.y&quot;
+#line 182 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  true,  false)); Y}
     break;
 
   case 42:
-#line 181 &quot;src/q_parser.y&quot;
+#line 183 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  false, true)); Y}
     break;
 
   case 43:
-#line 182 &quot;src/q_parser.y&quot;
+#line 184 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str),  (yyvsp[(3) - (4)].str),  false, false)); Y}
     break;
 
   case 44:
-#line 183 &quot;src/q_parser.y&quot;
+#line 185 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str),  false, false)); Y}
     break;
 
   case 45:
-#line 184 &quot;src/q_parser.y&quot;
+#line 186 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str),  false, true)); Y}
     break;
 
   case 46:
-#line 185 &quot;src/q_parser.y&quot;
+#line 187 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str),  NULL,true,  false)); Y}
     break;
 
   case 47:
-#line 186 &quot;src/q_parser.y&quot;
+#line 188 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str),  NULL,false, false)); Y}
     break;
 
   case 48:
-#line 187 &quot;src/q_parser.y&quot;
+#line 189 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (2)].str),  false, false)); Y}
     break;
 
   case 49:
-#line 188 &quot;src/q_parser.y&quot;
+#line 190 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(3) - (3)].str),  false, true)); Y}
     break;
 
   case 50:
-#line 189 &quot;src/q_parser.y&quot;
+#line 191 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(3) - (3)].str),  NULL,true,  false)); Y}
     break;
 
   case 51:
-#line 190 &quot;src/q_parser.y&quot;
+#line 192 &quot;src/q_parser.y&quot;
     { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (2)].str),  NULL,false, false)); Y}
     break;
 
@@ -2008,7 +2008,7 @@ yyreturn:
 }
 
 
-#line 192 &quot;src/q_parser.y&quot;
+#line 194 &quot;src/q_parser.y&quot;
 
 
 static const char *special_char = &quot;&amp;:()[]{}!\&quot;~^|&lt;&gt;=*?+-&quot;;</diff>
      <filename>c/src/q_parser.c</filename>
    </modified>
    <modified>
      <diff>@@ -1,3 +1,57 @@
+/*****************************************************************************
+ * QueryParser
+ * ===========
+ *
+ * Synopsis
+ * --------
+ *
+ * === qp_parse
+ *
+ * The main QueryParser method is +qp_parse+. It gets called with a the query
+ * string. The first thing it does is to clean the query string if
+ * ((QueryParser *)self)-&gt;clean_str is set to true. The cleaning is done with
+ * the qp_clean_str.
+ * 
+ * It then calls the yacc parser which will set self-&gt;result to the parsed
+ * query. If parsing fails in anyway, self-&gt;result should be set to NULL, in
+ * which case qp_parse does one of two things, depending on the value of
+ * self-&gt;handle_parse_errors. If it is set to true, qp_parse attempts to do a
+ * very basic parsing of the query by ignoring all special characters and
+ * parsing the query as a plain boolean query. If it is set to false, qp_parse
+ * will raise a PARSE_ERROR.
+ * 
+ * === The Lexer
+ *
+ * yylex is the lexing method called by the QueryParser. It breaks the query
+ * up into special characters ( &quot;&amp;:()[]{}!\&quot;~^|&lt;&gt;=*?+-&quot; ) and tokens (QWRD,
+ * WILD_STR, AND['AND', '&amp;&amp;'], OR['OR', '||'], REQ['REQ', '+'], NOT['NOT',
+ * '-', '~']). QWRD tokens are query word tokens which are made up of
+ * characters other than the special characters. They can also contain special
+ * characters when escaped with a backslash '\'. WILD_STR is the same as QWRD
+ * except that it may also contain '?' and '*' characters.
+ *
+ * === The Parser
+ *
+ * For a better understanding of the how the query parser works, it is a good
+ * idea to study the Ferret Query Language (FQL) described below. Once you
+ * understand FQL the one tricky part that needs to be mentioned is how fields
+ * are handled. The QueryParser knows about two sets of fields, the default
+ * search fields and the set of all fields in the index. When no fields are
+ * specified then the default fields are used. The '*:' field specifier will
+ * search all fields contained in the all_fields set. Otherwise all fields
+ * specified in the field descripter separated by '|' will be searched. For
+ * example 'title|content:' will search the title and content fields. When
+ * fields are specified like this, the parser will push the fields onto a
+ * stack and all queries modified by the field specifier will be applied to
+ * the fields on top of the stack. This is where the FLDS macro comes into
+ * place. It takes the current query building function in the parser and calls
+ * it for all fields on top of the stack.
+ * 
+ * Ferret Query Language (FQL)
+ * ===========================
+ *
+ * FIXME to be continued...
+ *****************************************************************************/
 %{
 #include &lt;string.h&gt;
 #include &lt;ctype.h&gt;
@@ -877,6 +931,14 @@ static void str_insert(char *str, int len, char chr)
     *str = chr;
 }
 
+/*****************************************************************************
+ * qp_clean_str method which basically scans the query string and ensures that
+ * all open and close parentheses '()' and quotes '&quot;' are balanced. It does
+ * this be inserting or appending extra parentheses or quotes which is not
+ * necessarily going to be exactly what the user wanted but it will help
+ * prevent the parser from failing so it's the best we can do at this stage.
+ * It also checks 
+ *****************************************************************************/
 char *qp_clean_str(char *str)
 {
     int b, pb = -1;</diff>
      <filename>c/src/q_parser.y</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>8c94483694f5774d39acb4072885476c45d9b571</id>
    </parent>
  </parents>
  <author>
    <name>dave</name>
    <email>dave@06fd6eb0-0002-0410-a719-e5602cce40bc</email>
  </author>
  <url>http://github.com/dbalmain/ferret/commit/cafd8d281bdf982e2ac0b84d57dfbf9fed46603c</url>
  <id>cafd8d281bdf982e2ac0b84d57dfbf9fed46603c</id>
  <committed-date>2008-04-21T07:10:15-07:00</committed-date>
  <authored-date>2008-04-21T07:10:15-07:00</authored-date>
  <message>Started detailed documentation of the QueryParser

In writing the documentation I've spotted a pretty serious bug in the
QueryParser. Fields need to be pushed onto a stack as they are specified,
otherwise the parser is quite broken for multi-level, multi-field queries. I'll
fix this first thing tomorrow before finishing this documentation. Comments on
the documentation are welcome.


git-svn-id: svn+ssh://davebalmain.com/home/dave/repos/ferret/trunk@1031 06fd6eb0-0002-0410-a719-e5602cce40bc</message>
  <tree>14ba0326debd7b3d8abec4d35d860d086ec5c5ac</tree>
  <committer>
    <name>dave</name>
    <email>dave@06fd6eb0-0002-0410-a719-e5602cce40bc</email>
  </committer>
</commit>
