Skip to content
This repository
Newer
Older
100644 539 lines (479 sloc) 16.425 kb
fccc6851 » MLstate
2011-06-21 Initial open-source release
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Adam Koprowski
20 **)
21
22 (* FIXME, proper trace output for --multiple option *)
23 (* FIXME, in --analyze-memo stop parsing with given settings when it takes longer than the best result so far (timeout) *)
24
25 module T = Tgrammar
26 module P = Tgrammar.PreGrammar
da6c620f » Raja
2011-06-22 [cleanup] teerex: remove open base
27 module List = BaseList
28 module Char = BaseChar
fccc6851 » MLstate
2011-06-21 Initial open-source release
29
30 let pr = Printf.sprintf
31
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
32 (* TODO: change this printf *)
33 let log fmt =
34 Printf.eprintf (fmt^^"\n")
35
fccc6851 » MLstate
2011-06-21 Initial open-source release
36 let grammar_to_string g =
37 pr "Start: {%s}\nProductions:\n%s\n" g.T.start
38 (T.def_map_to_string (StringMap.map fst g.T.grammar))
39
40 let errorHandling = ref true
41 let grammarFile = ref None
42 let inputFile = ref None
43 let startProd = ref None
44 let cmd = ref `parsing
45 let mode = ref `parseOne
46 let memo_default = ref None
47
48 let get_val_or_fail v err =
49 match !v with
50 | Some s -> s
51 | _ -> failwith err
52
53 let get_grammar_fn () = get_val_or_fail grammarFile "Unspecified grammar!"
54
55 let get_input_fn () = get_val_or_fail inputFile "Unspecified input file!"
56
57 let measureTime f =
58 let startT = Sys.time () in
59 let res = f () in
60 let endT = Sys.time () in
61 res, endT -. startT
62
63 (***************************************************************)
64 (************************** Error handling *********************)
65 (***************************************************************)
66
67 open Trx_runtime
68
69 (***************************************************************)
70 (************************** Parsing trace **********************)
71 (***************************************************************)
72
73 type traceParseResult =
74 | Failure
75 | Success of int (* number of consumed characters *)
76
77 type traceType =
78 | CallBeg of pos * (traceParseResult * Trx_runtime.parseError) ref * bool ref (* result of memoization? *)
79 | CallEnd
80
81 type tagInfo =
82 { def : string
83 ; level : int
84 }
85
86 type traceEntry = tagInfo * traceType
87
88 let traceFile = ref None
89
90 let trace = MutableList.create ()
91
92 let tracingEnabled () =
93 match !traceFile with
94 | Some _ -> true
95 | None -> false
96
97 let lev = ref 1
98
99 let enableTracing s =
100 traceFile := Some s
101
102 let rec indent = function
103 | 0 -> ""
104 | n -> " " ^ indent (n-1)
105
106 let write_trace input fn =
107 let max_indent = ref 0 in
108 let write_tag (ti, tt) =
109 match tt with
110 | CallBeg (pos, tr, memo) ->
111 let pos_str =
112 let line, col = FilePos.get_pos_no_cache input pos in
113 pr "%d/%d" line col
114 in
115 let memo_str = if !memo then " memoized=\"true\"" else "" in
116 let res_str, len_str =
117 match fst !tr with
118 | Failure -> "fail", ""
119 | Success i -> "ok", pr " consumed=\"%d\"" i
120 in
121 (* let err_str = Trx_runtime.show_parse_error input (snd !tr) in*)
122 max_indent := max !max_indent ti.level;
123 Printf.fprintf fn "%s<%s pos=\"%s\" result=\"%s\" %s%s >\n" (indent ti.level) ti.def pos_str res_str len_str memo_str;
124 (* Printf.fprintf fn "%s<![CDATA[%s]]>\n" (indent ti.level) err_str*)
125 | CallEnd ->
126 Printf.fprintf fn "%s</%s>\n" (indent ti.level) ti.def
127 in
128 Printf.fprintf fn "<trx_interpreter grammar=\"%s\" input=\"%s\">\n"
129 (get_grammar_fn ()) (get_input_fn ());
130 MutableList.iter write_tag trace;
131 Printf.fprintf fn "</trx_interpreter>\n";
132 Printf.eprintf "Max nesting: %d\n" !max_indent
133
134 (***************************************************************)
135 (************************* TRX interpreter *********************)
136 (***************************************************************)
137
138 let option_to_res_no_err res pos =
139 match res with
140 | Some res -> Ok (res, emptyError pos)
141 | None -> Fail (emptyError pos)
142
143 let addErrorInfo err res =
144 if !errorHandling then
145 Trx_runtime.addErrorInfo err res
146 else
147 res
148
149 let parse peg input =
150
151 let _len = String.length input in
152 let _get_char = String.unsafe_get input in
153 let memo = Hashtbl.create 128 in
154
155 let process_literal literal case pos =
156 let literal_len = String.length literal in
157 let eq = if case then (=) else Char.equal_insensitive in
158 let rec aux i =
159 if i = literal_len then
160 true
161 else
162 eq (_get_char (pos + i)) (String.unsafe_get literal i) && aux (i + 1)
163 in
164 let res =
165 if pos + literal_len <= _len && aux 0 then
166 Some (pos + literal_len, ())
167 else
168 None
169 in
170 if !errorHandling then
171 option_to_res_msg res pos ("\"" ^ literal ^ "\"")
172 else
173 option_to_res_no_err res pos
174 in
175
176 let process_range ranges pos =
177 let encode = function
178 | T.Any -> `Any
179 | T.One c -> `One c
180 | T.Range (c1, c2) -> `Range (c1, c2)
181 in
182 let res =
183 if pos < _len then begin
184 let c = _get_char pos in
185 let rec aux = function
186 | [] -> false
187 | T.Any::_ -> true
188 | T.One c'::cs -> c = c' || aux cs
189 | T.Range (c1, c2)::cs -> (c >= c1 && c <= c2) || aux cs
190 in
191 if aux ranges then
192 Some (pos + 1, ())
193 else
194 None
195 end else
196 None
197 in
198 if !errorHandling then
199 let err = range_to_error (List.map encode ranges) in
200 option_to_res_err res pos err
201 else
202 option_to_res_no_err res pos
203 in
204
205 let get_def id =
206 try
207 StringMap.find id peg.T.grammar
208 with
209 Not_found ->
34d11e64 » Raja
2011-06-23 [cleanup] Base: remove log_error
210 log "Could not find a definition for the production: %s" id;
fccc6851 » MLstate
2011-06-21 Initial open-source release
211 raise Not_found
212 in
213
214 let rec parse_definition id pos =
215 let d, def_annots = get_def id in
216 let do_parse () =
217 let res, memoized =
218 try
219 Hashtbl.find memo (id, pos), true
220 with
221 Not_found ->
222 parse_expression pos d.P.expression, false
223 in
224 let res =
225 match def_annots.T.rule_name with
226 | T.NoName -> res (* decorateConstruct res pos [Expected id] *) (* uncommenting turns on use of rule ids (not only names) for error reporting *)
227 | T.PrimaryName def_name -> setMainConstruct res pos (Expected def_name)
228 | T.SecondaryName def_name -> decorateConstruct res pos [Expected def_name]
229 in
230 let success =
231 match res with
232 | Ok _ -> true
233 | Fail _ -> false
234 in
235 let do_memo =
236 match def_annots.T.rule_memo with
237 | T.MemoNone -> false
238 | T.MemoFail -> not success
239 | T.MemoSuccess -> success
240 | T.MemoFull -> true
241 | T.MemoNoInfo -> assert false
242 in
243 if do_memo then
244 Hashtbl.add memo (id, pos) res;
245 res, memoized
246 in
247 if not (tracingEnabled ()) then
248 fst (do_parse ())
249 else
250 begin
251 let tagInfo =
252 { def = id
253 ; level = !lev
254 }
255 in
256 let tr = ref (Failure, Trx_runtime.emptyError 0) in
257 let memo_mark = ref false in
258 lev := !lev + 1;
259 MutableList.add trace (tagInfo, CallBeg (pos, tr, memo_mark));
260 let res, memoized = do_parse () in
261 if memoized then
262 memo_mark := true;
263 tr :=
264 begin match res with
265 | Fail e -> Failure, e
266 | Ok ((pos', _), e) -> Success (pos' - pos), e
267 end;
268 lev := !lev - 1;
269 MutableList.add trace (tagInfo, CallEnd);
270 res
271 end
272
273 and parse_expression pos = function
274 | P.App _ -> failwith "Unexpected App"
275 | P.Expr [] -> Fail (emptyError pos)
276 | P.Expr (s::ss) ->
277 let (items, _, prod) = s in
278 match prod with
279 | Some (_, _, _, true) ->
280 (* Printf.eprintf "WARNING! Skipping sequence with production {| ... |}\n";*)
281 Fail (emptyError pos)
282 | _ ->
283 match parse_sequence pos items with
284 | Fail e ->
285 let res = parse_expression pos (P.Expr ss) in
286 addErrorInfo e res
287 | res -> res
288
289 and parse_sequence pos = function
290 | [] -> Ok ((pos, ()), emptyError pos)
291 | i::is ->
292 match parse_item pos i with
293 | Fail e -> Fail e
294 | Ok ((pos', _), e) ->
295 let res = parse_sequence pos' is in
296 addErrorInfo e res
297
298 and parse_item pos (prefix, prim, suffix) =
299 let rec loop pos required =
300 match parse_primary pos prim with
301 | Ok ((pos', _), e) ->
302 let res = loop pos' false in
303 addErrorInfo e res
304 | Fail e ->
305 if required then
306 Fail e
307 else
308 Ok ((pos, ()), e)
309 in
310 let suffixr =
311 match suffix with
312 | `NORMAL -> parse_primary pos prim
313 | `STAR -> loop pos false
314 | `PLUS -> loop pos true
315 | `QUESTION ->
316 match parse_primary pos prim with
317 | Fail e -> Ok ((pos, ()), e)
318 | res -> res
319 in
320 (* FIXME, Handle errors *)
321 match prefix, suffixr with
322 | `NORMAL, res -> res
323 | `AND, Fail e -> Fail e
324 | `AND, Ok (_, e) -> Ok ((pos, ()), e)
325 | `NOT, Fail e -> Ok ((pos, ()), e)
326 | `NOT, Ok (_, e) -> Fail e
327
328 and parse_primary pos = function
329 | P.Paren e ->
330 parse_expression pos e
331 | P.Literal (l, case) ->
332 process_literal l case pos
333 | P.Class ranges ->
334 process_range ranges pos
335 | P.Ident id ->
336 parse_definition id pos
337 in
338
339 (* FIXME, at the moment if -start switch is not provided then all productions
340 * marked with '+' are tried.
341 *)
342 let rec try_parse print_success = function
343 | [] -> None
344 | s::ss ->
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
345 (*jlog ~level:2 (pr "Trying to parse with the start production: %s" s);*)
fccc6851 » MLstate
2011-06-21 Initial open-source release
346 match parse_definition s 0 with
347 | Fail e ->
348 let err_str = show_parse_error (FilePos.get_pos_no_cache input) e in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
349 log "Production %s gives syntax error: %s" s err_str;
fccc6851 » MLstate
2011-06-21 Initial open-source release
350 try_parse print_success ss
351 | Ok ((pos, _), _e) ->
352 if print_success then
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
353 log "Success with: %s" s;
fccc6851 » MLstate
2011-06-21 Initial open-source release
354 Some pos
355 in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
356 (*jlog ~level:3 (pr "Parsing with the following grammar:\n%s\n======\n" (grammar_to_string peg));*)
fccc6851 » MLstate
2011-06-21 Initial open-source release
357 match !startProd with
358 | None -> try_parse true (Pgrammar.start_definitions peg.T.grammar)
359 | Some s -> try_parse false [s]
360
361 (**************************************************************)
362 (************************* Main front-end *********************)
363 (**************************************************************)
364
365 let str2memo = function
366 | "none" -> T.MemoNone
367 | "fail" -> T.MemoFail
368 | "success" -> T.MemoSuccess
369 | "full" -> T.MemoFull
370 | s -> failwith (pr "Unknown memoization option: '%s' (should be: 'none', 'fail', 'success' or 'full')" s)
371
9a4a8eed » Mathieu Baudet
2011-12-19 [enhance] help/manpages: global pass for improving help messages of O…
372 let usage_msg = Printf.sprintf "%s: parser interpreter for the Opa project\nUsage: %s [options] syntax_file.[trx|prx]\n" Sys.argv.(0) Sys.argv.(0)
373
fccc6851 » MLstate
2011-06-21 Initial open-source release
374 let parse_args () =
375 let anon_fun s =
376 if !grammarFile = None then
377 grammarFile := Some s
378 else if !inputFile = None then
379 inputFile := Some s
380 else
381 failwith (pr "Don't know what to do with <%s> argument" s)
382 in
383 Arg.parse (Arg.align [
384 ("--no-error-handling",
385 Arg.Unit (fun () -> errorHandling := false),
386 " turns off error handling (faster, but no error msgs)");
387
388 ("--multiple",
389 Arg.Unit (fun () -> mode := `parseMany),
390 " parses a number of files; the input file contains a list of files to parse, one per line");
391
392 ("--trace",
393 Arg.String (fun s -> enableTracing s),
394 " file.[xml] produces an XML trace of parsing in <file>");
395
396 ("--start",
397 Arg.String (fun s -> startProd := Some s),
398 " prod starting production to be used (if not provided all marked with + will be tried)");
399
400 ("--verbose",
401 Arg.Unit (fun () -> ()),
402 " deprecated");
403
404 ("--more-verbose",
405 Arg.Unit (fun () -> ()),
406 " deprecated");
407
408 ("--analyze-memo",
409 Arg.Unit (fun () -> cmd := `analyze_memo),
410 " instead of parsing peform memoization analysis with given input");
411
412 ("--memoization",
413 Arg.String (fun s -> memo_default := Some (str2memo s)),
414 " default memoization level for rules with no annotations [none | fail | success | full]");
415
416 ]) anon_fun
9a4a8eed » Mathieu Baudet
2011-12-19 [enhance] help/manpages: global pass for improving help messages of O…
417 (usage_msg^"Options:")
fccc6851 » MLstate
2011-06-21 Initial open-source release
418
419 let load_grammar grammarFn =
420 let peg, _ = Pgrammar.read_grammar ?memo_default:!memo_default ~verbose:true None grammarFn in
421 peg
422
423 let parse_file peg inputFn =
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
424 log "Parsing <%s>..." inputFn;
fccc6851 » MLstate
2011-06-21 Initial open-source release
425 let input = File.content inputFn in
426 let all = String.length input in
427 let go () = parse peg input in
428 let res, t = measureTime go in
429 begin match res with
430 | Some pos ->
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
431 log "Parsing successful [%d/%d] in %4.2fsec." pos all t
fccc6851 » MLstate
2011-06-21 Initial open-source release
432 | None ->
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
433 log "Parsing failed"
fccc6851 » MLstate
2011-06-21 Initial open-source release
434 end;
435 begin match !traceFile with
436 | None -> ()
437 | Some fn ->
438 write_trace input (open_out fn);
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
439 log "Parsing trace written to <%s>" fn
fccc6851 » MLstate
2011-06-21 Initial open-source release
440 end
441
442 let parse_files peg inputFn =
443 let inc = open_in inputFn in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
444 log "Parsing all files from <%s>" inputFn;
fccc6851 » MLstate
2011-06-21 Initial open-source release
445 let n = ref 0 in
446 let go () =
447 try
448 while true do
449 let fn = input_line inc in
450 parse_file peg fn;
451 incr n
452 done
453 with
454 End_of_file ->
455 close_in inc
456 in
457 let _, t = measureTime go in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
458 log "Total time of parsing %d files from <%s>: %4.2fsec." !n inputFn t
fccc6851 » MLstate
2011-06-21 Initial open-source release
459
460 let parsing peg =
461 let inputFn = get_input_fn () in
462 match !mode with
463 | `parseOne ->
464 parse_file peg inputFn
465 | `parseMany ->
466 parse_files peg inputFn
467
468 let analyze_memo peg =
469 let i = ref 0 in
470 let progress () =
471 let rules = StringMap.size peg.T.grammar in
472 let total = float_of_int (rules * 3) in
473 incr i;
474 Printf.eprintf "\rProgress: %3.2f%%...%!" (float_of_int (100 * !i) /. total)
475 in
476 let parseWithPeg peg =
477 let res = measureTime (fun () -> parsing peg) in
478 res
479 in
480 let memo2str = function
481 | T.MemoNone -> "none"
482 | T.MemoFail -> "fail"
483 | T.MemoSuccess -> "success"
484 | T.MemoFull -> "full"
485 | T.MemoNoInfo -> "noInfo"
486 in
487 let apply_move move peg =
488 let def_name, memo_opt = move in
489 let (def, annots) = StringMap.find def_name peg.T.grammar in
490 let new_annots = { annots with T.rule_memo = memo_opt } in
491 let new_entry = def, new_annots in
492 { peg with T.grammar = StringMap.add def_name new_entry peg.T.grammar }
493 in
494 let try_to_improve_on peg def_name (best_move, best) memo_opt =
495 let move = def_name, memo_opt in
496 let peg' = apply_move move peg in
497 let _, t = parseWithPeg peg' in
498 progress ();
da6c620f » Raja
2011-06-22 [cleanup] teerex: remove open base
499 (*if false then
500 jlog ~level:2 (pr "Trying to change memo option for def. <%s> to <%s> gives time: %4.3fsec." def_name (memo2str memo_opt) t);*)
fccc6851 » MLstate
2011-06-21 Initial open-source release
501 if t < best then
502 (Some move, t)
503 else
504 (best_move, best)
505 in
506 let try_to_improve peg def_name (_, def_annots) (best_move, best) =
507 let opts = [T.MemoNone; T.MemoFail; T.MemoSuccess; T.MemoFull] in
508 let opts = List.remove_first def_annots.T.rule_memo opts in
509 List.fold_left (try_to_improve_on peg def_name) (best_move, best) opts
510 in
511 let rec improve (best, peg) =
512 i := 0;
513 let res, best' = StringMap.fold (try_to_improve peg) peg.T.grammar (None, best) in
514 match res with
515 | None ->
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
516 log "\nNo improvement..."
fccc6851 » MLstate
2011-06-21 Initial open-source release
517 | Some move ->
518 let impr = best -. best' in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
519 log "\nConsider changing memoization option for definition <%s> to <%s>.\nIt resulted in time %4.3fsec. (%4.3fsec./%1.3f%% improvement)"
520 (fst move) (memo2str (snd move)) best' impr (100.0 *. impr /. best);
fccc6851 » MLstate
2011-06-21 Initial open-source release
521 improve (best', apply_move move peg)
522 in
523 let _, best = parseWithPeg peg in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
524 log "Initial grammar gives timing: %4.3fsec. [no. of rules: %d]" best (StringMap.size peg.T.grammar);
fccc6851 » MLstate
2011-06-21 Initial open-source release
525 improve (best, peg)
526
527 let _ =
528 parse_args ();
529 let grammarFn = get_grammar_fn () in
530 let inputFn = get_input_fn () in
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
531 log "Loading grammar from {%s} and then parsing {%s}\n" grammarFn inputFn;
fccc6851 » MLstate
2011-06-21 Initial open-source release
532 try
533 let peg = load_grammar grammarFn in
534 match !cmd with
535 | `parsing -> parsing peg
536 | `analyze_memo -> analyze_memo peg
537 with
538 | Pgrammar.GrammarParse _err ->
34d11e64 » Raja
2011-06-23 [cleanup] Base: remove log_error
539 log "Failed while parsing the input grammar: {%s}!\n" grammarFn
Something went wrong with that request. Please try again.