Skip to content
Newer
Older
100644 538 lines (478 sloc) 15.9 KB
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Adam Koprowski
20 **)
21
22 (* FIXME, proper trace output for --multiple option *)
23 (* FIXME, in --analyze-memo stop parsing with given settings when it takes longer than the best result so far (timeout) *)
24
25 module T = Tgrammar
26 module P = Tgrammar.PreGrammar
da6c620 [cleanup] teerex: remove open base
Raja authored Jun 22, 2011
27 module List = BaseList
28 module Char = BaseChar
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
29
30 let pr = Printf.sprintf
31
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
32 (* TODO: change this printf *)
33 let log fmt =
34 Printf.eprintf (fmt^^"\n")
35
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
36 let grammar_to_string g =
37 pr "Start: {%s}\nProductions:\n%s\n" g.T.start
38 (T.def_map_to_string (StringMap.map fst g.T.grammar))
39
40 let errorHandling = ref true
41 let grammarFile = ref None
42 let inputFile = ref None
43 let startProd = ref None
44 let cmd = ref `parsing
45 let mode = ref `parseOne
46 let memo_default = ref None
47
48 let get_val_or_fail v err =
49 match !v with
50 | Some s -> s
51 | _ -> failwith err
52
53 let get_grammar_fn () = get_val_or_fail grammarFile "Unspecified grammar!"
54
55 let get_input_fn () = get_val_or_fail inputFile "Unspecified input file!"
56
57 let measureTime f =
58 let startT = Sys.time () in
59 let res = f () in
60 let endT = Sys.time () in
61 res, endT -. startT
62
63 (***************************************************************)
64 (************************** Error handling *********************)
65 (***************************************************************)
66
67 open Trx_runtime
68
69 (***************************************************************)
70 (************************** Parsing trace **********************)
71 (***************************************************************)
72
73 type traceParseResult =
74 | Failure
75 | Success of int (* number of consumed characters *)
76
77 type traceType =
78 | CallBeg of pos * (traceParseResult * Trx_runtime.parseError) ref * bool ref (* result of memoization? *)
79 | CallEnd
80
81 type tagInfo =
82 { def : string
83 ; level : int
84 }
85
86 type traceEntry = tagInfo * traceType
87
88 let traceFile = ref None
89
90 let trace = MutableList.create ()
91
92 let tracingEnabled () =
93 match !traceFile with
94 | Some _ -> true
95 | None -> false
96
97 let lev = ref 1
98
99 let enableTracing s =
100 traceFile := Some s
101
102 let rec indent = function
103 | 0 -> ""
104 | n -> " " ^ indent (n-1)
105
106 let write_trace input fn =
107 let max_indent = ref 0 in
108 let write_tag (ti, tt) =
109 match tt with
110 | CallBeg (pos, tr, memo) ->
111 let pos_str =
112 let line, col = FilePos.get_pos_no_cache input pos in
113 pr "%d/%d" line col
114 in
115 let memo_str = if !memo then " memoized=\"true\"" else "" in
116 let res_str, len_str =
117 match fst !tr with
118 | Failure -> "fail", ""
119 | Success i -> "ok", pr " consumed=\"%d\"" i
120 in
121 (* let err_str = Trx_runtime.show_parse_error input (snd !tr) in*)
122 max_indent := max !max_indent ti.level;
123 Printf.fprintf fn "%s<%s pos=\"%s\" result=\"%s\" %s%s >\n" (indent ti.level) ti.def pos_str res_str len_str memo_str;
124 (* Printf.fprintf fn "%s<![CDATA[%s]]>\n" (indent ti.level) err_str*)
125 | CallEnd ->
126 Printf.fprintf fn "%s</%s>\n" (indent ti.level) ti.def
127 in
128 Printf.fprintf fn "<trx_interpreter grammar=\"%s\" input=\"%s\">\n"
129 (get_grammar_fn ()) (get_input_fn ());
130 MutableList.iter write_tag trace;
131 Printf.fprintf fn "</trx_interpreter>\n";
132 Printf.eprintf "Max nesting: %d\n" !max_indent
133
134 (***************************************************************)
135 (************************* TRX interpreter *********************)
136 (***************************************************************)
137
138 let option_to_res_no_err res pos =
139 match res with
140 | Some res -> Ok (res, emptyError pos)
141 | None -> Fail (emptyError pos)
142
143 let addErrorInfo err res =
144 if !errorHandling then
145 Trx_runtime.addErrorInfo err res
146 else
147 res
148
149 let parse peg input =
150
151 let _len = String.length input in
152 let _get_char = String.unsafe_get input in
153 let memo = Hashtbl.create 128 in
154
155 let process_literal literal case pos =
156 let literal_len = String.length literal in
157 let eq = if case then (=) else Char.equal_insensitive in
158 let rec aux i =
159 if i = literal_len then
160 true
161 else
162 eq (_get_char (pos + i)) (String.unsafe_get literal i) && aux (i + 1)
163 in
164 let res =
165 if pos + literal_len <= _len && aux 0 then
166 Some (pos + literal_len, ())
167 else
168 None
169 in
170 if !errorHandling then
171 option_to_res_msg res pos ("\"" ^ literal ^ "\"")
172 else
173 option_to_res_no_err res pos
174 in
175
176 let process_range ranges pos =
177 let encode = function
178 | T.Any -> `Any
179 | T.One c -> `One c
180 | T.Range (c1, c2) -> `Range (c1, c2)
181 in
182 let res =
183 if pos < _len then begin
184 let c = _get_char pos in
185 let rec aux = function
186 | [] -> false
187 | T.Any::_ -> true
188 | T.One c'::cs -> c = c' || aux cs
189 | T.Range (c1, c2)::cs -> (c >= c1 && c <= c2) || aux cs
190 in
191 if aux ranges then
192 Some (pos + 1, ())
193 else
194 None
195 end else
196 None
197 in
198 if !errorHandling then
199 let err = range_to_error (List.map encode ranges) in
200 option_to_res_err res pos err
201 else
202 option_to_res_no_err res pos
203 in
204
205 let get_def id =
206 try
207 StringMap.find id peg.T.grammar
208 with
209 Not_found ->
34d11e6 [cleanup] Base: remove log_error
Raja authored Jun 23, 2011
210 log "Could not find a definition for the production: %s" id;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
211 raise Not_found
212 in
213
214 let rec parse_definition id pos =
215 let d, def_annots = get_def id in
216 let do_parse () =
217 let res, memoized =
218 try
219 Hashtbl.find memo (id, pos), true
220 with
221 Not_found ->
222 parse_expression pos d.P.expression, false
223 in
224 let res =
225 match def_annots.T.rule_name with
226 | T.NoName -> res (* decorateConstruct res pos [Expected id] *) (* uncommenting turns on use of rule ids (not only names) for error reporting *)
227 | T.PrimaryName def_name -> setMainConstruct res pos (Expected def_name)
228 | T.SecondaryName def_name -> decorateConstruct res pos [Expected def_name]
229 in
230 let success =
231 match res with
232 | Ok _ -> true
233 | Fail _ -> false
234 in
235 let do_memo =
236 match def_annots.T.rule_memo with
237 | T.MemoNone -> false
238 | T.MemoFail -> not success
239 | T.MemoSuccess -> success
240 | T.MemoFull -> true
241 | T.MemoNoInfo -> assert false
242 in
243 if do_memo then
244 Hashtbl.add memo (id, pos) res;
245 res, memoized
246 in
247 if not (tracingEnabled ()) then
248 fst (do_parse ())
249 else
250 begin
251 let tagInfo =
252 { def = id
253 ; level = !lev
254 }
255 in
256 let tr = ref (Failure, Trx_runtime.emptyError 0) in
257 let memo_mark = ref false in
258 lev := !lev + 1;
259 MutableList.add trace (tagInfo, CallBeg (pos, tr, memo_mark));
260 let res, memoized = do_parse () in
261 if memoized then
262 memo_mark := true;
263 tr :=
264 begin match res with
265 | Fail e -> Failure, e
266 | Ok ((pos', _), e) -> Success (pos' - pos), e
267 end;
268 lev := !lev - 1;
269 MutableList.add trace (tagInfo, CallEnd);
270 res
271 end
272
273 and parse_expression pos = function
274 | P.App _ -> failwith "Unexpected App"
275 | P.Expr [] -> Fail (emptyError pos)
276 | P.Expr (s::ss) ->
277 let (items, _, prod) = s in
278 match prod with
279 | Some (_, _, _, true) ->
280 (* Printf.eprintf "WARNING! Skipping sequence with production {| ... |}\n";*)
281 Fail (emptyError pos)
282 | _ ->
283 match parse_sequence pos items with
284 | Fail e ->
285 let res = parse_expression pos (P.Expr ss) in
286 addErrorInfo e res
287 | res -> res
288
289 and parse_sequence pos = function
290 | [] -> Ok ((pos, ()), emptyError pos)
291 | i::is ->
292 match parse_item pos i with
293 | Fail e -> Fail e
294 | Ok ((pos', _), e) ->
295 let res = parse_sequence pos' is in
296 addErrorInfo e res
297
298 and parse_item pos (prefix, prim, suffix) =
299 let rec loop pos required =
300 match parse_primary pos prim with
301 | Ok ((pos', _), e) ->
302 let res = loop pos' false in
303 addErrorInfo e res
304 | Fail e ->
305 if required then
306 Fail e
307 else
308 Ok ((pos, ()), e)
309 in
310 let suffixr =
311 match suffix with
312 | `NORMAL -> parse_primary pos prim
313 | `STAR -> loop pos false
314 | `PLUS -> loop pos true
315 | `QUESTION ->
316 match parse_primary pos prim with
317 | Fail e -> Ok ((pos, ()), e)
318 | res -> res
319 in
320 (* FIXME, Handle errors *)
321 match prefix, suffixr with
322 | `NORMAL, res -> res
323 | `AND, Fail e -> Fail e
324 | `AND, Ok (_, e) -> Ok ((pos, ()), e)
325 | `NOT, Fail e -> Ok ((pos, ()), e)
326 | `NOT, Ok (_, e) -> Fail e
327
328 and parse_primary pos = function
329 | P.Paren e ->
330 parse_expression pos e
331 | P.Literal (l, case) ->
332 process_literal l case pos
333 | P.Class ranges ->
334 process_range ranges pos
335 | P.Ident id ->
336 parse_definition id pos
337 in
338
339 (* FIXME, at the moment if -start switch is not provided then all productions
340 * marked with '+' are tried.
341 *)
342 let rec try_parse print_success = function
343 | [] -> None
344 | s::ss ->
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
345 (*jlog ~level:2 (pr "Trying to parse with the start production: %s" s);*)
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
346 match parse_definition s 0 with
347 | Fail e ->
348 let err_str = show_parse_error (FilePos.get_pos_no_cache input) e in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
349 log "Production %s gives syntax error: %s" s err_str;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
350 try_parse print_success ss
351 | Ok ((pos, _), _e) ->
352 if print_success then
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
353 log "Success with: %s" s;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
354 Some pos
355 in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
356 (*jlog ~level:3 (pr "Parsing with the following grammar:\n%s\n======\n" (grammar_to_string peg));*)
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
357 match !startProd with
358 | None -> try_parse true (Pgrammar.start_definitions peg.T.grammar)
359 | Some s -> try_parse false [s]
360
361 (**************************************************************)
362 (************************* Main front-end *********************)
363 (**************************************************************)
364
365 let str2memo = function
366 | "none" -> T.MemoNone
367 | "fail" -> T.MemoFail
368 | "success" -> T.MemoSuccess
369 | "full" -> T.MemoFull
370 | s -> failwith (pr "Unknown memoization option: '%s' (should be: 'none', 'fail', 'success' or 'full')" s)
371
372 let parse_args () =
373 let anon_fun s =
374 if !grammarFile = None then
375 grammarFile := Some s
376 else if !inputFile = None then
377 inputFile := Some s
378 else
379 failwith (pr "Don't know what to do with <%s> argument" s)
380 in
381 Arg.parse (Arg.align [
382 ("--no-error-handling",
383 Arg.Unit (fun () -> errorHandling := false),
384 " turns off error handling (faster, but no error msgs)");
385
386 ("--multiple",
387 Arg.Unit (fun () -> mode := `parseMany),
388 " parses a number of files; the input file contains a list of files to parse, one per line");
389
390 ("--trace",
391 Arg.String (fun s -> enableTracing s),
392 " file.[xml] produces an XML trace of parsing in <file>");
393
394 ("--start",
395 Arg.String (fun s -> startProd := Some s),
396 " prod starting production to be used (if not provided all marked with + will be tried)");
397
398 ("--verbose",
399 Arg.Unit (fun () -> ()),
400 " deprecated");
401
402 ("--more-verbose",
403 Arg.Unit (fun () -> ()),
404 " deprecated");
405
406 ("--analyze-memo",
407 Arg.Unit (fun () -> cmd := `analyze_memo),
408 " instead of parsing peform memoization analysis with given input");
409
410 ("--memoization",
411 Arg.String (fun s -> memo_default := Some (str2memo s)),
412 " default memoization level for rules with no annotations [none | fail | success | full]");
413
414 ]) anon_fun
da6c620 [cleanup] teerex: remove open base
Raja authored Jun 22, 2011
415 (Printf.sprintf "%s <options> syntax_file.[trx|prx] input_file" Sys.argv.(0))
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
416
417 let load_grammar grammarFn =
418 let peg, _ = Pgrammar.read_grammar ?memo_default:!memo_default ~verbose:true None grammarFn in
419 peg
420
421 let parse_file peg inputFn =
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
422 log "Parsing <%s>..." inputFn;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
423 let input = File.content inputFn in
424 let all = String.length input in
425 let go () = parse peg input in
426 let res, t = measureTime go in
427 begin match res with
428 | Some pos ->
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
429 log "Parsing successful [%d/%d] in %4.2fsec." pos all t
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
430 | None ->
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
431 log "Parsing failed"
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
432 end;
433 begin match !traceFile with
434 | None -> ()
435 | Some fn ->
436 write_trace input (open_out fn);
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
437 log "Parsing trace written to <%s>" fn
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
438 end
439
440 let parse_files peg inputFn =
441 let inc = open_in inputFn in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
442 log "Parsing all files from <%s>" inputFn;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
443 let n = ref 0 in
444 let go () =
445 try
446 while true do
447 let fn = input_line inc in
448 parse_file peg fn;
449 incr n
450 done
451 with
452 End_of_file ->
453 close_in inc
454 in
455 let _, t = measureTime go in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
456 log "Total time of parsing %d files from <%s>: %4.2fsec." !n inputFn t
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
457
458 let parsing peg =
459 let inputFn = get_input_fn () in
460 match !mode with
461 | `parseOne ->
462 parse_file peg inputFn
463 | `parseMany ->
464 parse_files peg inputFn
465
466 let analyze_memo peg =
467 let i = ref 0 in
468 let progress () =
469 let rules = StringMap.size peg.T.grammar in
470 let total = float_of_int (rules * 3) in
471 incr i;
472 Printf.eprintf "\rProgress: %3.2f%%...%!" (float_of_int (100 * !i) /. total)
473 in
474 let parseWithPeg peg =
475 let res = measureTime (fun () -> parsing peg) in
476 res
477 in
478 let memo2str = function
479 | T.MemoNone -> "none"
480 | T.MemoFail -> "fail"
481 | T.MemoSuccess -> "success"
482 | T.MemoFull -> "full"
483 | T.MemoNoInfo -> "noInfo"
484 in
485 let apply_move move peg =
486 let def_name, memo_opt = move in
487 let (def, annots) = StringMap.find def_name peg.T.grammar in
488 let new_annots = { annots with T.rule_memo = memo_opt } in
489 let new_entry = def, new_annots in
490 { peg with T.grammar = StringMap.add def_name new_entry peg.T.grammar }
491 in
492 let try_to_improve_on peg def_name (best_move, best) memo_opt =
493 let move = def_name, memo_opt in
494 let peg' = apply_move move peg in
495 let _, t = parseWithPeg peg' in
496 progress ();
da6c620 [cleanup] teerex: remove open base
Raja authored Jun 22, 2011
497 (*if false then
498 jlog ~level:2 (pr "Trying to change memo option for def. <%s> to <%s> gives time: %4.3fsec." def_name (memo2str memo_opt) t);*)
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
499 if t < best then
500 (Some move, t)
501 else
502 (best_move, best)
503 in
504 let try_to_improve peg def_name (_, def_annots) (best_move, best) =
505 let opts = [T.MemoNone; T.MemoFail; T.MemoSuccess; T.MemoFull] in
506 let opts = List.remove_first def_annots.T.rule_memo opts in
507 List.fold_left (try_to_improve_on peg def_name) (best_move, best) opts
508 in
509 let rec improve (best, peg) =
510 i := 0;
511 let res, best' = StringMap.fold (try_to_improve peg) peg.T.grammar (None, best) in
512 match res with
513 | None ->
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
514 log "\nNo improvement..."
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
515 | Some move ->
516 let impr = best -. best' in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
517 log "\nConsider changing memoization option for definition <%s> to <%s>.\nIt resulted in time %4.3fsec. (%4.3fsec./%1.3f%% improvement)"
518 (fst move) (memo2str (snd move)) best' impr (100.0 *. impr /. best);
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
519 improve (best', apply_move move peg)
520 in
521 let _, best = parseWithPeg peg in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
522 log "Initial grammar gives timing: %4.3fsec. [no. of rules: %d]" best (StringMap.size peg.T.grammar);
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
523 improve (best, peg)
524
525 let _ =
526 parse_args ();
527 let grammarFn = get_grammar_fn () in
528 let inputFn = get_input_fn () in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored Jun 22, 2011
529 log "Loading grammar from {%s} and then parsing {%s}\n" grammarFn inputFn;
fccc685 Initial open-source release
MLstate authored Jun 21, 2011
530 try
531 let peg = load_grammar grammarFn in
532 match !cmd with
533 | `parsing -> parsing peg
534 | `analyze_memo -> analyze_memo peg
535 with
536 | Pgrammar.GrammarParse _err ->
34d11e6 [cleanup] Base: remove log_error
Raja authored Jun 23, 2011
537 log "Failed while parsing the input grammar: {%s}!\n" grammarFn
Something went wrong with that request. Please try again.