Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 514 lines (456 sloc) 18.939 kb
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Henri Binsztok
20 @author Adam Koprowski
21 *)
22
23 (* FIXME, This module seroiusly needs a clean-up... I hope to have time one day... *)
24
25 (* depends*)
26 module List = BaseList
27 module String = BaseString
28
29 (* alias *)
30 module B = Base
31 module T = Tgrammar
32 module P = T.PreGrammar
33
34 (**)
35
36 exception GrammarParse of string
37 exception GrammarCheck of string
38
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
39 (* TODO: change this printf *)
40 let log fmt =
41 Printf.eprintf (fmt^^"\n")
42
fccc685 Initial open-source release
MLstate authored
43
44 (* FIXME: déséquilibre file / input *)
45 (* FIXME: les extra des include sont ajoutés !!! *)
46 (* FIXME: il faudrait établir une map : definition -> file pour afficher la liste des définitions dont la redéfinition n'a pas été possible (dans les include) : on paie ici le choix d'un namespace unique... *)
47
48 (* FIXME: add optional path *)
49 let find_file f =
50 File.content f
51
52 let module_name_of_name n =
53 let chop s = File.chop_extension s in
54 String.capitalize (chop (Filename.basename n))
55
56 let add_globals pg cur_mod new_mod =
57 List.fold_left (
58 fun pg x ->
59 { pg with
60 P.defs = StringMap.add ((module_name_of_name new_mod) ^ "_" ^ x)
61 (StringMap.find ((module_name_of_name cur_mod) ^ "_" ^ x) pg.P.defs) pg.P.defs
62 }
63 ) pg
64
65 (**
66 |parse_pre_grammar|
67
68
69 The trx module system is the following:
70 1. each file is a module (by default "file.trx" -> "File")
71 2. each file contains rules
72 3. each file opens itself and opened modules
73 4. there is only one module level
74 5. the search order is first itself, then opened modules by order
75
76
77 Currently,
78 [
79 Identifier <- (
80 / Module [.] Name {{ __1 ^ "_" ^ __3 }}
81 / Name {{ module_name ^ "_" ^ __1 }}
82 ) Spacing {{ __1 }}
83 ]
84
85 What to do when a rulename is defined in multiple modules?
86 Highest priority = module itself, then opened modules by order of definition (including recursives open ?)
87
88 *)
89 let parse_pre_grammar ?(name="Main") ?(stoppable=false) ~verbose input =
90 let already_read = ref StringSet.empty in
91 let rec load pg input name module_name =
92 FilePos.add_file name input;
93 if StringSet.mem name !already_read then
94 pg
95 else begin
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
96 if verbose then log "parsing %s (stoppable:%b)" name stoppable;
fccc685 Initial open-source release
MLstate authored
97 let input_len = String.length input in
98 let result =
99 try
100 let lastp, pg = Trxparse.parse_trxparse_grammar pg name (module_name_of_name module_name) stoppable input in
101 if lastp = input_len then begin
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
102 if verbose then log "(%s) read %d/%d bytes" name lastp input_len;
fccc685 Initial open-source release
MLstate authored
103 StringMap.fold (
104 fun x include_def pg ->
105 let pg = { pg with P.incl = StringMap.remove x pg.P.incl } in
106 let new_module = match include_def.P.it with P.Read -> x | P.Incl -> name in
107 let pg = add_globals pg module_name new_module include_def.P.gl in
108 already_read := StringSet.add name !already_read;
109 let old_header = pg.P.pheader in
110 let peg = load { pg with P.pheader = [] } (find_file x) x new_module in
111 { peg with P.pheader = old_header @ peg.P.pheader }
112 ) pg.P.incl pg
113 end else
114 (* FIXME, Adam, this should be handled in the grammar *)
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
115 raise (GrammarParse (Printf.sprintf "error parsing '%s': only %d out of %d bytes processed" name lastp input_len))
fccc685 Initial open-source release
MLstate authored
116 with
117 | Trx_runtime.SyntaxError (pos, err) ->
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
118 raise (GrammarParse (Printf.sprintf "error parsing '%s': %s" name (Trx_runtime.show_error input pos err)))
fccc685 Initial open-source release
MLstate authored
119 in
120 FilePos.uncache name;
121 result
122 end
123 in
124 let pg = load T.empty_pre_grammar input name name in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
125 (*log "parse_pre_grammar: end" ;*)
fccc685 Initial open-source release
MLstate authored
126 pg
127
128 let read_pre_grammar ?stoppable ~verbose name = parse_pre_grammar ?stoppable ~verbose ~name (File.content name)
129
130 let rewrite_funs pg =
131 (* TODO: recursive functions *)
132 let all_functions = pg.P.funs in
133 let rec rewrite_fun functions bindings expr =
134 (* substitution *)
135 let rec aux_expr = function
136 | P.Expr sl -> P.Expr (aux_seql sl)
137 | P.App (f, vars) ->
138 begin match StringMap.find_opt f functions with
139 | None ->
140 if StringMap.mem f all_functions then
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
141 failwith (Printf.sprintf "function %s is recursive" f)
fccc685 Initial open-source release
MLstate authored
142 else
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
143 failwith (Printf.sprintf "function %s is undefined" f)
fccc685 Initial open-source release
MLstate authored
144 | Some (fdef, _) ->
145 let functions = StringMap.remove f functions in
146 let expected_arity = List.length fdef.P.vars in
147 if expected_arity = List.length vars then
148 let bindings = List.fold_left2 (fun acc idfun expra -> StringMap.add idfun (aux_seql expra) acc) bindings fdef.P.vars vars in
149 rewrite_fun functions bindings fdef.P.expr
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
150 else failwith (Printf.sprintf "function %s is of arity %d" f expected_arity)
fccc685 Initial open-source release
MLstate authored
151 end
152 and aux_seql sl = List.map aux_seq sl
153 and aux_seq (il, map, code) = List.map aux_item il, map, code
154 and aux_item (pre, pri, suf) = pre, aux_pri pri, suf
155 and aux_pri = function
156 | P.Paren e -> P.Paren (aux_expr e)
157 | P.Ident id as pri ->
158 begin match StringMap.find_opt id bindings with
159 | None -> pri
160 | Some e -> P.Paren (P.Expr e) (** There's no need to apply aux_expr once again here since it's done just above, in let bindings = ... aux_seql ... AND it wouldn't be the right binding environment !! **)
161 end
162 | pri -> pri
163 in aux_expr expr
164 in
165 { pg with
166 P.defs = StringMap.map (fun (def, msg_error) -> { def with P.expression = rewrite_fun all_functions StringMap.empty def.P.expression }, msg_error) pg.P.defs
167 }
168
169 let dependencies pg =
170 (* Printf.eprintf "All definitions: %s" (String.concat_map ", " (fun s -> s) (StringMap.keys pg));*)
171 (** ajoute les dépendances de l'expression à set *)
172 let rec dep_of_expression set = function
173 | [] -> raise (GrammarCheck "empty expression!")
174 | expr -> List.fold_left (
175 fun acc (pl, _, _) ->
176 List.fold_left (
177 fun acc (_, primary, _) ->
178 match primary with
179 | P.Ident s ->
180 if StringMap.mem s pg then StringSet.add s acc
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
181 else raise (GrammarCheck (Printf.sprintf "definition '%s' missing!" s))
fccc685 Initial open-source release
MLstate authored
182 | P.Paren (P.Expr e) -> dep_of_expression acc e
183 | P.Paren _ -> assert false
184 | _ -> acc
185 ) acc pl
186 ) set expr in
187 StringMap.map (fun (def,_msg_error) -> dep_of_expression StringSet.empty (T.get_expression def)) pg
188
189 let grammar_error s =
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
190 log "%s" s;
fccc685 Initial open-source release
MLstate authored
191 exit 2
192
193 let grammar_analysis pg =
194
195 (*
196 let prop2str = function
197 | `Empty -> "Empty"
198 | `NonEmpty -> "NonEmpty"
199 | `Fail -> "Fail"
200 | `Success -> "Success"
201 in
202 *)
203
204 let rec analyze_def prop def =
205 (* jlog (Printf.sprintf "Analyze def: %s, property: %s" def (prop2str prop));*)
206 let def, _ = StringMap.find def pg in
207 analyze_exp prop def.P.expression
208
209 and analyze_exp prop exp =
210 (* jlog (Printf.sprintf "Analyze exp %s, property: %s" (expr_to_string exp) (prop2str prop));*)
211 match prop with
212 | `Success -> analyze_exp `Empty exp || analyze_exp `NonEmpty exp
213 | _ ->
214 match exp with
215 | P.App _
216 | P.Expr [] -> assert false
217 | P.Expr [x] -> analyze_seq prop x
218 | P.Expr (x::xs) ->
219 match prop with
220 | `Fail ->
221 analyze_seq `Fail x && analyze_exp `Fail (P.Expr xs)
222 | `Empty
223 | `NonEmpty ->
224 analyze_seq prop x || (analyze_seq `Fail x && analyze_exp prop (P.Expr xs))
225 | `Success -> assert false
226
227 and analyze_seq prop ((seq, q1, q2) as seqf) =
228 (* jlog (Printf.sprintf "Analyze seq %s, property: %s" (if seq = [] then "" else seq_to_string seqf) (prop2str prop));*)
229 match seq, prop with
230 | _, `Success -> analyze_seq `Empty seqf || analyze_seq `NonEmpty seqf
231 | [], `Empty -> true
232 | [], `Fail
233 | [], `NonEmpty -> false
234 | x::xs, `Empty -> analyze_item `Empty x && analyze_seq `Empty (xs, q1, q2)
235 | x::xs, `Fail -> analyze_item `Fail x || (analyze_item `Success x && analyze_seq `Fail (xs, q1, q2))
236 | x::xs, `NonEmpty ->
237 (analyze_item `NonEmpty x && analyze_seq `Success (xs, q1, q2)) ||
238 (analyze_item `Success x && analyze_seq `NonEmpty (xs, q1, q2))
239
240 and analyze_item prop ((prefix, primary, suffix) as item) =
241 (* jlog (Printf.sprintf "Analyze item %s, property: %s" (item_to_string item) (prop2str prop));*)
242 if prefix = `NORMAL then
243 analyze_suffix prop (primary, suffix)
244 else
245 match prop with
246 | `NonEmpty -> false
247 | `Success -> analyze_item `Empty item || analyze_item `NonEmpty item
248 | `Empty
249 | `Fail ->
250 let p =
251 match prefix, prop with
252 | `AND, `Empty -> `Success
253 | `NOT, `Empty -> `Fail
254 | `AND, `Fail -> `Fail
255 | `NOT, `Fail -> `Success
256 | _ -> assert false
257 in
258 analyze_item p (`NORMAL, primary, suffix)
259
260 and analyze_suffix prop (primary, suffix) =
261 match suffix with
262 | `NORMAL -> analyze_primary prop primary
263 | `QUESTION -> (* e? := e / empty *)
264 let e = `NORMAL, primary, `NORMAL in
265 let empty = `NORMAL, P.Literal ("", false), `NORMAL in
266 let make_option i = i, StringMap.empty, None in
267 analyze_exp prop (P.Expr ([make_option [e]; make_option [empty]]))
268 | `STAR ->
269 begin match prop with
270 | `Empty -> analyze_suffix `Fail (primary, `NORMAL)
271 | `NonEmpty -> analyze_suffix `NonEmpty (primary, `NORMAL)
272 | `Success -> true
273 | `Fail -> false
274 end
275 | `PLUS -> (* e+ := e; e* *)
276 analyze_seq prop ([(`NORMAL, primary, `NORMAL); (`NORMAL, primary, `STAR)], StringMap.empty, None)
277
278 and analyze_primary prop = function
279 (* jlog (Printf.sprintf "Analyze primary %s, property: %s" (primary_to_string primary) (prop2str prop));*)
280 | P.Paren e -> analyze_exp prop e
281 | P.Ident id -> analyze_def prop id
282 | P.Literal (l, _) ->
283 begin match prop with
284 | `Empty -> String.length l = 0
285 | `NonEmpty -> String.length l > 0
286 | `Success -> true
287 | `Fail -> true
288 end
289 | P.Class _ -> (* character range has the same characteristics as a literal of length 1 *)
290 analyze_primary prop (P.Literal ("X", false))
291 in
292 analyze_def, analyze_exp, analyze_seq, analyze_item, analyze_primary
293
294 let analyze_def pg =
295 let (analyze_def, _, _, _, _) = grammar_analysis pg in analyze_def
296
297 let analyze_exp pg =
298 let (_, analyze_exp, _, _, _) = grammar_analysis pg in analyze_exp
299
300 let analyze_seq pg =
301 let (_, _, analyze_seq, _, _) = grammar_analysis pg in analyze_seq
302
303 let analyze_item pg =
304 let (_, _, _, analyze_item, _) = grammar_analysis pg in analyze_item
305
306 let analyze_primary pg =
307 let (_, _, _, _, analyze_primary) = grammar_analysis pg in analyze_primary
308
309 let check_wf pg name =
310
311 let rec check_wf_def stack name =
312 let def, _ = StringMap.find name pg in
313 if List.mem name stack then
314 grammar_error (Printf.sprintf "Grammmar contains forbidden left-recursion: %s"
315 (String.concat_map " -> " (fun i -> i) (List.rev (name::stack)))
316 );
317 check_wf_exp (name::stack) def.P.expression
318
319 and check_wf_exp stack = function
320 | P.App _ -> assert false
321 | P.Expr [] -> ()
322 | P.Expr ((x, _, _)::xs) ->
323 check_wf_seq stack x;
324 check_wf_exp stack (P.Expr xs)
325
326 and check_wf_seq stack = function
327 | [] -> ()
328 | x::xs ->
329 check_wf_item stack x;
330 if analyze_item pg `Empty x then
331 check_wf_seq stack xs
332
333 and check_wf_item stack (_prefix, prim, suffix) =
334 check_wf_primary stack prim;
335 match suffix with
336 | `STAR
337 | `PLUS ->
338 if analyze_primary pg `Empty prim then
339 grammar_error
340 (Printf.sprintf "The expression <%s> in rule <%s> admits empty string, while it is marked with a %s. This would result in a looping parser and hence is forbidden."
341 (T.primary_to_string prim)
342 (List.hd stack)
343 (match suffix with `STAR -> "star (*)" | `PLUS -> "plus (+)" | _ -> assert false)
344 )
345 | _ -> ()
346
347 and check_wf_primary stack = function
348 | P.Paren e -> check_wf_exp stack e
349 | P.Ident id -> check_wf_def stack id
350 | P.Literal _
351 | P.Class _ -> ()
352
353 in
354 check_wf_def [] name
355
356 let check_grammar pg =
357 let dep = dependencies pg in
358 let rec add_definition name set =
359 if StringSet.mem name set then set
360 else
361 let name_dep = StringMap.find name dep in
362 StringSet.fold add_definition name_dep (StringSet.add name set)
363 in
364 (** définitions initiales *)
365 let starts = StringMap.fold (fun name (def,_msg_error) acc -> if def.P.mark then name::acc else acc) pg [] in
366 (** liste des définitions utilisées *)
367 let def_used = List.fold_left (fun acc x -> add_definition x acc) StringSet.empty starts in
368 let is_loop name = check_wf pg name in
369 StringSet.iter is_loop def_used;
370 def_used
371
372 let start_definitions g =
373 StringMap.fold (
374 fun name (def,_) acc ->
375 if def.P.mark then name::acc else acc
376 ) g []
377
378 let unfold_star_and_plus peg =
379 let star_rule_id = ref 0 in
380 let new_rules = ref StringMap.empty in
381 let rule_for_unfolded_star rule_id primary suffix =
382 let item_from_primary e = `NORMAL, e, `NORMAL in
383 let make_sequence seq code = List.map item_from_primary seq, StringMap.empty, Some (false, code, None, false) in
384 let non_empty = make_sequence [primary; P.Ident rule_id] "__1::__2" in
385 let empty =
386 match suffix with
387 | `STAR -> make_sequence [P.Literal ("", true)] "[]"
388 | `PLUS -> make_sequence [primary] "[__1]"
389 in
390 let rule_def =
391 { P.expression = P.Expr [non_empty; empty]
392 ; P.debug = false
393 ; P.mark = false
394 ; P.retain_cache = false
395 ; P.rule_type = None
396 ; P.origin = None
397 }
398 in
399 let rule_annots = { T.rule_name = T.NoName; T.rule_memo = T.MemoNoInfo } in
400 rule_def, rule_annots
401 in
402 let unfold_star_plus prefix primary suffix =
403 let rule_id = incr star_rule_id; !star_rule_id in
404 let rule_name = Printf.sprintf "_starplus_unfolding_%d" rule_id in
405 let new_primary = P.Ident rule_name in
406 new_rules := StringMap.add rule_name (rule_for_unfolded_star rule_name primary suffix) !new_rules;
407 prefix, new_primary, `NORMAL
408 in
409 let rec process_primary = function
410 | P.Paren e -> P.Paren (process_expression e)
411 | e -> e
412 and process_item (prefix, primary, suffix) =
413 let primary = process_primary primary in
414 match suffix with
415 | (`STAR | `PLUS) as suffix -> unfold_star_plus prefix primary suffix
416 | _ -> prefix, primary, suffix
417 and process_sequence (items, vars, code) = (List.map process_item items, vars, code)
418 and process_expression = function
419 | P.App _ -> failwith "pgrammar::unfold_star_and_plus: unexpected [App] in the grammar"
420 | P.Expr ss -> P.Expr (List.map process_sequence ss)
421 in
422 let process_definition def = { def with P.expression = process_expression def.P.expression } in
423 let process_rule (def, annot) = (process_definition def, annot) in
424 let new_grammar = StringMap.map process_rule peg.T.grammar in
425 let merge_rule _ _ = assert false (* new_rules should have IDs disjoint from the grammar *) in
426 { peg with T.grammar = StringMap.merge merge_rule new_grammar !new_rules }
427
428 let infer_memoization_options ?(memo_default=T.MemoFull) peg =
429 let infer_memoization = function
430 | T.MemoNoInfo -> memo_default
431 | T.MemoNone
432 | T.MemoFail
433 | T.MemoSuccess
434 | T.MemoFull as memo -> memo
435 in
436 let process_rule (def, annot) =
437 let new_annot = { annot with T.rule_memo = infer_memoization annot.T.rule_memo } in
438 def, new_annot
439 in
440 { peg with T.grammar = StringMap.map process_rule peg.T.grammar }
441
442 (* FIXME: détection d'erreurs ici ou teds suffit ? *)
443 (* try ... *)
444 (* with *)
445 (* e -> *)
446 (* let _, _, _, last_ok = positions ... *)
447 (* raise e *)
448 let grammar_of_pre_grammar ~memo_default ~unfold_starplus start_opt pg =
449 let pg = rewrite_funs pg in
450 let used = check_grammar pg.P.defs in
451 (* FIXME: ne conserver que used dans pg, renvoyer juste pg en type abstrait ! *)
452 let start = match start_opt with
453 | Some s -> s
454 | _ ->
455 let stdefs = start_definitions pg.P.defs in
456 if stdefs = [] then raise (GrammarCheck "no start definition")
457 else List.hd stdefs
458 in
459 let g =
460 { T.start = start
461 ; T.grammar = pg.P.defs
462 ; T.extra = pg.P.pextra
463 ; T.options = pg.P.poptions
464 ; T.header = pg.P.pheader
465 }
466 in
467 let grammar_memo_default () =
468 let check_memo memo (opt, v) =
469 if opt = "memoization" then
470 Some (T.str2memo_type v)
471 else
472 memo
473 in
474 List.fold_left check_memo None g.T.options
475 in
476 let memo_default =
477 match memo_default with
478 | None -> grammar_memo_default ()
479 | Some _ -> memo_default
480 in
481 let g = if unfold_starplus then unfold_star_and_plus g else g in
482 let g = infer_memoization_options ?memo_default g in
483 g, used
484
485 let read_grammar ?stoppable ?memo_default ?(unfold_starplus=true) ~verbose start name =
486 grammar_of_pre_grammar ?memo_default ~unfold_starplus start (read_pre_grammar ?stoppable ~verbose name)
487
488 let parse_grammar ?(name="Main") ?stoppable ?memo_default ?(unfold_starplus=true) ~verbose start text =
489 grammar_of_pre_grammar ?memo_default ~unfold_starplus start (parse_pre_grammar ~name ?stoppable ~verbose text)
490
491 let list_start ~verbose name =
492 let pg = read_pre_grammar ~verbose name in
493 let stdefs = start_definitions pg.P.defs in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
494 List.iter (fun s -> log "%s" s) stdefs
fccc685 Initial open-source release
MLstate authored
495
496 (* FIXME: only the grammar *)
497 let output_binary_grammar ~verbose ?(input="trxparse.trx") ?start output_file =
498 let g, _ = read_grammar ~verbose start input in
499 let oc = open_out output_file in
500 output_value oc (g:'a T.grammar) ;
501 close_out oc
502
503 let input_binary_grammar input_file =
504 let ic = open_in input_file in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
505 log "input_binary_grammar: begin" ;
fccc685 Initial open-source release
MLstate authored
506 let (grammar:'a T.grammar) = input_value ic in
da5de87 [cleanup] jlog: remove Base.jlog
Raja authored
507 log "input_binary_grammar: end" ;
fccc685 Initial open-source release
MLstate authored
508 close_in ic ;
509 grammar
510
511 (* 2/11/2010 Adam: removing dead, deprecated code: old Henri's TRX interpreter &
512 some un-used (non-working?) functions for incremental parsing. Dig deep in
513 git history if you think you may need that... *)
Something went wrong with that request. Please try again.