Skip to content
This repository
tag: v1887
Fetching contributors…

Cannot retrieve contributors at this time

file 549 lines (489 sloc) 19.714 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
(*
Copyright © 2011 MLstate

This file is part of OPA.

OPA is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License, version 3, as published by
the Free Software Foundation.

OPA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
more details.

You should have received a copy of the GNU Affero General Public License
along with OPA. If not, see <http://www.gnu.org/licenses/>.
*)
(*
@author Henri Binsztok
@author Adam Koprowski
*)

(* FIXME, This module seroiusly needs a clean-up... I hope to have time one day... *)

(* depends*)
module List = BaseList
module String = BaseString

(* alias *)
module B = Base
module T = Tgrammar
module P = T.PreGrammar

(**)

exception GrammarParse of string
exception GrammarCheck of string

(* TODO: change this printf *)
let log fmt =
  Printf.eprintf (fmt^^"\n")


(* FIXME: déséquilibre file / input *)
(* FIXME: les extra des include sont ajoutés !!! *)
(* FIXME: il faudrait établir une map : definition -> file pour afficher la liste des définitions dont la redéfinition n'a pas été possible (dans les include) : on paie ici le choix d'un namespace unique... *)

(* FIXME: add optional path *)
let find_file f =
  File.content f

let module_name_of_name n =
  let chop s = File.chop_extension s in
  String.capitalize (chop (Filename.basename n))

let add_globals pg cur_mod new_mod =
  List.fold_left (
    fun pg x ->
      { pg with
          P.defs = StringMap.add ((module_name_of_name new_mod) ^ "_" ^ x)
          (StringMap.find ((module_name_of_name cur_mod) ^ "_" ^ x) pg.P.defs) pg.P.defs
      }
  ) pg

(**
|parse_pre_grammar|


The trx module system is the following:
1. each file is a module (by default "file.trx" -> "File")
2. each file contains rules
3. each file opens itself and opened modules
4. there is only one module level
5. the search order is first itself, then opened modules by order


Currently,
[
Identifier <- (
/ Module [.] Name {{ __1 ^ "_" ^ __3 }}
/ Name {{ module_name ^ "_" ^ __1 }}
) Spacing {{ __1 }}
]

What to do when a rulename is defined in multiple modules?
Highest priority = module itself, then opened modules by order of definition (including recursives open ?)

*)
let parse_pre_grammar ?(name="Main") ?(stoppable=false) ~verbose input =
  let already_read = ref StringSet.empty in
  let rec load pg input name module_name =
    FilePos.add_file name input;
    if StringSet.mem name !already_read then
      pg
    else begin
      if verbose then log "parsing %s (stoppable:%b)" name stoppable;
      let input_len = String.length input in
      let result =
        try
          let lastp, pg = Trxparse.parse_trxparse_grammar pg name (module_name_of_name module_name) stoppable input in
          if lastp = input_len then begin
            if verbose then log "(%s) read %d/%d bytes" name lastp input_len;
            StringMap.fold (
              fun x include_def pg ->
                let pg = { pg with P.incl = StringMap.remove x pg.P.incl } in
                let new_module = match include_def.P.it with P.Read -> x | P.Incl -> name in
                let pg = add_globals pg module_name new_module include_def.P.gl in
                already_read := StringSet.add name !already_read;
                let old_header = pg.P.pheader in
                let peg = load { pg with P.pheader = [] } (find_file x) x new_module in
                { peg with P.pheader = old_header @ peg.P.pheader }
            ) pg.P.incl pg
          end else
            (* FIXME, Adam, this should be handled in the grammar *)
            raise (GrammarParse (Printf.sprintf "error parsing '%s': only %d out of %d bytes processed" name lastp input_len))
        with
        | Trx_runtime.SyntaxError (pos, err) ->
            raise (GrammarParse (Printf.sprintf "error parsing '%s': %s" name (Trx_runtime.show_error input pos err)))
      in
      FilePos.uncache name;
      result
    end
  in
  let pg = load T.empty_pre_grammar input name name in
  (*log "parse_pre_grammar: end" ;*)
  pg

let read_pre_grammar ?stoppable ~verbose name = parse_pre_grammar ?stoppable ~verbose ~name (File.content name)

let rewrite_funs pg =
  (* TODO: recursive functions *)
  let all_functions = pg.P.funs in
  let rec rewrite_fun functions bindings expr =
    (* substitution *)
    let rec aux_expr = function
      | P.Expr sl -> P.Expr (aux_seql sl)
      | P.App (f, vars) ->
          begin match StringMap.find_opt f functions with
          | None ->
              if StringMap.mem f all_functions then
                failwith (Printf.sprintf "function %s is recursive" f)
              else
                failwith (Printf.sprintf "function %s is undefined" f)
          | Some (fdef, _) ->
              let functions = StringMap.remove f functions in
              let expected_arity = List.length fdef.P.vars in
              if expected_arity = List.length vars then
                let bindings = List.fold_left2 (fun acc idfun expra -> StringMap.add idfun (aux_seql expra) acc) bindings fdef.P.vars vars in
                rewrite_fun functions bindings fdef.P.expr
              else failwith (Printf.sprintf "function %s is of arity %d" f expected_arity)
          end
    and aux_seql sl = List.map aux_seq sl
    and aux_seq (il, map, code) = List.map aux_item il, map, code
    and aux_item (pre, pri, suf) = pre, aux_pri pri, suf
    and aux_pri = function
      | P.Paren e -> P.Paren (aux_expr e)
      | P.Ident id as pri ->
          begin match StringMap.find_opt id bindings with
          | None -> pri
          | Some e -> P.Paren (P.Expr e) (** There's no need to apply aux_expr once again here since it's done just above, in let bindings = ... aux_seql ... AND it wouldn't be the right binding environment !! **)
          end
      | pri -> pri
    in aux_expr expr
  in
  { pg with
      P.defs = StringMap.map (fun (def, msg_error) -> { def with P.expression = rewrite_fun all_functions StringMap.empty def.P.expression }, msg_error) pg.P.defs
  }

let dependencies pg =
(* Printf.eprintf "All definitions: %s" (String.concat_map ", " (fun s -> s) (StringMap.keys pg));*)
  (** ajoute les dépendances de l'expression à set *)
  let rec dep_of_expression set = function
    | [] -> raise (GrammarCheck "empty expression!")
    | expr -> List.fold_left (
        fun acc (pl, _, _) ->
          List.fold_left (
            fun acc (_, primary, _) ->
              match primary with
              | P.Ident s ->
                  if StringMap.mem s pg then StringSet.add s acc
                  else raise (GrammarCheck (Printf.sprintf "definition '%s' missing!" s))
              | P.Paren (P.Expr e) -> dep_of_expression acc e
              | P.Paren _ -> assert false
              | _ -> acc
          ) acc pl
      ) set expr in
  StringMap.map (fun (def,_msg_error) -> dep_of_expression StringSet.empty (T.get_expression def)) pg

let grammar_error s =
  log "%s" s;
  exit 2

exception Looping

let grammar_analysis pg =

  let rec analyze_def s prop def_name =
    let def, _ = StringMap.find def_name pg in
    if List.mem (prop, def_name) s then
      raise Looping
    else
      analyze_exp ((prop, def_name)::s) prop def.P.expression

  and analyze_exp s prop exp =
    match prop with
    | `Success -> analyze_exp s `Empty exp || analyze_exp s `NonEmpty exp
    | _ ->
        match exp with
        | P.App _
        | P.Expr [] -> assert false
        | P.Expr [x] -> analyze_seq s prop x
        | P.Expr (x::xs) ->
            match prop with
            | `Fail ->
                analyze_seq s `Fail x && analyze_exp s `Fail (P.Expr xs)
            | `Empty
            | `NonEmpty ->
                analyze_seq s prop x || (analyze_seq s `Fail x && analyze_exp s prop (P.Expr xs))
            | `Success -> assert false

  and analyze_seq s prop ((seq, q1, q2) as seqf) =
    let backtraceable =
      match q2 with
      | None -> false
      | Some code ->
        let (_, _, _, b) = code in
        b
    in
    match seq, prop with
    | _, `Success -> analyze_seq s `Empty seqf || analyze_seq s `NonEmpty seqf
    | [], `Empty -> true
    | [], `Fail
    | [], `NonEmpty -> false
    | x::xs, `Empty ->
        if backtraceable then
          (* WARNING, we assume that a backtraceable expression will not admit an empty expression;
this may not be true and hence it can lead to a non-terminating parser... *)
          false
        else
          analyze_item s `Empty x && analyze_seq s `Empty (xs, q1, q2)
    | x::xs, `Fail -> analyze_item s `Fail x || (analyze_item s `Success x && analyze_seq s `Fail (xs, q1, q2))
    | x::xs, `NonEmpty ->
        (analyze_item s `NonEmpty x && analyze_seq s `Success (xs, q1, q2)) ||
          (analyze_item s `Success x && analyze_seq s `NonEmpty (xs, q1, q2))

  and analyze_item s prop ((prefix, primary, suffix) as item) =
    if prefix = `NORMAL then
      analyze_suffix s prop (primary, suffix)
    else
      match prop with
      | `NonEmpty -> false
      | `Success -> analyze_item s `Empty item || analyze_item s `NonEmpty item
      | `Empty
      | `Fail ->
          let p =
            match prefix, prop with
            | `AND, `Empty -> `Success
            | `NOT, `Empty -> `Fail
            | `AND, `Fail -> `Fail
            | `NOT, `Fail -> `Success
            | _ -> assert false
          in
          analyze_item s p (`NORMAL, primary, suffix)

  and analyze_suffix s prop (primary, suffix) =
    match suffix with
    | `NORMAL -> analyze_primary s prop primary
    | `QUESTION -> (* e? := e / empty *)
        let e = `NORMAL, primary, `NORMAL in
        let empty = `NORMAL, P.Literal ("", false), `NORMAL in
        let make_option i = i, StringMap.empty, None in
        analyze_exp s prop (P.Expr ([make_option [e]; make_option [empty]]))
    | `STAR ->
        begin match prop with
        | `Empty -> analyze_suffix s `Fail (primary, `NORMAL)
        | `NonEmpty -> analyze_suffix s `NonEmpty (primary, `NORMAL)
        | `Success -> true
        | `Fail -> false
        end
    | `PLUS -> (* e+ := e; e* *)
        analyze_seq s prop ([(`NORMAL, primary, `NORMAL); (`NORMAL, primary, `STAR)], StringMap.empty, None)

  and analyze_primary s prop = function
  | P.Paren e -> analyze_exp s prop e
  | P.Ident id -> analyze_def s prop id
  | P.Literal (l, _) ->
      begin match prop with
      | `Empty -> String.length l = 0
      | `NonEmpty -> String.length l > 0
      | `Success -> true
      | `Fail -> true
      end
  | P.Class _ -> (* character range has the same characteristics as a literal of length 1 *)
      analyze_primary s prop (P.Literal ("X", false))
  in
  analyze_def [], analyze_exp [], analyze_seq [], analyze_item [], analyze_primary []

let analyze_def pg =
  let (analyze_def, _, _, _, _) = grammar_analysis pg in analyze_def

let analyze_exp pg =
  let (_, analyze_exp, _, _, _) = grammar_analysis pg in analyze_exp

let analyze_seq pg =
  let (_, _, analyze_seq, _, _) = grammar_analysis pg in analyze_seq

let analyze_item pg =
  let (_, _, _, analyze_item, _) = grammar_analysis pg in analyze_item

let analyze_primary pg =
  let (_, _, _, _, analyze_primary) = grammar_analysis pg in analyze_primary

let grammar_used_defs pg =
  let dep = dependencies pg in
  let rec add_definition name set =
    if StringSet.mem name set then set
    else
      let name_dep = StringMap.find name dep in
      StringSet.fold add_definition name_dep (StringSet.add name set)
  in
  let starts = StringMap.fold (fun name (def,_msg_error) acc -> if def.P.mark then name::acc else acc) pg [] in
  List.fold_left (fun acc x -> add_definition x acc) StringSet.empty starts

let analyze_grammar peg =
  let defs = grammar_used_defs peg in
  let check_def def_name =
    let check prop symbol =
      try
        if analyze_def peg prop def_name then
          symbol
        else
          " "
      with
        Looping -> "??"
    in
    Printf.printf "%60s: %s %s %s\n%!" def_name (check `Empty "=0") (check `NonEmpty ">0") (check `Fail "F ")
  in
  StringSet.iter check_def defs

let check_wf ~analyze pg name =

  let analyze () =
    if analyze then begin
      Printf.eprintf "Complete grammar analysis:\n%!";
      try
        analyze_grammar pg
      with
        Looping ->
          Printf.eprintf "Couldn't continue the analysis due to a loop...\n"
    end
  in

  let rec check_wf_def stack name =
    let def, _ = StringMap.find name pg in
    if List.mem name stack then begin
      analyze ();
      grammar_error (Printf.sprintf "Grammmar contains forbidden left-recursion: %s"
        (String.concat_map " -> " (fun i -> i) (List.rev (name::stack)))
      )
    end;
    check_wf_exp (name::stack) def.P.expression

  and check_wf_exp stack = function
    | P.App _ -> assert false
    | P.Expr [] -> ()
    | P.Expr ((x, _, _)::xs) ->
        check_wf_seq stack x;
        check_wf_exp stack (P.Expr xs)

  and check_wf_seq stack = function
    | [] -> ()
    | x::xs ->
        check_wf_item stack x;
        if analyze_item pg `Empty x then
          check_wf_seq stack xs

  and check_wf_item stack (_prefix, prim, suffix) =
    check_wf_primary stack prim;
    match suffix with
    | `STAR
    | `PLUS ->
        if analyze_primary pg `Empty prim then begin
          analyze ();
          grammar_error
            (Printf.sprintf "The expression <%s> in rule <%s> admits empty string, while it is marked with a %s. This would result in a looping parser and hence is forbidden."
              (T.primary_to_string prim)
              (List.hd stack)
              (match suffix with `STAR -> "star (*)" | `PLUS -> "plus (+)" | _ -> assert false)
            )
        end
    | _ -> ()

  and check_wf_primary stack = function
    | P.Paren e -> check_wf_exp stack e
    | P.Ident id -> check_wf_def stack id
    | P.Literal _
    | P.Class _ -> ()

  in
  check_wf_def [] name

let check_grammar ~analyze pg =
  let def_used = grammar_used_defs pg in
  let is_loop name = check_wf ~analyze pg name in
  StringSet.iter is_loop def_used;
  def_used

let start_definitions g =
  StringMap.fold (
    fun name (def,_) acc ->
      if def.P.mark then name::acc else acc
  ) g []

let unfold_star_and_plus peg =
  let star_rule_id = ref 0 in
  let new_rules = ref StringMap.empty in
  let rule_for_unfolded_star rule_id primary suffix =
    let item_from_primary e = `NORMAL, e, `NORMAL in
    let make_sequence seq code = List.map item_from_primary seq, StringMap.empty, Some (false, code, None, false) in
    let non_empty = make_sequence [primary; P.Ident rule_id] "__1::__2" in
    let empty =
      match suffix with
      | `STAR -> make_sequence [P.Literal ("", true)] "[]"
      | `PLUS -> make_sequence [primary] "[__1]"
    in
    let rule_def =
      { P.expression = P.Expr [non_empty; empty]
      ; P.debug = false
      ; P.mark = false
      ; P.retain_cache = false
      ; P.rule_type = None
      ; P.origin = None
      }
    in
    let rule_annots = { T.rule_name = T.NoName; T.rule_memo = T.MemoNoInfo } in
    rule_def, rule_annots
  in
  let unfold_star_plus prefix primary suffix =
    let rule_id = incr star_rule_id; !star_rule_id in
    let rule_name = Printf.sprintf "_starplus_unfolding_%d" rule_id in
    let new_primary = P.Ident rule_name in
    new_rules := StringMap.add rule_name (rule_for_unfolded_star rule_name primary suffix) !new_rules;
    prefix, new_primary, `NORMAL
  in
  let rec process_primary = function
    | P.Paren e -> P.Paren (process_expression e)
    | e -> e
  and process_item (prefix, primary, suffix) =
    let primary = process_primary primary in
    match suffix with
    | (`STAR | `PLUS) as suffix -> unfold_star_plus prefix primary suffix
    | _ -> prefix, primary, suffix
  and process_sequence (items, vars, code) = (List.map process_item items, vars, code)
  and process_expression = function
    | P.App _ -> failwith "pgrammar::unfold_star_and_plus: unexpected [App] in the grammar"
    | P.Expr ss -> P.Expr (List.map process_sequence ss)
  in
  let process_definition def = { def with P.expression = process_expression def.P.expression } in
  let process_rule (def, annot) = (process_definition def, annot) in
  let new_grammar = StringMap.map process_rule peg.T.grammar in
  let merge_rule _ _ = assert false (* new_rules should have IDs disjoint from the grammar *) in
  { peg with T.grammar = StringMap.merge merge_rule new_grammar !new_rules }

let infer_memoization_options ?(memo_default=T.MemoFull) peg =
  let infer_memoization = function
    | T.MemoNoInfo -> memo_default
    | T.MemoNone
    | T.MemoFail
    | T.MemoSuccess
    | T.MemoFull as memo -> memo
  in
  let process_rule (def, annot) =
    let new_annot = { annot with T.rule_memo = infer_memoization annot.T.rule_memo } in
    def, new_annot
  in
  { peg with T.grammar = StringMap.map process_rule peg.T.grammar }

(* FIXME: détection d'erreurs ici ou teds suffit ? *)
(* try ... *)
(* with *)
(* e -> *)
(* let _, _, _, last_ok = positions ... *)
(* raise e *)
let grammar_of_pre_grammar ~analyze ~memo_default ~unfold_starplus start_opt pg =
  let pg = rewrite_funs pg in
  let used = check_grammar ~analyze pg.P.defs in
  (* FIXME: ne conserver que used dans pg, renvoyer juste pg en type abstrait ! *)
  let start = match start_opt with
  | Some s -> s
  | _ ->
      let stdefs = start_definitions pg.P.defs in
      if stdefs = [] then raise (GrammarCheck "no start definition")
      else List.hd stdefs
  in
  let g =
    { T.start = start
    ; T.grammar = pg.P.defs
    ; T.extra = pg.P.pextra
    ; T.options = pg.P.poptions
    ; T.header = pg.P.pheader
    }
  in
  let grammar_memo_default () =
    let check_memo memo (opt, v) =
      if opt = "memoization" then
        Some (T.str2memo_type v)
      else
        memo
    in
    List.fold_left check_memo None g.T.options
  in
  let memo_default =
    match memo_default with
    | None -> grammar_memo_default ()
    | Some _ -> memo_default
  in
  let g = if unfold_starplus then unfold_star_and_plus g else g in
  let g = infer_memoization_options ?memo_default g in
  g, used

let read_grammar ?stoppable ?memo_default ?(analyze=false) ?(unfold_starplus=true) ~verbose start name =
  grammar_of_pre_grammar ?memo_default ~analyze ~unfold_starplus start (read_pre_grammar ?stoppable ~verbose name)

let parse_grammar ?(name="Main") ?stoppable ?memo_default ?(analyze=false) ?(unfold_starplus=true) ~verbose start text =
  grammar_of_pre_grammar ?memo_default ~analyze ~unfold_starplus start (parse_pre_grammar ~name ?stoppable ~verbose text)

let list_start ~verbose name =
  let pg = read_pre_grammar ~verbose name in
  let stdefs = start_definitions pg.P.defs in
  List.iter (fun s -> log "%s" s) stdefs

(* FIXME: only the grammar *)
let output_binary_grammar ~verbose ?(input="trxparse.trx") ?start output_file =
  let g, _ = read_grammar ~verbose start input in
  let oc = open_out output_file in
  output_value oc (g:'a T.grammar) ;
  close_out oc

let input_binary_grammar input_file =
  let ic = open_in input_file in
  log "input_binary_grammar: begin" ;
  let (grammar:'a T.grammar) = input_value ic in
  log "input_binary_grammar: end" ;
  close_in ic ;
  grammar

(* 2/11/2010 Adam: removing dead, deprecated code: old Henri's TRX interpreter &
some un-used (non-working?) functions for incremental parsing. Dig deep in
git history if you think you may need that... *)
Something went wrong with that request. Please try again.