Skip to content
This repository
Newer
Older
100644 366 lines (326 sloc) 12.937 kb
fccc6851 » MLstate
2011-06-21 Initial open-source release
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Henri Binsztok
20 @author Adam Koprowski
21 *)
22
23 (* depends*)
24 module List = BaseList
25 module String = BaseString
26
27 (* alias *)
28 module B = Base
29 (* FIXME, This module seroiusly needs a clean-up... I hope to have time one day... *)
30
31 (* TODO
32 * autoriser une expression vide (on peut bien écrire !"é#ù£" {{ ... }})
33 * ajouter de gardes une fois traduit en Qml
34 * syntaxe pour un au plus parmi [[toto truc muche]]
35 * syntaxe extensible = générer les opérateurs infixes au minimum, voire générer le parser en cours de route...
36 *)
37
38 (* FIXME, Any should become a separate construct; it does not belong to range *)
39 type range = One of char | Range of char * char | Any
40
41 type rule_name = NoName
42 | PrimaryName of string
43 | SecondaryName of string
44
45 type memo_type = MemoNone (* no memoization *)
46 | MemoFail (* memoization of failures *)
47 | MemoSuccess (* memoization of success *)
48 | MemoFull (* memoization of both failure and success *)
49 | MemoNoInfo (* no memoization information for rule *)
50
51 type rule_annots =
52 { rule_name : rule_name
53 ; rule_memo : memo_type
54 }
55
56 type location =
57 { file_name : string
58 ; line_number : int
59 }
60
61 (* type code = string (\* FIXME: Qml.expression *\) *)
62 type ident = string (* FIXME: abstrait *)
63 type fun_ident = ident (* FIXME: abstrait *)
64 type code_ident = ident (* FIXME: abstrait *)
65 type code_type = string
66 type filename = string
67
68 module Grammar =
69 struct
70 type 'a expression = 'a sequence list
71 and 'a sequence =
72 'a item list (* FIXME * [`LIST | `ONEOFEACH] *) * (string * bool) StringMap.t (* labeled items *)
73 * (bool * 'a (* code *) * location option * bool (* if true production gives option with none indicating that we should backtrack*)) option (* bool = ajouter les positions... *)
74 and 'a item = [`AND | `NOT | `NORMAL] * 'a primary * [`QUESTION | `STAR | `PLUS | `NORMAL]
75 and 'a primary =
76 | Ident of ident
77 | Paren of 'a expression
78 | Literal of string * bool (* case agnostic *)
79 | Class of range list
80 type 'a grammar =
81 { start : string
82 ; grammar : 'a expression StringMap.t
83 }
84 end
85
86 module PreGrammar =
87 struct
88 type 'a definition =
89 { expression : 'a expression
90 ; debug : bool
91 ; mark : bool (* FIXME: le "+" comme "debug" devraient être dans la StringMap : pas ici *)
92 ; retain_cache: bool (* Only relevant for incremental parsing: if true then cache for this rule will be
93 kept in the next parsing of the input; otherwise it will be cleared (only
94 few top-level rules should be marked for retaining cache, since it has to be
95 updated and this can be quite costly) *)
96 ; rule_type : string option
97 ; origin : location option (* location from which this rule originated (if available) *)
98 }
99 and 'a expression =
100 | Expr of 'a sequence list
101 | App of fun_ident * 'a sequence list list (* application de fonction, ne doit plus rester dans une grammaire typecheckée *)
102 and 'a sequence = 'a item list (* FIXME * [`LIST | `ONEOFEACH] *) * (string * bool) StringMap.t *
103 (bool (* ajouter les positions... *) * 'a (* code *) * location option * bool (* if true then production gives option with none indicating that we should backtrack *)) option
104 and 'a item = [`AND | `NOT | `NORMAL] * 'a primary * [`QUESTION | `STAR | `PLUS | `NORMAL]
105 and 'a primary =
106 | Ident of ident
107 | Paren of 'a expression
108 | Literal of string * bool (* case agnostic *)
109 | Class of range list
110
111 type include_type = Incl | Read
112 type include_def =
113 { it : include_type
114 ; gl : string list (* liste des définitions "globales", i.e. fonctions de l'include ou open *)
115 }
116 type 'a pre_grammar =
117 { pheader : 'a header list
118 ; poptions : (code_ident * code_ident) list
119 ; pextra : (code_ident * code_type) list
120 ; incl : include_def StringMap.t (* stringset *)
121 ; funs : ('a gfun * rule_annots) StringMap.t (* fun_ident_map *)
122 ; defs : ('a definition * rule_annots) StringMap.t }
123 and 'a gfun =
124 { vars : ident list (* liste des idents bindés *)
125 ; expr : 'a expression }
126 and 'a header_code =
127 [ `inside of 'a
128 | `normal of 'a
129 | `types of 'a
130 | `decls of 'a
131 | `file of filename (* utile ? *)
132 ]
133 and 'a header = 'a header_code * location option
134 end
135
136 module G = Grammar
137 module P = PreGrammar
138
139 let rec def_map_to_string ?cf dm =
140 StringMap.fold (
141 fun x y acc ->
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
142 Printf.sprintf "%s%s%s%s%s%s <- %s" acc (if y.P.mark & (not y.P.debug) & (not (acc = "")) then ";" else "") (if acc = "" then "" else "\n\n") (if y.P.debug then "%" else "") (if y.P.mark then "+" else "") x (def_to_string ?cf y)
fccc6851 » MLstate
2011-06-21 Initial open-source release
143 ) dm ""
144 and def_to_string ?cf d =
145 expr_to_string ?cf d.P.expression
146 and expr_to_string ?cf = function
147 | P.Expr sl -> seq_list_to_string ?cf sl
148 | _ -> assert false
149 and seq_list_to_string ?cf = function
150 | [hd] -> seq_to_string ?cf hd
151 | (hd :: tl) -> seq_to_string ?cf hd ^ " / " ^ seq_list_to_string ?cf tl
152 | _ -> assert false
153 and seq_to_string ?cf = function
154 | il, map, None -> item_list_to_string ?cf ~map:map ~i:1 il
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
155 | il, map, Some (b, code, _loc, c) -> Printf.sprintf "%s %s" (item_list_to_string ?cf ~map:map ~i:1 il) (match cf with None -> "CODE" | Some f -> f b c code)
fccc6851 » MLstate
2011-06-21 Initial open-source release
156 and item_list_to_string ?cf ?map ?i = function
157 | [hd] -> (item_to_string ?cf ?map ?i hd)
158 | (hd :: tl) ->
159 begin
160 match i with
161 | None -> (item_to_string ?cf ?map ?i hd) ^ " " ^ (item_list_to_string ?cf ?map ?i tl)
162 | Some l -> (item_to_string ?cf ?map ?i hd) ^ " " ^ (item_list_to_string ?cf ?map ~i:(l+1) tl)
163 end
164 | _ -> assert false
165 and item_to_string ?cf ?map ?i (pre, content, post) =
166 let option = match map with
167 | None -> None
168 | Some m ->
169 StringMap.fold (
170 fun key (ind,b) acc ->
171 match i with
172 | None -> acc
173 | Some l -> if (ind = string_of_int (l)) then Some (key,b) else acc
174 ) m None in
175 match option with
176 | None -> pre_to_string pre ^ primary_to_string ?cf content ^ post_to_string post
177 | Some (name,bool) -> let tmp = if bool then " :_ " else " : " in pre_to_string pre ^ primary_to_string ?cf content ^ post_to_string post ^ tmp ^ name ^ " "
178 and pre_to_string = function
179 | `AND -> "&"
180 | `NOT -> "!"
181 | _ -> ""
182 and post_to_string = function
183 | `QUESTION -> "?"
184 | `STAR -> "*"
185 | `PLUS -> "+"
186 | `NORMAL -> ""
187 and primary_to_string ?cf = function
188 | P.Ident i -> i
189 | P.Paren e -> "(" ^ expr_to_string ?cf e ^ ")"
190 | P.Literal (s, b) ->
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
191 Printf.sprintf "\"%s\"%s" (String.escaped s) (if b then "~" else "")
192 | P.Class rl -> Printf.sprintf "%s" (range_list_to_string rl)
fccc6851 » MLstate
2011-06-21 Initial open-source release
193 and range_list_to_string = function
194 | [Any] -> "."
195 | liste ->
196 let rec aux bool = function
197 | [hd] -> range_to_string bool hd
198 | hd :: tl -> range_to_string true hd ^ aux true tl
199 | _ -> assert false
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
200 in Printf.sprintf "[%s]" (aux false liste)
fccc6851 » MLstate
2011-06-21 Initial open-source release
201 and range_to_string b = function
202 | One c ->
203 begin
204 match c with
205 | '\n' -> "\\n"
206 | '\r' -> "\\r"
207 | '\t' -> "\\t"
208 | ']' -> "\\]"
209 | '[' -> "\\["
210 | '\'' -> "\\'"
211 | '\\' -> "\\\\"
212 | c ->
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
213 if (b && ((c = '-') || (c = '"'))) then Printf.sprintf "\\%c" c
214 else Printf.sprintf "%c" c
fccc6851 » MLstate
2011-06-21 Initial open-source release
215 end
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
216 | Range (c1, c2) -> Printf.sprintf "%c-%c" c1 c2
fccc6851 » MLstate
2011-06-21 Initial open-source release
217 | Any -> assert false
218
219 let make_const name =
220 (([`NORMAL, P.Literal (name, false), `NORMAL], StringMap.empty, None) : 'a P.sequence)
221
222 type 'a grammar =
223 { start : string
224 ; grammar : ('a P.definition * rule_annots) StringMap.t
225 ; header : 'a P.header list
226 ; extra : (code_ident * code_type) list
227 ; options : (code_ident * code_ident) list
228 }
229
230 let rec map_grammar (f:'a -> 'b) g =
231 let rec aux_definition (def,msg_error) =
232 { def with P.expression = aux_expression def.P.expression }, msg_error
233 and aux_expression = function
234 | P.Expr sl -> P.Expr (List.map aux_sequence sl)
235 | P.App _ -> raise (B.NotImplemented "map_grammar1")
236 and aux_sequence = function
237 | (il, map, None) -> List.map aux_item il, map, None
238 | (il, map, Some (b, code, loc, c)) -> List.map aux_item il, map, Some (b, f code, loc, c)
239 and aux_item (pre, p, suf) = pre, aux_primary p, suf
240 and aux_primary = function
241 | P.Ident i -> P.Ident i
242 | P.Paren e -> P.Paren (aux_expression e)
243 | P.Literal (s, b) -> P.Literal (s, b)
244 | P.Class rl -> P.Class rl
245 and aux_header_code = function
246 | `normal code -> `normal (f code)
247 | `inside code -> `inside (f code)
248 | `types code -> `types (f code)
249 | `decls code -> `decls (f code)
250 | `file f -> `file f
251 and aux_header = function
252 | code, loc -> aux_header_code code, loc
253 in
254 { start = g.start
255 ; grammar = StringMap.map aux_definition g.grammar
256 ; header = List.map aux_header g.header
257 ; extra = g.extra
258 ; options = g.options
259 }
260
261 let empty_pre_grammar =
262 { P.pheader = []
263 ; P.pextra = []
264 ; P.poptions = []
265 ; P.incl = StringMap.empty
266 ; P.funs = StringMap.empty
267 ; P.defs = StringMap.empty }
268
269 (* pour le traitement des includes, on n'ajoute que si nécessaire *)
270 let add_definition preg (name, def) =
271 if StringMap.mem name preg.P.defs then
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
272 ((* Printf.eprintf "definition %s exists, skipping\n" name ;*) preg)
fccc6851 » MLstate
2011-06-21 Initial open-source release
273 else
274 { preg with P.defs = StringMap.add name def preg.P.defs }
275
276 (* FIXME: factoriser *)
277 let add_function preg (name, def) =
278 if StringMap.mem name preg.P.funs then
da5de875 » Raja
2011-06-22 [cleanup] jlog: remove Base.jlog
279 ((* Printf.eprintf "function %s exists, skipping\n" name ;*) preg)
fccc6851 » MLstate
2011-06-21 Initial open-source release
280 else
281 { preg with P.funs = StringMap.add name def preg.P.funs }
282
283 let get_expression def =
284 match def.P.expression with
285 | P.Expr e -> e
286 | _ -> assert false
287
288 let ml_identifier = B.String.map (function '.' -> '_' | c -> c)
289
290 let string_of_chars = B.String.of_chars
291 let int_of_chars l =
292 int_of_string (string_of_chars l)
293
294 let while_primary is_plus =
295 fun f -> fun _pos ->
296 let rec aux acc pos =
297 match f pos with
298 | Some (np, nr) -> aux (nr::acc) np
299 | None -> pos, acc in
300 match aux [] _pos with
301 | (_, []) as r -> if is_plus (*option=`PLUS*) then None else Some r
302 | (np, r) -> Some (np, List.rev r)
303
304 let match_char c l =
305 let rec aux = function
306 | [] -> false
307 | Any::_ -> true
308 | (One c1)::tl -> c1 = c or aux tl
309 | (Range (c1, c2))::tl -> (c >= c1 && c <= c2) or aux tl
310 in aux l
311
312 let str2memo_type = function
313 | "none" -> MemoNone
314 | "fail" -> MemoFail
315 | "success" -> MemoSuccess
316 | "full" -> MemoFull
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
317 | s -> failwith (Printf.sprintf "Unknown memoization option: '%s' (should be: 'none', 'fail', 'success' or 'full')" s)
fccc6851 » MLstate
2011-06-21 Initial open-source release
318
319 let prefix2str = function
320 | `AND -> "&"
321 | `NOT -> "!"
322 | `NORMAL -> ""
323
324 let suffix2str = function
325 | `QUESTION -> "?"
326 | `STAR -> "*"
327 | `PLUS -> "+"
328 | `NORMAL -> ""
329
330 let rangeChar2str = function
331 | '"' -> "\\\""
332 | '-' -> "\\-"
333 | c -> Char.escaped c
334
335 let range2str = function
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
336 | One c -> Printf.sprintf "%s" (rangeChar2str c)
337 | Range (c1, c2) -> Printf.sprintf "%s-%s" (rangeChar2str c1) (rangeChar2str c2)
fccc6851 » MLstate
2011-06-21 Initial open-source release
338 | Any -> failwith "Unexpected Any in a non-trivial range"
339
340 let rec primary2str = function
341 | P.Ident i -> i
342 | P.Paren exp -> "(" ^ expr2str exp ^ ")"
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
343 | P.Literal (l, cs) -> Printf.sprintf "\"%s\"%s" (String.escaped l) (if cs then "" else "~")
fccc6851 » MLstate
2011-06-21 Initial open-source release
344 | P.Class [Any] -> "."
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
345 | P.Class cs -> Printf.sprintf "[%s]" (List.to_string range2str cs)
fccc6851 » MLstate
2011-06-21 Initial open-source release
346
347 and item2str (prefix, primary, suffix) =
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
348 Printf.sprintf "%s%s%s" (prefix2str prefix) (primary2str primary) (suffix2str suffix)
fccc6851 » MLstate
2011-06-21 Initial open-source release
349
350 (* FIXME add support for printing code *)
351 (* FIXME add support for printing named items *)
352 and sequence2str (items, labels, _code) =
353 if not (StringMap.is_empty labels) then
354 failwith "item labels not supported in printing yet"
355 else
356 String.concat_map " " item2str items
357
358 and expr2str = function
359 | P.Expr es -> String.concat_map " / " sequence2str es
360 | P.App _ -> failwith "tgrammar:expr2str -> App"
361
362 let rule2str (name, (exp, _)) =
a9f8d34c » Raja
2011-06-28 [cleanup] Base: remove sprintf
363 Printf.sprintf "%s <- %s ;\n\n" name (expr2str exp.P.expression)
fccc6851 » MLstate
2011-06-21 Initial open-source release
364
365 let grammar2str peg =
366 List.to_string rule2str (StringMap.to_list peg.grammar)
Something went wrong with that request. Please try again.