Skip to content
Newer
Older
100644 342 lines (298 sloc) 12.3 KB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Adam Koprowski
20 **)
21
22 (* HACK : please, clean-up in opa lang *)
23 module Parser_utils = OpaParserUtils
24
25 open Base
26 open Trx_ast
27 open SurfaceAst
28 open SurfaceAstHelper
29
30 let opt_compile_ranges = true
31
32 (* ================= naming conventions =============== *)
33
34 let runtime_module = "Parser_private"
35 let input_name = "__input__"
36 let partial_flag_name = "__partial__"
37 let range_type = Printf.sprintf "%s.range" runtime_module
38
39 let position_var_name = sprintf "pos__%s"
40
41 let seq_it_name =
42 let r = ref 0 in
43 fun i ->
44 Pervasives.incr r;
45 Printf.sprintf "__seq_%d_%d" i !r
46
47 let choice_fun_name =
48 let r = ref 0 in
49 fun () ->
50 Pervasives.incr r;
51 Printf.sprintf "__choice_%d" !r
52
53 (* ================== AST manipulation ================ *)
54
55 module C = SurfaceAstCons.StringCons
56 let iterator_res () = C.T.name ~tyl:[C.T.name "itextrator"; C.T.fresh ()] Opacapi.Types.tuple_2
57 let option_iterator_res () = C.T.name ~tyl:[iterator_res ()] Opacapi.Types.option
58 let coerce_as_option_iterator_res e = C.E.coerce e (option_iterator_res ())
59 let none () = coerce_as_option_iterator_res (C.E.simple_record "none")
60 let some e = coerce_as_option_iterator_res (C.E.record1 "some" e)
61 let match_opt e pe1 pe2 = C.E.match_opt ~ty:(iterator_res ()) e pe1 pe2
62 let match_option e pe1 pe2 = C.E.match_option ~ty:(iterator_res ()) e pe1 pe2
63
64 let (!) = C.E.ident
65 let (&) = C.E.applys
66 let (<.>) = C.E.dot
67
68 (* FIXME, this should go to SurfaceAstCons or similar module *)
69 let opa_plus e1 e2 = (!"Int" <.> "add") & [e1; e2]
70 let opa_eq e1 e2 = (!"Int" <.> "==") & [e1; e2]
71 let opa_gt e1 e2 = (!"Int" <.> ">") & [e1; e2]
72 let opa_ge e1 e2 = (!"Int" <.> ">=") & [e1; e2]
73 let opa_and e1 e2 = (!"&&") & [e1; e2]
74 let opa_or e1 e2 = (!"||") & [e1; e2]
75 let opa_string_length src = (!"String" <.> "length") & [src]
76
77 let itextrator_next e = (!"Itextrator" <.> "next") & [e]
78
79 (* ============== TRX values manipulation ============= *)
80
81 let success_exp it v = some (C.E.tuple_2 it v)
82 let failure_exp () = none ()
83
84 let success_pat_gen it p = C.P.some (C.P.tuple_2 (C.P.var it) p)
85 let success_pat it v = success_pat_gen it (C.P.var v)
86 let success_wild_pat () = C.P.some (C.P.any ())
87 let failure_pat () = C.P.none ()
88
89 (* ================ auxiliary functions =============== *)
90
91 let call_runtime_fun f args = (!runtime_module <.> f) & args
92
93 let rec exists_expr p ((Expr seqs, _): _ Trx_ast.expr) = List.exists (exists_seq p) seqs
94 and exists_seq p ((seq_node, _) : _ Trx_ast.seq) =
95 List.exists (exists_item p) seq_node.seq_items ||
96 Option.exists p seq_node.seq_code
97 and exists_item p (({item_primary=(primary_node,_); _}, _): _ Trx_ast.item) =
98 match primary_node with
99 | Parens e -> exists_expr p e
100 | Class _
101 | Any
102 | Literal _ -> false
103 | Code e
104 | Rule e
105 | DynamicLiteral e -> p e
106
107 (* TODO, can we somehow improve this to also ignore results (if not needed) in sub-parsers? *)
108 let is_name_used_in_production_or_following_subrules code_opt subrules argName =
109 let predicate = OpaWalk.Expr.appears_str argName in
110 (* when there is no code associated with TRX expression, we will just take a
111 substring of the input, which does not use any intermediate results *)
112 Option.exists predicate code_opt || List.exists (exists_item predicate) subrules
113
114 (* ==================== translation =================== *)
115
116 let translate_class cl =
117 let make_range r = C.T.coerce_name r range_type in
118 let translate_range = function
119 | `ONE c -> make_range (C.E.record1 "one" (C.E.int c))
120 | `RANGE (c1, c2) -> make_range (C.E.record ["from", C.E.int c1; "to", C.E.int c2])
121 in
122 cl |> List.map translate_range |> C.E.list
123
124 let compile_class input cl =
125 let char = "char" in
126 let new_it = "new_it" in
127 let condition_for = function
128 | `ONE c -> opa_eq !char (C.E.int c)
129 | `RANGE (c1, c2) -> opa_and (opa_ge !char (C.E.int c1)) (opa_ge (C.E.int c2) !char)
130 in
131 let rec match_class = function
132 | [] -> C.E.false_ ()
133 | [x] -> condition_for x
134 | x::xs -> opa_or (condition_for x) (match_class xs)
135 in
136 let check_class =
137 let new_it = C.T.coerce_name !new_it Opacapi.Types.itextrator in
138 C.E.if_ (match_class cl) (success_exp new_it !char) (failure_exp ())
139 in
140 match_opt (itextrator_next input)
141 (C.P.none (), failure_exp ())
142 (C.P.some (C.P.tuple_2 (C.P.var new_it) (C.P.var char)), check_class)
143
144 let rec effective_seq_suffix is =
145 let effective_item i =
146 match i.item_prefix with
147 | `NORMAL -> true
148 | `AND | `NOT -> false
149 in
150 match is with
151 | [] -> false
152 | (x, _loc)::xs -> effective_item x || effective_seq_suffix xs
153
154 let rec translate_expression ~nested ~res_needed input (Expr es, loc) =
155
156 let rec aux = function
157 | [] -> [], failure_exp ()
158 | [s] -> [], translate_seq ~nested ~res_needed input s
159 | s::ss ->
160 let funs, last_exp = aux ss in
161 let n = choice_fun_name () in
162 let this_exp =
163 match_option (translate_seq ~nested ~res_needed input s)
164 (!n & []) (fun s -> some !s)
165 in
166 funs @ [n, last_exp], this_exp
167 in
168 let generate () =
169 let funs, exp = aux es in
170 let make_choice_fun (n, exp) = n, C.E.lambda [] exp in
171 C.E.letins (List.map make_choice_fun funs) exp
172 in
173 SurfaceAstCons.with_label loc generate
174
175 and translate_seq ~nested ~res_needed org_input (seq, loc) =
176 let rec aux input i used_names res_needed = function
177 | [] ->
178 let result =
179 if res_needed then
180 let get_substring = (!"Text" <.> "itsub") & [org_input; input] in
181 Option.default get_substring seq.seq_code
182 else
183 C.E.void ()
184 in
185 let return_val = success_exp input result in
186 if nested then
187 return_val
188 else
189 call_runtime_fun "check_partial" [!partial_flag_name; return_val]
190
191 | (x, loc)::xs ->
192 let item_name = x.item_name in
193 let it_name = seq_it_name i in
194 (* we need this item's result if it's default or derived name is used in the production *)
195 let item_needed = res_needed && Option.default_map false (is_name_used_in_production_or_following_subrules seq.seq_code xs) item_name in
196 let translated_item = translate_item ~nested:(nested || effective_seq_suffix xs) ~res_needed:item_needed input (x, loc) in
197 let generate () =
198 let provide_position_var result name =
199 let var_name = position_var_name name in
200 if is_name_used_in_production_or_following_subrules seq.seq_code xs var_name then
201 let pos = (!"Itextrator" <.> "pos") & [input] in
202 C.E.letin var_name pos result
203 else
204 result
205 in
206 let item_name_pattern =
207 match item_name with
208 | None -> C.P.any ()
209 | Some v -> C.P.var v in
210 let result =
211 (* used names are extended with the optional explicit item name *)
212 let use_name n =
213 if StringSet.mem n used_names then
214 error (Printf.sprintf "Use of the name %s ambiguous in a parser!" n);
215 StringSet.add n used_names
216 in
217 let used_names = Option.default_map used_names use_name item_name in
218 (* we compute the result *)
219 let res = aux !it_name (i+1) used_names res_needed xs in
220 (* if production uses [pos__name], where [name] is the explicit rule name,
221 then we make this variable available *)
222 let res = Option.default_map res (provide_position_var res) item_name in
223 res
224 in
225 match_opt translated_item
226 (failure_pat (), failure_exp ())
227 (success_pat_gen it_name item_name_pattern, result)
228 in
229 SurfaceAstCons.with_label loc generate
230 in
231 (* A small improvement to the heuristic that does not produce result when it is not needed.
232 We evaluate user provided productions even if it seems we don't need the result, as they
233 may contain side-effects and it would be baffling for the user if they were not executed *)
234 let res_needed' = res_needed || seq.seq_code <> None in
235 let generate () = aux org_input 1 StringSet.empty res_needed' seq.seq_items in
236 SurfaceAstCons.with_label loc generate
237
238 and translate_item ~nested ~res_needed input ((i, loc) as item) =
239 let r = translate_suffix ~nested:(nested || i.item_prefix <> `NORMAL) ~res_needed input item in
240 let generate () =
241 let void = C.E.void () in
242 match i.item_prefix with
243 | `AND -> match_opt r (failure_pat (), failure_exp ()) (success_pat "_it" "r", success_exp input !"r")
244 | `NOT -> match_opt r (failure_pat (), success_exp input void) (success_wild_pat (), failure_exp ())
245 | `NORMAL -> r
246 in
247 SurfaceAstCons.with_label loc generate
248
249 and translate_suffix ~nested ~res_needed input (i, loc) =
250 let this_nested =
251 match i.item_suffix with
252 | `NORMAL | `QUESTION -> false
253 | `PLUS | `STAR -> true
254 in
255 let is_nested = nested || this_nested in
256 let primary input = translate_primary ~nested:is_nested ~res_needed input i.item_primary in
257 let generate () =
258 match i.item_suffix with
259 | `NORMAL -> primary input
260 | `QUESTION ->
261 let it_name = "it" in
262 let res_name = "r" in
263 match_opt (primary input)
264 (failure_pat (), success_exp input (C.E.none ()))
265 (success_pat it_name res_name, success_exp !it_name (C.E.some !res_name))
266 | `PLUS | `STAR ->
267 let input_var = "input_it" in
268 let f = C.E.lambda_var input_var (primary !input_var) in
269 let fun_name = "primary_list" ^ if res_needed then "" else "_no_res" in
270 call_runtime_fun fun_name [C.E.bool (i.item_suffix = `PLUS); f; input]
271 in
272 SurfaceAstCons.with_label loc generate
273
274 and translate_primary ~nested ~res_needed input (p, loc) =
275 let rec generate p =
276 match p with
277 | Parens e ->
278 translate_expression ~nested ~res_needed input e
279
280 | DynamicLiteral e ->
281 let funName = "parse_literal" in
282 call_runtime_fun funName [input; e]
283
284 | Literal (l, cs) ->
285 (*
286 (* we treat 1-character literals "x" as a class [x] *)
287 if Cactutf.length l = 1 then
288 let cl = Class [`ONE (Cactutf.get l 0)] in
289 translate_primary ~res_needed input (cl, loc)
290 else
291 *)
292 let funName = if cs then "parse_literal" else "parse_literal_case_insensitive" in
293 call_runtime_fun funName [input; C.E.string l]
294
295 | Any ->
296 itextrator_next (C.T.coerce_name input Opacapi.Types.itextrator)
297
298 | Class cl ->
299 if opt_compile_ranges then
300 SurfaceAstCons.with_label loc (fun () -> compile_class input cl)
301 else
302 call_runtime_fun "parse_range" [input; translate_class cl]
303
304 (* TODO do we really need distinction between Rule & Code tags? Consider
305 implementing Rule by means of Code (possibly renaming it) *)
306 | Rule id ->
307 let partial =
308 if nested then
309 C.E.true_ ()
310 else
311 !partial_flag_name
312 in
313 let coerced_id = C.T.coerce_name id Opacapi.Types.Parser.general_parser in
314 coerced_id & [partial; input]
315
316 | Code code ->
317 let user_parser_var = Parser_utils.fresh_name () in
318 let user_parser = C.T.coerce_name code Opacapi.Types.Parser.general_parser in
319 let partial =
320 if nested then
321 C.E.true_ ()
322 else
323 !partial_flag_name
324 in
325 let call_parser = !user_parser_var & [partial; input] in
326 C.E.letin user_parser_var user_parser call_parser
327
328 in
329 SurfaceAstCons.with_label' loc generate p
330
331 (* --- main entry point --- *)
332 let translate_rule e =
333 let rule_exp = translate_expression ~nested:false ~res_needed:true !input_name e in
334 let args = List.map C.P.ident [partial_flag_name; input_name] in
335 let parser_code = C.E.lambda args rule_exp in
336 let coerced_parser_code = C.T.coerce_name parser_code Opacapi.Types.Parser.general_parser in
337 let () =
338 #<If:SA_TRX>
339 Format.eprintf "TRX <<\n%a\n>>@." OpaPrint.string#expr coerced_parser_code
340 #<End>
341 in coerced_parser_code
Something went wrong with that request. Please try again.