-
Notifications
You must be signed in to change notification settings - Fork 125
/
trx_ocaml.ml
1626 lines (1499 loc) · 56.7 KB
/
trx_ocaml.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
Copyright © 2011 MLstate
This file is part of OPA.
OPA is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License, version 3, as published by
the Free Software Foundation.
OPA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
more details.
You should have received a copy of the GNU Affero General Public License
along with OPA. If not, see <http://www.gnu.org/licenses/>.
*)
(*
@author Adam Koprowski
**)
module T = Tgrammar
module P = Tgrammar.PreGrammar
module OcamlG = Ocaml.Cons
module List = BaseList
module String = BaseString
let pr = Printf.sprintf
let prErr = Printf.eprintf
let log fmt = Printf.eprintf (fmt^^"\n")
let error fmt = Printf.eprintf ("[31m"^^fmt^^"[0m\n")
(* we count parts of the sequence starting from 1 (they can be accessed in semantic
actions via __1, __2 etc. variables *)
let first_pos = 1
let argValue i = pr "__%d" i
(* FIXME, start using those constants consistenly!!! *)
let main_param_cache = "cache"
let main_param_pos_update = "pos_update"
let main_param_filename = "_filename"
let main_param_text = "_text"
let main_param_start = "_start"
let trx_runtime fn = ["Trx_runtime"; fn]
(* TODO, Line number directives #... in generated files; for generated parts
put appropriate directives; otherwise they refer to wrong parts
of .trx files *)
(* TODO, incremental parsing: return updated cache also for failed parses...
(possibly by boxing all exceptions; even better not returning this value but
doing in-place modifications in cache arguments) *)
(* TODO, grammar optimizations: choice re-ordering etc. *)
(* TODO, handle left-recursion *)
(* TODO, rules that produce no result don't need memoization of the result, only yes/no - use it *)
(* TODO, switch to camlp4 for Ocaml AST/pretty-printing? *)
(* TODO, optimization: get rid of string_of_chars for X+ productions *)
(* TODO, think about many different back-ends, we may want TRX-ocaml, TRX-QML, TRX-JS, ... TRX should be properly designed for that. *)
(* =========================================================================================================== *)
(* =========================================== CMD LINE arguments: =========================================== *)
(* =========================================================================================================== *)
let default_imperative_errors = false
let default_opt_errors = true
let default_opt_gen_res = true
let default_opt_inline_literals = true
let default_opt_inline_ranges = true
let default_opt_unfold_starplus = false
let grammarFn = ref None
let imperative_errors = ref default_imperative_errors
let opt_errors = ref default_opt_errors
let opt_gen_res = ref default_opt_gen_res
let opt_inline_literals = ref default_opt_inline_literals
let opt_inline_ranges = ref default_opt_inline_ranges
let opt_unfold_starplus = ref default_opt_unfold_starplus
let map_location_to_trx_file = ref true
let pp_grammar = ref false
let functorize = ref false
let binary = ref None
let incremental = ref false
let auto_ast = ref false
let rule_deps = ref false
let list_start = ref false
let memo_default = ref None
let debug_mode = ref false
let main = ref None
let basename = ref None
let no_mli = ref false
let analyze_grammar = ref false
(* =========================================================================================================== *)
(* ============================================= Results/patterns ============================================ *)
(* =========================================================================================================== *)
type parsingContext =
{ peg : string Tgrammar.grammar
; input : string
; gen_err : bool
; gen_res : bool
}
let var = Ocaml.make_Var
let pvar v = Ocaml.PatVar (Ident.source v)
let vars = Ocaml.make_Varl
let pany = Ocaml.PatAny
let fail_id = List.map Ident.source (trx_runtime "Fail")
let ok_id = List.map Ident.source (trx_runtime "Ok")
let pair p1 p2 = OcamlG.tuple [p1; p2]
let pat_pair p1 p2 = OcamlG.pat_tuple [p1; p2]
let fail gen_err err lai =
let result =
if gen_err then
Ocaml.Constructor (fail_id, [err])
else
OcamlG.none
in
if !incremental then
pair result lai
else
result
let ok gen_err pos res err lai =
let pos_res = OcamlG.tuple [pos; res] in
let result =
if gen_err then
Ocaml.Constructor (ok_id, [pos_res; err])
else
OcamlG.some pos_res
in
if !incremental then
pair result lai
else
result
let pat_fail gen_err err lai =
let result =
if gen_err then
Ocaml.PatConstructor (fail_id, [err])
else
OcamlG.pat_none
in
if !incremental then
pat_pair result lai
else
result
let pat_ok gen_err pos res err lai =
let pos_res = OcamlG.pat_tuple [pos; res] in
let result =
if gen_err then
Ocaml.PatConstructor (ok_id, [OcamlG.pat_tuple [pos_res; err]])
else
OcamlG.pat_some pos_res
in
if !incremental then
pat_pair result lai
else
result
let map_return_value gen_err value return =
OcamlG.make_match value
[ pat_ok gen_err (pvar "pos") (pvar "res") (pvar "err") (pvar "lai"),
None (* guard *),
ok gen_err (var "pos") (return "res") (var "err") (var "lai")
; pat_fail gen_err (pvar "err") (pvar "lai"),
None (* guard *),
fail gen_err (var "err") (var "lai")
]
let map_lai_value gen_err value return =
OcamlG.make_match value
[ pat_ok gen_err (pvar "pos") (pvar "res") (pvar "err") (pvar "lai"),
None (* guard *),
ok gen_err (var "pos") (var "res") (var "err") (return "lai")
; pat_fail gen_err (pvar "err") (pvar "lai"),
None (* guard *),
fail gen_err (var "err") (var "lai")
]
let map_lai_value_to_max ctx value lai =
map_lai_value ctx value
(fun old_lai -> OcamlG.app3 (var "max") (var old_lai) lai)
(* =========================================================================================================== *)
(* =========================== Graph for computing dependencies between productions ========================== *)
(* =========================================================================================================== *)
module V : Graph.Sig.COMPARABLE with type t = string =
struct
type t = string
let equal u v = (u = v)
let hash = Hashtbl.hash
let compare = Pervasives.compare
end
(** We will compute a directed graph where nodes are strings, which are names of functions for parsing
productions. If opt_no_err is turned off then there is exactly one parsing function for every production
and it has the prefix "try_" followed by production name (including module names etc.). With this
optimization turned on, on top of this function there is another one with suffix "_no_err".
The arrows correspond to dependencies, i.e. if there is arrow from A to B, then it means that parsing
function A may call B. We use this dependency analysis to output only required functions (and to avoid
unused variable warnings). *)
module G = Graph.Imperative.Digraph.ConcreteBidirectional (V)
module Neigh = Graph.Oper.Neighbourhood (G)
module SCC = Graph.Components.Make (G)
module Dot = Graph.Graphviz.Dot (
struct
include G
let graph_attributes _g = []
let default_vertex_attributes _g = []
let vertex_name v = v
let vertex_attributes _v = []
let get_subgraph _v = None
let default_edge_attributes _g = []
let edge_attributes _edge = []
end
)
(* parsing functions dependency graph *)
let dep_g = G.create ()
(** given: a dependency graph [g], a list of starting productions [all_prods] and a list of all generated
production functions [all_prods], returns [all_prods] filtered to those functions that are really
required in the final parser (as computed from dependencies). *)
let compute_needed_prods g start_names all_prods =
let rec compute_reqs to_visit reqs =
match to_visit with
| [] -> reqs
| x::xs ->
if StringSet.mem x reqs then
compute_reqs xs reqs
else
let n = G.succ g x in
compute_reqs (xs @ n) (StringSet.add x reqs)
in
let required = compute_reqs start_names StringSet.empty in
let req_fun (_pn, fn, _) = StringSet.mem fn required in
List.filter req_fun all_prods
let partition_definitions g start_names all_prods =
let needed = compute_needed_prods g start_names all_prods in
let prod_for_name v (_, fn, _) = fn = v in
let remove_unneeded v =
if not (List.exists (prod_for_name v) needed) then
G.remove_vertex dep_g v
in
G.iter_vertex remove_unneeded dep_g;
let sccs = Array.to_list (SCC.scc_array dep_g) in
let find_prod fn =
match List.find_all (prod_for_name fn) needed with
| [x] -> x
| _ -> assert false
in
List.map (List.map find_prod) sccs
(* =========================================================================================================== *)
(* ========================================== Interface generation =========================================== *)
(* =========================================================================================================== *)
let find_start_prods peg =
let prods = StringMap.to_list peg.T.grammar in
let filter_start_prods (fn, (def, _)) =
if def.P.mark then
Some fn
else
None
in
List.filter_map filter_start_prods prods
let parse_function_name fn = Ident.source ("parse_" ^ String.lowercase fn)
let trx_annotate_location loc code =
if !map_location_to_trx_file then
match loc with
| None -> code
| Some r -> Ocaml.LineAnnot (r.T.line_number, r.T.file_name, code)
else
code
let generate_interface grammarFn peg =
let interface_comment = Printf.sprintf "
*******************
*** DO NOT EDIT ***
*******************
This file was automatically generated by TRX from the grammar: %s
" grammarFn
in
let reader_module_name = "R" in
let generate_signature fn =
let parens s = "(" ^ s ^ ")" in
let trx_pos_type = Ocaml.TypeName ([], ["Trx_runtime"; "pos"]) in
let type_string = Ocaml.TypeConst Ocaml.TypeString in
let filename_arg = Ocaml.TypeLabel (true, main_param_filename, type_string) in
let start_arg = Ocaml.TypeLabel (true, main_param_start, trx_pos_type) in
let input_arg =
if !functorize then
Ocaml.TypeName ([Ocaml.TypeVar "'input_a"; Ocaml.TypeVar "'input_b"], [reader_module_name; "input"])
else
type_string
in
let rule_def, _ = StringMap.find fn peg.T.grammar in
let extra_args =
if !auto_ast then
[]
else
List.map (fun (_name, t) -> Ocaml.TypeVerbatim (parens t)) peg.T.extra
in
let result_type =
let final_type =
if !auto_ast then
Ocaml.TypeName ([], ["Trx_auto_ast"; "auto_ast"])
else
match rule_def.P.rule_type with
| None -> failwith ("Unknown type for rule: " ^ fn ^ "; please specify the rule type in the grammar")
| Some t -> Ocaml.TypeVerbatim (parens t)
in
Ocaml.TypeTuple [trx_pos_type; final_type]
in
let args = extra_args @ [filename_arg; start_arg; input_arg; result_type] in
Ocaml.Val (parse_function_name fn, OcamlG.type_arrows args)
in
let start_prods = find_start_prods peg in
let interface_funs = List.map generate_signature start_prods in
let declarations =
if !functorize then
(* FIXME This is not good; use AST instead, but no appropriate construct for now in ocamllang/ocaml.ml *)
[ Ocaml.Verbatim (Printf.sprintf "module Make : functor (%s : Reader.S) -> sig" reader_module_name)] @
interface_funs @
[ Ocaml.Verbatim "end" ]
else
interface_funs
in
let user_code =
let gen_user_code acc (h, loc) =
match h with
| `types c | `decls c -> acc @ [trx_annotate_location loc (Ocaml.Verbatim c)]
| `normal _ | `file _ | `inside _ -> acc
in
List.fold_left gen_user_code [] peg.T.header
in
Ocaml.Comment (interface_comment) :: user_code @ declarations
(* =========================================================================================================== *)
(* ========================================== Actual parsing logic =========================================== *)
(* =========================================================================================================== *)
let grammar_extras peg =
if !auto_ast then
[]
else
List.map fst peg.T.extra
let add_flag_suffix gen_err id =
let id = if gen_err then id else id ^ "_noerr" in
id
let call_fun args = OcamlG.app_list args
let call_runtime fn args =
OcamlG.app_list (vars (trx_runtime fn) :: args)
let call_string_sub p1 p2 = OcamlG.app_list [var "_get_sub"; p1; p2]
let call_emptyError pos = call_runtime "emptyError" [pos]
let call_option_to_res_msg res pos err = call_runtime "option_to_res_msg" [res; pos; err]
let call_option_to_res_err res pos err = call_runtime "option_to_res_err" [res; pos; err]
let call_range_to_error range = call_runtime "range_to_error" [range]
let call_while_primary ctx plus f pos =
let fn = add_flag_suffix ctx.gen_err "while_primary" in
let fn =
if !opt_gen_res && not ctx.gen_res then
fn ^ "_nores"
else
fn
in
call_runtime fn [plus; f; pos]
let call_process_literal pos literal case = call_runtime "process_literal" [var "_get_char"; var "_len"; pos; literal; case]
let call_process_range pos cl = call_runtime "process_range" [var "_get_char"; var "_len"; pos; cl]
let call_gen_syntax_error err =
let input_string =
if !functorize then
call_fun [var "_get_sub"; OcamlG.int 0; var "_len"]
else
var main_param_text
in
let pos2loc () = call_fun [vars ["FilePos"; "get_pos_no_cache"]; input_string] in
call_runtime "gen_syntax_error" [pos2loc (); err]
let call_setMainConstruct res pos err = call_runtime "setMainConstruct" [res; pos; err]
let call_decorateConstruct res pos err = call_runtime "decorateConstruct" [res; pos; err]
let call_update_cache cache update_pos = call_runtime "update_memoization_cache" [cache; update_pos]
let call_gatherErrors gen_err res =
if !imperative_errors && gen_err then
call_runtime "gatherErrors" [res]
else
res
let call_addErrorInfo err res =
if !imperative_errors then
Ocaml.Sequence (
call_runtime "push_errInfo" [err],
res
)
else
call_runtime "addErrorInfo" [err; res]
exception Final of Ocaml.expr
let default_code_for_seq items =
let rec aux pos res = function
| [] ->
begin match res with
| Some pos ->
(* there is only one argument that can be used as a result of a
sequence, so we use it *)
Some (false, var (argValue pos), false, pos)
| None -> None
end
| i::is ->
match i with
| `NORMAL, _, _ -> (* this is a candidate for a productive argument *)
begin match res with
| None -> aux (pos + 1) (Some pos) is
| Some _ -> None
end
| _ ->
aux (pos + 1) res is
in
aux first_pos None items
let update_indent v =
let indent_var = var "indent" in
Ocaml.SetRef (indent_var, OcamlG.plus (Ocaml.GetRef indent_var) (OcamlG.int v))
let optional_rule_name annots =
match annots.T.rule_name with
| T.NoName -> OcamlG.none
| T.PrimaryName def_name
| T.SecondaryName def_name -> OcamlG.some (OcamlG.string def_name)
let grammar_rules peg =
let prods = StringMap.to_list peg.T.grammar in
let prod_sig (name, (_def, annots)) =
let full_name = optional_rule_name annots in
OcamlG.tuple [OcamlG.string name; full_name]
in
OcamlG.list (List.map prod_sig prods)
(* counter for position counters *)
let inputCnt = ref 0
(* name of the actually processed parsing function *)
let generated_prod = ref ""
let newInputVar () =
let res = pr "input_%d" !inputCnt in
incr inputCnt;
res
let parsing_function gen_err id = add_flag_suffix gen_err ("try_" ^ id)
let memoization_table (id, err) = "memo_" ^ id ^ (if err then "_err" else "")
(* ############################## Ranges ############################## *)
let compile_class ctx cls =
let current_char_var = "c" in
let current_char = var current_char_var in
let encode_range = function
| T.Any -> OcamlG.true_
| T.One c -> OcamlG.equal current_char (OcamlG.char c)
| T.Range (c1, c2) ->
OcamlG.band
(OcamlG.ge current_char (OcamlG.char c1))
(OcamlG.le current_char (OcamlG.char c2))
in
let rec encode_class = function
| [] -> OcamlG.false_
| [x] -> encode_range x
| x::xs -> OcamlG.bor (encode_range x) (encode_class xs)
in
let trivial_class =
let rec aux = function
| [] -> false
| T.Any::_xs -> true
| _x::xs -> aux xs
in
aux cls
in
let class_cnd = encode_class cls in
let check_eof = OcamlG.lt (var ctx.input) (var "_len") in
let return x = (* TODO: refactor this pattern *)
if !auto_ast then
Ocaml.ConstructorPV ([Ident.source "Class"], [x])
else
x
in
let process_class =
let new_pos = call_fun [var "succ"; var ctx.input] in
let res = OcamlG.tuple [new_pos; return current_char] in
if trivial_class then
OcamlG.some res
else
Ocaml.Cond (class_cnd, OcamlG.some res, OcamlG.none)
in
let expected s = Ocaml.Constructor (List.map Ident.source (trx_runtime "Expected"), [OcamlG.string s]) in
let err =
let rec aux = function
| [] -> []
| T.Any :: _ -> raise (Final (expected "any character"))
| T.One c::cs -> expected (pr "'%c'" c)::aux cs
| T.Range (c1, c2)::cs -> expected (pr "['%c'-'%c']" c1 c2) :: aux cs
in
let l =
try
List.sort Pervasives.compare (aux cls)
with
Final c -> [c]
in
OcamlG.list l
in
let process =
let get_char = call_fun [var "_get_char"; var ctx.input] in
Ocaml.make_Letin (Ocaml.pf current_char_var) get_char process_class
in
let res = Ocaml.Cond (check_eof, process, OcamlG.none) in
if ctx.gen_err then
call_option_to_res_err res (var ctx.input) err
else
res
let recognize_class ctx cls =
let range id cs = Ocaml.ConstructorPV ([Ident.source "Tgrammar"; Ident.source id], List.map OcamlG.char cs) in
let encode_range = function
| T.Any -> range "Any" []
| T.One c -> range "One" [c]
| T.Range (c1, c2) -> range "Range" [c1; c2]
in
let range_enc = OcamlG.list (List.map encode_range cls) in
let range_var = "range" in
let res = call_process_range (var ctx.input) (var range_var) in
let res =
if !auto_ast then
let return res = Ocaml.ConstructorPV ([Ident.source "Class"], [var res]) in
map_return_value ctx.gen_err res return
else
res
in
let err = call_range_to_error (var range_var) in
let result =
if ctx.gen_err then
call_option_to_res_err res (var ctx.input) err
else
res
in
Ocaml.make_Letin (Ocaml.pf range_var) range_enc result
(* ############################## Literals ############################## *)
let match_literal ctx literal case offset success failure =
let n = String.length literal in
let end_offset = OcamlG.plus (var ctx.input) (OcamlG.int (n + offset)) in
let within_input = OcamlG.le end_offset (var "_len") in
let cmp_equal =
let rec aux i =
if i = String.length literal then
OcamlG.true_
else
let this_pos = OcamlG.plus (var ctx.input) (OcamlG.int (i + offset)) in
let input_char = call_fun [var "_get_char"; this_pos] in
let literal_char = OcamlG.char (String.get literal i) in
let cmp_this_char =
if case then
OcamlG.equal input_char literal_char
else
call_fun [vars ["Char"; "equal_insensitive"]; input_char; literal_char]
in
OcamlG.band cmp_this_char (aux (i + 1))
in
aux 0
in
Ocaml.Cond (OcamlG.band within_input cmp_equal, success end_offset, failure)
let compile_literal ctx literal case =
let literalStr = OcamlG.string (if case then literal else String.lowercase literal) in
let return x =
if !auto_ast then
Ocaml.ConstructorPV ([Ident.source "Literal"], [var ctx.input; x])
else
x
in
let produce_result end_offset = OcamlG.some (OcamlG.tuple [end_offset; return literalStr]) in
match_literal ctx literal case 0 produce_result OcamlG.none
let recognize_literal ctx literal case =
let res = call_process_literal (var ctx.input) (OcamlG.string literal) (OcamlG.bool case) in
if !auto_ast then
let return res = Ocaml.ConstructorPV ([Ident.source "Literal"], [var ctx.input; var res]) in
map_return_value ctx.gen_err res return
else
res
module CharOrder : (OrderedTypeSig.S with type t = char) =
struct
type t = char
let compare = Char.compare
end
module CharMap = BaseMap.Make (CharOrder)
let literals_choice ctx lits =
let prefixes (x, _) (y, _) = String.is_prefix y x in
let rec check_lit = function
| [] -> ()
| x::xs ->
match List.filter (prefixes x) xs with
| [] -> check_lit xs
| y::_ -> failwith (Printf.sprintf "Literal %s shadows %s; please re-order them!" (fst y) (fst x))
in
let rec make_choice def pos matched options =
let result s code pos =
let pos_code = OcamlG.plus (var ctx.input) (OcamlG.int pos) in
let posArg = argValue first_pos in
let ss = OcamlG.string s in
let v =
if !auto_ast then
Ocaml.ConstructorPV ([Ident.source "Literal"], [pos_code; ss])
else
match code with
| None -> ss
| Some c ->
let argNeeded = String.is_contained posArg c in
let res = Ocaml.Verbatim ("(" ^ c ^ ")") in
if argNeeded then
Ocaml.make_Letin (Ocaml.pf posArg) ss res
else
res
in
OcamlG.some (OcamlG.tuple [pos_code; v])
in
let build_match_map s code map =
if s = "" then
map
else
let c = s.[0] in
let rest = String.right s (-1) in
let cset = Option.default StringMap.empty (CharMap.find_opt c map) in
CharMap.add c (StringMap.add rest code cset) map
in
let rec next_branch str options old_def =
let accepting = StringMap.find_opt "" options in
let def =
match accepting with
| Some code -> result matched code (pos + String.length str)
| None -> old_def
in
(* Printf.eprintf "<%s> --- <%s> --- %s (def = %s)\n" matched str (String.concat_map ", " (fun s -> "<" ^ s ^ ">") (StringMap.keys options)) (if def = OcamlG.none then "-" else "X");*)
if str <> "" && accepting <> None then
old_def, `MatchString (str, options)
else if StringMap.is_empty options || (accepting <> None && StringMap.size options = 1) then
def, `Terminal
else
let match_map = StringMap.fold build_match_map options CharMap.empty in
if CharMap.size match_map > 1 then
if String.length str > 0 then
def, `MatchString (str, options)
else
def, `Branch match_map
else
let c, opts = CharMap.random match_map in
next_branch (str ^ String.make 1 c) opts def
in
(* Printf.eprintf "---------\n";*)
match next_branch "" options def with
| def, `Terminal ->
(* Printf.eprintf "TERMINAL\n";*)
def
| def, `MatchString (str, options) ->
(* Printf.eprintf "MATCH_STRING (%s, ...)\n" str;*)
if str = "" then
def
else
let rest = make_choice def (pos + String.length str) (matched ^ str) options in
match_literal ctx str true pos (fun _ -> rest) def
| def, `Branch match_map ->
(* Printf.eprintf "BRANCH: {%s}\n" (String.concat_map ", " (String.make 1) (CharMap.keys match_map));*)
let end_offset = OcamlG.plus (var ctx.input) (OcamlG.int pos) in
let within_input = OcamlG.lt end_offset (var "_len") in
let do_match =
let build_match_case c vs cases =
let pattern = Ocaml.PatConst (Ocaml.Char c) in
let new_matched = matched ^ String.make 1 c in
let value = make_choice def (pos + 1) new_matched vs in
let guard = None in
(pattern, guard, value) :: cases
in
let input_char = call_fun [var "_get_char"; end_offset] in
let default_case = Ocaml.PatAny, None, def in
let cases = CharMap.fold build_match_case match_map [] @ [default_case] in
OcamlG.make_match input_char cases
in
Ocaml.Cond (within_input, do_match, def)
in
check_lit lits;
let res = make_choice OcamlG.none 0 "" (StringMap.from_list lits) in
if ctx.gen_err then
let err_msg = String.concat_map " or " (fun l -> "\"" ^ fst l ^ "\"") lits in
call_option_to_res_msg res (var ctx.input) (OcamlG.string err_msg)
else
res
(* ############################## Choice ############################## *)
let rec generate_exp ctx = function
| P.App _ -> failwith "Unexpected App"
| P.Expr [] -> assert false
| P.Expr [s] ->
let res = generate_seq ctx s in
call_gatherErrors ctx.gen_err res
| P.Expr (s::ss) ->
let get_literals lits e =
match lits with
| None -> None
| Some ls ->
match e with
| [_, P.Literal (l, true), _], _, c ->
begin match c with
| Some (false, ".sub", _, _)
| None -> Some ((l, None) :: ls)
| Some (false, c, _, false) -> Some ((l, Some c)::ls)
| _ -> None
end
| _ -> None
in
match List.fold_left get_literals (Some []) (s::ss) with
| None ->
let res = generate_exp ctx (P.Expr ss) in
let res =
if ctx.gen_err then
call_addErrorInfo (var "err") res
else
res
in
let return x =
if !auto_ast then
let decorate res = Ocaml.ConstructorPV ([Ident.source "Choice"], [res]) in
map_return_value ctx.gen_err x (fun res -> decorate (var res))
else
x
in
let ge = ctx.gen_err in
OcamlG.make_match (generate_seq ctx s) [
pat_fail ge (pvar "err") (pvar "lai"),
None,
map_lai_value_to_max ge res (var "lai")
;
pvar "ok",
None,
call_gatherErrors ge (return (var "ok"))
]
| Some lits ->
literals_choice ctx lits
(* ############################## Sequence ############################## *)
and generate_seq ctx (items, map, code) =
(* if there is no provided [code] and there is only one part of the sequence that
can be used as a result - use it *)
(* jlog ~color:`green ~level:3 (pr "generate_seq [gen_err: %b]" ctx.gen_err);*)
(* We substitute proper variables into [_pos_beg] and [_pos_end] variables,
* substitute for positional variables (named parts of a sequence and catch
* exceptions thrown from productions' code.
*)
let code_uses_arg argNo =
match code with
| None ->
begin match default_code_for_seq items with
| None -> false
| Some (_, _, _, a) -> a == argNo
end
| Some (_, c, _, _) ->
let used_in_label _key (i, _) b = b || (int_of_string i = argNo) in
String.is_contained (argValue argNo) c || StringMap.fold used_in_label map false
in
let code_transform input =
if !auto_ast then begin
let make_arg i = var (argValue (first_pos + i)) in
let args = List.init (List.length items) make_arg in
let node = Ocaml.ConstructorPV ([Ident.source "Seq"], [var ctx.input; var input; OcamlG.list args]) in
Some (false, node, false)
end else
match code with
| None ->
Option.map (fun (r, c, b, _) -> (r, c, b)) (default_code_for_seq items)
| Some (range, ".sub", _, _) ->
let get_substring = call_string_sub (var ctx.input) (OcamlG.minus (var input) (var ctx.input)) in
Some (range, get_substring, false)
| Some (range, c, loc, backtraceable) ->
let subst_vars = function
| "_pos_beg" -> ctx.input
| "_pos_end" -> input
| s -> s
in
let tokens = snd (Tokenizer.parse_tokenizer_tokens c) in
(* substitute into _pos_beg and _pos_end variables *)
let tokens = List.map subst_vars tokens in
let code = String.concat "" tokens in
(* substitute into named variables *)
let is_var_used v = List.mem v tokens in
let map = StringMap.filter_keys is_var_used map in
let gen_label (key, (i, b)) =
if b then
failwith "TRX: unsupported type of labels"
else
let argNo = int_of_string i in
Ocaml.pf key, var (argValue argNo)
in
let user_code =
let code = OcamlG.verbatim (pr "( %s )" code) in
trx_annotate_location loc code
in
let user_code =
if StringMap.is_empty map then
user_code
else
let labels = List.map gen_label (StringMap.to_list map) in
Ocaml.Letin (labels, user_code)
in
Some (range, user_code, backtraceable)
in
let rec aux input argNo = function
| [] ->
let err = call_emptyError (var input) in
let res_code, backtraceable =
match code_transform input with
| None -> OcamlG.unit, false
| Some (range, base, backtraceable) ->
let c =
if range then
OcamlG.tuple [OcamlG.tuple [var ctx.input; var input]; base]
else
base
in
c, backtraceable
in
let ge = ctx.gen_err in
if backtraceable then
OcamlG.make_match res_code [
OcamlG.pat_none,
None,
fail ge err (var input)
;
OcamlG.pat_some (pvar "res"),
None,
ok ge (var input) (var "res") err (var input)
]
else
let res = ok ge (var input) res_code err (var input) in
call_gatherErrors ge res
| i::is ->
let argResult = argValue argNo in
let inputVar = newInputVar () in
let res = aux inputVar (argNo+1) is in
let res =
if ctx.gen_err then
call_addErrorInfo (var "err") res
else
res
in
let genArg_res =
match ctx.gen_res with
| false -> false
| true ->
if !auto_ast then
true
else if !opt_gen_res then
code_uses_arg argNo
else
true
in
let ge = ctx.gen_err in
let item_ctx = { ctx with input=input; gen_res=genArg_res } in
OcamlG.make_match (generate_item item_ctx i) [
pat_fail ge (pvar "err") (pvar "lai"),
None,
call_gatherErrors ge (fail ge (var "err") (var "lai"))
;
pat_ok ge (pvar inputVar) (pvar argResult) (pvar "err") (pvar "lai"),
None,
map_lai_value_to_max ge res (var "lai")
]
in
aux ctx.input 1 items
(* ############################## Item ############################## *)
and generate_item ctx (prefix, primary, suffix) =
let predicate_res pred =
let res =
if !auto_ast then
Ocaml.ConstructorPV ([Ident.source pred], [])
else
OcamlG.unit
in
res
in
let ge = ctx.gen_err in
match prefix with
| `NORMAL -> generate_suffix ctx (primary, suffix)
| `AND ->
let r = generate_suffix ctx (primary, suffix) in
OcamlG.make_match r [
pat_fail ge (pvar "err") (pvar "lai"),
None,
fail ge (var "err") (var "lai")
;
pat_ok ge pany pany (pvar "err") (pvar "lai"),
None,
ok ge (var ctx.input) (predicate_res "And") (var "err") (var "lai")
]
| `NOT ->
let r = generate_suffix ctx (primary, suffix) in
OcamlG.make_match r [
pat_fail ge (pvar "err") (pvar "lai"),
None,
ok ge (var ctx.input) (predicate_res "Not") (var "err") (var "lai")
;
pat_ok ge pany pany (pvar "err") (pvar "lai"),
None,
fail ge (var "err") (var "lai")
]
(* ############################## Suffix ############################## *)
and generate_suffix ctx (primary, suffix) =
match suffix with
| `NORMAL ->
generate_primary ctx primary
| `QUESTION ->
let r = generate_primary ctx primary in
let inputVar = newInputVar () in
let return v =
let ret =
if !auto_ast then
Ocaml.ConstructorPV ([Ident.source "Option"], [v])
else
v
in
ret
in
let ge = ctx.gen_err in
OcamlG.make_match r [
pat_fail ge (pvar "err") (pvar "lai"),
None,
ok ge (var ctx.input) (return OcamlG.none) (var "err") (var "lai")
;
pat_ok ge (pvar inputVar) (pvar "r") (pvar "err") (pvar "lai"),
None,
ok ge (var inputVar) (return (OcamlG.some (var "r"))) (var "err") (var "lai")
]
| `STAR | `PLUS ->
let inputVar = newInputVar () in
let f = OcamlG.lambda (Ident.source inputVar) (generate_primary { ctx with input = inputVar } primary) in
let res = call_while_primary ctx (OcamlG.bool (suffix = `PLUS)) f (var ctx.input) in
if !auto_ast then
let return v =
let cons =
match suffix with
| `STAR -> "Star"
| `PLUS -> "Plus"
| _ -> assert false
in
Ocaml.ConstructorPV ([Ident.source cons], [var v])
in
map_return_value ctx.gen_err res return
else
res
(* ############################## Primary ############################## *)
and generate_primary ctx = function
| P.Ident id ->
let pf = parsing_function ctx.gen_err id in
G.add_edge dep_g !generated_prod pf;
(*jlog ~color:`green ~level:3 (pr "Function dependency: %s --> %s" !generated_prod pf);*)
call_fun (List.map var (pf :: grammar_extras ctx.peg @ [main_param_filename; main_param_text; ctx.input]))
| P.Paren exp ->
generate_exp ctx exp
| P.Class cl ->
if !opt_inline_ranges then
compile_class ctx cl
else
recognize_class ctx cl
| P.Literal (literal, case) ->
let res =