-
Notifications
You must be signed in to change notification settings - Fork 125
/
parser_private.opa
257 lines (234 loc) · 8.37 KB
/
parser_private.opa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
Copyright © 2011 MLstate
This file is part of OPA.
OPA is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License, version 3, as published by
the Free Software Foundation.
OPA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
more details.
You should have received a copy of the GNU Affero General Public License
along with OPA. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @author Corentin Gallet, October 2009 (now using text iterators)
* @author Adam Koprowski, February 2010 (clean-up, extensions, documentation, ...)
* @category parsing
* @destination private
*/
/**
* {1 About this module}
*
* This module contains some low-level functions for parsing, as used by TRX.
* We probably do not want to expose this interface to end-users.
*
* {1 Where should I start?}
*
* {1 What if I need more?}
*/
/**
* {1 Types defined in this module}
*/
/**
* [Parser.general_parser('a)] is a type of a parser that returns a value of type ['a].
* Such a parser needs to be given a boolean (indicating whether we want a partial
* parsing, where partial parsing means that there may be some input remaining after
* parsing, whereas with non-partial parsing all input must be consumed) and a text
* iterator and it returns either [none] if parsing failed or a pair [some((i, v))]
* where [i] is a new text iterator (at the new position in the input) and [v] is
* the value associated with the parsed construct.
*/
type Parser.private_general_parser('a) = bool, itextrator -> option((itextrator, 'a))
@opacapi
type Parser.general_parser('a) = Parser.private_general_parser('a)
/**
* [Parser_private.range] represents a range of characters.
*/
type Parser_private.range = { one : Unicode.character }
/** a single character [one] */
/ { from : Unicode.character ; to : Unicode.character }
/** a range of characters between [from] and [to] */
/ { any }
/** an arbitrary character */
/**
* {1 Interface}
*/
Parser_private =
{{
check_partial(partial, result) =
if partial then
result
else
match result : option with
| {none} -> none
| {some=(it, _res)} ->
match Itextrator.next(it) with
| {none} -> result
/* parsing is non-partial and there is some input left, so parsing fails */
| {some=_} -> none
/**
* [char_equal(case_sensitive, c1, c2)] is true iff characters [c1] and [c2] are
* equal. If [case_sensitive=true] then the comparison should be case-sensitive.
*/
char_equal(case_sensitive, c1, c2) =
c1 == c2 ||
(not(case_sensitive) &&
Text.lowercase(Text.from_character(c1)) ==
Text.lowercase(Text.from_character(c2)))
/**
* [match_char(c, r)] returns true iff character [c] matches
* any of the ranges in [r].
*/
match_char(c, l)=
rec aux(range) = match range : list(Parser_private.range) with
| [] -> false
| [{any} | _] -> true
| [{~one} | tl] -> one == c || aux(tl)
| [{~from ~to} | tl] -> (c >= from && c <= to) || aux(tl)
aux(l)
/**
* [primary_list] repeatedly applies a given parsing function on the input
* for as long as it succeeds. The result of [primary_list] is a list of
* results produced by the parsing function. If [is_plus] then the function
* must succeed at least once, or else [primary_list] fails.
*/
primary_list(is_plus, f, init_pos) =
rec aux(acc, pos) =
match f(pos) : option with
| {some = (newpos, res)} ->
aux(res +> acc, newpos)
| _ ->
(pos, List.rev(acc))
res = aux([], init_pos);
if is_plus && List.is_empty(res.f2) then
none
else
some(res)
/**
* As [primary_list] but we do not care about the result.
*/
primary_list_no_res(is_plus, f, init_pos) =
rec aux(pos, input_needed) =
match f(pos) : option with
| {some = (newpos, _)} ->
aux(newpos, false)
| _ ->
if input_needed then
none
else
some((pos, void))
aux(init_pos, is_plus)
/*
* [parse_literal(it, literal)] checks whether the text contained by
* the iterator [it] corresponds to a string [literal]. If this is the
* case then the function returns [some(it', literal)], where [it'] is [it]
* after consuming its [literal] prefix; otherwise the function returns [none].
*/
parse_literal(it, literal) =
itpos = Itextrator.pos(it)
ittxt = Itextrator.txt(it)
if itpos + String.length(literal) > String.length(ittxt) then
none
else
if String.check_substring(ittxt, itpos, literal) then
new_it = Itextrator.forward(it, String.length(literal))
some((new_it, literal))
else
none
/**
* As [parse_literal_case_sensitive] but the comparison of characters is
* *not* case-sensitive.
*/
parse_literal_case_insensitive(it, literal) =
rec aux(input : itextrator, literal : itextrator) =
match Itextrator.next(literal) with
| {none} -> some(input)
| {some = (new_literal, c1)} ->
match Itextrator.next(input) with
| {none} -> none
| {some = (new_input, c2)} ->
if char_equal(false, c1, c2) then
aux(new_input, new_literal)
else
none
literal_it = Itextrator.make(literal)
match aux(it, literal_it) with
| {none} -> none
| {some = new_it} -> some((new_it, literal))
/**
* [parse_range(it, l)] takes a text iterator [it] and a list of ranges [l]
* and returns [none] if none of the elements represented in [l] conforms
* to the first character in [it], and otherwise it returns [some(it',c)]
* where [it'] is [it.next()] and [c] is the first character of [it].
*/
parse_range(it : itextrator, l) =
res = Itextrator.next(it) : option
match res with
| {none} ->
none
| {some = (_, c)} ->
if match_char(c, l) then
res
else
none
/**
* Generic function for testing that parsing and pretty-printing functions
* are inverses of each other. We cannot simply check for:
*
* [to_string(of_string(s)) == s]
*
* as the input string will typically have some stuff that will be thrown
* away in its data representation types (think: white space). But instead
* we perform this test with its normalized version i.e. with:
*
* [s_normalized = to_string(of_string(s))]
*
* we check that:
*
* [to_string(of_string(s_normalized)) == s_normalized]
*
* So in a sense we check for the fixed point of the initial condition.
*
* @param to_string a function converting from this data type to string
* @param of_string a function converting from string to some data type
* @param s a string on which to test whether parsing and pretty printing
* are inverses in the above sense.
* @return a boolean value indicating whether the test was successful
*/
test_parsing(to_string, of_string, s) =
process = compose(to_string,of_string)
s_normalized = process(s)
process(s_normalized) == s_normalized
/**
* As {!test_parsing} only now the [of_string] function gives an
* option type (i.e. parsing may fail). Though, the test succeeds
* only if it does not (on given inputs).
* @param correct indicates whether the given input string is correct,
* if not then the test succeeds if the parsing fails.
*/
test_try_parsing(to_string, of_string, s, correct) =
process(s) = Option.map(to_string, of_string(s))
if correct then
match process(s) with
| {none} -> false
| {some = v1} ->
match process(v1) with
| {none} -> false
| {some = v2} -> v1 == v2
else
process(s) == none
/**
* As {!test_try_parsing} but we start from a data element and not from
* a string and check that the value after printing and parsing is the same
* one that we started with.
*/
test_try_parsing_with_data(to_string, of_string, data, correct) =
dp = of_string(to_string(data))
if correct then
match dp with
| {none} -> false
| {some = new_data} -> data == new_data
else
dp == none
}}