Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 345 lines (309 sloc) 16.981 kb
f5e912f Jakub Higersberger Plurarize combinators in system/package name
authored
1 (in-package :parser-combinators)
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
2
3 ;;; greedy version of repetition combinators
4
a1bddb9 Jakub Higersberger Change uses of MDO to NAMED-SEQ?/* as appropriate.
authored
5 (defun seq-list* (&rest parsers)
6 "Non-backtracking parser: Return a list of result of PARSERS."
7 (assert parsers)
8 (let ((parsers (map 'vector #'ensure-parser parsers)))
9 (define-oneshot-result inp is-unread
10 (iter (for parser in-vector parsers)
11 (for inp-prime initially inp then (suffix-of result))
12 (for result = (funcall (funcall parser inp-prime)))
13 (while result)
14 (collect result into results)
15 (finally (return
16 (when result
17 (make-instance 'parser-possibility
18 :tree (mapcar #'tree-of results)
19 :suffix (suffix-of result)))))))))
20
21 (defmacro named-seq* (&rest parser-descriptions)
22 "Non-backtracking parser: This is similar to MDO, except that constructed parsers cannot depend on
23 the results of previous ones and the final form is not used as a parser, but is automatically used
24 to construct the result. All names bound using the (<- name parser) construct are only available in
25 that final form.
26
27 This parser generator is useful when full generality of MDO is not necessary, as it is implemented
28 non-recursively and has better memory performance."
29 `(%named-seq? seq-list* ,@parser-descriptions))
30
a91adb7 Kosyrev Serge Add MDO*: like NAMED-SEQ*, but with MDO syntax: the last element must be...
deepfire authored
31 (defmacro mdo* (&body spec)
32 "Like NAMED-SEQ*, but with MDO syntax: the last element must be a parser."
33 (with-gensyms (ret)
34 `(named-seq*
35 ,@(butlast spec)
36 (<- ,ret ,(lastcar spec))
37 ,ret)))
38
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
39 (defun between* (parser min max &optional (result-type 'list))
40 "Non-backtracking parser: find the first, longest chain of expression accepted by parser of length between min and max"
41 (assert (or (null min)
42 (null max)
43 (>= max min)))
44 ;; min=zero or nil means accept zero width results
45 (assert (or (null min)
46 (zerop min)
47 (plusp min)))
48 ;; can't have 0-0 parser
49 (assert (or (null max)
50 (plusp max)))
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
51 (with-parsers (parser)
52 (define-oneshot-result inp is-unread
53 (iter (for count from 0)
54 (for result next (funcall (funcall parser inp-prime)))
55 (while (and result
56 (or (null max)
57 (< count max))))
174c0cb Jakub Higersberger Update package, use make-context-at-position, add between* infinite loop...
authored
58 (for inp-prime initially inp then
59 (if (eql inp-prime (suffix-of result))
60 (error "Subparser in repetition parser didn't advance the input.")
61 (suffix-of result)))
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
62 (collect result into results)
63 (finally (return
64 (when (or (null min)
65 (>= count min))
66 (make-instance 'parser-possibility
67 :tree (map result-type #'tree-of results)
68 :suffix inp-prime))))))))
2769794 Jakub Higersberger Define iterative version of many*
authored
69
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
70 (defun many* (parser)
71 "Non-backtracking parser: collect as many of first result of parser as possible"
72 (between* parser nil nil))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
73
74 (defun many1* (parser)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
75 "Non-backtracking parser: accept as many as possible, and at least one, of parser"
76 (between* parser 1 nil))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
77
28e2775 Jakub Higersberger Add some more repetition parsers.
authored
78 (defun atleast* (parser count)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
79 "Non-backtracking parser: accept as many as possible and at least count of parser"
80 (between* parser count nil))
28e2775 Jakub Higersberger Add some more repetition parsers.
authored
81
82 (defun atmost* (parser count)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
83 "Non-backtracking parser: accept as many as possible but at most count of parser"
84 (between* parser nil count))
28e2775 Jakub Higersberger Add some more repetition parsers.
authored
85
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
86 (defun sepby1* (parser-item parser-separator)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
87 "Non-backtracking parser: accept as many as possible of parser-item separated by parser-separator, but at least one."
d1852c2 Jakub Higersberger Remove obviously redundant uses of WITH-PARSERS
authored
88 (named-seq* (<- x parser-item)
89 (<- xs (many* (named-seq* parser-separator
90 (<- y parser-item)
91 y)))
92 (cons x xs)))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
93
94 (defun sepby* (parser-item parser-separator)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
95 "Non-backtracking parser: accept as many as possible of parser-item separated by parser-separator."
d1852c2 Jakub Higersberger Remove obviously redundant uses of WITH-PARSERS
authored
96 (choice1 (sepby1* parser-item parser-separator)
97 (result nil)))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
98
99 (defun chainl1* (p op)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
100 "Non-backtracking parser: accept as many as possible, but at least one of p, reduced by result of op with left associativity"
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
101 (with-parsers (p op)
102 (labels ((rest-chain (init-x)
103 (define-oneshot-result inp is-unread
104 (let ((final-result (iter (for f-result next (funcall (funcall op p-inp)))
105 (while f-result)
106 (for f-inp next (suffix-of f-result))
107 (for p-result next (funcall (funcall p f-inp)))
108 (while p-result)
109 (for p-inp initially inp then (suffix-of p-result))
110 (for f = (tree-of f-result))
111 (for x initially init-x then tree)
112 (for y = (tree-of p-result))
113 (for tree next (funcall f x y))
114 (finally (return (list tree p-inp))))))
115 (if (car final-result)
116 (make-instance 'parser-possibility
117 :tree (car final-result)
118 :suffix (cadr final-result))
119 (make-instance 'parser-possibility
120 :tree init-x :suffix inp))))))
121 (bind p #'rest-chain))))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
122
bdf0843 Jakub Higersberger Make whitespace? and word? backtracking, export new symbols.
authored
123 (def-cached-arg-parser whitespace* (&key (result-type nil) (accept-empty nil))
124 "Non-backtracking parser: accept a sequence of whitespace characters."
125 (gather-if* (rcurry #'member '(#\Space #\Newline #\ ))
126 :result-type result-type
127 :accept-empty accept-empty))
128
129 (def-cached-parser word*
eaafdd4 Jakub Higersberger Close #3. Change word? and word* to accept alphanumeric characters. Chan...
authored
130 "Parser: accept a string of alphanumeric characters"
131 (gather-if* #'alphanumericp :result-type 'string))
bdf0843 Jakub Higersberger Make whitespace? and word? backtracking, export new symbols.
authored
132
3f5e33b Jakub Higersberger Add pure-word?/* which accepts only alphabetic characters.
authored
133 (def-cached-parser pure-word*
134 "Parser: accept a string of alphabetic characters"
135 (gather-if* #'alpha-char-p :result-type 'string))
136
90ae404 Jakub Higersberger Add radix not natural number parser and make backtracking one actually b...
authored
137 (defun nat* (&optional (radix 10))
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
138 "Non-backtracking parser: accept natural number, consuming as many digits as possible"
90ae404 Jakub Higersberger Add radix not natural number parser and make backtracking one actually b...
authored
139 (named-seq* (<- number (gather-if* (rcurry #'digit-char-p radix) :result-type 'string))
140 (parse-integer number :radix radix)))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
141
8d09e99 Jakub Higersberger Fix integer parsers.
authored
142 (defun int* (&optional (radix 10))
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
143 "Non-backtracking parser: accept integer, consuming as many digits as possible"
8d09e99 Jakub Higersberger Fix integer parsers.
authored
144 (named-seq* (<- sign (choices1 #\+ #\- (result #\+)))
145 (<- n (nat* radix))
146 (* (if (eql sign #\+) 1 -1) n)))
b5831ea Jakub Higersberger Add greedy integers
authored
147
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
148 (defun chainr1* (p op)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
149 "Non-backtracking parser: accept as many as possible, but at least one of p, reduced by result of op with right associativity"
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
150 (with-parsers (p op)
151 (bind p
152 #'(lambda (init-x)
153 (define-oneshot-result inp is-unread
154 (let ((final-result
155 (iter (for f-result next (funcall (funcall op p-inp)))
156 (while f-result)
157 (for f-inp next (suffix-of f-result))
158 (for p-result next (funcall (funcall p f-inp)))
159 (while p-result)
160 (for p-inp initially inp then (suffix-of p-result))
161 (for f = (tree-of f-result))
162 (for y = (tree-of p-result))
163 (collect f into function-list)
164 (collect y into y-list)
165 (finally (let ((rev-y-list (nreverse (cons init-x y-list))))
166 (return (list (iter (for x in (cdr rev-y-list))
167 (for f in function-list)
168 (for tree next (if (first-iteration-p)
169 (funcall f x (car rev-y-list))
170 (funcall f x tree)))
171 (finally (return tree)))
172 p-inp)))))))
173 (if (car final-result)
174 (make-instance 'parser-possibility
175 :tree (car final-result)
176 :suffix (cadr final-result))
177 (make-instance 'parser-possibility
178 :tree init-x :suffix inp))))))))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
179
180 (defun chainl* (p op v)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
181 "Non-backtracking parser: like chainl1*, but will return v if no p can be parsed"
d1852c2 Jakub Higersberger Remove obviously redundant uses of WITH-PARSERS
authored
182 (choice1
183 (chainl1* p op)
184 (result v)))
0ead384 Jakub Higersberger Add greedy version of basic combinators.
authored
185
186 (defun chainr* (p op v)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
187 "Non-backtracking parser: like chainr1*, but will return v if no p can be parsed"
d1852c2 Jakub Higersberger Remove obviously redundant uses of WITH-PARSERS
authored
188 (choice1
189 (chainr1* p op)
190 (result v)))
4a18050 Jakub Higersberger Change string? to non-recursive, and add times*.
authored
191
192 (def-cached-arg-parser times* (parser count)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
193 "Non-backtracking parser: accept exactly count expressions accepted by parser, without backtracking."
194 (between* parser count count))
4a18050 Jakub Higersberger Change string? to non-recursive, and add times*.
authored
195
72f1b52 Jakub Higersberger Fix find-after?/*
authored
196 (defun find-after* (p q)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
197 "Non-backtracking parser: Find first q after some sequence of p."
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
198 (with-parsers (p q)
199 (define-oneshot-result inp is-unread
200 (iter (for p-result next (funcall (funcall p inp-prime)))
201 (for q-result next (funcall (funcall q inp-prime)))
202 (while (and p-result (null q-result)))
203 (for inp-prime initially inp then (suffix-of p-result))
204 (finally (return
205 (when q-result
206 (make-instance 'parser-possibility
207 :tree (tree-of q-result)
208 :suffix (suffix-of q-result)))))))))
025c025 Jakub Higersberger Add first-after-collect?/*, make non-backtracking version find q as soon...
authored
209
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
210 (defgeneric gather-if-not*-using-context (input predicate accept-end accept-empty)
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
211 (:documentation "Parser gather-if-not* specialized on context type")
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
212 (:method ((input end-context) predicate accept-end accept-empty)
213 (if (and accept-end accept-empty)
bd039c5 Jakub Higersberger Add end-context version for gather-if-not*-using-context
authored
214 (values nil input)
215 (values nil nil)))
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
216 (:method ((input context) predicate accept-end accept-empty)
f84cb6a Jakub Higersberger Add more parsers, trying to optimize certain common cases. Also add sequ...
authored
217 (iter (until (or (end-context-p inp-prime)
cb103f4 Jakub Higersberger Generalize gather-before-token* parser.
authored
218 (funcall predicate (context-peek inp-prime))))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
219 (for inp-prime initially input then (context-next inp-prime))
f84cb6a Jakub Higersberger Add more parsers, trying to optimize certain common cases. Also add sequ...
authored
220 (collect (context-peek inp-prime) into results)
221 (finally (return
222 (when (and results
223 (or (and accept-end (end-context-p inp-prime))
cb103f4 Jakub Higersberger Generalize gather-before-token* parser.
authored
224 (funcall predicate (context-peek inp-prime))))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
225 (values results inp-prime))))))
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
226 (:method ((input vector-context) predicate accept-end accept-empty)
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
227 (let ((input-vector (storage-of input)))
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
228 (check-type input-vector vector)
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
229 (let ((end-position (position-if predicate input-vector :start (position-of input))))
230 (cond ((and accept-end (null end-position))
231 (values (subseq input-vector (position-of input))
174c0cb Jakub Higersberger Update package, use make-context-at-position, add between* infinite loop...
authored
232 (make-context-at-position input (length input-vector))))
233 ((and end-position (or accept-empty (> end-position (position-of input))))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
234 (values (subseq input-vector (position-of input) end-position)
174c0cb Jakub Higersberger Update package, use make-context-at-position, add between* infinite loop...
authored
235 (make-context-at-position input end-position)))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
236 (t (values nil nil)))))))
237
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
238 (defun gather-if-not* (predicate &key (result-type 'list) (accept-end nil) (accept-empty nil))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
239 "Non-backtracking parser: Find a sequence of tokens terminated by one for which predicate returns true, which is not consumed."
240 (define-oneshot-result inp is-unread
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
241 (multiple-value-bind (result new-input) (gather-if-not*-using-context inp predicate accept-end accept-empty)
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
242 (when new-input
243 (make-instance 'parser-possibility
e5375cf Jakub Higersberger Allow discarding results in gather-if-not*.
authored
244 :tree (when result-type (coerce result result-type))
8fc91f5 Jakub Higersberger Add gather-if-not*-using-context
authored
245 :suffix new-input)))))
f84cb6a Jakub Higersberger Add more parsers, trying to optimize certain common cases. Also add sequ...
authored
246
d01cbee Jakub Higersberger Add gather-if*.
authored
247 (defun gather-if* (predicate &key (result-type 'list) (accept-end t) (accept-empty nil))
248 "Non-backtracking parser: Find a sequence of tokens for which predicate returns true."
249 (gather-if-not* (complement predicate)
250 :result-type result-type
251 :accept-end accept-end
252 :accept-empty accept-empty))
253
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
254 (defun gather-before-token* (token &key (result-type 'list) (test #'eql) (accept-end nil) (accept-empty nil))
cb103f4 Jakub Higersberger Generalize gather-before-token* parser.
authored
255 "Non-backtracking parser: Find a sequence of tokens terminated by single token, which is not consumed."
256 (gather-if-not* #'(lambda (input-token)
257 (funcall test input-token token))
258 :result-type result-type
31d36fa Jakub Higersberger Add parameter to gathering functions to allow zero width matches, also a...
authored
259 :accept-end accept-end
260 :accept-empty accept-empty))
cb103f4 Jakub Higersberger Generalize gather-before-token* parser.
authored
261
f84cb6a Jakub Higersberger Add more parsers, trying to optimize certain common cases. Also add sequ...
authored
262 (defun find-before-token* (p token &key (result-type 'list) (test #'eql))
263 "Non-backtracking parser: Find a sequence of p terminated by single token q, which is not consumed."
264 (with-parsers (p)
265 (define-oneshot-result inp is-unread
266 (iter (for p-result next (funcall (funcall p inp-prime)))
267 (while (and p-result (not (funcall test (context-peek inp-prime) token))))
268 (for inp-prime initially inp then (suffix-of p-result))
269 (collect (tree-of p-result) into p-results)
270 (finally (return
271 (when (funcall test (context-peek inp-prime) token)
272 (make-instance 'parser-possibility
273 :tree (coerce p-results result-type)
274 :suffix inp-prime))))))))
275
276
a7956d4 Jakub Higersberger Add (find-before* ...) and test.
authored
277 (defun find-before* (p q &optional (result-type 'list))
278 "Non-backtracking parser: Find a sequence of p terminated by q, doesn't consume q."
279 (with-parsers (p q)
280 (define-oneshot-result inp is-unread
281 (iter (for p-result next (funcall (funcall p inp-prime)))
282 (for q-result next (funcall (funcall q inp-prime)))
283 (while (and p-result (null q-result)))
284 (for inp-prime initially inp then (suffix-of p-result))
285 (collect (tree-of p-result) into p-results)
286 (finally (return
287 (when q-result
288 (make-instance 'parser-possibility
289 :tree (coerce p-results result-type)
290 :suffix inp-prime))))))))
291
025c025 Jakub Higersberger Add first-after-collect?/*, make non-backtracking version find q as soon...
authored
292 (defun find-after-collect* (p q &optional (result-type 'list))
293 "Non-backtracking parser: Find first q after some sequence of p. Return cons of list of p-results and q"
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
294 (with-parsers (p q)
295 (define-oneshot-result inp is-unread
296 (iter (for p-result next (funcall (funcall p inp-prime)))
297 (for q-result next (funcall (funcall q inp-prime)))
298 (while (and p-result (null q-result)))
299 (collect p-result into p-results)
300 (for inp-prime initially inp then (suffix-of p-result))
301 (finally (return
302 (when q-result
303 (make-instance 'parser-possibility
304 :tree (cons (map result-type #'tree-of p-results)
305 (tree-of q-result))
306 :suffix (suffix-of q-result)))))))))
025c025 Jakub Higersberger Add first-after-collect?/*, make non-backtracking version find q as soon...
authored
307
c1d16ab Kosyrev Serge Add BEFORE*: find a p before q, doesn't consume q.
deepfire authored
308 (defun before* (p q)
309 "Non-backtracking parser: Find a p before q, doesn't consume q."
310 (with-parsers (p q)
311 (define-oneshot-result inp is-unread
312 (let ((p-result (funcall (funcall p inp))))
313 (when p-result
314 (let* ((p-suffix (suffix-of p-result))
315 (q-result (funcall (funcall q p-suffix))))
316 (when (and p-result q-result)
317 (make-instance 'parser-possibility :tree (tree-of p-result) :suffix p-suffix))))))))
318
72f1b52 Jakub Higersberger Fix find-after?/*
authored
319 (defun find* (q)
3a66e1e Jakub Higersberger Reimplement non-backtracking repetition parsers in terms of between*
authored
320 "Non-backtracking parser: Find first q"
72f1b52 Jakub Higersberger Fix find-after?/*
authored
321 (find-after* (item) q))
3bfbf50 Jakub Higersberger Add non-backtracking version of expression parser.
authored
322
ab99892 Jakub Higersberger Add OPT?/*.
authored
323 (defun opt* (p)
324 "Non-backtracking parser: result of p or nil"
325 (choice1 p (result nil)))
326
3bfbf50 Jakub Higersberger Add non-backtracking version of expression parser.
authored
327 (defun expression* (term operators &optional (bracket-left nil) (bracket-right nil))
328 "Non-backtracking parser: Reduce a sequence of terms with unary/binary operators with precedence.
329 OPERATORS is a list of (op-parser :left/:right/:unary), where OP-PARSER is a parser consuming
330 an operator and returning a reduction function. Highest precedence first."
a1c55d9 Jakub Higersberger Make vectors and characters accepted as parsers.
authored
331 (with-parsers (term bracket-left bracket-right)
8dfc8fd Jakub Higersberger Use named? in expression?/*
authored
332 (named? expr-parser
333 (iter (for (op assoc) in operators)
334 (for base initially (choice1 (bracket? bracket-left expr-parser bracket-right)
335 term)
336 then (ecase assoc
337 (:left (chainl1* base op))
338 (:right (chainr1* base op))
339 (:unary (choice1
340 (named-seq* (<- op-fun op)
341 (<- subexpr base)
342 (funcall op-fun subexpr))
343 base))))
344 (finally (return base))))))
Something went wrong with that request. Please try again.