Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 314 lines (271 sloc) 10.467 kB
3e9b98a @mfp Added copyright notices and LICENSE.
authored
1 (* Copyright (C) 2009 Mauricio Fernandez <mfp@acm.org> *)
fed8a48 @mfp Initial commit.
authored
2 open Printf
3 open ExtString
4 open ExtList
5
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
6 TYPE_CONV_PATH "Simple_markup"
7
fed8a48 @mfp Initial commit.
authored
8 type ref = { src : string; desc : string }
9
10 type paragraph =
11 Normal of par_text
12 | Pre of string * string option
13 | Heading of int * par_text
14 | Quote of paragraph list
15 | Ulist of paragraph list * paragraph list list
16 | Olist of paragraph list * paragraph list list
17
18 and par_text = text list
19
20 and text =
21 Text of string
22 | Emph of string
23 | Bold of string
24 | Struck of par_text
25 | Code of string
26 | Link of href
2d2aa60 @mfp Simple_markup, Simple_markup__html: support internal links.
authored
27 | Anchor of string
fed8a48 @mfp Initial commit.
authored
28 | Image of img_ref
29
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
30 and href = { href_target : string; href_desc : string; }
fed8a48 @mfp Initial commit.
authored
31
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
32 and img_ref = { img_src : string; img_alt : string; }
fed8a48 @mfp Initial commit.
authored
33
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
34 and par_list = paragraph list with sexp
fed8a48 @mfp Initial commit.
authored
35
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
36 class fold = Camlp4Filters.GenerateFold.generated
fed8a48 @mfp Initial commit.
authored
37
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
38 type parse_state = { max : int; current : Buffer.t; fragments : text list; }
fed8a48 @mfp Initial commit.
authored
39
298c05b @mfp Use Sexplib for Simple_markup pretty-printing.
authored
40 let string_of_paragraph p = Sexplib.Sexp.to_string_hum (sexp_of_paragraph p)
41 let string_of_paragraphs ps = Sexplib.Sexp.to_string_hum (sexp_of_par_list ps)
fed8a48 @mfp Initial commit.
authored
42
80923b4 @mfp Simple_markup: \t equivalent to 8 spaces as far as indentation is con…
authored
43 let indentation ?(ts=8) s =
44 let rec loop n indent max =
45 if n >= max then indent
fed8a48 @mfp Initial commit.
authored
46 else match s.[n] with
80923b4 @mfp Simple_markup: \t equivalent to 8 spaces as far as indentation is con…
authored
47 ' ' -> loop (n + 1) (indent + 1) max
48 | '\t' -> loop (n + 1) (indent + 8) max
49 | _ -> indent
50 in loop 0 0 (String.length s)
fed8a48 @mfp Initial commit.
authored
51
52 let unescape s =
53 let b = Buffer.create (String.length s) in
54 let len = String.length s in
55 let rec loop i =
56 if i >= len then Buffer.contents b
57 else match s.[i] with
58 '\\' when i < len - 1 -> Buffer.add_char b s.[i+1]; loop (i + 2)
59 | c -> Buffer.add_char b c; loop (i + 1)
60 in loop 0
61
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
62 let unescape_slice s ~first ~last =
63 unescape (String.strip (String.slice ~first ~last s))
64
fed8a48 @mfp Initial commit.
authored
65 let snd_is s c = String.length s > 1 && s.[1] = c
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
66 let snd_is_space s = snd_is s ' ' || snd_is s '\t'
fed8a48 @mfp Initial commit.
authored
67
68 let collect f x =
69 let rec loop acc = match f x with
70 None -> List.rev acc
71 | Some y -> loop (y :: acc)
72 in loop []
73
74 let push_remainder ?(first=2) indent s e =
75 let s = String.slice ~first s in
76 let s' = String.strip s in
be6f921 @mfp Simple_markup: fix list parsing ("inner indentation" after '*' or '#').
authored
77 Enum.push e (indent + first + indentation s, s', s' = "")
fed8a48 @mfp Initial commit.
authored
78
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
79 let adds = Buffer.add_string
80
81 let addc = Buffer.add_char
82
83 let new_fragment () = Buffer.create 8
84
e4eedb7 @mfp Simple_markup: minor refactoring.
authored
85 let push_current st =
86 if Buffer.length st.current > 0 then
87 Text (Buffer.contents st.current) :: st.fragments
88 else st.fragments
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
89
fed8a48 @mfp Initial commit.
authored
90 let rec read_paragraph ?(skip_blank=true) indent e = match Enum.peek e with
91 None -> None
92 | Some (indentation, line, isblank) -> match isblank with
93 true ->
94 Enum.junk e;
95 if skip_blank then read_paragraph indent e else None
96 | false ->
97 if indentation < indent then
98 None
99 else begin
100 Enum.junk e;
101 read_nonempty indentation e line
102 end
103
104 and skip_blank_line e = match Enum.peek e with
105 None | Some (_, _, false) -> ()
106 | Some (_, _, true) -> Enum.junk e; skip_blank_line e
107
108 and read_nonempty indent e s = match s.[0] with
109 '!' -> read_heading s
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
110 | '*' when snd_is_space s -> push_remainder indent s e; read_ul indent e
111 | '#' when snd_is_space s -> push_remainder indent s e; read_ol indent e
fed8a48 @mfp Initial commit.
authored
112 | '{' when snd_is s '{' -> read_pre (String.slice s ~first:2) e
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
113 | '>' when snd_is_space s || s = ">" ->
fed8a48 @mfp Initial commit.
authored
114 (* last check needed because "> " becomes ">" *)
115 Enum.push e (indent, s, false); read_quote indent e
116 | _ -> Enum.push e (indent, s, false); read_normal e
117
118 and read_heading s =
119 let s' = String.strip ~chars:"!" s in
120 let level = String.length s - String.length s' in
121 Some (Heading (level, parse_text s'))
122
123 and read_ul indent e =
124 read_list
125 (fun fst others -> Ulist (fst, others))
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
126 (fun s -> snd_is_space s && s.[0] = '*')
fed8a48 @mfp Initial commit.
authored
127 indent e
128
129 and read_ol indent e =
130 read_list
131 (fun fst others -> Olist (fst, others))
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
132 (fun s -> snd_is_space s && s.[0] = '#')
fed8a48 @mfp Initial commit.
authored
133 indent e
134
135 and read_list f is_item indent e =
136 let read_item indent ps = collect (read_paragraph (indent + 1)) e in
137 let rec read_all fst others =
138 skip_blank_line e;
139 match Enum.peek e with
140 | Some (indentation, s, _) when indentation >= indent && is_item s ->
141 Enum.junk e;
142 push_remainder indentation s e;
143 read_all fst (read_item indentation [] :: others)
144 | None | Some _ -> f fst (List.rev others)
145 in Some (read_all (read_item indent []) [])
146
147 and read_pre kind e =
148 let kind = match kind with "" -> None | s -> Some s in
c85767d @mfp Simple_markup: unescape \}} lines (in general, \\+}}$) in pre.
authored
149 let re = Str.regexp "^\\\\+}}$" in
150 let unescape = function
151 s when Str.string_match re s 0 -> String.slice ~first:1 s
152 | s -> s in
721feae @mfp Simple_markup: refactored leading space removal in preformatted text.
authored
153 (* don't forget the last \n *)
154 let ret ls = Some (Pre (String.concat "\n" (List.rev ("" :: ls)), kind)) in
155 let rec read_until_end fstindent ls = match Enum.get e with
156 None | Some (_, "}}", _) -> ret ls
fed8a48 @mfp Initial commit.
authored
157 | Some (indentation, s, _) ->
721feae @mfp Simple_markup: refactored leading space removal in preformatted text.
authored
158 let spaces = String.make (max 0 (indentation - fstindent)) ' ' in
c85767d @mfp Simple_markup: unescape \}} lines (in general, \\+}}$) in pre.
authored
159 read_until_end fstindent ((spaces ^ unescape s) :: ls)
721feae @mfp Simple_markup: refactored leading space removal in preformatted text.
authored
160 in match Enum.get e with
161 None | Some (_, "}}", _) -> ret []
162 | Some (indentation, s, _) -> read_until_end indentation [s]
87b751c @mfp Simple_markup: remove leading spaces in pre according to fst line ind…
authored
163
fed8a48 @mfp Initial commit.
authored
164 and read_quote indent e =
165 let push_and_finish e elm = Enum.push e elm; raise Enum.No_more_elements in
166 let next_without_lt e = function
167 | (_, _, true) as line -> push_and_finish e line
168 | (n, s, false) as line ->
169 if n < indent || s.[0] <> '>' then
170 push_and_finish e line
171 else
172 let s = String.slice ~first:1 s in
173 let s' = String.strip s in
174 (String.length s - String.length s', s', s' = "")
175
176 in match collect (read_paragraph 0) (Enum.map (next_without_lt e) e) with
177 [] -> None
178 | ps -> Some (Quote ps)
179
180 and read_normal e =
181 let rec gettxt ls =
182 let return () = String.concat " " (List.rev ls) in
183 match Enum.peek e with
184 None | Some (_, _, true) -> return ()
185 | Some (_, l, _) -> match l.[0] with
ae859c0 @mfp Simple_markup: allow use of tabs after *, # and >.
authored
186 '!' | '*' | '#' | '>' when snd_is_space l -> return ()
fed8a48 @mfp Initial commit.
authored
187 | '{' when snd_is l '{' -> return ()
188 | _ -> Enum.junk e; gettxt (l :: ls) in
189 let txt = gettxt [] in
190 Some (Normal (parse_text txt))
191
192 and parse_text s =
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
193 scan
194 s
195 { max = String.length s;
196 fragments = [];
197 current = new_fragment (); }
198 0
fed8a48 @mfp Initial commit.
authored
199
200 (* scan s starting from n, upto max (exclusive) *)
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
201 and scan s st n =
202 let max = st.max in
e4eedb7 @mfp Simple_markup: minor refactoring.
authored
203 if n >= max then List.rev (push_current st)
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
204
205 else match s.[n] with
206 | '`' ->
207 delimited (fun ~first ~last -> Code (unescape_slice s ~first ~last)) "`"
208 s st n
209 | '*' ->
210 delimited (fun ~first ~last -> Bold (unescape_slice s ~first ~last)) "*"
211 s st n
212 | '_' ->
e2a83f9 @mfp Simple_markup: delimit emph with "__", not "_".
authored
213 delimited (fun ~first ~last -> Emph (unescape_slice s ~first ~last)) "__"
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
214 s st n
215 | '=' ->
216 delimited
217 (fun ~first ~last ->
218 Struck (scan s
219 { max = last; fragments = []; current = new_fragment (); }
220 first))
221 "==" s st n
222 | '!' when matches_at s ~max n "![" ->
223 maybe_link
224 "![" (fun ref -> Image { img_src = ref.src; img_alt = ref.desc })
225 s st (n + 2)
226 | '[' ->
227 maybe_link "["
1d473f6 @mfp Simple_markup: allow links with auto target: [http://foo.com]().
authored
228 (fun ref -> match ref.src, ref.desc with
229 "", "" -> Text ""
230 | "", desc -> Link { href_target = desc; href_desc = desc }
2d2aa60 @mfp Simple_markup, Simple_markup__html: support internal links.
authored
231 | src, "" when src.[0] = '#' -> Anchor (String.slice ~first:1 src)
1d473f6 @mfp Simple_markup: allow links with auto target: [http://foo.com]().
authored
232 | src, desc -> Link { href_target = ref.src; href_desc = ref.desc})
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
233 s st (n + 1)
234 | '\\' when (n + 1) < max -> addc st.current s.[n+1]; scan s st (n + 2)
235 | c -> addc st.current c; scan s st (n + 1)
236
237 (* [delimited f delim first] tries to match [delim] starting from [first],
238 * returns Some (offset of char after closing delim) or None *)
239 and delimited f delim s st first =
240 let max = st.max in
241 let delim_len = String.length delim in
242 let scan_from_next_char () =
243 addc st.current s.[first];
244 scan s st (first + 1)
fed8a48 @mfp Initial commit.
authored
245 in
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
246 if not (matches_at s ~max first delim) then scan_from_next_char ()
247 else match scan_past ~delim s ~max (first + String.length delim) with
248 Some n ->
249 let chunk = f ~first:(first + delim_len)
250 ~last:(n - String.length delim)
251 in scan s
e4eedb7 @mfp Simple_markup: minor refactoring.
authored
252 { st with fragments = chunk :: push_current st;
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
253 current = new_fragment () }
254 n
255 | None -> scan_from_next_char ()
256
257 and maybe_link delim f s st n = match scan_link s ~max:st.max n with
258 None -> adds st.current delim; scan s st n
259 | Some (ref, n) ->
260 scan s
e4eedb7 @mfp Simple_markup: minor refactoring.
authored
261 { st with fragments = f ref :: push_current st;
6a84559 @mfp Further refactoring in simple_markup.ml.
authored
262 current = (new_fragment ()) }
263 n
264
265 (* return None if delim not found, else Some (offset of char *after* delim) *)
266 and scan_past ~delim s ~max n =
267 let re = Str.regexp (Str.quote delim) in
268 let rec loop m ~max =
269 if m >= max then None else
270 match (try Some (Str.search_forward re s m) with Not_found -> None) with
271 | Some m when m < max && s.[m-1] <> '\\' -> Some (m + String.length delim)
272 | Some m when m < max -> loop (m + 1) ~max
273 | _ -> None (* no match or >= max *)
274 in loop n ~max
275
276 (* returns None or offset of char after the reference
277 * (i.e. after closing ')'). *)
278 and scan_link s ~max n = match scan_past ~delim:"]" s ~max n with
279 None -> None
280 | Some end_of_desc ->
281 if end_of_desc >= max then None
282 else match s.[end_of_desc] with
283 '(' ->
284 begin match scan_past ~delim:")" s ~max (end_of_desc + 1) with
285 None -> None
286 | Some end_of_uri ->
287 let ref =
288 {
289 desc = unescape_slice s ~first:n ~last:(end_of_desc - 1);
290 src = unescape_slice s
291 ~first:(end_of_desc + 1)
292 ~last:(end_of_uri - 1)
293 }
294 in Some (ref, end_of_uri)
295 end
296 | _ -> None
297
298 and matches_at s ~max n delim =
299 let len = String.length delim in
300 if n + len > max then false
301 else
302 let rec loop n m k =
303 if k = 0 then true
304 else if s.[n] = delim.[m] then loop (n + 1) (m + 1) (k - 1)
305 else false
306 in loop n 0 len
fed8a48 @mfp Initial commit.
authored
307
7c7076d @mfp Added Simple_markup.parse_enum and parse_lines.
authored
308 let parse_enum e =
309 collect (read_paragraph 0)
310 (Enum.map (fun l -> let l' = String.strip l in (indentation l, l', l' = "")) e)
311
312 let parse_lines ls = parse_enum (List.enum ls)
313 let parse_text s = parse_lines ((Str.split (Str.regexp "\n") s))
Something went wrong with that request. Please try again.