Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 69 lines (60 sloc) 3.107 kb
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (* depends *)
19 module String = BaseString
20
21 (* -- *)
22
23 let except_html_char = "\"&<>" (* normal characters that *should* be escaped in html *)
24 let allowed_special_char = "\t\n\r" (* special characters that *should not* be escaped in html *)
25
26 let string_of_int i =
27 let r1 i = Char.chr (((i mod 64) + 128)) in
28 let r2 i = Char.chr (((i / 64) mod 64) + 128) in
29 if i < 128 then String.make 1 (Char.chr i)
30 else if i < 2048 then
31 let t = [| Char.chr ((i / 64) + 192); r1 i |] in
32 String.init 2 (fun n -> t.(n))
33 else if i < 65536 then
34 let t = [| Char.chr (((i / 4096) mod 16) + 224); r2 i; r1 i |] in
35 String.init 3 (fun n -> t.(n))
36 else if i < 2097152 then
37 let t = [| Char.chr (((i / 262144) mod 8) + 240); Char.chr (((i / 4096) mod 64) + 128); r2 i; r1 i |] in
38 String.init 4 (fun n -> t.(n))
39 else assert false
40
41 (* takes any utf8 string and converts it into html entities (escape *all* characters, which is not avised in general; please perform checks before calling it) *)
42 let htmlentities src =
43 let rec pow n p = if p = 0 then 1 else (pow n (p - 1)) * n in
44 let len = String.length src in
45 let soi = Pervasives.string_of_int in
46 let rec aux nbr pos i lst =
47 if i = len then lst
48 else
49 let chr = Char.code src.[i] in
50 if pos = (-1) then
51 if chr >= 240 then aux ((chr mod 16) * 262144) 2 (i + 1) lst
52 else if chr >= 224 then aux ((chr mod 32) * 4096) 1 (i + 1) lst
53 else if chr >= 192 then aux ((chr mod 64) * 64) 0 (i + 1) lst
54 else if (chr < 128 && (chr >= 32 || String.contains allowed_special_char src.[i])) || String.contains except_html_char src.[i]
55 then aux 0 (-1) (i + 1) (chr::lst)
56 else
57 (* between 128 and 192: malformed UTF-8; we should absolutely not fail, but return the usual black question mark for invalid symbols *)
58 (* between 0 and 31, except allowed_special_char, the entities seem to be illegal, so we project again to the question mark *)
59 begin
60 (* Journal.Interface.warning (Printf.sprintf "Warning: htmlentities: invalid UTF-8: in string %s at position %d on character of code %d" src i chr); *)
61 aux 0 (-1) (i + 1) (65533::lst)
62 end
63 else
64 let nbr = nbr + ((chr mod 64) * (pow 64 pos)) in
65 if pos = 0 then aux 0 (-1) (i + 1) (nbr::lst)
66 else aux nbr (pos - 1) (i + 1) lst
67 in
68 List.fold_right (fun item acc -> acc^"&#"^(soi item)^";") (aux 0 (-1) 0 []) ""
Something went wrong with that request. Please try again.