Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 103 lines (89 sloc) 3.223 kb
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (**)
19
20 type uchar = int
21 type ustring = uchar array
22
23 exception Uchar of int
24 let ulength = Array.length
25
26 let uchar s pos =
27 let uchar_aux pos =
28 let c = int_of_char s.[pos] in
29 if c >= 128 && c < 192 then c land 0b111111
30 else raise (Uchar pos)
31 in
32 let c = int_of_char s.[pos] in
33 if c < 128 then c, 1
34 else if c >= 192 && c < 224 then
35 ((c land 0b11111) lsl 6) lor (uchar_aux (succ pos)), 2
36 else if c >= 224 && c < 240 then
37 ((c land 0b1111) lsl 12) lor (uchar_aux (pos + 1) lsl 6) lor (uchar_aux (pos + 2)), 3
38 else if c >= 240 && c <= 247 then
39 ((c land 0b111) lsl 18) lor (uchar_aux (pos + 1) lsl 12) lor (uchar_aux (pos + 2) lsl 6) lor (uchar_aux (pos + 3)), 4
40 else raise (Uchar pos)
41
42 (** benchmark vs FUNC *)
43 let ulength_of_string s =
44 let l = ref 0 in
45 String.iter (fun x -> if x < '\128' or (x >= '\192' && x < '\248') then incr l) s ;
46 !l
47
48 let length_of_ustring us =
49 let l = ref 0 in
50 Array.iteri (
51 fun i x ->
52 if x < 128 then incr l
53 else l := !l + (
54 if x < 2048 then 2
55 else if x < 65536 then 3
56 else if x < 2097152 then 4
57 else raise (Uchar i)
58 )
59 ) us ;
60 !l
61
62 let ustring s =
63 let l = ulength_of_string s in
64 let us = Array.create l 0 in
65 let f = uchar s in
66 let rec aux pos spos =
67 if pos = l then us
68 else
69 let uc, nb = f spos in
70 Array.unsafe_set us pos uc ;
71 aux (succ pos) (spos + nb)
72 in aux 0 0
73
74 let chars_of_uchar c =
75 if c < 128 then [char_of_int c]
76 else if c < 2048 then
77 [ char_of_int (((c land 0b11111000000) lsr 6) lor 0b11000000)
78 ; char_of_int ((c land 0b00000111111) lor 0b10000000) ]
79 else if c < 65536 then
80 [ char_of_int (((c land 0b1111000000000000) lsr 12) lor 0b11100000)
81 ; char_of_int (((c land 0b0000111111000000) lsr 6) lor 0b10000000)
82 ; char_of_int ((c land 0b0000000000111111) lor 0b10000000) ]
83 else (
84 assert (c < 2097152) ;
85 [ char_of_int (((c land 0b111000000000000000000) lsr 18) lor 0b11110000)
86 ; char_of_int (((c land 0b000111111000000000000) lsr 12) lor 0b10000000)
87 ; char_of_int (((c land 0b000000000111111000000) lsr 6) lor 0b10000000)
88 ; char_of_int ((c land 0b000000000000000111111) lor 0b10000000) ]
89 )
90
91 (* FIXME: unsafe_set/get *)
92 let of_ustring us =
93 let l = length_of_ustring us in
94 let s = String.create l in
95 let rec aux pos spos =
96 if pos = Array.length us then s
97 else
98 let cl = chars_of_uchar us.(pos) in
99 let nb = List.fold_left (fun i c -> s.[spos + i] <- c ; succ i) 0 cl in
100 aux (succ pos) (spos + nb)
101 in aux 0 0
102
Something went wrong with that request. Please try again.