Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 229 lines (203 sloc) 8.102 kb
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (** Encodings:
19 Some routines to allow fast encode/decode of strings.
20
21 Intended as replacements for cases where trx-generated parsers aren't fast
22 enough. Only use these routines if speed is your utmost priority.
23 *)
24 module List = Base.List
25 module String = Base.String
26 module Char = Base.Char
27
28 let us = String.unsafe_sub
29
30 (* HTTP encodings *)
31
32 (* %<hex><hex> -> <char> *)
33 let re1 = Str.regexp "\\(%[0-9a-fA-F][0-9a-fA-F]\\)"
34 let pchxhx =
35 ((fun s i l -> i < l && s.[i] = '%'),
36 re1,
37 (fun s d -> Buffer.add_char d (Char.chr (Charf.c2h s.[1] s.[2]))))
38
39 let decode_string convlst s =
40 let l = String.length s in
41 let b = Buffer.create l in
42 let rec aux i =
43 if i < l
44 then
45 let (i,found) =
46 List.fold_left (fun (i,found) (tr,re,fn) ->
47 if not found && tr s i l && Str.string_match re s i
48 then
49 let mtch = Str.matched_group 1 s in
50 fn mtch b;
51 (i+String.length mtch,true)
52 else (i,found)) (i,false) convlst in
53 if found
54 then aux i
55 else (Buffer.add_char b s.[i]; aux (i+1))
56 else ()
57 in
58 aux 0;
59 Buffer.contents b
60
61 let http_unencode s =
62 let l = String.length s in
63 let r = String.copy s in
64 let rec aux i j =
65 if i < l
66 then
67 (match s.[i] with
68 | '%' ->
69 let k = i + 1 in
70 if k + 1 < l
71 then
72 (match s.[k],s.[k+1] with
73 | (ch1,ch2) when (Charf.is_hexf ch1 && Charf.is_hexf ch2) ->
74 (r.[j] <- Char.chr (Charf.c2h s.[k] s.[k+1]); aux (i+3) (j+1))
75 | _ -> (r.[j] <- '%'; aux (i+1) (j+1)))
76 else (r.[j] <- '%'; aux (i+1) (j+1))
77 | ch -> (r.[j] <- ch; aux (i+1) (j+1)))
78 else j
79 in
80 us r 0 (aux 0 0)
81
82 (* Encode a string according to a character predicate and an arbitrary encode function. *)
83 let encode_chars_filter ?(hint=(fun l -> (l + (l asr 4)))) is_char encode_char s =
84 let l = String.length s in
85 let b = Buffer.create (hint l) in
86 let rec aux i j =
87 if i < l
88 then
89 if is_char s.[i]
90 then (Buffer.add_char b s.[i]; aux (i+1) (j+1))
91 else
92 let code = encode_char s.[i] in
93 let clen = String.length code in
94 Buffer.add_string b code; aux (i+1) (j+clen)
95 else j
96 in
97 Buffer.sub b 0 (aux 0 0)
98
99 (* Encode all characters according to selective encode function *)
100 let encode_chars ?(hint=(fun l -> (l + (l asr 4)))) encode_char s =
101 let l = String.length s in
102 let b = Buffer.create (hint l) in
103 let rec aux i j =
104 if i < l
105 then
106 let code = encode_char s.[i] in
107 let clen = String.length code in
108 Buffer.add_string b code; aux (i+1) (j+clen)
109 else j
110 in
111 Buffer.sub b 0 (aux 0 0)
112
113 (* Build a char map from a list of (char,string) pairs. *)
114 let encode_list_to_map encode_list =
115 let chmap = Array.init 256 (fun i -> String.make 1 (Char.chr i)) in
116 List.iter (fun (ch,code) -> chmap.(Char.code ch) <- code) encode_list;
117 chmap
118
119 (* Generic URL encode. *)
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
120 let chhxmp = Array.init 256 (fun i -> Printf.sprintf "%02X" i)
fccc685 Initial open-source release
MLstate authored
121 let pc_encode ch = "%"^(chhxmp.(Char.code ch))
122
123 (* RFC 1738 but excluding , / ? : @ & = + $ #
124 * See encodeURIComponent() in http://www.w3schools.com/jsref/jsref_encodeURIComponent.asp
125 *)
126 let encode_uri_component = (fun s -> encode_chars_filter Charf.is_urlxf pc_encode s)
127
128 let encode_aws_uri = (fun s -> encode_chars_filter Charf.is_awsf pc_encode s)
129
130 let decode_uri_component = decode_string [ pchxhx ]
131
c65e357 Quentin Bourgerie [feature] libbase: Encodings simple generator of encode/decode function
BourgerieQuentin authored
132 let pc_encode_string is_char s =
133 encode_chars_filter (function '%' -> false | c -> is_char c) pc_encode s
134 let pc_decode_string = decode_string [ pchxhx ]
135
fccc685 Initial open-source release
MLstate authored
136 let escaped1 = [ "'","&#39;"; "&","&amp;"; ]
137 let escaped2 = (* ' redefined here for revert operation *)
138 [ ">","&gt;"; "<","&lt;"; "\"","&quot;"; "'","&apos;";
139 "\194\163","&pound;"; "\194\176","&deg;"; "\194\167","&sect;"
140 ]
141
142 (* Generic, more optimised HTTP encode *)
143 let http_encode s =
144 String.multi_replace (String.multi_replace s escaped1) escaped2
145
146 (* Fixme:
147 There is already http_decode and http_unencode, but none are
148 human-readable, so I add a function that revert http_encode
149 *)
150 let revert_http_encode s =
151 let unescaped = List.map (fun (a,b) -> (b,a)) (escaped1 @ escaped2) in
152 String.multi_replace s unescaped
153
154 exception HttpBodyRewriteError of int * string
155
156 let http_body_rewrite s =
157 let l = String.length s in
158 let skip is i = let rec aux i = if i >= l then i else if is s.[i] then aux (i+1) else i in aux i in
159 let skip_space = skip Charf.is_spacef in
160 let get_name i = let ii = skip Charf.is_namef i in (ii,String.sub s i (ii-i)) in
161 let b = Buffer.create l in
162 let get_value i =
163 Buffer.clear b;
164 let rec aux i =
165 if i >= l
166 then (i,Buffer.contents b)
167 else
168 match s.[i] with
169 | '+' -> Buffer.add_char b ' '; aux (i+1)
170 | '%' ->
171 let i = i + 1 in
172 if i >= l then raise (HttpBodyRewriteError (i,"expected 'u' or <hex>")) else
173 (match s.[i] with
174 | 'u' ->
175 let i = i + 1 in
176 if i + 4 > l then raise (HttpBodyRewriteError (i,"expected <hex><hex><hex><hex>")) else
177 (match s.[i],s.[i+1],s.[i+2],s.[i+3] with
178 | (ch1,ch2,ch3,ch4) when (Charf.is_hexf ch1 && Charf.is_hexf ch2 && Charf.is_hexf ch3 && Charf.is_hexf ch4) ->
179 Buffer.add_string b (Charf.c4u ch1 ch2 ch3 ch4); aux (i+4)
180 | _ ->
181 raise (HttpBodyRewriteError (i,"expected <hex><hex><hex><hex>")))
182 | _ ->
183 if i + 2 > l then raise (HttpBodyRewriteError (i,"expected <hex><hex>")) else
184 (match s.[i],s.[i+1] with
185 | (ch1,ch2) when (Charf.is_hexf ch1 && Charf.is_hexf ch2) ->
186 Buffer.add_char b (Char.chr (Charf.c2h ch1 ch2)); aux (i+2)
187 | _ ->
188 raise (HttpBodyRewriteError (i,"expected <hex><hex>"))))
189 | ch ->
190 if Charf.is_charf ch
191 then (i,Buffer.contents b)
192 else (Buffer.add_char b ch; aux (i+1))
193 in
194 aux i
195 in
196 let rec aux i es =
197 let i = skip_space i in
198 if i >= l then (i,es) else
199 let i,n = get_name i in
200 if i >= l || n = "" then raise (HttpBodyRewriteError (i,"expected <name>")) else
201 let i = skip_space i in
202 if i >= l || s.[i] <> '=' then raise (HttpBodyRewriteError (i,"expected '='")) else
203 let i,v =
204 if i + 1 >= l
205 then (i,"")
206 else
207 let i = skip_space (i+1) in
208 if i >= l then raise (HttpBodyRewriteError (i,"expected <value>")) else
209 if s.[i] = '&'
210 then (i,"")
211 else
212 let i,v = get_value i in
213 if v = "" then raise (HttpBodyRewriteError (i,"expected <value>")) else
214 i,v
215 in
216 let es = ((n,v)::es) in
217 if i >= l then (i,es) else
218 let i = skip_space i in
219 if i + 1 >= l || s.[i] <> '&' then (i,es) else
220 let i = skip_space (i+1) in
221 if i + 1 >= l then raise (HttpBodyRewriteError (i,"expected <name>=<value>")) else
222 aux i es
223 in
224 let i, es = aux 0 [] in
225 (i,List.rev es)
226
227 (* End of file encodings.ml *)
228
Something went wrong with that request. Please try again.