Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 225 lines (200 sloc) 7.945 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (** Encodings:
19 Some routines to allow fast encode/decode of strings.
20
21 Intended as replacements for cases where trx-generated parsers aren't fast
22 enough. Only use these routines if speed is your utmost priority.
23 *)
24 module List = Base.List
25 module String = Base.String
26 module Char = Base.Char
27
28 let us = String.unsafe_sub
29
30 (* HTTP encodings *)
31
32 (* %<hex><hex> -> <char> *)
33 let re1 = Str.regexp "\\(%[0-9a-fA-F][0-9a-fA-F]\\)"
34 let pchxhx =
35 ((fun s i l -> i < l && s.[i] = '%'),
36 re1,
37 (fun s d -> Buffer.add_char d (Char.chr (Charf.c2h s.[1] s.[2]))))
38
39 let decode_string convlst s =
40 let l = String.length s in
41 let b = Buffer.create l in
42 let rec aux i =
43 if i < l
44 then
45 let (i,found) =
46 List.fold_left (fun (i,found) (tr,re,fn) ->
47 if not found && tr s i l && Str.string_match re s i
48 then
49 let mtch = Str.matched_group 1 s in
50 fn mtch b;
51 (i+String.length mtch,true)
52 else (i,found)) (i,false) convlst in
53 if found
54 then aux i
55 else (Buffer.add_char b s.[i]; aux (i+1))
56 else ()
57 in
58 aux 0;
59 Buffer.contents b
60
61 let http_unencode s =
62 let l = String.length s in
63 let r = String.copy s in
64 let rec aux i j =
65 if i < l
66 then
67 (match s.[i] with
68 | '%' ->
69 let k = i + 1 in
70 if k + 1 < l
71 then
72 (match s.[k],s.[k+1] with
73 | (ch1,ch2) when (Charf.is_hexf ch1 && Charf.is_hexf ch2) ->
74 (r.[j] <- Char.chr (Charf.c2h s.[k] s.[k+1]); aux (i+3) (j+1))
75 | _ -> (r.[j] <- '%'; aux (i+1) (j+1)))
76 else (r.[j] <- '%'; aux (i+1) (j+1))
77 | ch -> (r.[j] <- ch; aux (i+1) (j+1)))
78 else j
79 in
80 us r 0 (aux 0 0)
81
82 (* Encode a string according to a character predicate and an arbitrary encode function. *)
83 let encode_chars_filter ?(hint=(fun l -> (l + (l asr 4)))) is_char encode_char s =
84 let l = String.length s in
85 let b = Buffer.create (hint l) in
86 let rec aux i j =
87 if i < l
88 then
89 if is_char s.[i]
90 then (Buffer.add_char b s.[i]; aux (i+1) (j+1))
91 else
92 let code = encode_char s.[i] in
93 let clen = String.length code in
94 Buffer.add_string b code; aux (i+1) (j+clen)
95 else j
96 in
97 Buffer.sub b 0 (aux 0 0)
98
99 (* Encode all characters according to selective encode function *)
100 let encode_chars ?(hint=(fun l -> (l + (l asr 4)))) encode_char s =
101 let l = String.length s in
102 let b = Buffer.create (hint l) in
103 let rec aux i j =
104 if i < l
105 then
106 let code = encode_char s.[i] in
107 let clen = String.length code in
108 Buffer.add_string b code; aux (i+1) (j+clen)
109 else j
110 in
111 Buffer.sub b 0 (aux 0 0)
112
113 (* Build a char map from a list of (char,string) pairs. *)
114 let encode_list_to_map encode_list =
115 let chmap = Array.init 256 (fun i -> String.make 1 (Char.chr i)) in
116 List.iter (fun (ch,code) -> chmap.(Char.code ch) <- code) encode_list;
117 chmap
118
119 (* Generic URL encode. *)
a9f8d34 [cleanup] Base: remove sprintf
Raja authored
120 let chhxmp = Array.init 256 (fun i -> Printf.sprintf "%02X" i)
fccc685 Initial open-source release
MLstate authored
121 let pc_encode ch = "%"^(chhxmp.(Char.code ch))
122
123 (* RFC 1738 but excluding , / ? : @ & = + $ #
124 * See encodeURIComponent() in http://www.w3schools.com/jsref/jsref_encodeURIComponent.asp
125 *)
126 let encode_uri_component = (fun s -> encode_chars_filter Charf.is_urlxf pc_encode s)
127
128 let encode_aws_uri = (fun s -> encode_chars_filter Charf.is_awsf pc_encode s)
129
130 let decode_uri_component = decode_string [ pchxhx ]
131
132 let escaped1 = [ "'","&#39;"; "&","&amp;"; ]
133 let escaped2 = (* ' redefined here for revert operation *)
134 [ ">","&gt;"; "<","&lt;"; "\"","&quot;"; "'","&apos;";
135 "\194\163","&pound;"; "\194\176","&deg;"; "\194\167","&sect;"
136 ]
137
138 (* Generic, more optimised HTTP encode *)
139 let http_encode s =
140 String.multi_replace (String.multi_replace s escaped1) escaped2
141
142 (* Fixme:
143 There is already http_decode and http_unencode, but none are
144 human-readable, so I add a function that revert http_encode
145 *)
146 let revert_http_encode s =
147 let unescaped = List.map (fun (a,b) -> (b,a)) (escaped1 @ escaped2) in
148 String.multi_replace s unescaped
149
150 exception HttpBodyRewriteError of int * string
151
152 let http_body_rewrite s =
153 let l = String.length s in
154 let skip is i = let rec aux i = if i >= l then i else if is s.[i] then aux (i+1) else i in aux i in
155 let skip_space = skip Charf.is_spacef in
156 let get_name i = let ii = skip Charf.is_namef i in (ii,String.sub s i (ii-i)) in
157 let b = Buffer.create l in
158 let get_value i =
159 Buffer.clear b;
160 let rec aux i =
161 if i >= l
162 then (i,Buffer.contents b)
163 else
164 match s.[i] with
165 | '+' -> Buffer.add_char b ' '; aux (i+1)
166 | '%' ->
167 let i = i + 1 in
168 if i >= l then raise (HttpBodyRewriteError (i,"expected 'u' or <hex>")) else
169 (match s.[i] with
170 | 'u' ->
171 let i = i + 1 in
172 if i + 4 > l then raise (HttpBodyRewriteError (i,"expected <hex><hex><hex><hex>")) else
173 (match s.[i],s.[i+1],s.[i+2],s.[i+3] with
174 | (ch1,ch2,ch3,ch4) when (Charf.is_hexf ch1 && Charf.is_hexf ch2 && Charf.is_hexf ch3 && Charf.is_hexf ch4) ->
175 Buffer.add_string b (Charf.c4u ch1 ch2 ch3 ch4); aux (i+4)
176 | _ ->
177 raise (HttpBodyRewriteError (i,"expected <hex><hex><hex><hex>")))
178 | _ ->
179 if i + 2 > l then raise (HttpBodyRewriteError (i,"expected <hex><hex>")) else
180 (match s.[i],s.[i+1] with
181 | (ch1,ch2) when (Charf.is_hexf ch1 && Charf.is_hexf ch2) ->
182 Buffer.add_char b (Char.chr (Charf.c2h ch1 ch2)); aux (i+2)
183 | _ ->
184 raise (HttpBodyRewriteError (i,"expected <hex><hex>"))))
185 | ch ->
186 if Charf.is_charf ch
187 then (i,Buffer.contents b)
188 else (Buffer.add_char b ch; aux (i+1))
189 in
190 aux i
191 in
192 let rec aux i es =
193 let i = skip_space i in
194 if i >= l then (i,es) else
195 let i,n = get_name i in
196 if i >= l || n = "" then raise (HttpBodyRewriteError (i,"expected <name>")) else
197 let i = skip_space i in
198 if i >= l || s.[i] <> '=' then raise (HttpBodyRewriteError (i,"expected '='")) else
199 let i,v =
200 if i + 1 >= l
201 then (i,"")
202 else
203 let i = skip_space (i+1) in
204 if i >= l then raise (HttpBodyRewriteError (i,"expected <value>")) else
205 if s.[i] = '&'
206 then (i,"")
207 else
208 let i,v = get_value i in
209 if v = "" then raise (HttpBodyRewriteError (i,"expected <value>")) else
210 i,v
211 in
212 let es = ((n,v)::es) in
213 if i >= l then (i,es) else
214 let i = skip_space i in
215 if i + 1 >= l || s.[i] <> '&' then (i,es) else
216 let i = skip_space (i+1) in
217 if i + 1 >= l then raise (HttpBodyRewriteError (i,"expected <name>=<value>")) else
218 aux i es
219 in
220 let i, es = aux 0 [] in
221 (i,List.rev es)
222
223 (* End of file encodings.ml *)
224
Something went wrong with that request. Please try again.