Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 502 lines (410 sloc) 14.699 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 module StringMap = Map.Make (struct type t = string let compare = Pervasives.compare end)
19 module String = BaseString
20 exception MimeType_NotFound
21 exception Malformed
22 exception Open of string
23
24
25 type match_type =
26 | String
27 | Host16
28 | Host32
29 | Big16
30 | Big32
31 | Little16
32 | Little32
33 | Byte
34
35 type offset =
36 | Range of int * int
37 | Single of int
38
39 type magic =
40 { m_type : match_type;
41 m_value : string ;
42 m_offset : offset;
43 m_mask : string option;
44 m_imbrik : magic list;
45 }
46
47
48 type pattern =
49 | Suffix of string
50 | Other of string
51
52 type all_infos =
53 { ai_mimetype : string;
54 ai_patterns : pattern list;
55 ai_magics : magic list;
56 ai_human_readable : string option;
57 ai_subclassof : string option;
58 }
59
60 type mimetype_database = all_infos list
61
62 (*TO STRING *)
63
64 let string_of_pattern = function
65 | Suffix x -> "*"^x
66 | Other x -> x
67
68 let string_of_offset = function
69 | Range (a,b) -> Printf.sprintf "%d:%d" a b
70 | Single s -> string_of_int s
71
72 let string_of_match_type = function
73 | String -> "string"
74 | Host16 -> "host16"
75 | Host32 -> "host32"
76 | Big16 -> "big16"
77 | Big32 -> "big32"
78 | Little16-> "little16"
79 | Little32-> "little32"
80 | Byte -> "byte"
81
82 let string_of_magic m =
83 Printf.sprintf "%s / %s / %s / %s"
84 (string_of_match_type m.m_type)
85 m.m_value
86 (string_of_offset m.m_offset)
87 (match m.m_mask with | Some x -> x | None -> "-")
88
89
90 let string_of_all_infos ai =
91 let bf = Buffer.create 100 in
92 let add = Buffer.add_string bf in
93 let addn () = add "\n" in
94 add "";
95 add "Mime-type : ";
96 add ai.ai_mimetype;
97 addn ();
98 add "Patterns : ";
99 List.iter (fun x ->
100 add (string_of_pattern x);
101 add " - ") ai.ai_patterns;
102 addn ();
103 add "Magics : ";
104 List.iter (fun x ->
105 add (string_of_magic x);
106 add " \n\t ")
107 ai.ai_magics;
108 addn();
109 add "Humanreadable : ";
110 add (match ai.ai_human_readable with | None -> "" | Some s -> s);
111 addn ();
112 add "Sub class of : ";
113 add (match ai.ai_subclassof with None -> "" | Some s -> s);
114 add "";
115 addn ();
116 Buffer.contents bf
117
118
119 let pp fmt =
120 Printf.printf (fmt^^"\n%!")
121
122 let ipp fmt =
123 Printf.ifprintf stdout (fmt^^"\n%!")
124
125
126 (*TO STRING (end) *)
127
128
129 (*FROM STRING *)
130 let string_to_pattern s =
131 if String.length s> 1 && s.[0] = '*' then
132 Suffix (String.sub s 1 (String.length s -1))
133 else
134 Other s
135
136 let string_to_offset x =
137 let beg,eend = String.split_char ':' x in
138 if eend = "" then
139 Single (int_of_string beg)
140 else
141 Range (int_of_string beg, int_of_string eend)
142
143
144 let string_to_match_type s =
145 match s with
146 | "string" -> String
147 | "host16" -> Host16
148 | "host32" -> Host32
149 | "big16" -> Big16
150 | "big32" -> Big32
151 | "little16"-> Little16
152 | "little32"-> Little32
153 | "byte" -> Byte
154 | _ -> raise Malformed
155
156
157
158 (*FROM STRING (end) *)
159
160 (*UTILS *)
161 let replace avant apres =
162 Str.global_replace (Str.regexp avant) apres
163
164
165 let chaine_de_remplacement s =
166 let rmx s =
167 let hexa = Str.regexp "\\\\x\\([0-9a-eA-E][0-9a-eA-E]\\)" in
168 Str.global_substitute hexa
169 (fun x ->
170 let recup = "0x"^(Str.matched_group 1 x) in
171 let valeur = int_of_string recup in
172 let rempl = String.make 1 (Char.chr valeur) in
173 rempl) s
174 in
175
176 let rmo s =
177 let hexa = Str.regexp "\\\\\\([0-8][0-8][0-8]\\)" in
178 Str.global_substitute hexa
179 (fun x ->
180 let recup = "0o"^(Str.matched_group 1 x) in
181 let valeur = int_of_string recup in
182 let rempl = String.make 1 (Char.chr valeur) in
183 rempl) s
184 in
185
186 let last_modif s = rmx (rmo s) in
187
188 let chaine = [
189 ("&lt;", "<");
190 ("&gt;", ">");
191 ("&quot;", "\"");
192 ] in
193
194 last_modif (List.fold_left (fun acc (av,ap) -> replace av ap acc) s chaine)
195 ;;
196
197
198 let petit_boutiste value =
199 let length= String.length value in
200 if length < 2 || length mod 2 != 0 then value
201 else
202 (let rec to_list x acc =
203 if x >= length then
204 acc
205 else
206 to_list (x+2) ((String.sub value x 2)::acc)
207 in
208 let maliste =
209 if String.is_prefix "0x" value then
210 "0x"::(to_list 2 [])
211 else to_list 0 [] in
212 let result = String.concat "" maliste in
213 result)
214
215
216
217 let mise_enforme v = function
218 | String -> chaine_de_remplacement v
219 | Little16
220 | Little32 -> chaine_de_remplacement (petit_boutiste v)
221 (* | Host16 | Host32 -> failwith "refuse"*)
222 | _ -> v
223
224
225
226 let list_find_opt f l = try Some (List.find f l) with Not_found -> None
227 let is_some = function Some _ -> true | None -> false
228
229 (*UTILS (end) *)
230
231 (*CREATE FUNCTIONS *)
232 let create_all_infos mt patterns magics hr sbc=
233 { ai_mimetype = mt;
234 ai_patterns = patterns;
235 ai_magics = magics;
236 ai_human_readable = hr;
237 ai_subclassof = sbc;
238 }
239
240 let create_magic ?mask ttype value offset imbrik =
241 { m_type = ttype;
242 m_value = mise_enforme value ttype;
243 m_offset = offset;
244 m_mask = mask;
245 m_imbrik = imbrik;
246 }
247
248 (*CREATE FUNCTIONS (end) *)
249
250 let get_mimetype_aux filename database =
251 let file =
252 try open_in_bin filename
253 with e -> raise (Open (Printexc.to_string e)) in
254 let length = in_channel_length file in
255 let content = String.make length ' ' in
256 really_input file content 0 length;
257
258 let rec check_mime_list accumulator mimelist =
259 match mimelist with
260 | [] -> accumulator
261 | mime::mime_rest ->
262 (let rec check_magic_list magiclist =
263 match magiclist with
264 | [] -> false
265 | magic::magic_rest ->
266 (let value = magic.m_value in
267
268 let relance cond =
269 if cond then
270 (match magic.m_imbrik with
271 | [] -> true
272 | _ -> check_magic_list magic.m_imbrik)
273 else
274 check_magic_list magic_rest
275 in
276
277 let aux () =
278 match magic.m_offset with
279 | Range (debut, fin) ->
280 (if length <= debut && length < fin then check_magic_list magic_rest
281 else
282 (let check_string =
283 is_some
284 (String.is_contained_from_until value content debut fin) in
285 relance check_string)
286 )
287
288 | Single x ->
289 (let debut, a_lire = x, (String.length value) in
290
291 (*TODO a optimiser *)
292 if length <= debut && length < (debut + a_lire) then check_magic_list magic_rest
293 else
294 (let fin = debut + a_lire in
295 let rec check_single_string i =
296 if i >= fin then true
297 else
298 (let r = content.[i] = value.[i-debut] in
299 if r then check_single_string (succ i)
300 else false)
301 in
302
303 relance (check_single_string debut))
304 )
305 in aux ()
306 ) in
307
308 if check_magic_list mime.ai_magics then check_mime_list (mime::accumulator) mime_rest
309 else check_mime_list accumulator mime_rest
310
311 ) in
312
313 let checkpatt pattern =
314 match pattern with
315 | Suffix suff ->
316 String.is_suffix suff filename
317 | Other pattern ->
318 (match String.findi '*' pattern with
319 | None -> filename = pattern
320 | Some index ->
321 (let length_pattern = String.length pattern in
322 if index = String.length pattern -1 then
323 String.is_prefix
324 (String.sub pattern 0 (length_pattern - 2)) filename
325 else failwith (Printf.sprintf "je ne sais pas traite ce genre de pattern %s" pattern)
326 )
327 )
328 in
329
330
331 let second_try mimelist =
332 let res =
333 list_find_opt
334 (fun mime ->
335 match mime.ai_patterns with
336 | [] -> false
337 | _ ->
338 let rec aux = function
339 | [] -> false
340 | x::y ->
341 let r = checkpatt x in
342 if r then true
343 else aux y
344 in aux mime.ai_patterns)
345 mimelist
346 in
347 match res with
348 | Some x -> x
349 | None -> raise MimeType_NotFound
350 in
351
352
353 let accumulator = check_mime_list [] database in
354
355 match accumulator with
356 | [] -> second_try database
357 | [x] -> x
358 | _ ->
359 try List.find (fun x -> is_some (list_find_opt (fun y -> checkpatt y) x.ai_patterns)) accumulator
360 with Not_found -> second_try database
361
362
363 let get_mimetype filename database =
364 (get_mimetype_aux filename database).ai_mimetype
365
366
367 let path_database mlstatedir =
368 PathTransform.string_to_mysys ~relative_position:(PathTransform.of_string (Lazy.force mlstatedir)) "share/opa/mimetype_database.xml"
369
370 let build_mimetype_database database =
371 let ic = open_in database in
372 let inic = Xmlm.make_input ~enc:(Some `UTF_8) ~strip:true (`Channel ic) in
373 let input () = try Xmlm.input inic with (Xmlm.Error ((p1,p2), error)) as e -> (pp "%d:%d : %s" p1 p2 (Xmlm.error_message error); raise e) in
374 let is_end = function `El_end -> true | _ -> false in
375 let assert_end e = assert (is_end e) in
376 let check s tag attr = match s with `El_start (("", t), lst) -> assert (t = tag && lst = attr) | _ -> raise Malformed in
377 let n t = ("",t) in
378
379 let signal = input () in
380 let _ = match signal with `Dtd None -> () | _ -> raise Malformed in
381
382 let signal = input () in
383 check signal "mimetype-database" [ (n "version"), "1.0"];
384
385 let rec continue mimetypelist =
386 match input () with
387 | `El_end -> mimetypelist
388 | `El_start (("", "mimetype"), (((("", "type"), ai_mimetype))::[])) ->
389 (let signal = input () in
390 check signal "patterns" [];
391
392 (* on a au moins 1 pattern *)
393 let rec aux acc =
394 match input () with
395 | `El_end -> acc
396 | `El_start (("","patt"), [(("", "type"), "suffix"); (("", "value"), value) ]) ->
397 (assert_end (input ());
398 aux ((Suffix value) :: acc))
399 | `El_start (("","patt"), [(("", "type"), "pattern"); (("", "value"), value) ]) ->
400 (assert_end (input ());
401 aux ((Suffix value) :: acc))
402 | _ -> raise Malformed in
403
404 let ai_patterns = aux [] in
405
406 let signal = input () in
407 let ai_human_readable =
408 check signal "human-readable" [];
409 match input () with
410 | `El_end -> None
411 | `Data value -> (assert_end (input ()); Some value)
412 | _ -> raise Malformed
413 in
414
415 let signal = input () in
416 let ai_subclassof =
417 check signal "subclass-of" [];
418 match input () with
419 | `El_end -> None
420 | `Data value -> (assert_end (input ()); Some value)
421 | _ -> raise Malformed
422 in
423
424 let signal = input () in
425 check signal "magics" [];
426
427 let rec aux acc =
428 match input () with
429 | `El_end -> acc
430 | `El_start (("","magic-number"), ((("","type"), m_type) :: ((("","offset")), m_offset) :: ((("","value")), m_value) :: y)) ->
431 let m_type = string_to_match_type m_type in
432 let m_offset = string_to_offset m_offset in
433 let m_imbrik = aux [] in
434 let m_mask = match y with [] -> None | [((("","mask")), m)] -> Some m | _ -> raise Malformed in
435
436 (* replace escaped hars by them real values *)
437 let m_value =
438 Str.global_substitute (Str.regexp "\\\\\\([0-9][0-9][0-9]\\)")
439 (fun x -> String.make 1 (Char.chr (int_of_string (Str.matched_group 1 x)))) m_value in
440
441 let magic = { m_type; m_value; m_offset; m_mask; m_imbrik } in
442 aux (magic :: acc)
443 | _ -> raise Malformed
444 in
445 let ai_magics = aux [] in
446
447 let signal = input () in
448 assert_end signal;
449
450 let mimetype = { ai_mimetype; ai_patterns; ai_magics; ai_human_readable; ai_subclassof } in
451
452 continue (mimetype::mimetypelist))
453 | _ -> raise Malformed
454 in
455 let db = continue [] in
456
457 (* let mimetypes_by_hierarchie =
458 let premium, others = List.partition (fun x -> match x.ai_subclassof with None -> true | _ -> false) db in
459 let themap = List.fold_left (fun acc elem -> StringMap.add elem.ai_mimetype 0 acc) StringMap.empty premium in
460 let rec aux (themap: int StringMap.t) thelist therest last =
461 match thelist with
462 | [] ->
463 (match therest with
464 | [] -> themap
465 | _ ->
466 (if last = List.length therest then
467 (pp "probleme, pas de parents pour : %s" (String.concat "; " (List.map (fun x -> x.ai_mimetype) therest)); raise Malformed)
468 else
469 (aux themap therest [] (List.length therest))))
470 | elem::rest ->
471 (try
472 let prof = StringMap.find (Option.get elem.ai_subclassof) themap in
473 let themap = StringMap.add elem.ai_mimetype (succ prof) themap in
474 aux themap rest therest last
475 with Not_found -> aux themap rest (elem::therest) last)
476 in aux themap others [] 0
477 in
478
479 let database =
480 List.sort
481 (fun elem1 elem2 ->
482 let prof1 = StringMap.find elem1.ai_mimetype mimetypes_by_hierarchie in
483 let prof2 = StringMap.find elem2.ai_mimetype mimetypes_by_hierarchie in
484 (compare prof2 prof1)) db
485 in database
486 *)
487
488 db
489
490 let get_mimetype_database =
491 let h = Hashtbl.create 3 in
492 fun database ->
493 try
494 Hashtbl.find h database
495 with Not_found ->
496 let mdb = build_mimetype_database database in
497 Hashtbl.add h database mdb;
498 mdb
499
500
501 let mimetype_database mlstatedir = get_mimetype_database (path_database mlstatedir)
Something went wrong with that request. Please try again.