Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 533 lines (435 sloc) 15.594 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
18
19
20 #<Debugvar:MIMETYPE_DEBUG>
21
fccc685 Initial open-source release
MLstate authored
22 module StringMap = Map.Make (struct type t = string let compare = Pervasives.compare end)
23 module String = BaseString
24 exception MimeType_NotFound
25 exception Malformed
26 exception Open of string
27
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
28 (* -- *)
fccc685 Initial open-source release
MLstate authored
29
30 type match_type =
31 | String
32 | Host16
33 | Host32
34 | Big16
35 | Big32
36 | Little16
37 | Little32
38 | Byte
39
40 type offset =
41 | Range of int * int
42 | Single of int
43
44 type magic =
45 { m_type : match_type;
46 m_value : string ;
47 m_offset : offset;
48 m_mask : string option;
49 m_imbrik : magic list;
50 }
51
52
53 type pattern =
54 | Suffix of string
55 | Other of string
56
57 type all_infos =
58 { ai_mimetype : string;
59 ai_patterns : pattern list;
60 ai_magics : magic list;
61 ai_human_readable : string option;
62 ai_subclassof : string option;
63 }
64
65 type mimetype_database = all_infos list
66
67 (*TO STRING *)
68
69 let string_of_pattern = function
70 | Suffix x -> "*"^x
71 | Other x -> x
72
73 let string_of_offset = function
74 | Range (a,b) -> Printf.sprintf "%d:%d" a b
75 | Single s -> string_of_int s
76
77 let string_of_match_type = function
78 | String -> "string"
79 | Host16 -> "host16"
80 | Host32 -> "host32"
81 | Big16 -> "big16"
82 | Big32 -> "big32"
83 | Little16-> "little16"
84 | Little32-> "little32"
85 | Byte -> "byte"
86
87 let string_of_magic m =
88 Printf.sprintf "%s / %s / %s / %s"
89 (string_of_match_type m.m_type)
90 m.m_value
91 (string_of_offset m.m_offset)
92 (match m.m_mask with | Some x -> x | None -> "-")
93
94
95 let string_of_all_infos ai =
96 let bf = Buffer.create 100 in
97 let add = Buffer.add_string bf in
98 let addn () = add "\n" in
99 add "";
100 add "Mime-type : ";
101 add ai.ai_mimetype;
102 addn ();
103 add "Patterns : ";
104 List.iter (fun x ->
105 add (string_of_pattern x);
106 add " - ") ai.ai_patterns;
107 addn ();
108 add "Magics : ";
109 List.iter (fun x ->
110 add (string_of_magic x);
111 add " \n\t ")
112 ai.ai_magics;
113 addn();
114 add "Humanreadable : ";
115 add (match ai.ai_human_readable with | None -> "" | Some s -> s);
116 addn ();
117 add "Sub class of : ";
118 add (match ai.ai_subclassof with None -> "" | Some s -> s);
119 add "";
120 addn ();
121 Buffer.contents bf
122
123
124 let pp fmt =
125 Printf.printf (fmt^^"\n%!")
126
127 let ipp fmt =
128 Printf.ifprintf stdout (fmt^^"\n%!")
129
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
130 let debug ?(level=0) fmt =
131 let level = succ level in
132 #<If$minlevel level> Printf.printf ("[MimeType] "^^fmt^^"\n%!")
133 #<Else> Printf.ifprintf stdout fmt #<End>
134
fccc685 Initial open-source release
MLstate authored
135
136 (*TO STRING (end) *)
137
138
139 (*FROM STRING *)
140 let string_to_pattern s =
141 if String.length s> 1 && s.[0] = '*' then
142 Suffix (String.sub s 1 (String.length s -1))
143 else
144 Other s
145
146 let string_to_offset x =
147 let beg,eend = String.split_char ':' x in
148 if eend = "" then
149 Single (int_of_string beg)
150 else
151 Range (int_of_string beg, int_of_string eend)
152
153
154 let string_to_match_type s =
155 match s with
156 | "string" -> String
157 | "host16" -> Host16
158 | "host32" -> Host32
159 | "big16" -> Big16
160 | "big32" -> Big32
161 | "little16"-> Little16
162 | "little32"-> Little32
163 | "byte" -> Byte
164 | _ -> raise Malformed
165
166
167
168 (*FROM STRING (end) *)
169
170 (*UTILS *)
171 let replace avant apres =
172 Str.global_replace (Str.regexp avant) apres
173
174
175 let chaine_de_remplacement s =
176 let rmx s =
177 let hexa = Str.regexp "\\\\x\\([0-9a-eA-E][0-9a-eA-E]\\)" in
178 Str.global_substitute hexa
179 (fun x ->
180 let recup = "0x"^(Str.matched_group 1 x) in
181 let valeur = int_of_string recup in
182 let rempl = String.make 1 (Char.chr valeur) in
183 rempl) s
184 in
185
186 let rmo s =
187 let hexa = Str.regexp "\\\\\\([0-8][0-8][0-8]\\)" in
188 Str.global_substitute hexa
189 (fun x ->
190 let recup = "0o"^(Str.matched_group 1 x) in
191 let valeur = int_of_string recup in
192 let rempl = String.make 1 (Char.chr valeur) in
193 rempl) s
194 in
195
196 let last_modif s = rmx (rmo s) in
197
198 let chaine = [
199 ("&lt;", "<");
200 ("&gt;", ">");
201 ("&quot;", "\"");
202 ] in
203
204 last_modif (List.fold_left (fun acc (av,ap) -> replace av ap acc) s chaine)
205 ;;
206
207
208 let petit_boutiste value =
209 let length= String.length value in
210 if length < 2 || length mod 2 != 0 then value
211 else
212 (let rec to_list x acc =
213 if x >= length then
214 acc
215 else
216 to_list (x+2) ((String.sub value x 2)::acc)
217 in
218 let maliste =
219 if String.is_prefix "0x" value then
220 "0x"::(to_list 2 [])
221 else to_list 0 [] in
222 let result = String.concat "" maliste in
223 result)
224
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
225 let to_byte s =
226 String.make 1 (Char.chr (int_of_string s))
227
228
fccc685 Initial open-source release
MLstate authored
229
230
231 let mise_enforme v = function
232 | String -> chaine_de_remplacement v
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
233 | Byte -> to_byte v
fccc685 Initial open-source release
MLstate authored
234 | Little16
235 | Little32 -> chaine_de_remplacement (petit_boutiste v)
236 (* | Host16 | Host32 -> failwith "refuse"*)
237 | _ -> v
238
239
240
241 let list_find_opt f l = try Some (List.find f l) with Not_found -> None
242 let is_some = function Some _ -> true | None -> false
243
244 (*UTILS (end) *)
245
246 (*CREATE FUNCTIONS *)
247 let create_all_infos mt patterns magics hr sbc=
248 { ai_mimetype = mt;
249 ai_patterns = patterns;
250 ai_magics = magics;
251 ai_human_readable = hr;
252 ai_subclassof = sbc;
253 }
254
255 let create_magic ?mask ttype value offset imbrik =
256 { m_type = ttype;
257 m_value = mise_enforme value ttype;
258 m_offset = offset;
259 m_mask = mask;
260 m_imbrik = imbrik;
261 }
262
263 (*CREATE FUNCTIONS (end) *)
264
265 let get_mimetype_aux filename database =
266 let file =
267 try open_in_bin filename
268 with e -> raise (Open (Printexc.to_string e)) in
269 let length = in_channel_length file in
270 let content = String.make length ' ' in
271 really_input file content 0 length;
272
273 let rec check_mime_list accumulator mimelist =
274 match mimelist with
275 | [] -> accumulator
276 | mime::mime_rest ->
277 (let rec check_magic_list magiclist =
278 match magiclist with
279 | [] -> false
280 | magic::magic_rest ->
281 (let value = magic.m_value in
282
283 let relance cond =
284 if cond then
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
285 let c =
286 match magic.m_imbrik with
fccc685 Initial open-source release
MLstate authored
287 | [] -> true
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
288 | _ -> check_magic_list magic.m_imbrik
289 in
290 if c then true
291 else check_magic_list magic_rest
fccc685 Initial open-source release
MLstate authored
292 else
293 check_magic_list magic_rest
294 in
295
296 let aux () =
297 match magic.m_offset with
298 | Range (debut, fin) ->
299 (if length <= debut && length < fin then check_magic_list magic_rest
300 else
301 (let check_string =
302 is_some
303 (String.is_contained_from_until value content debut fin) in
304 relance check_string)
305 )
306
307 | Single x ->
308 (let debut, a_lire = x, (String.length value) in
309
310 (*TODO a optimiser *)
311 if length <= debut && length < (debut + a_lire) then check_magic_list magic_rest
312 else
313 (let fin = debut + a_lire in
314 let rec check_single_string i =
315 if i >= fin then true
316 else
317 (let r = content.[i] = value.[i-debut] in
318 if r then check_single_string (succ i)
319 else false)
320 in
321
322 relance (check_single_string debut))
323 )
324 in aux ()
325 ) in
326
327 if check_magic_list mime.ai_magics then check_mime_list (mime::accumulator) mime_rest
328 else check_mime_list accumulator mime_rest
329
330 ) in
331
332 let checkpatt pattern =
333 match pattern with
334 | Suffix suff ->
335 String.is_suffix suff filename
336 | Other pattern ->
337 (match String.findi '*' pattern with
338 | None -> filename = pattern
339 | Some index ->
340 (let length_pattern = String.length pattern in
341 if index = String.length pattern -1 then
342 String.is_prefix
343 (String.sub pattern 0 (length_pattern - 2)) filename
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
344 else failwith (Printf.sprintf "je ne sais pas traiter ce genre de pattern %s" pattern)
fccc685 Initial open-source release
MLstate authored
345 )
346 )
347 in
348
349
350 let second_try mimelist =
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
351 debug "no result : check_pattern";
fccc685 Initial open-source release
MLstate authored
352 let res =
353 list_find_opt
354 (fun mime ->
355 match mime.ai_patterns with
356 | [] -> false
357 | _ ->
358 let rec aux = function
359 | [] -> false
360 | x::y ->
361 let r = checkpatt x in
362 if r then true
363 else aux y
364 in aux mime.ai_patterns)
365 mimelist
366 in
367 match res with
368 | Some x -> x
369 | None -> raise MimeType_NotFound
370 in
371
372
373 let accumulator = check_mime_list [] database in
374
375 match accumulator with
376 | [] -> second_try database
377 | [x] -> x
378 | _ ->
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
379 let _ = debug "more than one result : %s" (BaseList.print string_of_all_infos accumulator) in
fccc685 Initial open-source release
MLstate authored
380 try List.find (fun x -> is_some (list_find_opt (fun y -> checkpatt y) x.ai_patterns)) accumulator
381 with Not_found -> second_try database
382
383
384 let get_mimetype filename database =
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
385 debug "";
386 debug "Check mimetype of %s" filename;
387 try (get_mimetype_aux filename database).ai_mimetype
388 with Sys_error s -> debug "problem !"; failwith ("Sys_error in mymetype detection : "^s)
fccc685 Initial open-source release
MLstate authored
389
390
391 let path_database mlstatedir =
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
392 let p = PathTransform.string_to_mysys
393 ~relative_position:(PathTransform.of_string (Lazy.force mlstatedir))
394 "share/opa/mimetype_database.xml" in
395 debug "Use database at : %s" p;
396 p
fccc685 Initial open-source release
MLstate authored
397
398 let build_mimetype_database database =
399 let ic = open_in database in
400 let inic = Xmlm.make_input ~enc:(Some `UTF_8) ~strip:true (`Channel ic) in
401 let input () = try Xmlm.input inic with (Xmlm.Error ((p1,p2), error)) as e -> (pp "%d:%d : %s" p1 p2 (Xmlm.error_message error); raise e) in
402 let is_end = function `El_end -> true | _ -> false in
403 let assert_end e = assert (is_end e) in
404 let check s tag attr = match s with `El_start (("", t), lst) -> assert (t = tag && lst = attr) | _ -> raise Malformed in
405 let n t = ("",t) in
406
407 let signal = input () in
408 let _ = match signal with `Dtd None -> () | _ -> raise Malformed in
409
410 let signal = input () in
411 check signal "mimetype-database" [ (n "version"), "1.0"];
412
413 let rec continue mimetypelist =
414 match input () with
415 | `El_end -> mimetypelist
416 | `El_start (("", "mimetype"), (((("", "type"), ai_mimetype))::[])) ->
417 (let signal = input () in
418 check signal "patterns" [];
419
420 (* on a au moins 1 pattern *)
421 let rec aux acc =
422 match input () with
423 | `El_end -> acc
424 | `El_start (("","patt"), [(("", "type"), "suffix"); (("", "value"), value) ]) ->
425 (assert_end (input ());
426 aux ((Suffix value) :: acc))
427 | `El_start (("","patt"), [(("", "type"), "pattern"); (("", "value"), value) ]) ->
428 (assert_end (input ());
429 aux ((Suffix value) :: acc))
430 | _ -> raise Malformed in
431
432 let ai_patterns = aux [] in
433
434 let signal = input () in
435 let ai_human_readable =
436 check signal "human-readable" [];
437 match input () with
438 | `El_end -> None
439 | `Data value -> (assert_end (input ()); Some value)
440 | _ -> raise Malformed
441 in
442
443 let signal = input () in
444 let ai_subclassof =
445 check signal "subclass-of" [];
446 match input () with
447 | `El_end -> None
448 | `Data value -> (assert_end (input ()); Some value)
449 | _ -> raise Malformed
450 in
451
452 let signal = input () in
453 check signal "magics" [];
454
455 let rec aux acc =
456 match input () with
457 | `El_end -> acc
458 | `El_start (("","magic-number"), ((("","type"), m_type) :: ((("","offset")), m_offset) :: ((("","value")), m_value) :: y)) ->
459 let m_type = string_to_match_type m_type in
460 let m_offset = string_to_offset m_offset in
461 let m_imbrik = aux [] in
462 let m_mask = match y with [] -> None | [((("","mask")), m)] -> Some m | _ -> raise Malformed in
463
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
464 (* replace escaped chars by them real values *)
fccc685 Initial open-source release
MLstate authored
465 let m_value =
1831471 [debug] mimetype: add debugVariable MIMETYPE_DEBUG and some debug
Raja authored
466 let res =
467 Str.global_substitute (Str.regexp "\\\\\\([0-9][0-9][0-9]\\)")
468 (fun x -> String.make 1 (Char.chr (int_of_string (Str.matched_group 1 x)))) m_value
469 in mise_enforme res m_type
470 in
fccc685 Initial open-source release
MLstate authored
471
472 let magic = { m_type; m_value; m_offset; m_mask; m_imbrik } in
473 aux (magic :: acc)
474 | _ -> raise Malformed
475 in
476 let ai_magics = aux [] in
477
478 let signal = input () in
479 assert_end signal;
480
481 let mimetype = { ai_mimetype; ai_patterns; ai_magics; ai_human_readable; ai_subclassof } in
482
483 continue (mimetype::mimetypelist))
484 | _ -> raise Malformed
485 in
486 let db = continue [] in
487
488 (* let mimetypes_by_hierarchie =
489 let premium, others = List.partition (fun x -> match x.ai_subclassof with None -> true | _ -> false) db in
490 let themap = List.fold_left (fun acc elem -> StringMap.add elem.ai_mimetype 0 acc) StringMap.empty premium in
491 let rec aux (themap: int StringMap.t) thelist therest last =
492 match thelist with
493 | [] ->
494 (match therest with
495 | [] -> themap
496 | _ ->
497 (if last = List.length therest then
498 (pp "probleme, pas de parents pour : %s" (String.concat "; " (List.map (fun x -> x.ai_mimetype) therest)); raise Malformed)
499 else
500 (aux themap therest [] (List.length therest))))
501 | elem::rest ->
502 (try
503 let prof = StringMap.find (Option.get elem.ai_subclassof) themap in
504 let themap = StringMap.add elem.ai_mimetype (succ prof) themap in
505 aux themap rest therest last
506 with Not_found -> aux themap rest (elem::therest) last)
507 in aux themap others [] 0
508 in
509
510 let database =
511 List.sort
512 (fun elem1 elem2 ->
513 let prof1 = StringMap.find elem1.ai_mimetype mimetypes_by_hierarchie in
514 let prof2 = StringMap.find elem2.ai_mimetype mimetypes_by_hierarchie in
515 (compare prof2 prof1)) db
516 in database
517 *)
518
519 db
520
521 let get_mimetype_database =
522 let h = Hashtbl.create 3 in
523 fun database ->
524 try
525 Hashtbl.find h database
526 with Not_found ->
527 let mdb = build_mimetype_database database in
528 Hashtbl.add h database mdb;
529 mdb
530
531
532 let mimetype_database mlstatedir = get_mimetype_database (path_database mlstatedir)
Something went wrong with that request. Please try again.