Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 480 lines (463 sloc) 18.965 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author Louis Gesbert
20 **)
21
22 module D = Badop.Dialog
23 module G = Dbgraph
24 module X = Xmlm
25
26 (* reverse of the function escape_string in Xml_dump *)
27 let unescape_string s =
28 let slen = String.length s in
29 let rec aux b i =
30 if i >= slen then b else
31 let c = s.[i] in
32 let next () = aux (FBuffer.add b (String.make 1 c)) (i+1) in
33 if c = '&' && i < slen - 1 then
34 try
35 let colon = String.index_from s (i+1) ';' in
36 let elen = colon - i - 1 in
37 if 0 < elen && elen <= 6 then
38 match String.sub s (i+1) elen with
39 | "amp" -> aux (FBuffer.add b "&") (colon+1)
40 | "lt" -> aux (FBuffer.add b "<") (colon+1)
41 | "gt" -> aux (FBuffer.add b ">")(colon+1)
42 | "quot" -> aux (FBuffer.add b "\"") (colon+1)
43 | "apos" -> aux (FBuffer.add b "'") (colon+1)
44 | e when e.[0] = '#' ->
45 (try
46 e.[0] <- '0'; (* convert XML-hex to ocaml-hex (#xB414 becomes 0xB414). No effect on decimal *)
47 let c = int_of_string e in
48 if c = 0 then next ()
49 else aux (FBuffer.add b (Cactutf.cons c)) (colon+1)
50 with Failure "int_of_string" -> next ())
51 | _ -> next ()
52 else next ()
53 with Not_found -> next ()
54 else next ()
55 in
56 FBuffer.contents (aux (FBuffer.create slen) 0)
57
58 let print_pos (li,col) = Printf.sprintf "line %d, column %d" li col
59 let warn fmt = Logger.warning ("XML import: "^^fmt)
60 let error fmt = Logger.error ("XML import: "^^fmt)
61
62 module F (B: Badop.S) (C: Xml_dump.SigCpsBackend) = struct
63
64 (* our tiny CPS lib... *)
65 let mkcont = C.mkcont
66 let (@>) f k = f k
67 let (|>) = C.return
68 let rec fold_list f acc l k = match l with
69 | [] -> acc |> k
70 | hd::tl -> f acc hd @> mkcont k @> fun acc -> fold_list f acc tl @> k
71
72 let commit_transaction_step =
73 #<If:BADOP_XML_IMPORT_COMMIT_TRANSACTION_STEP$minlevel 1>
74 int_of_string (Option.get DebugVariables.badop_xml_import_commit_transaction_step)
75 #<Else>
76 100
77 #<End>
78
79 let path_schema = Badop.Path.of_list [ Badop.Key.IntKey 2; Badop.Key.IntKey 0 ]
80 let path_schema_version = Badop.Path.of_list [ Badop.Key.IntKey 2; Badop.Key.IntKey (-1) ]
81 (* Path /2/0 to schema defined in dbGen_private.ml (/2 is config_keys, config key 0 is schema) *)
82 let path_root = Badop.Path.of_list [ Badop.Key.IntKey 1 ]
83
84 let from_input db t input k =
85 let parsed_nodes = ref 0 in
86 let written_nodes = ref 0 in
87 let recursive_nodes = Hashtbl.create 16 in
88 let do_link = ref (fun _ _ _ -> assert false) in
89 let emergency_cont =
90 mkcont k
91 @> fun _ ->
92 error "fatal error, stopping now (after %d nodes read, %d values written)"
93 (!parsed_nodes) (!written_nodes);
94 () |> k
95 in
96 let tr_init = None, commit_transaction_step
97 in
98 let tr_close (tropt,_n) k =
99 match tropt with
100 | None -> () |> k
101 | Some tr ->
102 B.Tr.prepare tr
103 @> fun (tr, _ok) ->
104 B.Tr.commit tr
105 @> fun ok ->
106 if not ok then
107 warn "commit of a transaction failed. Your XML import may be incomplete.";
108 () |> k
109 in
110 let write (tropt,n) path write_op k =
3321f32 [enhance] database: fatal database errors now trigger the fail-transa…
Louis Gesbert authored
111 (fun k -> match tropt with
112 | Some tr -> tr |> k
113 | None ->
114 B.Tr.start db
115 (fun err -> error "database error: %s" (Printexc.to_string err) |> emergency_cont)
116 @> fun tr -> tr |> k)
fccc685 Initial open-source release
MLstate authored
117 @> mkcont k
118 @> fun tr ->
119 B.write tr path write_op
120 @> fun resp ->
121 incr written_nodes;
122 let n = n-1 in
123 let trn = (Some (Badop.Aux.result_transaction resp), n) in
124 if n > 0 then trn |> k
125 else tr_close trn @> mkcont k @> fun () -> tr_init |> k
126 in
127 let write_schema trn t k =
128 let g = G.from_tree t in
129 write trn path_schema_version (Badop.Set (D.query (Badop.Data.Int (G.version)))) @> mkcont k
130 @> fun trn ->
131 write trn path_schema (Badop.Set (D.query (Badop.Data.Binary (G.export_schema g))))
132 @> k
133 in
134 let catch_read_error trn f x k =
135 let r = try Some (f x) with
136 | X.Error (pos, e) ->
137 warn "parsing of XML input failed at %s:\n%s"
138 (print_pos pos) (X.error_message e);
139 None
140 in
141 match r with
142 | Some r -> r |> k
143 | None -> tr_close trn @> emergency_cont
144 in
145 let debug_print_signal = fun _ -> () (* function *)
146 (* | `Dtd _ -> Printf.eprintf ">> Read: Dtd\n" *)
147 (* | `El_start ((_ns,tag),_attr) -> Printf.eprintf ">> Read: Tag <%s>\n" tag *)
148 (* | `El_end -> Printf.eprintf ">> Read: Tag end\n" *)
149 (* | `Data d -> Printf.eprintf ">> Read: Data (\"%s\")\n" d *)
150 in
151 (* reads while catching errors from Xmlm, trimming `Data and skipping empty `Data *)
152 let rec read trn k =
153 catch_read_error trn X.input input @> mkcont k
154 @> function
155 | `Data d ->
156 let d = Base.String.trim d in
157 if d = "" then read trn @> k
158 else
159 let s = `Data d in
160 incr parsed_nodes; debug_print_signal s; s |> k
161 | s -> incr parsed_nodes; debug_print_signal s; s |> k
162 in
163 (* only applies f if there's something else than `El_end to read. Trims and
164 skips `Data like read *)
165 let rec read_content trn dflt f k =
166 let pos = X.pos input in
167 catch_read_error trn X.peek input @> mkcont k
168 @> fun s -> match s with
169 | `El_end -> dflt |> k
170 | `Data d ->
171 let d = Base.String.trim d in
172 ignore (X.input input);
173 incr parsed_nodes;
174 if d = "" then read_content trn dflt f k
175 else
176 let s = `Data d in
177 debug_print_signal s;
178 f s pos @> k
179 | _ ->
180 ignore (X.input input);
181 incr parsed_nodes;
182 debug_print_signal s;
183 f s pos @> k
184 in
185 let rec read_tag_end ?ign ?(silent=false) trn k =
186 let pos = X.pos input in
187 read trn @> mkcont k
188 @> function
189 | `El_end ->
190 if not silent then
191 (match ign with None -> () | Some ign_pos ->
192 warn "ignored unexpected content from %s to %s"
193 (print_pos ign_pos) (print_pos pos));
194 () |> k
195 | `El_start _ ->
196 read_tag_end ~silent:true trn @> mkcont k
197 @> fun () -> read_tag_end ~ign:(Option.default pos ign) ~silent trn @> k
198 | `Data d when Base.String.trim d = "" ->
199 read_tag_end ?ign ~silent trn @> k
200 | _ ->
201 read_tag_end ~ign:(Option.default pos ign) ~silent trn @> k
202 in
203 (* Call f on all tags that can be read as direct children of the current
204 tag; f is supposed to consume all contents of the tag. trn is always passed
205 along. *)
206 let rec read_tags trn f k =
207 read_content trn trn
208 (fun x pos k -> match x with
209 | `El_start (("",tag), attrlist) ->
210 f trn tag attrlist pos @> mkcont k
211 @> fun trn ->
212 read_tag_end trn @> mkcont k
213 @> fun () -> read_tags trn f @> k
214 | `Data _ ->
215 let pos' = X.pos input in
216 warn "ignored unexpected string data from %s to %s"
217 (print_pos pos) (print_pos pos');
218 read_tags trn f @> k
219 | _ -> assert false)
220 @> k
221 in
222 let rec read_tag trn expected_tag dflt f k =
223 read_content trn dflt
224 (fun s pos k -> match s with
225 | `El_start (("",tag), attrs) when tag = expected_tag ->
226 f attrs pos @> k
227 | `El_start ((_,tag), _) ->
228 warn "ignored tag <%s>, was expecting <%s> at %s"
229 tag expected_tag (print_pos pos);
230 read_tag_end ~ign:pos trn @> mkcont k
231 @> fun () -> read_tag trn expected_tag dflt f @> k
232 | `Data _ ->
233 warn "ignored text data, was expecting <%s> at %s"
234 expected_tag (print_pos pos);
235 read_tag trn expected_tag dflt f @> k
236 | _ -> assert false)
237 @> k
238 in
239 let read_data trn dflt f k =
240 read_content trn dflt
241 (fun x pos k ->
242 match x with
243 | `Data str ->
244 f (unescape_string (Base.String.strip_quotes str)) pos @> k
245 | `El_start (("","base64"),[]) ->
246 read_content trn dflt
247 (fun x pos k -> match x with
248 | `Data str ->
249 let dat =
250 try Some (Base.String.base64decode str)
251 with Invalid_argument "base64decode" ->
252 warn "skipping invalid base64 encoding at %s" (print_pos pos);
253 None
254 in
255 (match dat with
256 | Some dat -> f dat pos @> k
257 | None -> dflt |> k)
258 | `El_start _ ->
259 read_tag_end ~ign:pos trn @> mkcont k
260 @> fun () -> dflt |> k
261 | _ -> assert false)
262 @> mkcont k
263 @> fun res ->
264 read_tag_end trn @> mkcont k (* </base64> *)
265 @> fun () -> res |> k
266 | _ -> assert false)
267 @> k
268 in
269 let warn_nonempty_attrs attrlist pos =
270 if attrlist <> [] then
271 warn "ignored unexpected tag attributes at %s" (print_pos pos);
272 in
273 let import_leaf trn path leaf k =
274 read_data trn trn
275 (fun dat pos k ->
276 let dat =
277 try
278 match leaf with
279 | G.Leaf_int -> Some (Badop.Data.Int (int_of_string dat))
280 | G.Leaf_float -> Some (Badop.Data.Float (float_of_string dat))
281 | G.Leaf_text -> Some (Badop.Data.Text dat)
282 | G.Leaf_binary -> Some (Badop.Data.Binary dat)
283 with
284 | Failure "int_of_string" ->
285 warn "ignored invalid integer \"%s\" at %s" dat (print_pos pos);
286 None
287 | Failure "float_of_string" ->
288 warn "ignored invalid float \"%s\" at %s" dat (print_pos pos);
289 None
290 in
291 match dat with
292 | Some d ->
293 write trn path (Badop.Set (D.query d)) @> k
294 | None -> trn |> k)
295 @> k
296 in
297 let rec import_record_aux path edges trn tag attrlist pos k =
298 warn_nonempty_attrs attrlist pos;
299 match List.filter (function G.Field (tag',_), _ when tag' = tag -> true | _ -> false) edges with
300 | [ G.Field (_tag, id), child ] ->
301 (* Printf.eprintf "Inside record field %s\n%!" tag; *)
302 import_node trn child (Badop.Path.add path (Badop.Key.IntKey id))
303 @> k
304 | [] ->
305 warn "ignored unexpected tag <%s> at %s"
306 tag (print_pos pos);
307 trn |> k
308 | _ -> assert false
309 and import_record trn path edges k =
310 read_tags trn (import_record_aux path edges) @> k
311 and import_map trn path t kind k =
312 let rec import_entries trn k =
313 read_tag trn "entry" trn
314 (fun attrlist pos_entry k ->
315 warn_nonempty_attrs attrlist pos_entry;
316 read_tag trn "key" None
317 (fun attrlist pos k ->
318 warn_nonempty_attrs attrlist pos;
319 read_data trn None
320 (fun raw_key _pos k ->
321 let key_opt = match kind with
322 | G.Kint ->
323 (try Some (Badop.Key.IntKey (int_of_string raw_key))
324 with Failure "int_of_string" -> None)
325 | G.Kstring -> Some (Badop.Key.StringKey (unescape_string raw_key))
326 | G.Kfields _ ->
327 error "set keys unsupported yet";
328 None
329 in
330 key_opt |> k)
331 @> mkcont k
332 @> fun key_opt -> read_tag_end trn @> mkcont k (* </key> *)
333 @> fun () -> key_opt |> k)
334 @> mkcont k
335 @> function
336 | None ->
337 warn "ignored invalid %smap entry with bad key at %s"
338 (match kind with G.Kint -> "int" | G.Kstring -> "string" | G.Kfields _ -> "record")
339 (print_pos pos_entry);
340 read_tag_end ~ign:pos_entry trn @> mkcont k (* </entry> *)
341 @> fun () -> import_entries trn @> k
342 | Some key ->
343 read_tag trn "value" trn
344 (fun attrlist pos k ->
345 warn_nonempty_attrs attrlist pos;
346 import_node trn t (Badop.Path.add path key) @> mkcont k
347 @> fun trn -> read_tag_end trn @> mkcont k (* </value> *)
348 @> fun () -> trn |> k)
349 @> mkcont k
350 @> fun trn -> read_tag_end trn @> mkcont k (* </entry> *)
351 @> fun () -> import_entries trn @> k)
352 @> k
353 in
354 read_tag trn "map" trn
355 (fun attrlist pos k ->
356 warn_nonempty_attrs attrlist pos;
357 import_entries trn @> mkcont k
358 @> fun trn -> read_tag_end trn @> mkcont k (* </map> *)
359 @> fun () -> trn |> k)
360 @> k
361 and import_recursive trn id path (_e,t) trec k =
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
362 let key,id0 =
fccc685 Initial open-source release
MLstate authored
363 match t with
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
364 | G.Tnode (id0, _, _) ->
fccc685 Initial open-source release
MLstate authored
365 let key = try
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
366 let _, _, key = Hashtbl.find recursive_nodes id0 in key+1
fccc685 Initial open-source release
MLstate authored
367 with Not_found -> 0 in
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
368 Hashtbl.replace recursive_nodes id0 (path, trec, key);
369 key,id0
fccc685 Initial open-source release
MLstate authored
370 | _ ->
371 error "Unexpected schema structure at recursive node %s" id;
372 assert false
373 in
374 let next_path = Badop.Path.add path (Badop.Key.IntKey key) in
375 if key = 0 then
376 write trn path (Badop.Clear (D.query ())) @> mkcont k
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
377 @> fun trn -> import_node trn t next_path @> mkcont k
378 @> fun trn -> Hashtbl.remove recursive_nodes id0; trn |> k
fccc685 Initial open-source release
MLstate authored
379 else
88a48f5 [fix] db/xml-import: fixed bug with multiple recursive nodes (OPA-706)
Louis Gesbert authored
380 let lnk = !do_link in
381 do_link := (fun _ _ _ -> error "Rectype linking error" |> emergency_cont);
382 lnk trn next_path @> mkcont k
fccc685 Initial open-source release
MLstate authored
383 @> fun trn -> import_node trn t next_path @> k
384 and import_sum trn path el k =
385 read_tags trn
386 (fun trn tag attrlist pos k ->
387 (* Find path of sum case *)
388 match
389 List.find_all
390 (function
391 | G.SumCase _, G.Tnode (_, G.Product, el) ->
392 List.exists (function G.Field (tag', _id), _ when tag' = tag -> true | _ -> false) el
393 | _ -> false)
394 el
395 with
396 | [G.SumCase id, G.Tnode (_, G.Product, el)] ->
397 write trn path (Badop.Set (D.query (Badop.Data.Int id)))
398 @> mkcont k
399 @> fun trn ->
400 let path = Badop.Path.add path (Badop.Key.IntKey id) in
401 import_record_aux path el trn tag attrlist pos @> k
402 | _::_ ->
403 (* we don't handle sum types with a conflicting first field (that would need look-ahead) *)
404 warn "Ignored tag <%s> which has several matching cases in the database schema at %s"
405 tag (print_pos pos);
406 trn |> k
407 | [] ->
408 warn "Ignored tag <%s> which has no matching cases in the database schema at %s"
409 tag (print_pos pos);
410 trn |> k
411 ) @> k
412 and import_link trn id path k =
413 do_link :=
414 (fun trn p k -> write trn path (Badop.Link (D.query p)) @> k);
415 let recpath, trec, _ = Hashtbl.find recursive_nodes id in
416 import_node trn trec recpath @> k
417 and import_node trn t path k = match t with
418 | G.Tnode (_id, G.Product, []) -> write trn path (Badop.Set (D.query (Badop.Data.Unit))) @> k
419 | G.Tnode (_id, G.Product, el) -> import_record trn path el @> k
420 | G.Tnode (_id, G.Multi, [G.Multi_edge kind, t]) -> import_map trn path t kind @> k
421 | G.Tnode (id, G.Hidden, [e]) -> import_recursive trn id path e t @> k
422 | G.Tnode (_id, G.Sum, el) -> import_sum trn path el @> k
423 | G.Tnode (_id, G.Leaf lf, []) -> import_leaf trn path lf @> k
424 | G.Tnode (_id, _, _) -> assert false (* invalid graph *)
425 | G.Tlink id -> import_link trn id path @> k
426
427 in
428 read tr_init @> mkcont k
429 @> fun _dtd ->
430 read tr_init @> mkcont k
431 @> function
432 | `El_start (("","opa_database_root"), attr) ->
433 let version =
434 try Some (List.assoc ("","version") attr) with
435 | Not_found -> error "could not read version number from database XML input"; None
436 in
437 if version = Some Xml_dump.xml_dump_version then
438 write_schema tr_init t @> mkcont k
439 @> fun trn ->
440 import_node trn t path_root @> mkcont k
441 @> fun trn ->
442 read_tag_end trn @> mkcont k
443 @> fun () ->
444 tr_close trn @> mkcont k
445 @> fun () ->
446 let pos = X.pos input in
447 if try not (X.eoi input) with X.Error _ -> true then
448 warn "trailing garbage at end of file (after %s)" (print_pos pos);
449 Logger.info
450 "Database XML import finished. %d nodes read, %d values written." (!parsed_nodes) (!written_nodes);
451 () |> k
452 else
453 (error "this XML database dump seems to be of an unknown version: %s (expected %s)"
454 (Option.default "<no version number>" version) Xml_dump.xml_dump_version;
455 () |> k)
456 | _ ->
457 error "can't read XML root node";
458 () |> k
459
460 let from_channel db t ch k =
461 let input = X.make_input (`Channel ch) in
462 from_input db t input @> k
463
464 let from_function db t f k =
465 let input = X.make_input (`Fun f) in
466 from_input db t input @> k
467
468 let from_file db t file k =
469 let ch = try Some (open_in file) with Sys_error _ -> None in
470 match ch with
471 | Some ch ->
472 from_channel db t ch @> mkcont k
473 @> fun () ->
474 close_in ch;
475 () |> k
476 | None ->
477 error "could not open file %s for reading" file;
478 () |> k
479 end
Something went wrong with that request. Please try again.