Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 766 lines (677 sloc) 25.864 kB
fccc685 Initial open-source release
MLstate authored
1 (*
2 Copyright © 2011 MLstate
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 (*
19 @author1 Henri Binsztok,
20 @author2 Gregoire Makridis
21 **)
22
23 (* depends *)
24 module List = BaseList
25
26 (* alias *)
27
28 (* -- *)
29
30 exception UnqualifiedPath
31 exception Merge
32
33 type index = ((Path.t * float) list) StringMap.t
34
35 type node_map = Node.t UidMap.t
36
37 type t = {
38 rev : Revision.t ;
39 tcount : Eid.t ;
40 next_uid : Uid.t ;
41 uid_of_eid : (Uid.t RevisionMap.t) EidMap.t ;
42 node_of_uid : node_map ;
43 index : index ;
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
44 tmp_uid_of_eid : Uid.t Eid.Map.t ;
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
45 tmp_node_of_uid : Node.t Uid.Map.t ;
fccc685 Initial open-source release
MLstate authored
46 }
47
48 (*
49 - field rev is the current revision of the DB
50 - field tcount is the number of nodes in the DB
51 - field next_uid is the new uid available for node creation
52 - field uid_of_eid is the map (eid -> rev -> uid)
53 - field node_of_uid is the map (uid -> Node.t)
54 - field index is the map (string -> Path.t list) that for a word gives the list
55 of paths where the word can be found and the pf_idf score for this word on
56 each path
57 - fields tmp_uid_of_eid and tmp_node_of_uid are current version of
58 eid -> uid and uid -> Node.t, usefull to know which nodes have been created
59 and must been writen on disk.
60 *)
61
62 (*
63 the nodes are stored either in an IntMap (mostly when the db is used for debuging).
64 *)
65
66 (* the root of the database *)
67 let root_eid = Eid.make 0
68 let start = root_eid
69
70
71 (******************)
72 (* screen display *)
73 (******************)
74
75 let print_tmp_node_map db =
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
76 if Uid.Map.is_empty db.tmp_node_of_uid
fccc685 Initial open-source release
MLstate authored
77 then "Empty"
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
78 else Uid.Map.fold (
fccc685 Initial open-source release
MLstate authored
79 fun uid node acc -> Printf.sprintf "%s\t%d -> %s\n"
80 acc (Uid.value uid) (Node.to_string node)
81 ) db.tmp_node_of_uid "\n"
82
83 let print_tmp_uid_map db =
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
84 if Eid.Map.is_empty db.tmp_uid_of_eid
fccc685 Initial open-source release
MLstate authored
85 then "Empty"
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
86 else Eid.Map.fold (
fccc685 Initial open-source release
MLstate authored
87 fun eid uid acc -> Printf.sprintf "%s\t%d -> %s\n"
88 acc (Eid.value eid) (Uid.to_string uid)
89 ) db.tmp_uid_of_eid "\n"
90
91 let print_uid_map_bis map =
92 if map = RevisionMap.empty then "Empty"
93 else
94 fst (RevisionMap.fold(
95 fun rev uid (acc, sep) ->
96 let res = Printf.sprintf "%s%s%d -> %s\n"
97 acc sep (Revision.value rev) (Uid.to_string uid) in
98 res, " "
99 ) map ("", ""))
100
101 let print_uid_map db =
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
102 if EidMap.is_empty db.uid_of_eid then "Empty"
fccc685 Initial open-source release
MLstate authored
103 else
104 EidMap.fold (
105 fun eid map acc -> Printf.sprintf "%s%d -> %s"
106 acc (Eid.value eid) (print_uid_map_bis map)
107 ) db.uid_of_eid "\n"
108
109 let print_node_map db =
110 UidMap.fold (
111 fun uid node acc -> Printf.sprintf "%s\t%d -> %s\n"
112 acc (Uid.value uid) (Node.to_string node)
113 ) db.node_of_uid "\n"
114
115 let print_index db =
116 let index = db.index in
117 if StringMap.is_empty index then "Empty"
118 else
119 StringMap.fold (
120 fun name path_list acc ->
121 Printf.sprintf "%s%s : %s\n" acc name
122 (Base.List.to_string (
123 fun (p, _) -> Printf.sprintf "%s " (Path.to_string p)
124 ) path_list)
125 ) index ""
126
127 let print_db db =
128 let rev =
129 Printf.sprintf "current_revision = %s"
130 (Revision.to_string db.rev)
131 in
132 let tcount = Printf.sprintf "tcount = %s" (Eid.to_string db.tcount) in
133 let next_uid = Printf.sprintf "next_uid = %s" (Uid.to_string db.next_uid) in
134 let uid_map = Printf.sprintf "uid_map = %s" (print_uid_map db) in
135 let node_map = Printf.sprintf "node_map = %s" (print_node_map db) in
136 let tmp_uid_map = Printf.sprintf "tmp_uid_map = %s" (print_tmp_uid_map db) in
137 let tmp_node_map = Printf.sprintf "tmp_node_map = %s" (print_tmp_node_map db) in
138 let index = Printf.sprintf "index = %s" (print_index db) in
139 Printf.sprintf "db : \n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s"
140 rev tcount next_uid uid_map node_map tmp_uid_map
141 tmp_node_map index
142
143
144 (**********************)
145 (* db fields accessors*)
146 (**********************)
147
148 let get_rev db = db.rev
149 let get_tcount db = db.tcount
150 let get_next_uid db = db.next_uid
151 let is_empty db = (Eid.value db.tcount = 0)
152
153 let get_uid_map db = db.uid_of_eid
154 let get_node_map db = db.node_of_uid
155 let get_last_nodes db = db.tmp_node_of_uid
156 let get_index db = db.index
157
158
159 (*****************************)
160 (* navigation through the db *)
161 (*****************************)
162
163 (* Raise Not_found if the eid is not in the map *)
164 (* Raise Not_found if the revision looked for is the smallest one *)
165 let _get_uid_of_eid db rev eid =
166 let map = EidMap.find eid db.uid_of_eid in
8a8d2b5 [enhance] db3: remove a double check
Raja authored
167 snd (RevisionMap.find_inf rev map)
fccc685 Initial open-source release
MLstate authored
168
169 (* may raise Not_found from _get_uid_of_eid *)
170 let get_uid_of_eid db rev eid =
846231a [enhance] db3: add logs & update some
Raja authored
171 #<If:DEBUG_DB$minlevel 1000>
172 Logger.log ~color:`green
173 "DB : get_uide_of_eid eid(%s) rev(%s)"
174 (Eid.to_string eid)
175 (Revision.to_string rev)
176 #<End>;
177
fccc685 Initial open-source release
MLstate authored
178 if rev < db.rev then _get_uid_of_eid db rev eid
179 else
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
180 try Eid.Map.find eid db.tmp_uid_of_eid
fccc685 Initial open-source release
MLstate authored
181 with Not_found -> _get_uid_of_eid db rev eid
182
183 (* looking for an uid from an eid into the different maps of the db,
184 or reading from disk if no found in all the maps. *)
185 (* Raise Not_found id the uid is not in the temporary or full nodemap *)
186 let get_node_of_uid db uid =
846231a [enhance] db3: add logs & update some
Raja authored
187 #<If:DEBUG_DB$minlevel 1000>
188 Logger.log ~color:`green
189 "DB : get_node_of_uid uid(%s)"
190 (Uid.to_string uid)
191 #<End>;
192
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
193 match Uid.Map.find_opt uid db.tmp_node_of_uid with
fccc685 Initial open-source release
MLstate authored
194 | Some node -> node
195 | None -> UidMap.find uid db.node_of_uid
196
197 let get_node_of_eid db rev eid =
846231a [enhance] db3: add logs & update some
Raja authored
198 #<If:DEBUG_DB$minlevel 1000>
199 Logger.log ~color:`green
200 "DB : get_node_of_eid eid(%s) rev(%s)"
201 (Eid.to_string eid)
202 (Revision.to_string rev)
203 #<End>;
204
372208f [enhance] db3: remove unusefull old_rev field from node structure
Raja authored
205 let uid = get_uid_of_eid db rev eid in
fccc685 Initial open-source release
MLstate authored
206 get_node_of_uid db uid
207
208 (* raise UnqualifiedPath if unable to find a node from its eid and the given
209 revision, or if the current key of the path is not found in the map of the
210 current node. *)
211 let rec get_eid_of_path db rev path =
846231a [enhance] db3: add logs & update some
Raja authored
212 #<If:DEBUG_DB$minlevel 1000>
213 Logger.log ~color:`green
214 "DB : get_eid_of_path rev(%s) path(%s)"
215 (Revision.to_string rev)
216 (Path.to_string path)
217 #<End>;
218
fccc685 Initial open-source release
MLstate authored
219 let rec aux db pos rev cur_path path =
220 match path with
221 | [] -> pos, rev
222 | k :: tl ->
223 try
224 let node = get_node_of_eid db rev pos in
225 match Node.get_content ~f:(get_node_of_uid db) node with
226 | Datas.Link l
227 | Datas.Copy (None, l) ->
228 get_eid_of_path db rev (Path.concat l (Path.of_list path))
229 | Datas.Copy (Some r, p) ->
230 get_eid_of_path db r (Path.concat p (Path.of_list path))
231 | _ ->
232 let neid = Node.next_eid ~f:(get_node_of_uid db) k node in
233 aux db neid rev (Path.add cur_path k) tl
234 with Not_found -> raise UnqualifiedPath
235 in aux db start rev Path.root (Path.to_list path)
236
237 (* raise UnqualifiedPath if unable to find the eid from the path or unable to
238 find the node associated with the eid if it has been found *)
239 (* TODO: the [rev] result does not seem used a lot; use or simplify *)
240 let get_node_of_path db rev path =
846231a [enhance] db3: add logs & update some
Raja authored
241 #<If:DEBUG_DB$minlevel 1000>
242 Logger.log ~color:`green
243 "DB : get_node_of_path rev(%s) path(%s)"
244 (Revision.to_string rev)
245 (Path.to_string path)
246 #<End>;
247
fccc685 Initial open-source release
MLstate authored
248 let eid, rev = get_eid_of_path db rev path in
249 try get_node_of_eid db rev eid, rev
250 with Not_found -> raise UnqualifiedPath
251
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
252 let is_new_uid db uid = Uid.Map.mem uid db.tmp_node_of_uid
fccc685 Initial open-source release
MLstate authored
253
254 (* cleaning the temporary maps (mostly when the current revision has been
255 writen on disk. *)
256 let clean_tmp_maps db =
257 { db with
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
258 tmp_uid_of_eid = Eid.Map.empty ;
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
259 tmp_node_of_uid = Uid.Map.empty ;
fccc685 Initial open-source release
MLstate authored
260 }
261
262 (************************************)
263 (* database creation and rebuilding *)
264 (************************************)
265
266 (* parameter 'weak' is a function that for an uid reads the disk to find the
267 associated node. *)
268 let make ?weak () =
269 ignore weak;
270 let rev = Revision.make 0 in
271 let t = Node.create rev in
272 let uid_0 = Uid.make 0 in
273 let uid_1 = Uid.make 1 in
274 let uid_of_eid =
275 let eid_0 = Eid.make 0 in
276 let rev_0 = Revision.make 0 in
277 let content = RevisionMap.add rev_0 uid_0 RevisionMap.empty in
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
278 EidMap.add eid_0 content (EidMap.empty ()) in
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
279 let node_of_uid, tmp_node_of_uid = (UidMap.add uid_0 t (UidMap.empty ())), Uid.Map.empty in
fccc685 Initial open-source release
MLstate authored
280
281 let tcount = Eid.make 0 in
282 let next_uid = uid_1 in
283 let index = StringMap.empty in
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
284 let tmp_uid_of_eid = Eid.Map.empty in
fccc685 Initial open-source release
MLstate authored
285 { rev;
286 tcount;
287 next_uid;
288 uid_of_eid;
289 node_of_uid;
290 index;
291 tmp_uid_of_eid;
292 tmp_node_of_uid;
293 }
294
295
296 let restart ?index rev tcount next_uid uid_of_eid node_of_uid =
297 let index = Option.default StringMap.empty index in
298 { rev;
299 tcount;
300 next_uid;
301 uid_of_eid;
302 node_of_uid;
303 index;
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
304 tmp_uid_of_eid = Eid.Map.empty;
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
305 tmp_node_of_uid = Uid.Map.empty;
fccc685 Initial open-source release
MLstate authored
306 }
307
308 let set_rev db rev = {db with rev = rev}
309
310
311 (******************)
312 (* basic DB reads *)
313 (******************)
314
315 (* may raise UnqualifiedPath *)
316 let rec get db rev path =
317 let node, rev = get_node_of_path db rev path in
318 match (Node.get_content ~f:(get_node_of_uid db) node) with
319 | Datas.Data d -> d
320 | Datas.Link l
321 | Datas.Copy (None, l) -> get db rev l
322 | Datas.Copy (Some r, p) -> get db r p
323 | Datas.UnsetData -> DataImpl.empty
324
325 let get_data db node =
326 match (Node.get_content ~f:(get_node_of_uid db) node) with
327 | Datas.Data d -> d
328 | Datas.UnsetData -> DataImpl.empty
329 | _ -> assert false
330
331 let _get_children db rev node range path =
332 let foo = fun k _eid acc -> (Path.add path k, rev) :: acc in
333 match range with
334 | Some range ->
335 let l = Node.fold_range range ~f:(get_node_of_uid db) foo node [] in
336 if snd range < 0 then l else List.rev l
337 | None ->
338 List.rev (Node.fold ~f:(get_node_of_uid db) foo node [])
339
340 (* may raise UnqualifiedPath *)
341 let rec get_children db rev range path =
342 let node, rev = get_node_of_path db rev path in
343 match Node.get_content ~f:(get_node_of_uid db) node with
344 | Datas.Link p
345 | Datas.Copy (None, p) ->
346 get_children db rev range p
347 | Datas.Copy (Some r, p) ->
348 get_children db r range p
349 | _ -> _get_children db rev node range path
350
351 (* may raise UnqualifiedPath *)
352 let get_last_rev_of_path db rev path =
353 let node, _rev = get_node_of_path db rev path in
354 Node.get_cur_rev node
355
356 (* may raise UnqualifiedPath *)
357 let _get_all_rev_of_path db path =
358 let rec aux eid acc path =
359 let map =
360 match EidMap.find_opt eid db.uid_of_eid with
361 | Some m -> m
362 | None -> raise UnqualifiedPath
363 in
364 match path with
365 | k :: path ->
366 RevisionMap.fold (
367 fun _old_rev uid acc ->
368 let node = get_node_of_uid db uid in
369 match Node.find_opt ~f:(get_node_of_uid db) k node with
370 | Some eid2 -> aux eid2 acc path
371 | None -> acc
372 ) map acc
373 | [] ->
374 RevisionMap.fold (
375 fun _old_rev uid acc ->
376 let node = get_node_of_uid db uid in
377 let rev = Node.get_cur_rev node in
378 if List.mem rev acc then acc
379 else rev :: acc
380 ) map acc
381 in aux start [] (Path.to_list path)
382
383 (* explores all past revisions, even if the path does not exist
384 for the current revision; more expensive in this particular case;
385 raises UnqualifiedPath if the path does not exist for any revision *)
386 let get_all_rev_of_path db path =
387 try
388 let cur_rev = db.rev in
389 let eid, _= get_eid_of_path db cur_rev path in
390 let revisions = RevisionMap.keys (EidMap.find eid db.uid_of_eid) in
391 let revisions =
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
392 if Option.is_some (Eid.Map.find_opt eid db.tmp_uid_of_eid)
fccc685 Initial open-source release
MLstate authored
393 then cur_rev::revisions else revisions in
394 revisions
395 with Not_found | UnqualifiedPath ->
396 List.rev (_get_all_rev_of_path db path)
397
398 (* returns the sub-tree of the given path *)
399 (* may raise UnqualifiedPath *)
400 (* TODO rewrite *)
401 let get_descendants db path =
402 let f = get_node_of_uid db in
403 let rec aux accu path node =
404 let accu =
405 match Node.get_content ~f node with
406 | Datas.Data d -> (path, d) :: accu
407 | _ -> accu in
408 let chld = get_children db db.rev None path in
409 if chld = [] then accu
410 else
411 List.fold_left(
412 fun accu (path, _r) ->
413 try
414 let node, _rev = get_node_of_path db db.rev path in
415 match Node.get_content ~f node with
416 | Datas.Data _d -> aux accu path node
417 | _ -> accu
418 with UnqualifiedPath -> accu
419 ) accu chld
420 in
421 let node, _rev = get_node_of_path db db.rev path in
422 aux [] path node
423
424
425 (********************)
426 (* basics DB writes *)
427 (********************)
428
429 let update_uid_of_eid db uid_list rev =
430 List.fold_left (
431 fun (acc1, acc2) (eid, uid) ->
5641e07 [enhance] db: remove obsolete improvement
Raja authored
432 try
433 let map = EidMap.find eid db.uid_of_eid in
434 let new_map = RevisionMap.add rev uid map in
435 EidMap.add eid new_map acc1,
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
436 Eid.Map.add eid uid acc2
5641e07 [enhance] db: remove obsolete improvement
Raja authored
437 with Not_found ->
438 EidMap.add eid (RevisionMap.add rev uid RevisionMap.empty) acc1,
5929a5e [enhance] db3: Use TabMap instead of Map for eids
Raja authored
439 Eid.Map.add eid uid acc2
5641e07 [enhance] db: remove obsolete improvement
Raja authored
440 ) (db.uid_of_eid, db.tmp_uid_of_eid) uid_list
fccc685 Initial open-source release
MLstate authored
441
442 let update_node_of_uid db node_list =
443 List.fold_left (
444 fun (acc1, acc2) (uid, node) ->
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
445 (UidMap.add uid node acc1), Uid.Map.add uid node acc2
fccc685 Initial open-source release
MLstate authored
446 ) (db.node_of_uid, db.tmp_node_of_uid) node_list
447
448 let print_uid_list l =
449 Base.List.to_string
450 (fun (eid, uid) -> Printf.sprintf "%s -> %s \n\t\t"
451 (Eid.to_string eid) (Uid.to_string uid))
452 l
453
454 let print_node_list l =
455 Base.List.to_string
456 (fun (uid, node) -> Printf.sprintf "%s -> %s \n\t\t"
457 (Uid.to_string uid)(Node.to_string node))
458 l
459
460 let update_db db rev uid_list node_list =
461 #<If:DEBUG_DB$minlevel 10>
462 Logger.log ~color:`black
463 "update_db( %s , %s )"
464 (print_uid_list uid_list)
465 (print_node_list node_list)
466 #<End>;
467 let new_uid_of_eid, tmp_map_1 = update_uid_of_eid db uid_list rev in
468 let new_tcount = fst (EidMap.max new_uid_of_eid) in
469 let new_node_map, tmp_map_2 = update_node_of_uid db node_list in
470 let next_uid =
471 try
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
472 let uid1 = fst (Uid.Map.max tmp_map_2) in
fccc685 Initial open-source release
MLstate authored
473 let uid = Uid.make (succ (Uid.value uid1)) in
474 Uid.max uid db.next_uid
475 with Not_found -> db.next_uid
476 in
477 let new_tcount = Eid.max new_tcount db.tcount in
478 { rev = db.rev
479 ; tcount = new_tcount
480 ; next_uid = next_uid
481 ; uid_of_eid = new_uid_of_eid
482 ; node_of_uid = new_node_map
483 ; index = db.index
484 ; tmp_uid_of_eid = tmp_map_1
485 ; tmp_node_of_uid = tmp_map_2
486 }
487
488 let remove db rev path key =
489 #<If:DEBUG_DB$minlevel 10>
490 Logger.log ~color:`yellow
491 "DB : Hldb.remove child %s at %s"
492 (Keys.to_string key)
493 (Path.to_string path)
494 #<End>;
495 let f = get_node_of_uid db in
496 let aux uid eid node =
20bbd87 [enhance] db3: Use TabMap instead of Map for uids
Raja authored
497 let replace = Uid.Map.mem uid db.tmp_node_of_uid in
fccc685 Initial open-source release
MLstate authored
498 (* FIXME replace db.next_uid by the next one? *)
499 let nuid, _next_uid =
500 if replace
501 then
502 uid, db.next_uid
503 else
504 let uid = Uid.succ db.next_uid in
505 db.next_uid, uid
506 in
507 let new_father = Node.remove_child ~f rev node key in
508 let uid_list = [(eid, nuid)] in
509 let node_list = [(nuid, new_father)] in
510 update_db db rev uid_list node_list
511 in
512 let eid_father, _ = get_eid_of_path db db.rev path in
513 let uid_father = get_uid_of_eid db db.rev eid_father in
514 let father = get_node_of_uid db uid_father in
515 aux uid_father eid_father father
516
517 (* index management *)
518
519 let index_path = Path.add Path.root (Keys.unsafe_make (Keys.IntKey 1001))
520
521 let update_index db update_list =
522 let new_index =
523 List.fold_left
524 (fun acc (path, data) ->
525 let map = DataImpl.index_fun data in
526 let count = StringMap.fold (fun _k v acc -> acc + v) map 0 in
527 StringMap.fold
528 (fun name score acc ->
529 let score = (float_of_int score) /. (float_of_int count) in
530 let new_path_list =
531 match StringMap.find_opt name acc with
532 | Some pl -> (path, score) :: pl
533 | None -> [path, score]
534 in
535 StringMap.add name new_path_list acc
536 ) map acc
537 ) db.index update_list
538 in
539 {db with index = new_index}
540
541 let remove_from_index db remove_list =
542 let new_index =
543 List.fold_left
544 (fun index (path, data) ->
545 let map = DataImpl.index_fun data in
546 StringMap.fold
547 (fun str _ index ->
548 let new_list =
549 match StringMap.find_opt str index with
550 | Some l -> List.remove_assoc path l
551 | None -> []
552 in
553 match new_list with
554 | [] -> StringMap.remove str index
555 | _ -> StringMap.add str new_list index
556 ) map index
557 ) db.index remove_list
558 in
559 {db with index = new_index}
560
561
562
563 (******************************************************)
564 (* full search managment (only for current revision) *)
565 (******************************************************)
566
567 (** Takes a list of decreasing-relevance lists of results; merges them to turn
568 individual searches to an AND search, ordered by decreasing minimal
569 rank. Lists should not contain duplicates. *)
570 let merge_search_results ll =
571 let n = List.length ll in
572 let occur = Hashtbl.create 23 in
978b7c4 @akoprow [fix] typo: occurence->occurrence, occured->occurred
akoprow authored
573 (* table from key to number of occurrences. When that number equals n, we got a result *)
fccc685 Initial open-source release
MLstate authored
574 let results = ref [] in
575 let add key =
576 let nb_occur = try Hashtbl.find occur key + 1 with Not_found -> 1 in
577 if nb_occur < n then Hashtbl.replace occur key nb_occur else
578 (results := key::!results; Hashtbl.remove occur key)
579 in
580 let rec aux ll =
581 let nempty, ll = Base.List.fold_left_map
582 (fun nempty -> function key::r -> add key; nempty, r | [] -> nempty+1, [])
583 0 ll in
584 if nempty < n then aux ll
585 in
586 aux ll;
587 List.rev !results
588
589 let full_search db words path =
590 let (|>) a f = f a in
591 let results =
592 Base.List.filter_map
593 (fun word ->
594 StringMap.find_opt word db.index
595 |> Option.map
596 (Base.List.filter_map
597 (fun (p,r) -> Path.remaining path p |> Option.map (fun p -> List.hd p, r))))
598 words
599 in
600 let results =
601 List.tail_map
602 (fun l -> l
603 |> List.sort
604 (fun (k1, r1) (k2, r2) -> let c = compare r1 r2 in if c <> 0 then - c else - Keys.compare k1 k2)
605 |> List.tail_map fst
606 |> Base.List.uniq)
607 results
608 in
609 merge_search_results results
610
611
612 (* Stuff from the old hldb.ml (above everything is from the old lldb.ml *)
613
614 let make ?weak () =
615 let db = make ?weak () in
616 let rev = get_rev db in
617 let eid0 = Eid.make 0 and eid1 = Eid.make 1 in
618 let uid0 = Uid.make 0 and uid1 = Uid.make 1 in
619 let k = Keys.unsafe_make (Keys.IntKey 0) in
620 let child = (k, eid1) in
621 let root = Node.create rev in
622 let root, _ = Node.update ~f:(fun _ -> assert false) uid0 root rev ~child false in
623 let dns = Node.create rev in
624 let uid_list = [(eid0, uid0); (eid1,uid1)] in
625 let node_list = [(uid0, root); (uid1, dns)] in
626 update_db db rev uid_list node_list
627
628 (* Links *)
629 let set_link db rev path link =
630 (* Note: do not check if the [link] path exists: it may come into
631 existence later in the same transaction or it might have just vanished.
632 It's supposed to be a dynamic description of a node,
633 dependent on the current and future state of db. *)
634 let db_rev = get_rev db in
635 let old_eid, _ = get_eid_of_path db db_rev path in
636 let old_uid = get_uid_of_eid db db_rev old_eid in
637 let old_node = get_node_of_uid db old_uid in
638 let rev_node = Node.get_cur_rev old_node in
639 let nuid = if rev_node = rev then old_uid else get_next_uid db in
640 let delta = false in
641 let content = Datas.Link link in
642 let new_node, _=
643 Node.update
644 ~f:(get_node_of_uid db) old_uid old_node rev ~content delta in
645 let uid_list = [old_eid, nuid] in
646 let node_list = [nuid, new_node] in
647 update_db db rev uid_list node_list
648
649 (* Copies *)
650 let set_copy db rev path ?copy_rev link =
651 let lrev =
652 match copy_rev with
653 | Some rev -> rev | None -> rev
654 in
655 let _ =
656 (* Check if link is dangling.
657 TODO: this is an overkill, probably, because it checks recursively
658 and not only copies, but links as well. *)
659 try get db lrev link
660 with UnqualifiedPath ->
661 let db = clean_tmp_maps db in
662 get db lrev link (* Reraise the exception. *)
663 in
664 let db_rev = get_rev db in
665 let old_eid, _ = get_eid_of_path db db_rev path in
666 let old_uid = get_uid_of_eid db db_rev old_eid in
667 let old_node = get_node_of_uid db old_uid in
668 let rev_node = Node.get_cur_rev old_node in
669 let nuid = if rev_node = rev then old_uid else get_next_uid db in
670 let content = Datas.Copy (Some lrev, link) in
671 let new_node = Node.create ~content rev in
672 let uid_list = [old_eid, nuid] in
673 let node_list = [nuid, new_node] in
674 update_db db rev uid_list node_list
675
676 (* See the .mli. *)
677 let rec follow_path db rev node path_end =
678 #<If:DEBUG_DB$minlevel 10>
679 Logger.log ~color:`green
680 "DB : low-level following path at rev %s; remaining: %s"
681 (Revision.to_string rev)
682 (Path.to_string (Path.of_list path_end))
683 #<End>;
684 match path_end with
685 | [] -> ([], node)
686 | k :: rest ->
687 try
688 match Node.get_content ~f:(get_node_of_uid db) node with
689 | Datas.Link _
690 | Datas.Copy _ -> (path_end, node)
691 | _ ->
692 let neid = Node.next_eid ~f:(get_node_of_uid db) k node in
693 let node = get_node_of_eid db rev neid in
694 follow_path db rev node rest
695 with Not_found -> raise UnqualifiedPath
696
697 let follow_link db original_rev path =
846231a [enhance] db3: add logs & update some
Raja authored
698 #<If:DEBUG_DB$minlevel 10>
699 Logger.log ~color:`green
700 "DB : following link %s at rev %s"
701 (Revision.to_string original_rev)
702 (Path.to_string path)
703 #<End>;
fccc685 Initial open-source release
MLstate authored
704 let rec aux db rev path =
705 let path_end = Path.to_list path in
706 let root = get_node_of_eid db rev root_eid in
707 let (path_end, node) = follow_path db rev root path_end in
708 match Node.get_content ~f:(get_node_of_uid db) node with
709 | Datas.Link l ->
710 (* Links possible both on [l] and [path_end], hence the [concat]. *)
711 let new_path = Path.concat l (Path.of_list path_end) in
712 aux db original_rev new_path
713 | Datas.Copy (Some copy_rev, l) ->
714 let new_path = Path.concat l (Path.of_list path_end) in
715 aux db copy_rev new_path
716 | Datas.Copy (None, _) ->
717 assert false (* We are at [original_rev]! *)
718 | _ ->
719 assert (path_end = []);
720 (path, node)
721 in
722 aux db original_rev path
723
724 (* TODO: use the 2 functions below, instead of above, in the future,
725 because we unwind paths elsewhere. *)
726 (* raise UnqualifiedPath if unable to find a node from its eid and the given
727 revision, or if the current key of the path is not found in the map of the
728 current node. *)
729 let rec get_eid_of_path db rev path =
730 let rec aux db pos rev cur_path path =
731 match path with
732 | [] -> pos, rev
733 | k :: tl ->
734 try
735 let node = get_node_of_eid db rev pos in
736 let neid = Node.next_eid ~f:(get_node_of_uid db) k node in
737 aux db neid rev (Path.add cur_path k) tl
738 with Not_found -> raise UnqualifiedPath
739 in aux db start rev Path.root (Path.to_list path)
740
741 (* raise UnqualifiedPath if unable to find the eid from the path or unable to
742 find the node associated with the eid if it has been found *)
743 (* TODO: the [rev] result does not seem used a lot; use or simplify *)
744 let get_node_of_path db rev path =
745 let eid, rev = get_eid_of_path db rev path in
746 try get_node_of_eid db rev eid, rev
747 with Not_found -> raise UnqualifiedPath
7d965b9 [fix] db3: abort of a transaction
Raja authored
748
749 let update_aborted db trdb =
750 UidMap.resize db.node_of_uid (Uid.value db.next_uid);
751 EidMap.resize db.uid_of_eid (succ (Eid.value db.tcount));
752 let rec filter_rev max lst =
753 if Revision.compare (fst (RevisionMap.max lst)) max <= 0 then
754 lst
755 else
756 filter_rev max (RevisionMap.remove_last lst)
757 in
758
759
760 Eid.Map.iter (fun k _ ->
761 if Eid.compare k db.tcount <= 0 then
762 ignore (EidMap.add k (filter_rev db.rev (EidMap.find k db.uid_of_eid)) db.uid_of_eid)
763 ) trdb.tmp_uid_of_eid;
764
765 db
Something went wrong with that request. Please try again.