Permalink
Browse files

initial import into git repo

  • Loading branch information...
0 parents commit 3b4e63f8dea78b067d839c67bb251938feca49a5 romain committed Dec 7, 2011
Showing with 542 additions and 0 deletions.
  1. +5 −0 _tags
  2. +42 −0 adler.ml
  3. +180 −0 algo.ml
  4. +20 −0 evil.ml
  5. +16 −0 io.ml
  6. +4 −0 profile_test.ml
  7. +118 −0 signature.ml
  8. +39 −0 syncer.ml
  9. +69 −0 test.ml
  10. BIN tests/new/big.bmp
  11. +25 −0 tests/new/fischer.txt
  12. +1 −0 tests/new/small.txt
  13. BIN tests/old/big.bmp
  14. +22 −0 tests/old/fischer.txt
  15. +1 −0 tests/old/small.txt
@@ -0,0 +1,5 @@
+true: annot
+true: debug
+#true: pp(camlp4 pa_o.cmo str.cma pa_ocamlviz.cmo pr_o.cmo)
+true: package(unix)
+
@@ -0,0 +1,42 @@
+type t = {mutable a: int; mutable b: int; mutable c: int}
+
+let padler = 65521
+
+let make () = {a = 1 ;b = 0; c = 0}
+
+let from buf offset length =
+
+ let too_far = offset + length in
+ let rec loop a b i=
+ if i = too_far
+ then {a; b; c = length}
+ else (* slooow modulo can be lifted with a bit of math *)
+ let a' = (a + Char.code(buf.[i])) mod padler in
+ let b' = (b + a') mod padler in
+ loop a' b' (i+1)
+ in
+ loop 1 0 offset
+
+let reset t = t.a <- 1;t.b <- 0; t.c <- 0
+
+let digest t = (t.b lsl 16) lor t.a
+
+let rotate t c1 cn =
+ let x1 = Char.code c1 in
+ let xn = Char.code cn in
+ t.a <- (t.a - x1 + xn) mod padler;
+ t.b <- (t.b - t.c * x1 + t.a -1) mod padler
+
+let update t buf offset length =
+ let too_far = offset + length in
+ let rec loop i =
+ if i = too_far then ()
+ else
+ let x = Char.code buf.[i] in
+ let () = t.a <- (t.a + x) mod padler in
+ let () = t.b <- (t.b + t.a) mod padler in
+ let () = t.c <- t.c + 1 in
+ loop (i +1)
+ in
+ loop offset
+
@@ -0,0 +1,180 @@
+
+module Weak = Adler
+module Strong = Digest
+
+type action =
+ | Match of int * int
+ | Miss of Buffer.t
+ | Start of int
+ | Stop
+
+let output_action oc = function
+ | Match (b,e) ->
+ Io.write_byte oc 1;
+ Io.write_int oc b;
+ Io.write_int oc e
+ | Miss b ->
+ Io.write_byte oc 2;
+ Io.write_string oc (Buffer.contents b)
+ | Start s ->
+ Io.write_byte oc 3;
+ Io.write_int oc s
+ | Stop -> Io.write_byte oc 4
+
+let input_action ic =
+ match input_byte ic with
+ | 1 ->
+ let b = Io.read_int ic in
+ let e = Io.read_int ic in
+ Match (b,e)
+ | 2 ->
+ let len = Io.read_int ic in
+ let b = Buffer.create len in
+ let () = Buffer.add_channel b ic len in
+ Miss b
+ | 3 ->
+ let s = Io.read_int ic in
+ Start s
+ | 4 -> Stop
+ | _ -> failwith "unknown action"
+
+open Signature
+
+
+class delta_emitter signature new_fn handler =
+ let bs = signature.bs in
+ let buffer_size = 8 * bs in
+ let buffer = String.create buffer_size in
+object(self)
+ val mutable _read = 0
+ val mutable _first_free = 0
+ val mutable _n_free = buffer_size
+ val mutable _first_todo = 0
+ val mutable _previous_action = Start bs
+ val mutable _finished = false
+ val mutable _weak_ok = false
+ val _weak = Weak.make ()
+
+ method _examine_block buffer offset length =
+ let wd = Weak.digest _weak in
+ match lookup_weak signature wd with
+ | None -> None
+ | Some block ->
+ let strong = Strong.substring buffer offset length in
+ if strong = block.strong
+ then Some block.index
+ else None
+
+ method _miss char =
+ match _previous_action with
+ | Miss b when Buffer.length b < bs -> Buffer.add_char b char
+ | other ->
+ let () = handler _previous_action in
+ let b = Buffer.create bs in
+ let () = Buffer.add_char b char in
+ _previous_action <- Miss b
+
+ method _match index =
+ match _previous_action with
+ | Match (b,e) when e + 1 = index -> _previous_action <- Match(b,index)
+ | other -> let () = handler _previous_action in
+ _previous_action <- Match(index,index)
+
+ method run () =
+ let ic = open_in new_fn in
+ while not _finished do
+ begin
+ let read = input ic buffer _first_free _n_free in
+ if read = 0 then
+ _finished <- true
+ else
+ let () = _first_free <- _first_free + read in
+ let () = _n_free <- _n_free - read in
+ ()
+ end;
+ while _first_todo + bs < _first_free do
+ if not _weak_ok then
+ begin
+ Weak.update _weak buffer _first_todo bs;
+ _weak_ok <- true
+ end;
+ begin
+ match self # _examine_block buffer _first_todo bs with
+ | None ->
+ self # _miss buffer.[_first_todo];
+ Weak.rotate _weak buffer.[_first_todo] buffer.[_first_todo + bs];
+ _first_todo <- _first_todo + 1
+ | Some i ->
+ self # _match i;
+ _first_todo <- _first_todo + bs;
+ _weak_ok <- false;
+ Weak.reset _weak
+ end
+ done;
+ if _first_todo + bs >= _first_free
+ then
+ begin
+ let length = _first_free - _first_todo in
+ String.blit buffer _first_todo buffer 0 length;
+ _first_todo <- 0;
+ _first_free <- length;
+ _n_free <- buffer_size - length
+ end
+ done;
+ let rec loop i =
+ if i = _first_free
+ then ()
+ else
+ let () = self # _miss (buffer.[i]) in
+ loop (i+1)
+ in
+ loop _first_todo;
+ handler _previous_action;
+ handler Stop;
+ close_in ic
+end
+
+
+class delta_applier ic (old_fn:string) oc =
+ let fd = Unix.openfile old_fn [Unix.O_RDONLY] 0o640 in
+ let really_read buffer bs =
+ let rec loop pos todo =
+ if todo = 0 then ()
+ else
+ let read = Unix.read fd buffer pos todo in
+ loop (pos+ read) (todo - read)
+ in
+ loop 0 bs
+ in
+object(self)
+ method run () =
+ let bs = match input_action ic with
+ | Start s -> s
+ | _ -> failwith "start @ beginning"
+ in
+ let rec loop () =
+ let action = input_action ic in
+ match action with
+ | Match(b,e) -> self # apply_match bs b e ; loop ()
+ | Miss b -> self # apply_miss b ; loop ()
+ | Start s -> failwith "can't restart"
+ | Stop -> ()
+ in
+ loop ();
+ Unix.close fd
+
+ method apply_match bs b e =
+ let buffer = String.create bs in
+ let n = e + 1 - b in
+ let rec loop i =
+ if i = 0 then ()
+ else
+ let () = really_read buffer bs in
+ let () = output_string oc buffer in
+ loop (i-1)
+ in
+ let _ = Unix.lseek fd (bs * b) Unix.SEEK_SET in
+ loop n
+
+ method apply_miss b = output_string oc (Buffer.contents b)
+end
@@ -0,0 +1,20 @@
+let fn = "/tmp/evil.bin"
+
+let v = 3910028790
+
+let write () =
+ let oc = open_out fn in
+ output_binary_int oc v;
+ close_out oc
+
+let read () =
+ let ic = open_in fn in
+ let v2 = input_binary_int ic in
+ let () = close_in ic in
+ v2
+
+
+let () =
+ let () = write () in
+ let v2 = read () in
+ Printf.printf "%08x %08x\n" v v2
@@ -0,0 +1,16 @@
+
+let write_byte = output_byte
+
+let write_int oc i = Marshal.to_channel oc i []
+let read_int ic = Marshal.from_channel ic
+
+let read_string ic =
+ let ss = read_int ic in
+ let s = String.create ss in
+ let () = really_input ic s 0 ss in
+ s
+
+let write_string oc s =
+ write_int oc (String.length s);
+ output_string oc s
+
@@ -0,0 +1,4 @@
+let () =
+ Ocamlviz.init();
+ Ocamlviz.wait_for_connected_clients 1;
+ Test.test_correctness();;
@@ -0,0 +1,118 @@
+module Weak = Adler
+module Strong = Digest
+
+type block_signature = {weak:int; strong:string; index:int}
+
+let compare_weak ba bb = compare ba.weak bb.weak
+let compare_index ba bb = compare ba.index bb.index
+
+type t = {len:int; bs: int; blocks: block_signature array;}
+
+let length t = Array.length t.blocks
+
+let lookup_weak t w =
+ let rec find min max =
+ let mid = (min + max) / 2 in
+ let block = t.blocks.(mid) in
+ let weak = block.weak in
+ if w = weak then Some block
+ else if min > max then None
+ else if w > weak then find (mid+1) max
+ else find min (mid -1)
+ in
+ let len = length t in
+ find 0 (len -1)
+
+let create fn bs =
+ let ic = open_in fn in
+ let len = in_channel_length ic in
+ let buf = String.create bs in
+ let read_block size index =
+ let () = really_input ic buf 0 size in
+ let a = Weak.from buf 0 size in
+ let weak = Weak.digest a in
+ let strong = Strong.substring buf 0 size in
+ {weak;strong;index}
+ in
+ let rec read_blocks acc todo i =
+ if todo >= bs then
+ let block = read_block bs i in
+ read_blocks (block :: acc) (todo - bs) (i+1)
+ else
+ let block = read_block todo i in
+ List.rev (block :: acc)
+ in
+ let blocks_l = read_blocks [] len 0 in
+ let blocks = Array.of_list blocks_l in
+ let () = close_in ic in
+ Array.sort compare_weak blocks;
+ {len;bs;blocks;}
+
+let output_signature oc t =
+ Io.write_int oc t.len;
+ Io.write_int oc t.bs;
+ Io.write_int oc (length t);
+ let i = ref 0 in
+ let one block =
+ Io.write_int oc block.weak;
+ Io.write_string oc block.strong;
+ assert (block.index = !i);
+ (* i is skipped: in order because sorted *)
+ incr i
+ in
+ Array.sort compare_index t.blocks;
+ Array.iter one t.blocks;
+ Array.sort compare_weak t.blocks
+
+let input_signature ic =
+ let len = Io.read_int ic in
+ let bs = Io.read_int ic in
+ let nblocks = Io.read_int ic in
+ let rec loop acc index =
+ if index = nblocks then List.rev acc
+ else
+ let weak = Io.read_int ic in
+ let strong = Io.read_string ic in
+ let b = {weak;strong;index} in
+ loop (b :: acc) (index + 1) in
+
+ let blocks_l = loop [] 0 in
+ let blocks = Array.of_list blocks_l in
+ let r = {len;bs;blocks} in
+ Array.sort compare_weak r.blocks;
+ r
+
+
+let to_file t fn =
+ let oc = open_out fn in
+ let () = output_signature oc t in
+ close_out oc
+
+let from_file fn =
+ let ic = open_in fn in
+ let s = input_signature ic in
+ close_in ic;
+ s
+
+
+let equals t1 t2 =
+ let so_far =
+ t1.len = t2.len &&
+ t1.bs = t2.bs in
+ let size1 = Array.length t1.blocks in
+ let size2 = Array.length t2.blocks in
+ let rec loop i acc =
+ if i = size1 then acc
+ else
+ begin
+ let bs1 = t1.blocks.(i)
+ and bs2 = t2.blocks.(i) in
+ let acc' = acc && bs1 = bs2
+ and i' = i+1 in
+ loop i' acc'
+ end
+ in
+ let r = so_far && size1 = size2 && loop 0 true in
+ r
+
+
Oops, something went wrong.

0 comments on commit 3b4e63f

Please sign in to comment.