Skip to content

Commit

Permalink
switches to Fowler-Noll-Vo hash algorithm for hashing names
Browse files Browse the repository at this point in the history
The previous algorithm had a very bad collision rate, especially for
small strings. The new one is much better and is tested on large
dictionaries (of English words and password) and is guaranteed not
to collide on small strings.

Warning: the change of the hash function will break the knowledge base
format so do `bap --cache-clean` after the update.
  • Loading branch information
ivg committed Sep 16, 2020
1 parent 6c50124 commit 0b6f62a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
24 changes: 15 additions & 9 deletions lib/knowledge/bap_knowledge.ml
Expand Up @@ -170,7 +170,7 @@ type slot_status =
type fullname = {
package : string;
name : string;
} [@@deriving bin_io, compare, sexp]
} [@@deriving bin_io, equal, compare, sexp]


module Name : sig
Expand Down Expand Up @@ -218,22 +218,26 @@ end = struct

let registry = Hashtbl.create (module Int63)

let hash_name str =
(* using FNV-1a algorithm *)
let hash_name =
let open Int63 in
String.fold str ~init:(of_int 5381) ~f:(fun h c ->
(h lsl 5) + h + of_int (Char.to_int c))
let init = of_int64_exn 0xCBF29CE484222325L in
let m = of_int64_exn 0x100000001B3L in
let hash init = String.fold ~init ~f:(fun h c ->
(h lxor of_int (Char.to_int c)) * m) in
fun {package; name} ->
hash (hash init package) name

let intern name =
let str = full name in
let id = hash_name str in
let id = hash_name name in
match Hashtbl.find registry id with
| None -> Hashtbl.add_exn registry id name; id
| Some name ->
if full name = str
| Some name' ->
if equal_fullname name name'
then id
else invalid_argf "Names %S and %S have the same hash value, \
Change one of them."
(full name) str ()
(full name) (full name') ()

let fullname = Hashtbl.find_exn registry
include Int63
Expand Down Expand Up @@ -796,6 +800,8 @@ module Persistent = struct
let key = of_string pk key
and data = of_string pd data in
Map.add_exn xs ~key ~data))

let name = of_binable (module Name)
end


Expand Down
4 changes: 3 additions & 1 deletion lib/knowledge/bap_knowledge.mli
Expand Up @@ -1278,6 +1278,9 @@ module Knowledge : sig
(** string is a persistent data type. *)
val string : string persistent

(** names are persistent. *)
val name : name persistent

(** [list t] derives persistence for a list. *)
val list : 'a persistent -> 'a list persistent

Expand Down Expand Up @@ -1351,7 +1354,6 @@ module Knowledge : sig
*)
val create : ?package:string -> string -> t


(** [read ?package input] reads a full name from input.
This function will parse the [input] and return a
Expand Down

0 comments on commit 0b6f62a

Please sign in to comment.