Skip to content

Commit

Permalink
optimizes reconstructor, symtab, and brancher performance (#855)
Browse files Browse the repository at this point in the history
* rectifies reconstructor, symtab and brancher

This PR rectifies reconstructor, symtab and brancher with a respect to
performance, without adding new behaviour or breaking of existed one.

Rewrote it and reduced a number of iterations over nodes/edges of cfg

There was a bit inefficient implementation of `add_symbol` function,
so every addition of a symbol led to a filter of the whole table,
although there are enough info to reduce such calls.

Just a small fix that check if an instruction has jumps at all before
subsequent call of `fold_consts` that could be heavy for some of
instructions

* updated after review

* refactored
  • Loading branch information
gitoleg authored and ivg committed Aug 14, 2018
1 parent e73753b commit 06b516e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 53 deletions.
10 changes: 9 additions & 1 deletion lib/bap_disasm/bap_disasm_brancher.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,17 @@ let kind_of_branches t f =
| `Fall,`Fall -> `Fall
| _ -> `Cond

let has_jumps =
Bil.exists
(object
inherit [unit] Stmt.finder
method! enter_jmp _ r = r.return (Some ())
end)

let rec dests_of_bil bil : dests =
Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt
if has_jumps bil then
Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt
else []
and dests_of_stmt = function
| Bil.Jmp (Bil.Int addr) -> [Some addr,`Jump]
| Bil.Jmp (_) -> [None, `Jump]
Expand Down
76 changes: 32 additions & 44 deletions lib/bap_disasm/bap_disasm_reconstructor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,51 +19,39 @@ type reconstructor = t
let create f = Reconstructor f
let run (Reconstructor f) = f

let find_calls name roots cfg =
let starts = Addr.Table.create () in
List.iter roots ~f:(fun addr ->
Hashtbl.set starts ~key:addr ~data:(name addr));
Cfg.nodes cfg |> Seq.iter ~f:(fun blk ->
let () =
if Seq.is_empty (Cfg.Node.inputs blk cfg) then
let addr = Block.addr blk in
Hashtbl.set starts ~key:addr ~data:(name addr) in
let term = Block.terminator blk in
if Insn.(is call) term then
Seq.iter (Cfg.Node.outputs blk cfg)
~f:(fun e ->
if Cfg.Edge.label e <> `Fall then
let w = Block.addr (Cfg.Edge.dst e) in
Hashtbl.set starts ~key:w ~data:(name w)));
starts
let entries_of_block cfg roots entries blk =
let entries =
if Set.mem roots (Block.addr blk) then Set.add entries blk
else entries in
let term = Block.terminator blk in
if Insn.(is call) term then
Seq.fold ~init:entries (Cfg.Node.outputs blk cfg)
~f:(fun entries e ->
if Cfg.Edge.label e <> `Fall then
Set.add entries (Cfg.Edge.dst e)
else entries)
else entries

let reconstruct name roots cfg =
let roots = find_calls name roots cfg in
let init =
Cfg.nodes cfg |> Seq.fold ~init:Cfg.empty ~f:(fun cfg n ->
Cfg.Node.insert n cfg) in
let filtered =
Cfg.edges cfg |> Seq.fold ~init ~f:(fun cfg e ->
if Hashtbl.mem roots (Block.addr (Cfg.Edge.dst e)) then cfg
else Cfg.Edge.insert e cfg) in
let find_block addr =
Cfg.nodes cfg |> Seq.find ~f:(fun blk ->
Addr.equal addr (Block.addr blk)) in
Hashtbl.fold roots ~init:Symtab.empty
~f:(fun ~key:entry ~data:name syms ->
match find_block entry with
| None -> syms
| Some entry ->
let cfg : cfg =
with_return (fun {return} ->
Graphlib.depth_first_search (module Cfg)
filtered ~start:entry ~init:Cfg.empty
~enter_edge:(fun _ -> Cfg.Edge.insert)
~start_tree:(fun n t ->
if Block.equal n entry
then Cfg.Node.insert n t
else return t)) in
Symtab.add_symbol syms (name,entry,cfg))
let collect_entries cfg roots =
let roots = Addr.Set.of_list roots in
Seq.fold (Cfg.nodes cfg) ~init:Block.Set.empty
~f:(entries_of_block cfg roots)

let reconstruct name roots prog =
let entries = collect_entries prog roots in
let is_call e = Set.mem entries (Cfg.Edge.dst e) in
let rec add cfg node =
let cfg = Cfg.Node.insert node cfg in
Seq.fold (Cfg.Node.outputs node prog) ~init:cfg ~f:(fun cfg edge ->
if is_call edge then cfg
else
let cfg' = Cfg.Edge.insert edge cfg in
if Cfg.Node.mem (Cfg.Edge.dst edge) cfg then cfg'
else add cfg' (Cfg.Edge.dst edge)) in
Set.fold entries ~init:Symtab.empty ~f:(fun tab entry ->
let name = name (Block.addr entry) in
let fng = add Cfg.empty entry in
Symtab.add_symbol tab (name,entry,fng))

let of_blocks syms =
let reconstruct (cfg : cfg) =
Expand Down
22 changes: 14 additions & 8 deletions lib/bap_disasm/bap_disasm_symtab.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type cfg = Cfg.t [@@deriving compare]

type fn = string * block * cfg [@@deriving compare]

let sexp_of_fn (name,block,cfg) =
let sexp_of_fn (name,block,_cfg) =
Sexp.List [sexp_of_string name; sexp_of_addr (Block.addr block)]

module Fn = Opaque.Make(struct
Expand All @@ -38,7 +38,7 @@ let compare t1 t2 =

type symtab = t [@@deriving compare, sexp_of]

let span ((name,entry,cfg) as fn) =
let span ((_name,_entry,cfg) as fn) =
Cfg.nodes cfg |> Seq.fold ~init:Memmap.empty ~f:(fun map blk ->
Memmap.add map (Block.memory blk) fn)

Expand All @@ -52,19 +52,25 @@ let merge m1 m2 =
Memmap.to_sequence m2 |> Seq.fold ~init:m1 ~f:(fun m1 (mem,x) ->
Memmap.add m1 mem x)

let remove t (name,entry,_) : t = {
names = Map.remove t.names name;
addrs = Map.remove t.addrs (Block.addr entry);
memory = Memmap.filter t.memory ~f:(fun (n,e,_) ->
let filter_mem mem name entry =
Memmap.filter mem ~f:(fun (n,e,_) ->
not(String.(name = n) || Block.(entry = e)))
}

let remove t (name,entry,_) : t =
if Map.mem t.addrs (Block.addr entry) then
{
names = Map.remove t.names name;
addrs = Map.remove t.addrs (Block.addr entry);
memory = filter_mem t.memory name entry;
}
else t

let add_symbol t (name,entry,cfg) : t =
let data = name,entry,cfg in
let t = remove t data in
{
addrs = Map.add t.addrs ~key:(Block.addr entry) ~data;
names = Map.add t.names ~key:name ~data;
names = Map.add t.names ~key:name ~data;
memory = merge t.memory (span data);
}

Expand Down

0 comments on commit 06b516e

Please sign in to comment.