Skip to content

Commit

Permalink
support of llvm 4.0 (#700)
Browse files Browse the repository at this point in the history
* first steps towards to using llvm 4.0

* minor

* minor

* minor fix for 3.4

* just stylistic changes

* renaming

* hide all bap-llvm stuff in oasis

* added a llvm version parameter,

also rm old (and odd) file x86_llvm_config

* mionr

* fix oasis and internal modules

* minor

* mv reloatable to separate plugin

* minor

* llvm oasis stuff reworked again

* minor

* edited dockerfile

* fix version choosing

* minor

* minor
  • Loading branch information
gitoleg authored and ivg committed Aug 25, 2017
1 parent 817eac8 commit b7f4cf0
Show file tree
Hide file tree
Showing 22 changed files with 529 additions and 274 deletions.
3 changes: 2 additions & 1 deletion docker/Dockerfile
Expand Up @@ -25,6 +25,7 @@ WORKDIR /home/bap
RUN opam init --auto-setup --comp=4.02.3 --yes
RUN opam repo add bap git://github.com/BinaryAnalysisPlatform/opam-repository#testing
RUN opam update
RUN opam depext --install bap --yes
RUN opam depext --install conf-llvm.3.8 --yes
RUN OPAMJOBS=1 opam depext --install bap --yes
RUN sudo pip install bap
ENTRYPOINT ["opam", "config", "exec", "--"]
18 changes: 11 additions & 7 deletions lib/bap_llvm/bap_llvm.ml
@@ -1,10 +1,14 @@
open Core_kernel.Std

let strip_version ver =
if String.length ver <> 5 then ver
else String.sub ver 0 3


module Std = struct
module Llvm_disasm = Bap_llvm_disasm
module Legacy_loader = Bap_llvm_loader
module Ogre_loader = Bap_llvm_ogre_loader
module Scheme = Bap_llvm_ogre_types.Scheme
module Coff_scheme = Bap_llvm_coff_scheme
module Elf_scheme = Bap_llvm_elf_scheme
module Macho_scheme = Bap_llvm_macho_scheme
type x86_syntax = [`att | `intel] [@@deriving sexp]

let llvm_version = strip_version Bap_llvm_config.version
let init_disassembler = Bap_llvm_disasm.init
let init_loader = Bap_llvm_ogre_loader.init
end
9 changes: 9 additions & 0 deletions lib/bap_llvm/bap_llvm.mli
@@ -0,0 +1,9 @@
open Core_kernel.Std

module Std : sig
type x86_syntax = [`att | `intel] [@@deriving sexp]

val llvm_version : string
val init_disassembler : ?x86_syntax:x86_syntax -> unit -> unit Or_error.t
val init_loader : ?base:int64 -> unit -> unit
end
10 changes: 5 additions & 5 deletions lib/bap_llvm/llvm_binary.hpp
Expand Up @@ -19,8 +19,9 @@
#include <llvm/Config/llvm-config.h>

#include "llvm_binary_34.hpp"
#include "llvm_binary_38.hpp"
#include "llvm_binary_38_40.hpp"
#include "llvm_error_or.hpp"
#include "llvm_primitives.hpp"

extern "C" {
void bap_notify_error(const char*);
Expand Down Expand Up @@ -77,11 +78,10 @@ segment make_segment(const coff_section &s, uint64_t image_base) {

template<typename T>
error_or<segments> read(const ELFObjectFile<T>& obj) {
auto begin = elf_header_begin(obj.getELFFile());
auto end = elf_header_end(obj.getELFFile());
auto hdrs = prim::elf_program_headers(*obj.getELFFile());
segments s;
auto it = begin;
for (int pos = 0; it != end; ++it, ++pos) {
auto it = hdrs.begin();
for (int pos = 0; it != hdrs.end(); ++it, ++pos) {
if (it -> p_type == ELF::PT_LOAD) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(2) << pos;
Expand Down
File renamed without changes.
File renamed without changes.
@@ -1,7 +1,8 @@
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0

#ifndef LLVM_BINARY_38_HPP
#define LLVM_BINARY_38_HPP
#ifndef LLVM_BINARY_38_40_HPP
#define LLVM_BINARY_38_40_HPP

#include <memory>
#include <numeric>
Expand All @@ -12,6 +13,10 @@
#include <sstream>
#include <tuple>

#if LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
#include <llvm/Support/Error.h>
#endif

#include <llvm/Object/ELFObjectFile.h>
#include <llvm/Object/COFF.h>
#include <llvm/Object/MachO.h>
Expand All @@ -21,6 +26,7 @@
#include <llvm/ADT/iterator_range.h>
#include <llvm/Object/SymbolSize.h>

#include "llvm_primitives.hpp"
#include "llvm_error_or.hpp"

using std::error_code;
Expand All @@ -39,22 +45,6 @@ error_or<uint64_t> getImageBase(const COFFObjectFile &obj) {

} // namespace

namespace seg {
using namespace llvm;
using namespace llvm::object;

template <typename ELFT>
const typename ELFFile<ELFT>::Elf_Phdr* elf_header_begin(const ELFFile<ELFT> *elf) {
return elf->program_header_begin();
}

template <typename ELFT>
const typename ELFFile<ELFT>::Elf_Phdr* elf_header_end(const ELFFile<ELFT> *elf) {
return elf->program_header_end();
}

} //namespace seg

namespace sym {
using namespace llvm;
using namespace llvm::object;
Expand All @@ -69,21 +59,35 @@ error_or<T> of_llvm_error_or(const llvm::ErrorOr<T> &e) {
return success(e.get());
}

#if LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
template <typename T>
error_or<T> of_llvm_error_or(llvm::Expected<T> &e) {
if (!e) {
return failure(llvm::toString(e.takeError()));
}
return success(e.get());
}
#endif

error_or<std::string> get_name(const SymbolRef &sym) {
auto e = of_llvm_error_or(sym.getName());
auto er_name = sym.getName();
auto e = of_llvm_error_or(er_name);
return map_value<std::string>(e, [](const StringRef &x){return x.str();});
}

error_or<uint64_t> get_addr(const SymbolRef &sym, const ObjectFile &) {
return of_llvm_error_or(sym.getAddress());
auto er_addr = sym.getAddress();
return of_llvm_error_or(er_addr);
}

error_or<uint64_t> get_addr(const SymbolRef &sym, const COFFObjectFile &obj) {
return of_llvm_error_or(sym.getAddress());
auto er_addr = sym.getAddress();
return of_llvm_error_or(er_addr);
}

error_or<kind_type> get_kind(const SymbolRef &sym) {
return success(sym.getType());
auto er_type = sym.getType();
return success(er_type);
}

error_or<symbol_sizes> getSymbolSizes(const ObjectFile &obj) {
Expand All @@ -98,7 +102,7 @@ error_or<symbol_sizes> getSymbolSizes(const ELFObjectFile<ELFT> &obj) {
for (auto sym : obj.symbols())
syms.push_back({sym, sym.getSize()});

auto sections = obj.getELFFile()->sections();
auto sections = prim::elf_sections(*obj.getELFFile());
bool is_dyn = std::any_of(sections.begin(), sections.end(),
[](const sec_hdr &hdr) { return (hdr.sh_type == ELF::SHT_DYNSYM); });

Expand Down Expand Up @@ -219,13 +223,21 @@ error_or<object::Binary> get_binary(const char* data, std::size_t size) {
StringRef data_ref(data, size);
MemoryBufferRef buf(data_ref, "binary");
auto binary = createBinary(buf);
#if LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
if (!binary)
return failure(toString(binary.takeError()));
#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
if (error_code ec = binary.getError())
return failure(ec.message());
#else
#error LLVM version is not supported
#endif
error_or<object::Binary> v(binary->release());
return v;
}

} //namespace img

#endif //LLVM_BINARY_38_HPP
#endif // LLVM=3.8
#endif //LLVM_BINARY_38_40_HPP

#endif // LLVM = 3.8 | LLVM = 4.0
3 changes: 2 additions & 1 deletion lib/bap_llvm/llvm_coff_loader.hpp
Expand Up @@ -254,7 +254,8 @@ void exported_symbols(const coff_obj &obj, ogre_doc &s) {
exported_symbols(obj, v, s);
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0

error_or<uint64_t> symbol_relative_address(const coff_obj &obj, const SymbolRef &sym) {
auto base = obj.getImageBase();
Expand Down
46 changes: 33 additions & 13 deletions lib/bap_llvm/llvm_disasm.cpp
Expand Up @@ -9,6 +9,7 @@
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Target/TargetInstrInfo.h>
#include <llvm-c/Target.h>

#include <cstring>
#include <cstdint>
Expand All @@ -19,7 +20,8 @@
#include "disasm.hpp"
#include "llvm_disasm.h"

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
#include <llvm/ADT/ArrayRef.h>
#include <llvm/ADT/Triple.h>
#include <llvm/ADT/Twine.h>
Expand All @@ -32,7 +34,8 @@
#endif

//template <typename T>
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
template <typename T>
using smart_ptr = std::unique_ptr<T>;
template <class T>
Expand Down Expand Up @@ -74,7 +77,8 @@ bool ends_with(const std::string& str, const std::string &suffix) {
//! disassembler. This will allow us to handle all the checks
//! identically on both versions.

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
class MemoryObject {
memory mem;
public:
Expand Down Expand Up @@ -173,7 +177,8 @@ class llvm_disassembler : public disassembler_interface {
llvm::MCInst mcinst;
insn current;
std::vector<int> prefixes;
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
shared_ptr<MemoryObject> mem;
#else
shared_ptr<const llvm::MemoryObject> mem;
Expand Down Expand Up @@ -260,7 +265,8 @@ class llvm_disassembler : public disassembler_interface {
return {nullptr, {bap_disasm_unsupported_target} };
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
smart_ptr<llvm::MCSymbolizer>
symbolizer(target->createMCSymbolizer(
triple,
Expand All @@ -284,7 +290,8 @@ class llvm_disassembler : public disassembler_interface {
return {nullptr, {bap_disasm_unsupported_target} };
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
shared_ptr<llvm::MCInstPrinter>
printer (target->createMCInstPrinter
(t, asm_info->getAssemblerDialect(), *asm_info, *ins_info, *reg_info));
Expand All @@ -303,7 +310,8 @@ class llvm_disassembler : public disassembler_interface {
/* Make the default for immediates to be in hex */
printer->setPrintImmHex(true);

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
shared_ptr<llvm::MCDisassembler>
dis(target->createMCDisassembler(*sub_info, *ctx));
#else
Expand All @@ -317,7 +325,8 @@ class llvm_disassembler : public disassembler_interface {
return {nullptr, {bap_disasm_unsupported_target} };
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
dis->setSymbolizer(move(symbolizer));
#else
dis->setSymbolizer(symbolizer);
Expand Down Expand Up @@ -362,7 +371,8 @@ class llvm_disassembler : public disassembler_interface {
current.code);
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
llvm::ArrayRef<uint8_t> view(uint64_t pc) {
return mem->view(pc);
}
Expand Down Expand Up @@ -437,7 +447,8 @@ class llvm_disassembler : public disassembler_interface {
if (current.code != 0) {
std::string data;
llvm::raw_string_ostream stream(data);
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0
printer->printInst(&mcinst, stream, "", *sub_info);
#else
printer->printInst(&mcinst, stream, "");
Expand Down Expand Up @@ -479,6 +490,15 @@ class llvm_disassembler : public disassembler_interface {


private:

const char *get_name(const char *x) const { return x; }
const char *get_name(const llvm::StringRef &x) const { return x.data(); }

template <typename Table>
const char* get_name(const Table &tab, int code) const {
return get_name(tab.getName(code));
}

insn valid_insn(location loc) const {
insn ins;

Expand All @@ -495,7 +515,7 @@ class llvm_disassembler : public disassembler_interface {
}

ins.code = mcinst.getOpcode();
ins.name = ins_info->getName(ins.code) - ins_tab.data;
ins.name = get_name(*ins_info, ins.code) - ins_tab.data;
ins.loc = loc;
return ins;
}
Expand Down Expand Up @@ -563,10 +583,10 @@ class llvm_disassembler : public disassembler_interface {
// we can't just take address of the lowest opcode insn, and
// subtract it from the address of the highest one.
assert(n > 0);
const char *p = tab->getName(0);
const char *p = get_name(*tab, 0);
const char *q = p;
for (int i = 0; i < n; i++) {
const char *r = tab->getName(i);
const char *r = get_name(*tab, i);
if (r < p)
p = r;
if (r > q)
Expand Down
22 changes: 12 additions & 10 deletions lib/bap_llvm/llvm_elf_loader.hpp
Expand Up @@ -204,28 +204,29 @@ void symbol_entry(const ELFObjectFile<T> &obj, const SymbolRef &sym, ogre_doc &s
}
}

#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_MINOR == 0

template <typename T>
uint64_t base_address(const ELFObjectFile<T> &obj) {
auto elf = obj.getELFFile();
return base_address(elf->program_header_begin(), elf->program_header_end());
auto hdrs = prim::elf_program_headers(*obj.getELFFile());
return base_address(hdrs.begin(), hdrs.end());
}

template <typename T>
void program_headers(const ELFObjectFile<T> &obj, ogre_doc &s) {
auto elf = obj.getELFFile();
program_headers(elf->program_header_begin(), elf->program_header_end(), s);
auto hdrs = prim::elf_program_headers(*obj.getELFFile());
program_headers(hdrs.begin(), hdrs.end(), s);
}

template <typename T>
void section_headers(const ELFObjectFile<T> &obj, ogre_doc &s) {
auto elf = obj.getELFFile();
auto base = base_address(obj);
for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
auto name = elf->getSectionName(it);
if (!name) s.fail(name.getError().message());
section_header(*it, name.get().str(), base, s);
for (auto sec : prim::elf_sections(*elf)) {
auto name = prim::elf_section_name(*elf, &sec);
if (name)
section_header(sec, *name, base, s);
}
}

Expand All @@ -240,7 +241,8 @@ void symbol_entries(const ELFObjectFile<T> &obj, ogre_doc &s) {
typedef typename ELFFile<T>::Elf_Shdr sec_hdr;
auto elf = obj.getELFFile();
symbol_entries(obj, obj.symbol_begin(), obj.symbol_end(), s);
bool is_dyn = std::any_of(elf->section_begin(), elf->section_end(),
auto secs = prim::elf_sections(*elf);
bool is_dyn = std::any_of(secs.begin(), secs.end(),
[](const sec_hdr &hdr) { return (hdr.sh_type == ELF::SHT_DYNSYM); });
if (is_dyn) // preventing from llvm 3.8 fail in case of .dynsym absence
symbol_entries(obj, obj.dynamic_symbol_begin(), obj.dynamic_symbol_end(), s);
Expand Down

0 comments on commit b7f4cf0

Please sign in to comment.