Merge branch 'develop'

gaasedelen · Sep 2, 2020 · 91e427e · 91e427e
2 parents 710b13f + 1bad74f
commit 91e427e
Show file tree

Hide file tree

Showing 57 changed files with 211 additions and 78 deletions.
diff --git a/coverage/README.md b/coverage/README.md
@@ -16,19 +16,19 @@ Example usage:
 
 ## Intel Pin
 
-Using a [custom pintool](coverage/pin) contributed by [Agustin Gianni](https://twitter.com/agustingianni), the Intel Pin DBI can also be used to collect coverage data.
+Using a [custom pintool](pin/README.md) contributed by [Agustin Gianni](https://twitter.com/agustingianni), the Intel Pin DBI can also be used to collect coverage data.
 
 Example usage:
 
 ```
 pin.exe -t CodeCoverage64.dll -- boombox.exe
 ```
 
-For convenience, binaries for the Windows pintool can be found on the [releases](https://github.com/gaasedelen/lighthouse/releases) page. macOS and Linux users need to compile the pintool themselves following the [instructions](coverage/pin#compilation) included with the pintool for their respective platforms.
+For convenience, binaries for the Windows pintool can be found on the [releases](https://github.com/gaasedelen/lighthouse/releases) page. macOS and Linux users need to compile the pintool themselves following the [instructions](pin/README.md#compilation) included with the pintool for their respective platforms.
 
 ## Frida (Experimental)
 
-Lighthouse offers limited support for Frida based code coverage via a custom [instrumentation script](coverage/frida) contributed by [yrp](https://twitter.com/yrp604). 
+Lighthouse offers limited support for Frida based code coverage via a custom [instrumentation script](frida/README.md) contributed by [yrp](https://twitter.com/yrp604).
 
 Example usage:
 

diff --git a/coverage/pin/CodeCoverage.cpp b/coverage/pin/CodeCoverage.cpp
@@ -46,12 +46,9 @@ static std::string base_name(const std::string& path)
 }
 
 // Per thread data structure. This is mainly done to avoid locking.
+// - Per-thread map of executed basic blocks, and their size.
 struct ThreadData {
-    // Unique list of hit basic blocks.
-    pintool::unordered_set<ADDRINT> m_block_hit;
-
-    // Map basic a block address to its size.
-    pintool::unordered_map<ADDRINT, uint16_t> m_block_size;
+    pintool::unordered_map<ADDRINT, uint16_t> m_blocks;
 };
 
 class ToolContext {
@@ -166,24 +163,37 @@ static VOID PIN_FAST_ANALYSIS_CALL OnBasicBlockHit(THREADID tid, ADDRINT addr, U
 {
     auto& context = *reinterpret_cast<ToolContext*>(v);
     ThreadData* data = context.GetThreadLocalData(tid);
-    data->m_block_hit.insert(addr);
-    data->m_block_size[addr] = size;
+    data->m_blocks[addr] = size;
+    PIN_RemoveInstrumentationInRange(addr, addr);
 }
 
 // Trace hit event handler.
 static VOID OnTrace(TRACE trace, VOID* v)
 {
     auto& context = *reinterpret_cast<ToolContext*>(v);
-    BBL bbl = TRACE_BblHead(trace);
-    ADDRINT addr = BBL_Address(bbl);
 
     // Check if the address is inside a white-listed image.
-    if (!context.m_tracing_enabled || !context.m_images->isInterestingAddress(addr))
+    if (!context.m_tracing_enabled || !context.m_images->isInterestingAddress(TRACE_Address(trace)))
         return;
 
-    // For each basic block in the trace.
-    for (; BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-        addr = BBL_Address(bbl);
+    auto tid = PIN_ThreadId();
+    ThreadData* data = context.GetThreadLocalData(tid);
+
+    // This trace is getting JIT'd, which implies the head must get executed.
+    auto bbl = TRACE_BblHead(trace);
+    auto addr = BBL_Address(bbl);
+    data->m_blocks[addr] = (uint16_t)BBL_Size(bbl);
+
+    // For each basic block in the trace...
+    for (bbl = BBL_Next(bbl); BBL_Valid(bbl); bbl = BBL_Next(bbl))
+    {
+
+        // Ignore blocks that have already been marked as executed in the past...
+        ADDRINT addr = BBL_Address(bbl);
+        if (data->m_blocks.find(addr) != data->m_blocks.end())
+            continue;
+
+        // Instrument blocks that have not yet been executed (at least... by this thread).
         BBL_InsertCall(bbl, IPOINT_ANYWHERE, (AFUNPTR)OnBasicBlockHit,
             IARG_FAST_ANALYSIS_CALL,
             IARG_THREAD_ID,
@@ -192,6 +202,7 @@ static VOID OnTrace(TRACE trace, VOID* v)
             IARG_PTR, v,
             IARG_END);
     }
+
 }
 
 // Program finish event handler.
@@ -219,7 +230,7 @@ static VOID OnFini(INT32 code, VOID* v)
     // Count the global number of basic blocks.
     size_t number_of_bbs = 0;
     for (const auto& data : context.m_terminated_threads) {
-        number_of_bbs += data->m_block_hit.size();
+        number_of_bbs += data->m_blocks.size();
     }
 
     context.m_trace->write_string("BB Table: %u bbs\n", number_of_bbs);
@@ -233,7 +244,8 @@ static VOID OnFini(INT32 code, VOID* v)
     drcov_bb tmp;
 
     for (const auto& data : context.m_terminated_threads) {
-        for (const auto& address : data->m_block_hit) {
+        for (const auto& block : data->m_blocks) {
+            auto address = block.first;
             auto it = std::find_if(context.m_loaded_images.begin(), context.m_loaded_images.end(), [&address](const LoadedImage& image) {
                 return address >= image.low_ && address < image.high_;
             });
@@ -243,7 +255,7 @@ static VOID OnFini(INT32 code, VOID* v)
 
             tmp.id = (uint16_t)std::distance(context.m_loaded_images.begin(), it);
             tmp.start = (uint32_t)(address - it->low_);
-            tmp.size = data->m_block_size[address];
+            tmp.size = data->m_blocks[address];
 
             context.m_trace->write_binary(&tmp, sizeof(tmp));
         }

diff --git a/plugin/lighthouse/__init__.py → plugins/lighthouse/__init__.py b/plugin/lighthouse/__init__.py → plugins/lighthouse/__init__.py
diff --git a/plugin/lighthouse/composer/__init__.py → plugins/lighthouse/composer/__init__.py b/plugin/lighthouse/composer/__init__.py → plugins/lighthouse/composer/__init__.py
diff --git a/plugin/lighthouse/composer/parser.py → plugins/lighthouse/composer/parser.py b/plugin/lighthouse/composer/parser.py → plugins/lighthouse/composer/parser.py
diff --git a/plugin/lighthouse/composer/shell.py → plugins/lighthouse/composer/shell.py b/plugin/lighthouse/composer/shell.py → plugins/lighthouse/composer/shell.py
diff --git a/plugin/lighthouse/context.py → plugins/lighthouse/context.py b/plugin/lighthouse/context.py → plugins/lighthouse/context.py
diff --git a/plugin/lighthouse/coverage.py → plugins/lighthouse/coverage.py b/plugin/lighthouse/coverage.py → plugins/lighthouse/coverage.py
diff --git a/plugin/lighthouse/director.py → plugins/lighthouse/director.py b/plugin/lighthouse/director.py → plugins/lighthouse/director.py
@@ -67,6 +67,7 @@ def __init__(self, metadata, palette):
         # the coverage file parser
         self.reader = CoverageReader()
         self._target_whitelist = []
+        self.suppressed_errors = set()
 
         # the name of the active coverage
         self.coverage_name = NEW_COMPOSITION
@@ -380,6 +381,9 @@ def load_coverage_batch(self, filepaths, batch_name, progress_callback=logger.de
         errors = collections.defaultdict(list)
         aggregate_addresses = set()
 
+        # unsupress NO_COVERAGE_ERROR per-load, instead of per-session
+        self.suppressed_errors.discard(CoverageMissingError)
+
         start = time.time()
         #----------------------------------------------------------------------
 
@@ -439,6 +443,9 @@ def load_coverage_files(self, filepaths, progress_callback=logger.debug):
         errors = collections.defaultdict(list)
         all_coverage = []
 
+        # unsupress NO_COVERAGE_ERROR per-load, instead of per-session
+        self.suppressed_errors.discard(CoverageMissingError)
+
         start = time.time()
         #----------------------------------------------------------------------
 
@@ -541,16 +548,39 @@ def _extract_coverage_data(self, coverage_file):
         if not module_name and coverage_file.modules:
 
             #
-            # if the user closes the dialog without selecting a name, there's
-            # nothing we can do for them ...
+            # earlier in this load, the user opted to ignore future attempts
+            # to alias or select coverage data. this is useful when trying to
+            # load a batch of coverage files, where some coverage files
+            # contain data, but none relevant to this database.
+            #
+
+            if CoverageMissingError in self.suppressed_errors:
+                return []
+
+            #
+            # show the module selection dialog to the user, and wait for them
+            # to select something, or close the dialog
             #
 
             dialog = ModuleSelector(database_target, coverage_file.modules, coverage_file.filepath)
-            if not dialog.exec_():
-                return [] # no coverage data extracted ...
+            result = dialog.exec_()
+
+            # check if the user opted to ignore future warnings for missing coverage
+            if dialog.ignore_missing:
+                self.suppressed_errors.add(CoverageMissingError)
+
+            #
+            # if the user closed the dialog without selecting a name, there's
+            # nothing we can do for them. return an empty set of coverage data
+            #
+
+            if not result:
+                return []
 
             # the user selected a module name! use that to extract coverage
             module_name = dialog.selected_name
+
+            # the user opted to save the selected name as an 'alias'
             if dialog.remember_alias:
                 self._target_whitelist.append(module_name)
 
@@ -713,37 +743,73 @@ def _find_fuzzy_name(self, coverage_file, target_name):
         """
         target_name = target_name.lower()
 
+        #
+        # 0. Pre-process module names, strip filepath if present
+        #
+
+        clean_module_names = {}
+        for module_name_raw in coverage_file.modules:
+
+            # trim 'path' from a 'module name' entry... if present (uncommon)
+            module_name = os.path.basename(module_name_raw)
+
+            #
+            # if this triggers, it's probably because the coverage file is
+            # using full filepaths for 'module names', and that there was
+            # two unique filepaths with the same module name, eg:
+            #
+            #   - C:\foo.dll
+            #   - C:\bar\foo.dll
+            #
+            # this should be super rare, but we'll just revert to using the
+            # full / unprocessed paths and bail...
+            #
+
+            if module_name in clean_module_names:
+                clean_module_names = {name: name for name in coverage_file.modules}
+                break
+
+            clean_module_names[module_name] = module_name_raw
+
         #
         # 1. exact, case-insensitive filename matching
         #
 
-        for module_name in coverage_file.modules:
+        for module_name in clean_module_names:
+            if target_name == module_name.lower():
+                return clean_module_names[module_name]
+
+        #
+        # 2. exact, case-insensitive filename matching
+        #
+
+        for module_name in clean_module_names:
             if target_name == module_name.lower():
-                return module_name
+                return clean_module_names[module_name]
 
         #
         # 2. cleave the extension from the target module name (the source)
         # and try again to see if matches anything in the coverage file
         #
 
         target_name, extension = os.path.splitext(target_name)
-        for module_name in coverage_file.modules:
+        for module_name in clean_module_names:
             if target_name == module_name.lower():
-                return module_name
+                return clean_module_names[module_name]
 
         # too risky to do fuzzy matching on short names...
         if len(target_name) < 6:
             return None
 
         #
         # 3. try to match *{target_name}*{extension} in module_name, assuming
-        # target_name is more than 6 characters and there is no othe ambiguity
+        # target_name is more than 6 characters and there is no other ambiguity
         #
 
         possible_names = []
-        for module_name in coverage_file.modules:
+        for module_name in clean_module_names:
             if target_name in module_name.lower() and extension in module_name.lower():
-                possible_names.append(module_name)
+                possible_names.append(clean_module_names[module_name])
 
         # there were no matches on the wildcarding, so we're done
         if not possible_names:

diff --git a/plugin/lighthouse/exceptions.py → plugins/lighthouse/exceptions.py b/plugin/lighthouse/exceptions.py → plugins/lighthouse/exceptions.py
@@ -112,7 +112,7 @@ def __init__(self, coverage):
 # UI Warnings
 #------------------------------------------------------------------------------
 
-def warn_errors(errors):
+def warn_errors(errors, ignore=[]):
     """
     Warn the user of any encountered errors with a messagebox.
     """
@@ -131,6 +131,10 @@ def warn_errors(errors):
         for error in error_list:
             lmsg(" - %s" % error.filepath)
 
+        # suppress popups for certain errors, if the user has specified such
+        if error_type in ignore:
+            continue
+
         #
         # popup a more verbose error messagebox for the user to read regarding
         # this class of error they encountered

diff --git a/plugin/lighthouse/integration/__init__.py → plugins/lighthouse/integration/__init__.py b/plugin/lighthouse/integration/__init__.py → plugins/lighthouse/integration/__init__.py
diff --git a/...ghthouse/integration/binja_integration.py → ...ghthouse/integration/binja_integration.py b/...ghthouse/integration/binja_integration.py → ...ghthouse/integration/binja_integration.py
diff --git a/...in/lighthouse/integration/binja_loader.py → ...ns/lighthouse/integration/binja_loader.py b/...in/lighthouse/integration/binja_loader.py → ...ns/lighthouse/integration/binja_loader.py
diff --git a/plugin/lighthouse/integration/core.py → plugins/lighthouse/integration/core.py b/plugin/lighthouse/integration/core.py → plugins/lighthouse/integration/core.py
@@ -26,7 +26,7 @@ class LighthouseCore(object):
     # Plugin Metadata
     #--------------------------------------------------------------------------
 
-    PLUGIN_VERSION = "0.9.0"
+    PLUGIN_VERSION = "0.9.1"
     AUTHORS        = "Markus Gaasedelen"
     DATE           = "2020"
 
@@ -329,7 +329,7 @@ def interactive_load_batch(self, dctx=None):
         self.open_coverage_overview(lctx.dctx)
 
         # finally, emit any notable issues that occurred during load
-        warn_errors(errors)
+        warn_errors(errors, lctx.director.suppressed_errors)
 
     def interactive_load_file(self, dctx=None):
         """
@@ -399,7 +399,7 @@ def interactive_load_file(self, dctx=None):
         self.open_coverage_overview(lctx.dctx)
 
         # finally, emit any notable issues that occurred during load
-        warn_errors(errors)
+        warn_errors(errors, lctx.director.suppressed_errors)
 
     def check_for_update(self):
         """

diff --git a/...lighthouse/integration/ida_integration.py → ...lighthouse/integration/ida_integration.py b/...lighthouse/integration/ida_integration.py → ...lighthouse/integration/ida_integration.py
diff --git a/plugin/lighthouse/integration/ida_loader.py → plugins/lighthouse/integration/ida_loader.py b/plugin/lighthouse/integration/ida_loader.py → plugins/lighthouse/integration/ida_loader.py
diff --git a/plugin/lighthouse/metadata.py → plugins/lighthouse/metadata.py b/plugin/lighthouse/metadata.py → plugins/lighthouse/metadata.py
@@ -959,6 +959,14 @@ def _compute_complexity(self):
 
             # update the map of confirmed (walked) edges
             confirmed_edges[current_src] = self.edges.pop(current_src)
+
+        #
+        # retain only the 'confirmed' edges. this may differ from the
+        # original edge map because we are only keeping edges that can be
+        # walked from the function entry. (eg, no ida exception handlers)
+        #
+
+        self.edges = confirmed_edges
 
         # compute the final cyclomatic complexity for the function
         num_edges = sum(len(x) for x in itervalues(confirmed_edges))

diff --git a/plugin/lighthouse/painting/__init__.py → plugins/lighthouse/painting/__init__.py b/plugin/lighthouse/painting/__init__.py → plugins/lighthouse/painting/__init__.py
diff --git a/plugin/lighthouse/painting/binja_painter.py → plugins/lighthouse/painting/binja_painter.py b/plugin/lighthouse/painting/binja_painter.py → plugins/lighthouse/painting/binja_painter.py