Permalink
Cannot retrieve contributors at this time
''' | |
========================================================================= | |
Code coverage analysis tool: | |
This program generates log files in Calltree Profile Format. | |
Usage: | |
coverage_to_callgraph.py <log_file_path> <thread_id> [options] | |
... where: | |
<log_file_path> - Path to the log file, that has been generated by | |
Coverager.dll (PIN toolkit instrumentation module). | |
Walid options are: | |
--modules <module_name> - Collect information only for the specified modules. | |
--skip-symbols - Don't use PDB loading and parsing for executable modules. | |
Specify "*" as thread ID value to process logs from all available threads. | |
Example: | |
coverage_parse.py Coverager.log * --modules "ieframe,iexplore" | |
Developed by: | |
Oleksiuk Dmitry, eSage Lab | |
mailto:dmitry@esagelab.com | |
http://www.esagelab.com/ | |
========================================================================= | |
''' | |
import sys, os, time, re | |
ver = sys.version[:3] | |
# load python specified version of symlib module | |
if ver == "2.5": | |
from symlib25 import * | |
elif ver == "2.6": | |
from symlib import * | |
else: | |
print "[!] Only Python 2.5 and 2.6 are supported by symlib module" | |
# if end | |
APP_NAME = ''' | |
Code Coverage Analysis Tool for PIN | |
by Oleksiuk Dmitry, eSage Lab (dmitry@esagelab.com) | |
''' | |
m_logfile = None | |
m_routines_list = {} | |
m_modules_list = {} | |
m_modules_to_process = [] | |
m_skip_symbols = False | |
m_call_tree = {} | |
def log_write(text): | |
global m_logfile | |
if m_logfile: | |
m_logfile.write(text + "\r\n") | |
else: | |
print text | |
# def end | |
def read_modules_list(file_name): | |
global m_modules_list | |
m_modules_list['?'] = { 'path': '?', 'processed_items': 0, \ | |
'symbols_loaded': False, 'alias': 1, 'alias_accessed': False } | |
# open input file | |
f = open(file_name) | |
content = f.readline() | |
# read file contents line by line | |
while content != "": | |
content = content.replace("\n", "") | |
entry = content.split(":") | |
if len(entry) > 3: | |
entry[2] = entry[2] + ":" + entry[3] | |
if content[:1] != "#" and len(entry) >= 3: | |
alias = len(m_modules_list) + 1 | |
module_name = os.path.basename(entry[2]).lower() | |
m_modules_list[module_name] = { 'path': entry[2], 'processed_items': 0, \ | |
'symbols_loaded': False, 'alias': alias, 'alias_accessed': False } | |
# if end | |
# read the next line | |
content = f.readline() | |
# while end | |
f.close() | |
# def end | |
def parse_symbol(string): | |
global m_modules_list, m_modules_to_process, m_skip_symbols | |
# parse 'name+offset' string | |
info = string.split("+") | |
if len(info) >= 2: | |
info[1] = int(info[1], 16) | |
module_path = info[0].lower() | |
if m_modules_list.has_key(module_path): | |
m_modules_list[module_path]['processed_items'] += 1 | |
# if end | |
skip_module = False | |
if len(m_modules_to_process) > 0: | |
skip_module = True | |
for module_flt in m_modules_to_process: | |
if module_path.find(module_flt) >= 0: | |
# don't skip this module | |
skip_module = False | |
# if end | |
# for end | |
# if end | |
if skip_module: | |
return False | |
if m_skip_symbols: | |
return string | |
if m_modules_list.has_key(module_path): | |
module_path = m_modules_list[module_path]['path'] | |
# if end | |
# lookup debug symbol for address | |
symbol = bestbyaddr(module_path, info[1]) | |
if symbol != None: | |
addr_s = "%s!%s" % (info[0], symbol[0]) | |
if symbol[1] > 0: | |
addr_s += "+0x%x" % symbol[1] | |
return addr_s | |
# if end | |
elif string[0] == "?" and len(m_modules_to_process) > 0: | |
if "?" not in m_modules_to_process: | |
return False | |
# if end | |
return string | |
# def end | |
def load_symbols(module_name): | |
global m_routines_list, m_modules_list, m_skip_symbols | |
if m_skip_symbols: | |
return | |
module_name = module_name.lower() | |
if not m_modules_list.has_key(module_name): | |
# unknown module | |
return | |
if m_modules_list[module_name]['symbols_loaded']: | |
# symbols allready loaded for this module | |
return | |
# update names for all available routines from this module | |
for rtn_addr in m_routines_list: | |
if m_routines_list[rtn_addr]['module'].lower() == module_name: | |
rtn_name = parse_symbol(m_routines_list[rtn_addr]['name']) | |
if rtn_name != False: | |
m_routines_list[rtn_addr]['name'] = rtn_name | |
# if end | |
# for end | |
m_modules_list[module_name]['symbols_loaded'] = True | |
# def end | |
def read_routines_list(file_name): | |
global m_routines_list | |
# open input file | |
f = open(file_name) | |
content = f.readline() | |
print "[+] Parsing routines list, please wait...\n" | |
info_list = [] | |
i = 0 | |
# read file contents line by line | |
while content != "": | |
sys.stdout.write(["-", "\\", "|", "/"][i]) | |
sys.stdout.write("\r") | |
i = (i + 1) & 3 | |
content = content.replace("\n", "") | |
entry = content.split(":") | |
if content[:1] != "#" and len(entry) >= 3: | |
rtn_addr = int(entry[0], 16) # routinr virtual address | |
rtn_name = entry[1] | |
rtn_calls = int(entry[2]) | |
rtn_alias = len(m_routines_list) + 1 | |
rtn_module = "?" | |
name = entry[1].split("+") | |
if len(name) == 2: | |
rtn_module = name[0] | |
m_routines_list[rtn_addr] = { 'name': rtn_name, \ | |
'module': rtn_module, 'calls': rtn_calls, \ | |
'alias': rtn_alias, 'alias_accessed': False } | |
# if end | |
# read the next line | |
content = f.readline() | |
# while end | |
f.close() | |
# def end | |
def get_rtn_info(rtn): | |
global m_routines_list | |
# get function name | |
fn_name = m_routines_list[rtn]['name'] | |
fn_alias = m_routines_list[rtn]['alias'] | |
if m_routines_list[rtn]['alias_accessed'] == False: | |
# return full function name and alias | |
m_routines_list[rtn]['alias_accessed'] = True | |
else: | |
# function name has been allready logged, return only alias | |
fn_name = "" | |
return (fn_alias, fn_name) | |
# def end | |
def get_rtn_module_info(rtn): | |
global m_modules_list, m_routines_list | |
# get function module information | |
ob_name = m_routines_list[rtn]['module'] | |
mod_name = ob_name.lower() | |
ob_alias = m_modules_list[mod_name]['alias'] | |
if m_modules_list[mod_name]['alias_accessed'] == False: | |
# return full module name and alias | |
m_modules_list[mod_name]['alias_accessed'] = True | |
else: | |
# module name has been allready logged, return only alias | |
ob_name = "" | |
return (ob_alias, ob_name) | |
# def end | |
def read_calls_list(file_name): | |
global m_call_tree | |
# open input file | |
f = open(file_name) | |
content = f.readline() | |
# read file contents line by line | |
while content != "": | |
content = content.replace("\n", "") | |
entry = content.split(":") | |
if content[:1] != "#" and len(entry) >= 2: | |
rtn_src = int(entry[0], 16) | |
rtn_dst = int(entry[1], 16) | |
if rtn_src != 0: | |
if not m_call_tree.has_key(rtn_src): | |
m_call_tree[rtn_src] = {} | |
# if end | |
if not m_call_tree[rtn_src].has_key(rtn_dst): | |
m_call_tree[rtn_src][rtn_dst] = 0 | |
# if end | |
m_call_tree[rtn_src][rtn_dst] += 1 | |
# if end | |
# if end | |
# read the next line | |
content = f.readline() | |
# while end | |
f.close() | |
# def end | |
if __name__ == "__main__": | |
print APP_NAME | |
if len(sys.argv) < 3: | |
print "USAGE: coverage_to_callgraph.py <LogFilePath> <thread_id> [options]" | |
sys.exit() | |
# if end | |
try: | |
import psyco | |
psyco.full() | |
except: | |
print "[!] Psyco is not available" | |
thread_id = sys.argv[2] | |
if not thread_id.isdigit() and thread_id != "*": | |
print "[!] Error: invalid thread id specified" | |
sys.exit(-1) | |
# if end | |
logfile = "Callgrind.out" | |
if thread_id.isdigit(): | |
logfile += "." + thread_id | |
fname = sys.argv[1] | |
fname_blocks = fname + ".blocks" | |
fname_routines = fname + ".routines" | |
fname_modules = fname + ".modules" | |
# parse command line arguments | |
if len(sys.argv) > 3: | |
for i in range(3, len(sys.argv)): | |
if sys.argv[i] == "--modules" and i < len(sys.argv) - 1: | |
# filter by module name is specified | |
modlist = sys.argv[i + 1].split(",") | |
for mod in modlist: | |
mod = mod.lstrip() | |
m_modules_to_process.append(mod.lower()) | |
print "Filtering by module name \"%s\"" % (mod) | |
# for end | |
elif sys.argv[i] == "--skip-symbols": | |
m_skip_symbols = True | |
# if end | |
# for end | |
# if end | |
if not os.path.isfile(fname): | |
print "[!] Error while opening input file" | |
sys.exit(-1) | |
# if end | |
if not os.path.isfile(fname_modules): | |
print "[!] Error while opening modules log" | |
sys.exit(-1) | |
# if end | |
input_files = [] | |
if thread_id.isdigit(): | |
# | |
# process single input file | |
# | |
fname_calls = fname + "." + thread_id | |
if not os.path.isfile(fname_calls): | |
print "[!] Error while opening calls log" | |
sys.exit(-1) | |
# if end | |
input_files.append(fname_calls) | |
else: | |
# | |
# use call tree log files for all threads | |
# | |
files = os.listdir("./") | |
for f in files: | |
if re.search(fname + ".\\d+", f): | |
input_files.append(f) | |
# for end | |
# if end | |
print "[+] Input file(s): %s" % (", ".join(input_files)) | |
if logfile: | |
# create output file | |
m_logfile = open(logfile, "wb+") | |
print "[+] Output file: %s" % (logfile) | |
# if end | |
exec_time = time.time() | |
# read target application modules list | |
read_modules_list(fname_modules) | |
print "[+] %d modules readed" % (len(m_modules_list)) | |
# read target application routines list | |
read_routines_list(fname_routines) | |
print "[+] %d routines readed" % (len(m_routines_list)) | |
############ | |
print "[+] Parsing call tree, please wait...\n" | |
# parse all available input files | |
for input_file in input_files: | |
read_calls_list(input_file) | |
log_write("#") | |
log_write("# Generated by Code Coverage Analysis Tool for PIN") | |
log_write("#\r\n") | |
# write call tree information into the callgrind file | |
log_write("events: Ir\r\n") | |
# enumerate available functions | |
for rtn in m_call_tree: | |
if not m_routines_list.has_key(rtn): | |
continue | |
# load debug symbols for module, that contains this function | |
load_symbols(m_routines_list[rtn]['module']) | |
log_write("ob=(%d) %s" % get_rtn_module_info(rtn)) | |
log_write("fn=(%d) %s" % get_rtn_info(rtn)) | |
log_write("0 1") | |
# enumerate calls from current function to the others | |
for rtn_dst in m_call_tree[rtn]: | |
load_symbols(m_routines_list[rtn_dst]['module']) | |
log_write("cob=(%d) %s" % get_rtn_module_info(rtn_dst)) | |
log_write("cfn=(%d) %s" % get_rtn_info(rtn_dst)) | |
log_write("calls=%d 0" % (m_call_tree[rtn][rtn_dst])) | |
log_write("0 1") | |
# for end | |
log_write("\r\n") | |
# for end | |
############ | |
exec_time = int(time.time() - exec_time) | |
print "\n[+] DONE (%d mins., %d secs.)\n" % (exec_time / 60, exec_time % 60) | |
# if end | |
# | |
# EoF | |
# |