# BinDatabase

generate the database of functions based on the existed analyzed ghidra project 

In [5]:
from utils.db import *
from utils.key import *
from utils.launcher import *
from utils.match import *

In [6]:
# start the Launcher 
launcher = HeadlessLoggingPyhidraLauncher(verbose=True, log_path='./launch.log')
launcher.start()

INFO  Using log config file: jar:file:/home/dingisoul/dev/FirmFlaw/ghidra_11.0_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: ./launch.log (LoggingInitialization)  
INFO  Loading user preferences: /home/dingisoul/.ghidra/.ghidra_11.0_PUBLIC/preferences (Preferences)  
INFO  Searching for classes... (ClassSearcher)  
INFO  Class search complete (1213 ms) (ClassSearcher)  
INFO  Initializing SSL Context (SSLContextInitializer)  
INFO  Initializing Random Number Generator... (SecureRandomFactory)  
INFO  Random Number Generator initialization complete: NativePRNGNonBlocking (SecureRandomFactory)  
INFO  Trust manager disabled, cacerts have not been set (ApplicationTrustManagerFactory)  


In [7]:
# Reuse the project create or open in chapter 1 
# Necessary imports for ghidra project 
from ghidra.base.project import GhidraProject
from java.io import IOException
from pathlib import Path 

# Create Project Dir and name 
project_location = Path('./ghidra_project')
project_location.mkdir(exist_ok=True, parents=True)
project_name = "binmatch_project"

# create or open project 
try:
    project = GhidraProject.openProject(project_location, project_name, True)
    print(f'Opened project: {project.project.name}')
except IOException:
    project = GhidraProject.createProject(project_location, project_name, False)
    print(f'Created project: {project.project.name}')

INFO  Opening project: /home/dingisoul/dev/FirmFlaw/ghidra_project/binmatch_project (DefaultProject)  
Opened project: binmatch_project


In [11]:
# generate database for functions in binary 
from ghidra.program.flatapi import FlatProgramAPI
# from tqdm.notebook import tqdm
import os 
from functools import reduce
from operator import mul
# 
DATABASE = './step2_postSig/arm/bin_func.db'
if os.path.exists(DATABASE):
    print(f'remove {DATABASE}')
    os.remove(DATABASE)
conn = sqlite3.connect(DATABASE)
cursor = conn.cursor()
# if no database create the table and index 
sql_create_table(cursor,FUNC_KEYS,FUNC_TABLE_NAME)
sql_create_index(cursor,FUNC_TABLE_NAME,['hash'],'index_hash')
conn.commit()
num = 0
# use tqdm to generate the progress bar 
for file_ in project.getRootFolder().getFiles():
    # print(f"Match program {name_}")
    name_ = file_.getName()
    program = project.openProgram('/', name_, True)
    rows_ = []
    for func_ in program.getListing().getFunctions(True):
        if filter_func(func_):
            row_ = (func_.getName(),program.getName())
            inst_ = get_inst_key(func_)
            graph = get_struct_graph_key(func_)
            # make sure the inst_[0] means the numAddress 
            hash_ = reduce(mul,(n for n in graph),1) * inst_[0]
            #if hash_ >= 0xffffffff:
            #    print(f'WARNING: {func_.getName()} hash is a little long {hash_}')
            row_ += (hash_,) + inst_ + graph
            # no check because every bin is different 
            # if not sql_check_duplicate_func(cursor, row_[func_key_idx('name')], row_[func_key_idx('hash')], FUNC_TABLE_NAME):
            rows_.append(row_)
        # insert the rows    
    sql_insert(cursor, FUNC_KEYS.keys(), rows_, FUNC_TABLE_NAME)
    conn.commit()      
    # remember closing the program to avoid memory usage 
    print(f"{program.getName()} insert {len(rows_)} functions at {num}")
    num += 1
    project.close(program)
conn.close()
project.close()

remove ./step2_postSig/arm/bin_func.db
CREATE TABLE IF NOT EXISTS func_table (id INTEGER PRIMARY KEY,name TEXT,program TEXT,hash INTEGER,numAddresses INTEGER,mnemonics TEXT,block_num INTEGER,edge_num INTEGER,call_num INTEGER,jump_num INTEGER);
CREATE INDEX IF NOT EXISTS index_hash on func_table(hash);
0.hex.bin.fs109116fs.noheader insert 257 functions at 0
0.hex.bin.fs13848fs.noheader insert 16 functions at 1
0.hex.bin.fs162965fs.noheader insert 249 functions at 2
0.hex.bin.fs162965fs.noheader insert 0 functions at 3
0.hex.bin.fs177418fs.noheader insert 285 functions at 4
0.hex.bin.fs177418fs.noheader insert 0 functions at 5
0.hex.bin.fs177418fs.noheader insert 0 functions at 6
0.hex.bin.fs379885fs.noheader insert 154 functions at 7
0.hex.bin.fs445996fs.noheader insert 575 functions at 8
0.hex.bin.fs445996fs.noheader insert 0 functions at 9
0.hex.bin.fs510304fs.noheader insert 320 functions at 10
0.srec.bin.fs8384512fs.noheader insert 0 functions at 11
0.srec.bin.fs8384512fs.noheader i

In [13]:
for file_ in project.getRootFolder().getFiles():
    print(file_.getName())

ghidra.base.project.GhidraProject@76a6f045

# END

In [8]:
files_ = []
for file_ in project.getRootFolder().getFiles():
    files_.append(file_.getName())

In [10]:
len(files_)

603