# Learn Pyhidra

Pyhidra is a Python library that provides direct access to the Ghidra API within a native CPython interpreter using [jpype](https://jpype.readthedocs.io/en/latest). As well, Pyhidra contains some conveniences for setting up analysis on a given sample and running a Ghidra script locally. It also contains a Ghidra plugin to allow the use of CPython from the Ghidra user interface.

In this script, we will learn how to use basic functions provided by pyhidra.
Furthermore, we will discover the function related to binary analysis

* **Task 1:** wirte a inherit launcher class to implement log service
* **Task 2:** Create or open a project with given path and name 
* **Task 3:** import a program with path and name and get the flat api
    * flat_api is a interface to export many useful functions without determine the class of it
* **Task 4:** set the correct base address of the binary file
* **Task 5:** create a function based on the vector table 
* **Task 6:** create a data as a pointer refer to a function
* **Task 7:** Save the program and close the project

In [None]:
# remember to set the GHIDRA_INSTALL_DIR first
! pip install pyhidra > /dev/null
# import launcher
from pyhidra.launcher import PyhidraLauncher, GHIDRA_INSTALL_DIR

In [None]:
# Task 1: wirte a inherit launcher class to implement log service
# inherit PyhidraLauncher to save the log to file 
class HeadlessLoggingPyhidraLauncher(PyhidraLauncher):
    """
    Headless pyhidra launcher
    Slightly Modified from Pyhidra to allow the Ghidra log path to be set
    """

    def __init__(self, verbose=False, log_path=None):
        super().__init__(verbose)
        self.log_path = log_path

    def _launch(self):
        from pyhidra.launcher import _silence_java_output
        from ghidra.framework import Application, HeadlessGhidraApplicationConfiguration
        from java.io import File
        with _silence_java_output(not self.verbose, not self.verbose):
            config = HeadlessGhidraApplicationConfiguration()
            if self.log_path:
                log = File(self.log_path)
                config.setApplicationLogFile(log)
            Application.initializeApplication(self._layout,config)

In [None]:
# start the Launcher 
launcher = HeadlessLoggingPyhidraLauncher(verbose=True, log_path='./launch.log')
launcher.start()

In [None]:
# Task 2: Create or open a project with given path and name 
# Necessary imports for ghidra project 
from ghidra.base.project import GhidraProject
from java.io import IOException
from pathlib import Path 

# Create Project Dir and name 
project_location = Path('./ghidra_project')
project_location.mkdir(exist_ok=True, parents=True)
project_name = "test_project"

# create or open project 
try:
    project = GhidraProject.openProject(project_location, project_name, True)
    print(f'Opened project: {project.project.name}')
except IOException:
    project = GhidraProject.createProject(project_location, project_name, False)
    print(f'Created project: {project.project.name}')

In [None]:
# Task 3: import a program with path and name and get the flat api
program_name = "STM32L1xxSmartGlass-V1.21-181016.bin"
program_path = Path("./" + program_name)

# helper funcs 
def _get_language(id: str) -> "Language":
    from ghidra.program.util import DefaultLanguageService
    from ghidra.program.model.lang import LanguageID, LanguageNotFoundException
    try:
        service: "LanguageService" = DefaultLanguageService.getLanguageService()
        return service.getLanguage(LanguageID(id))
    except LanguageNotFoundException:
        # suppress the java exception
        pass
    raise ValueError("Invalid Language ID: "+id)
    
def _get_compiler_spec(lang: "Language", id: str = None) -> "CompilerSpec":
    if id is None:
        return lang.getDefaultCompilerSpec()
    from ghidra.program.model.lang import CompilerSpecID, CompilerSpecNotFoundException
    try:
        return lang.getCompilerSpecByID(CompilerSpecID(id))
    except CompilerSpecNotFoundException:
        # suppress the java exception
        pass
    lang_id = lang.getLanguageID()
    raise ValueError(f"Invalid CompilerSpecID: {id} for Language: {lang_id.toString()}")
    
# import program  
lang =  _get_language("ARM:LE:32:Cortex")
program = project.importProgram(program_path,lang , _get_compiler_spec(lang))

# get the flat api 
from ghidra.program.flatapi import FlatProgramAPI
flat_api = FlatProgramAPI(program)

In [None]:
# Task 4: set the correct base address of the binary file
# set base address
image_base = 0x8003000
print(f'Original Image Base: {program.getImageBase()}')
old_base = program.getImageBase()
# setImageBase (Address base, boolean commit)
program.setImageBase(old_base.getNewAddress(image_base), True)
# test for the result
print(hex(flat_api.getInt(flat_api.toAddr(image_base))))

In [None]:
# Task 5: create a function based on the vector table 
# raw program has no function 
list_ = program.getListing()
funcs = list_.getFunctions(False)
print(f'has func :{funcs.hasNext()}')
# create one function 
entry_point = flat_api.toAddr(flat_api.getInt(flat_api.toAddr(image_base + 4)) - 1)
print(f'Entry Point :{entry_point}')
flat_api.createFunction(entry_point, 'Reset_Handler')

In [None]:
# create pointer for handler 
# TODO: the dword + reference is different from the original pointer data type, 
# For **may** the correct operation, see the ArmThumbFunctionTableScript.java in Script manager
from ghidra.program.model.symbol import RefType
addr_ = flat_api.toAddr(image_base +4)
data_ = flat_api.createDWord(addr_)
flat_api.createLabel(addr_, "RESET", False)
flat_api.createMemoryReference(data_, entry_point, RefType.UNCONDITIONAL_CALL)

In [None]:
# Example: Create All labels and Functions based on the vector table 
# used for rename thunk function
from ghidra.program.model.symbol import SourceType
handler_name = ['MasterStackPointer', 'Reset_Handler', 'NMI_Handler', 'HardFault_Handler', 
 'MemManage_Handler', 'BusFault_Handler','UsageFault_Handler',
    'Reserved1','Reserved2','Reserved3','Reserved4',
 'SVC_Handler', 'Reserved5','Reserved6','PendSV_Handler','SysTick_Handler']
i = 0
program_len = int(program.getMaxAddress().subtract(program.getMinAddress()))
while True:
    i += 1
    addr_ = flat_api.toAddr(image_base +4*i)
    handler_address = flat_api.getInt(addr_) - 1
    if handler_address == -1 or handler_address == 0xfffffffe:
        flat_api.createDWord(addr_)
        continue
    elif handler_address > image_base and (handler_address - image_base) < program_len:
        if i >= len(handler_name):
            name_ = 'IRQ' + str(i-16)+ '_Handler'
        else:
            name_ = handler_name[i]
        # create Data and reference 
        label_ = name_[:name_.find('_')]
        data_ = flat_api.createDWord(addr_)
        flat_api.createLabel(addr_, label_, True)
        flat_api.createMemoryReference(data_, flat_api.toAddr(handler_address), RefType.UNCONDITIONAL_CALL)
        # create Function 
        flat_api.disassemble(flat_api.toAddr(handler_address))
        newfunc = flat_api.createFunction(flat_api.toAddr(handler_address), name_)
        # rename thunk functions 
        if newfunc.getName()[:6] == 'thunk_':
            newfunc.setName(name_, SourceType.USER_DEFINED)
    else:
        print(f'Add {i} handlers')
        # not a correct handler 
        break
   

In [None]:
# Auto Analyze for remaining functions 
flat_api.analyzeAll(program)
# TODO: analyze the remaining addressed which cannot be handled by analyzeAll()
# from ghidra.program.model.address import *
# set_ = AddressSet(program.getMinAddress(), program.getMaxAddress())
# undefinedset_ = program.getListing().getUndefinedRanges(set_, False, None)
# print(f'Undefined Nums: {undefinedset_.getNumAddressRanges()}')
# for range_ in undefinedset_:
#     print(f' Min:{range_.getMinAddress()} Max:{range_.getMaxAddress()}')

In [None]:
# Task 6: Save the program and close the project
project.saveAs(program, "/", program.getName(), True)
project.save(program)
project.close()