In [1]:
import angr
import sys
import struct
import networkx as nx
import monkeyhex
from angr.code_location import ExternalCodeLocation
from angr.knowledge_plugins.key_definitions.atoms import Register, MemoryLocation


In [2]:
import os
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import write_dot

In [4]:
binary_path = '/home/kai/project/Hardfuzz/example/consule/sketch_nov5a.ino.bin'
base_addr = 0x00080000

with open(binary_path, 'rb') as f:
    # Read the first 8 bytes (Initial Stack Pointer and Reset Handler)
    vector_table = f.read(8)
    if len(vector_table) < 8:
        print("Error: Binary file is too short to contain a valid vector table.")
        sys.exit(1)
    # Unpack the data (little-endian format)
    initial_sp, reset_handler = struct.unpack('<II', vector_table)
    entry_point = reset_handler
    print(f"Initial Stack Pointer: 0x{initial_sp:08X}")
    print(f"Entry Point (Reset Handler) address: 0x{entry_point:08X}")
p = angr.Project(
    binary_path,
    main_opts={
        'backend': 'blob',
        'arch': 'armel',  # 'armel' for little-endian ARM
        'base_addr': base_addr,
        'entry_point': entry_point,
    },
    auto_load_libs=True
)
cfg = p.analyses.CFGEmulated(
    normalize=True,
    context_sensitivity_level=3,  # Increase context sensitivity if needed
    # starts=[entry_point],
    keep_state=True,
    enable_function_hints=True
)

definitions_not_in_cfg = set()
def_use_chains_not_in_cfg = set()
external_defs_not_in_cfg = set()
def_use_chain = set()



Initial Stack Pointer: 0x20088000
Entry Point (Reset Handler) address: 0x00083099




In [3]:
binary2_path = '/home/kai/Arduino/program1_json/build/arduino.sam.arduino_due_x_dbg/program1_json.ino.elf'
proj = angr.Project(
    binary2_path,
 
    auto_load_libs=False
)
cfg2 = proj.analyses.CFGEmulated(    
    normalize=True,
    context_sensitivity_level=3,  # Increase context sensitivity if needed
    # starts=[entry_point],
    keep_state=True,
    enable_function_hints=True
    )


ERROR    | 2025-03-11 08:27:19,895 | cle.backends.elf.elf | Error parsing symbol 0x07d168
Traceback (most recent call last):
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/core.py", line 351, in _parse
    return self.packer.unpack(_read_stream(stream, self.length))[0]
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/core.py", line 293, in _read_stream
    raise FieldError("expected %d, found %d" % (length, len(data)))
elftools.construct.core.FieldError: expected 4, found 0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/common/utils.py", line 43, in struct_parse
    return struct.parse_stream(stream)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/c

ERROR    | 2025-03-11 08:27:19,909 | cle.backends.elf.elf | Error parsing symbol 0x000876
Traceback (most recent call last):
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/core.py", line 351, in _parse
    return self.packer.unpack(_read_stream(stream, self.length))[0]
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/core.py", line 293, in _read_stream
    raise FieldError("expected %d, found %d" % (length, len(data)))
elftools.construct.core.FieldError: expected 4, found 0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/common/utils.py", line 43, in struct_parse
    return struct.parse_stream(stream)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kai/.virtualenvs/angr/lib/python3.12/site-packages/elftools/construct/c

In [4]:
count = 0
for func_addr, func in cfg2.kb.functions.items():
    print(f"\nFunction at 0x{func_addr:x}:")
    # Each function contains a list of basic blocks
    # print the number of basic blocks in the function
    for block in func.blocks:
        print(f"  Basic block at 0x{block.addr:x}, size: {block.size} bytes")
        count += 1    
        # Optionally, you can print out the instructions in the block using capstone
        # for ins in block.capstone.insns:
print(count)
        #     print(f"    {ins.mnemonic} {ins.op_str}")


Function at 0x0:

Function at 0x1:

Function at 0x80000:
  Basic block at 0x80000, size: 96 bytes

Function at 0x80119:
  Basic block at 0x80127, size: 12 bytes
  Basic block at 0x80133, size: 2 bytes
  Basic block at 0x80135, size: 26 bytes
  Basic block at 0x80119, size: 14 bytes

Function at 0x80149:
  Basic block at 0x80149, size: 6 bytes

Function at 0x8014f:
  Basic block at 0x8014f, size: 6 bytes

Function at 0x80155:
  Basic block at 0x80155, size: 8 bytes

Function at 0x8015d:
  Basic block at 0x80167, size: 8 bytes
  Basic block at 0x8016f, size: 8 bytes
  Basic block at 0x80177, size: 8 bytes
  Basic block at 0x8017f, size: 2 bytes
  Basic block at 0x8015f, size: 8 bytes
  Basic block at 0x8015d, size: 2 bytes

Function at 0x80181:
  Basic block at 0x80181, size: 8 bytes
  Basic block at 0x8019d, size: 4 bytes
  Basic block at 0x80189, size: 20 bytes

Function at 0x801a1:
  Basic block at 0x801a1, size: 14 bytes
  Basic block at 0x801c3, size: 10 bytes
  Basic block at 0x80

In [30]:
cfg2.graph

<networkx.classes.digraph.DiGraph at 0x71a19ee63f20>

In [5]:
entry_point = proj.loader.main_object.entry
base_addr = proj.loader.main_object.mapped_base
print(f"Entry point: 0x{entry_point:x}")
# print(f"Base address: 0x{base_addr:x}")
# print(f"Binary size: {proj.loader.main_object.max_addr - proj.loader.main_object.min_addr} bytes")
# distance = dict(nx.shortest_path_length(cfg2.graph, source=entry_point))
# print(distance)
source_node = next((node for node in cfg2.graph.nodes() if node.addr == entry_point), None)
if source_node is None:
    print("Error: Entry point not found in the CFG.")
    # sys.exit(1)
distance = dict(nx.shortest_path_length(cfg2.graph, source=source_node))

Entry point: 0x83045


In [6]:
# print(list(cfg2.graph.nodes()))
print(distance)

{<CFGENode Reset_Handler 0x83045[12]>: 0, <CFGENode Reset_Handler+0x10 0x83055[4]>: 1, <CFGENode Reset_Handler+0xc 0x83051[4]>: 1, <CFGENode Reset_Handler+0x24 0x83069[6]>: 2, <CFGENode Reset_Handler+0x14 0x83059[8]>: 2, <CFGENode Reset_Handler+0x32 0x83077[24]>: 3, <CFGENode Reset_Handler+0x2a 0x8306f[8]>: 3, <CFGENode Reset_Handler+0x1c 0x83061[8]>: 3, <CFGENode Reset_Handler+0x52 0x83097[4]>: 4, <CFGENode Reset_Handler+0x4a 0x8308f[8]>: 4}


In [7]:
definitions_not_in_cfg = set()
def_use_chains_not_in_cfg = set()
external_defs_not_in_cfg = set()
all_def_use_chains = set()

critical_registers = ['pc', 'sp', 'lr', 'cpsr']
critical_memory_ranges = [
    (0x400E0800, 0x400E0FFF),  # System Control registers
    # Add other critical ranges as per the datasheet
]

def is_hardware_address(addr):
    # Peripheral memory space for SAM3X8E
    hardware_ranges = [
        (0x40000000, 0x5FFFFFFF),
    ]
    for start, end in hardware_ranges:
        if addr >= start and addr <= end:
            return True
    return False

def is_critical_memory(addr):
    for start, end in critical_memory_ranges:
        if addr >= start and addr <= end:
            return True
    return False

In [8]:
def_use_block_chain = set()

In [9]:
block_chain_count = {}
for function_addr, function in cfg2.kb.functions.items():
    try:
        print(f"\nAnalyzing function {function.name} at 0x{function_addr:x}")

        # Run ReachingDefinitions analysis on the function
        rd_analysis = proj.analyses.ReachingDefinitions(
            subject=function,
            func_addr=function_addr,
            track_tmps=True,
            observe_all=True  # Observe all definitions and uses
        )

        # Iterate over all definitions
        #get all the definations
        for _def in rd_analysis.all_definitions:
            def_ins_addr = _def.codeloc.ins_addr
            uses = rd_analysis.all_uses.get_uses(_def)
            #get uses

            # Check if the definition is external
            if isinstance(_def.codeloc, ExternalCodeLocation):
                atom = _def.atom
                # Create a hashable identifier for the atom
                if isinstance(atom, MemoryLocation):
                    atom_id = ('mem', atom.addr)
                elif isinstance(atom, Register):
                    atom_id = ('reg', atom.reg_offset)
                else:
                    atom_id = ('other', str(atom))
                # Use the instruction addresses of uses
                uses_ins_addrs = tuple(hex(use.ins_addr) for use in uses)
                external_defs_not_in_cfg.add((atom_id, uses_ins_addrs))
                continue  # Continue processing if needed

     
            def_node = cfg2.model.get_any_node(def_ins_addr, anyaddr=True)
       
            if def_node is None:
                # Record the definition not in CFG
                definitions_not_in_cfg.add((hex(def_ins_addr), 'Def instruction not in CFG'))

            def_distance = distance.get(def_node, 'Unknown')
            def_block = proj.factory.block(def_node.addr)
           
            if uses:
                for use in uses:
                    use_ins_addr = use.ins_addr
                    # if def_ins_addr != use_ins_addr:
                    all_def_use_chains.add((hex(def_ins_addr), hex(use_ins_addr)))
                    use_node = cfg2.get_any_node(use_ins_addr, anyaddr=True)
                   
                    if use_node is None:
                        # Record the def-use chain not in CFG
                        def_use_chains_not_in_cfg.add((hex(def_ins_addr), hex(use_ins_addr), 'Use instruction not in CFG'))
                        
                        continue  # Continue to next use
                    
                    use_block = proj.factory.block(use_node.addr)

                    if def_node != use_node:
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                        if not nx.has_path(cfg2.graph, def_node, use_node):
                            def_use_chains_not_in_cfg.add((hex(def_ins_addr), hex(use_ins_addr), 'No path from def to use in CFG'))
                            continue  # Continue to next use
                    def_use_block_chain.add((
                        f"Definition: 0x{def_block.addr:x}, size: {def_block.size}",
                        f"Use: 0x{use_block.addr:x}, size: {use_block.size}"
                    ))
                    block_chain_count[def_block.addr] = block_chain_count.get(def_block.addr, 0) + 1
                    block_chain_count[use_block.addr] = block_chain_count.get(use_block.addr, 0) + 1
            else:

                pass

    except Exception as e:
        print(f"Error analyzing function {function.name} (0x{function_addr:x}): {e}")


Analyzing function __stack at 0x0

Analyzing function sub_1 at 0x1

Analyzing function exception_table at 0x80000

Analyzing function frame_dummy at 0x80119

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator8allocateEj at 0x80149

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator10deallocateEPv at 0x8014f

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator10reallocateEPvj at 0x80155

Analyzing function _Z7TaskLEDPv at 0x8015d

Analyzing function _ZNK11ArduinoJson8V721PB226detail14MemoryPoolListINS1_15ResourceManager8SlotDataEE7getSlotEt.isra.41 at 0x80181

Analyzing function _ZN11ArduinoJson8V721PB226detail10StringNode6resizeEPS2_jPNS0_9AllocatorE at 0x801a1

Analyzing function sub_801bd at 0x801bd

Analyzing function _ZN11ArduinoJson8V721PB226detail10StringNode7destroyEPS2_PNS0_9AllocatorE at 0x801d1

Analyzing function _ZN11ArduinoJson8V721PB226detail18CollectionIterator4nextEPKNS1_15ResourceManagerE at 0x801e1

Analyzing fu




Analyzing function _ZN11ArduinoJson8V721PB226detail13TextFormatterINS1_18StaticStringWriterEE12writeIntegerIxEENS1_9enable_ifIXsrNS1_9is_signedIT_EE5valueEvE4typeES8_ at 0x808ed

Analyzing function sub_80913 at 0x80913

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE7currentEv at 0x80915

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE21skipSpacesAndCommentsEv at 0x80939

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE11skipKeywordEPKc at 0x8096f

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE17parseNumericValueERNS1_11VariantDataE at 0x8099b

Analyzing function _ZN11ArduinoJson8V721PB226detail13TextFormatterINS1_18StaticStringWriterEE10writeFloatEda at 0x80ac1

Analyzing function sub_80ae3 at 0x80ae3

Analyzing function sub_80afd at 0x80afd

Analyzing function sub_80b2d at 0x80b2d

Analyzin

ERROR    | 2025-03-11 08:36:57,697 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode
ERROR    | 2025-03-11 08:36:57,697 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode



Analyzing function sub_8693b at 0x8693b

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIfLj4EE25negativeBinaryPowersOfTenEvE7factors at 0x869b0

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIfLj4EE25positiveBinaryPowersOfTenEvE7factors at 0x869c8

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIdLj8EE25negativeBinaryPowersOfTenEvE7factors at 0x869e0

Analyzing function sub_869e8 at 0x869e8

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIdLj8EE25positiveBinaryPowersOfTenEvE7factors at 0x86a28

Analyzing function sub_86a78 at 0x86a78

Analyzing function sub_86a88 at 0x86a88

Analyzing function sub_86a8e at 0x86a8e

Analyzing function sub_86a9b at 0x86a9b

Analyzing function sub_86aa2 at 0x86aa2

Analyzing function sub_86aa7 at 0x86aa7

Analyzing function sub_86aac at 0x86aac

Analyzing function sub_86ab2 at 0x86ab2

Analyzing function sub_86abb at 0x86abb

Analyzing function sub_86ac3 at 0x86ac3

Analyzing funct

ERROR    | 2025-03-11 08:36:57,767 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode
ERROR    | 2025-03-11 08:36:57,768 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode



Analyzing function sub_2007012c at 0x2007012c

Analyzing function sub_200701c4 at 0x200701c4

Analyzing function sub_20070494 at 0x20070494

Analyzing function sub_20070495 at 0x20070495

Analyzing function _impure_ptr at 0x200704a0

Analyzing function __malloc_av_ at 0x200704a4

Analyzing function sub_200704ac at 0x200704ac

Analyzing function sub_200704b4 at 0x200704b4

Analyzing function sub_200704bc at 0x200704bc

Analyzing function sub_2007065c at 0x2007065c

Analyzing function sub_20070664 at 0x20070664

Analyzing function sub_20070694 at 0x20070694

Analyzing function sub_2007069c at 0x2007069c

Analyzing function sub_2007077c at 0x2007077c

Analyzing function __malloc_trim_threshold at 0x200708ac

Analyzing function __malloc_sbrk_base at 0x200708b0

Analyzing function sub_20182828 at 0x20182828

Analyzing function sub_2021d9d0 at 0x2021d9d0

Analyzing function sys_380 at 0x203002f8
Error analyzing function sys_380 (0x203002f8): 'NoneType' object has no attribute 'addr'

Analyz

In [12]:

block_count = {}
external_not_in_cfg = set()
def_not_in_cfg = set()
def__not_in_cfg = set()
all_chains = set()

# This holds all successfully matched (def => use) blocks, keyed by the definition’s block address.
def_use_dict = {}

for function_addr, function in cfg2.kb.functions.items():
    try:
        print(f"\nAnalyzing function {function.name} at 0x{function_addr:x}")

        # Run ReachingDefinitions analysis on the function
        rd_analysis = proj.analyses.ReachingDefinitions(
            subject=function,
            func_addr=function_addr,
            track_tmps=True,
            observe_all=True  # Observe all definitions and uses
        )

        # Iterate over all definitions
        for _def in rd_analysis.all_definitions:
            def_ins_addr = _def.codeloc.ins_addr
            uses = rd_analysis.all_uses.get_uses(_def)

            # 1) Skip external definitions
            if isinstance(_def.codeloc, ExternalCodeLocation):
                atom = _def.atom
                if isinstance(atom, MemoryLocation):
                    atom_id = ('mem', atom.addr)
                elif isinstance(atom, Register):
                    atom_id = ('reg', atom.reg_offset)
                else:
                    atom_id = ('other', str(atom))
                uses_ins_addrs = tuple(hex(use.ins_addr) for use in uses)
                external_not_in_cfg.add((atom_id, uses_ins_addrs))
                continue

            # 2) Find the CFG node & block for the definition
            def_node = cfg2.model.get_any_node(def_ins_addr, anyaddr=True)
            if def_node is None:
                def_not_in_cfg.add((hex(def_ins_addr), 'Def instruction not in CFG'))
                # If we can’t map the definition to a CFG node, skip the uses
                continue

            def_block = proj.factory.block(def_node.addr)

            # Prepare a new entry in def_use_dict for this definition
            # Key by the definition block address. 
            # We’ll store a dictionary with all relevant data for uses.
            def_addr_str = f"DefBlock=0x{def_block.addr:x}, size={def_block.size}"
            if def_block.addr not in def_use_dict:
                def_use_dict[def_block.addr] = {
                    "def_info": def_addr_str,
                    "uses_in_cfg": [],      # uses that pass the path check
                    "uses_no_path": [],     # uses that are in CFG but no path
                    "uses_not_in_cfg": [],  # use instructions not in the CFG
                }

            # 3) Iterate over each use
            for use in uses:
                use_ins_addr = use.ins_addr
                all_chains.add((hex(def_ins_addr), hex(use_ins_addr)))

                use_node = cfg2.get_any_node(use_ins_addr, anyaddr=True)
                if use_node is None:
                    # This use is not in the CFG at all
                    def__not_in_cfg.add((
                        hex(def_ins_addr), 
                        hex(use_ins_addr), 
                        'Use instruction not in CFG'
                    ))
                    # Store in "uses_not_in_cfg"
                    def_use_dict[def_block.addr]["uses_not_in_cfg"].append(hex(use_ins_addr))
                    continue

                use_block = proj.factory.block(use_node.addr)

                # Check for a valid path from definition to use in the CFG
                if def_node != use_node:
                    if not nx.has_path(cfg2.graph, def_node, use_node):
                        def__not_in_cfg.add((
                            hex(def_ins_addr), 
                            hex(use_ins_addr), 
                            'No path from def to use in CFG'
                        ))
                        def_use_dict[def_block.addr]["uses_no_path"].append(
                            f"UseBlock=0x{use_block.addr:x}, size={use_block.size}"
                        )
                        continue

                # If we get here, the use is in the CFG **and** a path exists
                def_use_dict[def_block.addr]["uses_in_cfg"].append(
                    f"UseBlock=0x{use_block.addr:x}, size={use_block.size}"
                )
                
                # Update block_chain_count for both the definition & use block
                block_count[def_block.addr] = block_count.get(def_block.addr, 0) + 1
                block_count[use_block.addr] = block_count.get(use_block.addr, 0) + 1

    except Exception as e:
        print(f"Error analyzing function {function.name} (0x{function_addr:x}): {e}")




Analyzing function __stack at 0x0

Analyzing function sub_1 at 0x1

Analyzing function exception_table at 0x80000

Analyzing function frame_dummy at 0x80119

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator8allocateEj at 0x80149

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator10deallocateEPv at 0x8014f

Analyzing function _ZN11ArduinoJson8V721PB226detail16DefaultAllocator10reallocateEPvj at 0x80155

Analyzing function _Z7TaskLEDPv at 0x8015d

Analyzing function _ZNK11ArduinoJson8V721PB226detail14MemoryPoolListINS1_15ResourceManager8SlotDataEE7getSlotEt.isra.41 at 0x80181

Analyzing function _ZN11ArduinoJson8V721PB226detail10StringNode6resizeEPS2_jPNS0_9AllocatorE at 0x801a1

Analyzing function sub_801bd at 0x801bd

Analyzing function _ZN11ArduinoJson8V721PB226detail10StringNode7destroyEPS2_PNS0_9AllocatorE at 0x801d1

Analyzing function _ZN11ArduinoJson8V721PB226detail18CollectionIterator4nextEPKNS1_15ResourceManagerE at 0x801e1

Analyzing fu




Analyzing function _ZN11ArduinoJson8V721PB226detail13TextFormatterINS1_18StaticStringWriterEE12writeIntegerIxEENS1_9enable_ifIXsrNS1_9is_signedIT_EE5valueEvE4typeES8_ at 0x808ed

Analyzing function sub_80913 at 0x80913

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE7currentEv at 0x80915

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE21skipSpacesAndCommentsEv at 0x80939

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE11skipKeywordEPKc at 0x8096f

Analyzing function _ZN11ArduinoJson8V721PB226detail16JsonDeserializerINS1_13BoundedReaderIPhvEEE17parseNumericValueERNS1_11VariantDataE at 0x8099b

Analyzing function _ZN11ArduinoJson8V721PB226detail13TextFormatterINS1_18StaticStringWriterEE10writeFloatEda at 0x80ac1

Analyzing function sub_80ae3 at 0x80ae3

Analyzing function sub_80afd at 0x80afd

Analyzing function sub_80b2d at 0x80b2d

Analyzin

ERROR    | 2025-03-11 09:33:34,111 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode
ERROR    | 2025-03-11 09:33:34,112 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode



Analyzing function sub_8693b at 0x8693b

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIfLj4EE25negativeBinaryPowersOfTenEvE7factors at 0x869b0

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIfLj4EE25positiveBinaryPowersOfTenEvE7factors at 0x869c8

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIdLj8EE25negativeBinaryPowersOfTenEvE7factors at 0x869e0

Analyzing function sub_869e8 at 0x869e8

Analyzing function _ZZN11ArduinoJson8V721PB226detail11FloatTraitsIdLj8EE25positiveBinaryPowersOfTenEvE7factors at 0x86a28

Analyzing function sub_86a78 at 0x86a78

Analyzing function sub_86a88 at 0x86a88

Analyzing function sub_86a8e at 0x86a8e

Analyzing function sub_86a9b at 0x86a9b

Analyzing function sub_86aa2 at 0x86aa2

Analyzing function sub_86aa7 at 0x86aa7

Analyzing function sub_86aac at 0x86aac

Analyzing function sub_86ab2 at 0x86ab2

Analyzing function sub_86abb at 0x86abb

Analyzing function sub_86ac3 at 0x86ac3

Analyzing funct

ERROR    | 2025-03-11 09:33:34,185 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode
ERROR    | 2025-03-11 09:33:34,186 | angr.analyses.reaching_definitions.engine_vex | Unable to translate bytecode



Analyzing function sub_2007012c at 0x2007012c

Analyzing function sub_200701c4 at 0x200701c4

Analyzing function sub_20070494 at 0x20070494

Analyzing function sub_20070495 at 0x20070495

Analyzing function _impure_ptr at 0x200704a0

Analyzing function __malloc_av_ at 0x200704a4

Analyzing function sub_200704ac at 0x200704ac

Analyzing function sub_200704b4 at 0x200704b4

Analyzing function sub_200704bc at 0x200704bc

Analyzing function sub_2007065c at 0x2007065c

Analyzing function sub_20070664 at 0x20070664

Analyzing function sub_20070694 at 0x20070694

Analyzing function sub_2007069c at 0x2007069c

Analyzing function sub_2007077c at 0x2007077c

Analyzing function __malloc_trim_threshold at 0x200708ac

Analyzing function __malloc_sbrk_base at 0x200708b0

Analyzing function sub_20182828 at 0x20182828

Analyzing function sub_2021d9d0 at 0x2021d9d0

Analyzing function sys_380 at 0x203002f8

Analyzing function sub_616b2f65 at 0x616b2f65

Analyzing function sub_6172676f at 0x6172676f

A

In [13]:
print("\n=== Def-Use Summary ===")
for def_block_addr, info in def_use_dict.items():
    print(info["def_info"])
    print("  Uses in CFG (with path):")
    for u in info["uses_in_cfg"]:
        print(f"    - {u}")
    print("  Uses in CFG (no path):")
    for u in info["uses_no_path"]:
        print(f"    - {u}")
    print("  Uses not in CFG:")
    for u in info["uses_not_in_cfg"]:
        print(f"    - {u}")
    print()



=== Def-Use Summary ===
DefBlock=0x80000, size=96
  Uses in CFG (with path):
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, size=96
    - UseBlock=0x80000, si

In [17]:
import re

# Suppose you already have def_use_dict in this structure:
# def_use_dict[def_block_addr] = {
#     "def_info": "DefBlock=0x..., size=...", 
#     "uses_in_cfg": [...],
#     "uses_no_path": [...],
#     "uses_not_in_cfg": [...]
# }

final_dict = {}

for def_block_addr, info in def_use_dict.items():
    # 'def_block_addr' is an integer (the block’s address).
    # We'll store it in hex form as the dictionary key.
    def_key = f"0x{def_block_addr:x}"

    # We only want the addresses from the "uses_in_cfg" (or from all sets if you prefer).
    uses_list = info["uses_in_cfg"]

    # Build a set to avoid duplicates.
    use_addr_set = set()

    # Each entry in uses_in_cfg might look like: "UseBlock=0x1234, size=16"
    # We'll parse out the actual hex address (0x1234).
    for use_entry in uses_list:
        # A simple regex or split can do the job. 
        # For example, let's capture the "0x..." part after "UseBlock=".
        # e.g. "UseBlock=0x1234, size=16"
        match = re.search(r'UseBlock=(0x[0-9a-fA-F]+)', use_entry)
        if match:
            raw_hex = match.group(1)  # e.g. "0x1234"
            use_addr_set.add(raw_hex)

    # Convert that set to a sorted list (optional)
    use_addr_list = sorted(use_addr_set)

    # Store it in the final dictionary
    final_dict[def_key] = use_addr_list

# Now 'final_dict' is something like:
# {
#   '0x1000': ['0x1234', '0x1280'],
#   '0x1100': ['0x2345'],
#   ...
# }

# Write it to "block.txt" in a simple format. You could use JSON or just str(...).
with open("block.txt", "w") as f:
    # For a nice human-readable format, you might do:
    for definition_addr, use_addrs in final_dict.items():
        
        for ua in use_addrs:
            f.write(f"Definition: {definition_addr}\n")
            f.write("Use:")
            f.write(f" {ua}\n")
        f.write("\n")
    
    # Or, if you just want a dictionary dump, do:
    # import json
    # json.dump(final_dict, f, indent=2)


In [11]:
block_addrs = []
sorted_blocks = sorted(block_chain_count.items(), key=lambda x: x[1], reverse=True)
for block_addr, count in sorted_blocks:
    # print(f"Block at 0x{block_addr:x} is part of {count} def-use chains")
    block_addrs.append(hex(block_addr)) 
    # new_block_list = block_addr:x
# print(sorted_blocks)
# print(block_addrs)
print(len(block_addrs))
# print(len(def_use_block_chain))
print(len(def_use_block_chain))
print(def_use_block_chain)


1445
4122
{('Definition: 0x8693b, size: 12', 'Use: 0x8692d, size: 14'), ('Definition: 0x802fd, size: 6', 'Use: 0x80303, size: 12'), ('Definition: 0x80da1, size: 30', 'Use: 0x80ed9, size: 6'), ('Definition: 0x81553, size: 18', 'Use: 0x81553, size: 18'), ('Definition: 0x8114b, size: 32', 'Use: 0x8114b, size: 32'), ('Definition: 0x8667d, size: 12', 'Use: 0x86711, size: 10'), ('Definition: 0x80c6d, size: 8', 'Use: 0x80c6d, size: 8'), ('Definition: 0x82265, size: 14', 'Use: 0x82273, size: 12'), ('Definition: 0x81343, size: 6', 'Use: 0x81361, size: 4'), ('Definition: 0x82273, size: 12', 'Use: 0x8227f, size: 32'), ('Definition: 0x8085b, size: 14', 'Use: 0x8085b, size: 14'), ('Definition: 0x85931, size: 8', 'Use: 0x8597f, size: 4'), ('Definition: 0x83731, size: 30', 'Use: 0x83731, size: 30'), ('Definition: 0x80dbf, size: 8', 'Use: 0x80ee9, size: 4'), ('Definition: 0x8629f, size: 6', 'Use: 0x862b9, size: 8'), ('Definition: 0x85941, size: 8', 'Use: 0x85941, size: 8'), ('Definition: 0x83055, size

In [46]:
with open("blocks.txt", "w") as f:
    for addr in block_addrs:
        f.write(f"{addr}\n") 

In [13]:
# print(all_def_use_chains)

In [11]:
# print(f"\nDefinitions not in CFG: {definitions_not_in_cfg}")

In [23]:
with open('new_all.txt', 'w') as f:
        f.write("Def-use chains  valid control flow path:\n\n")
        for def_addr, use_addr in all_def_use_chains:
            # def_addr_str = f"0x{def_addr:x}"
            # use_addr_str = f"0x{use_addr:x}"
            def_addr_str = def_addr
            use_addr_str = use_addr
            # print(def_addr_str, use_addr_str)
            f.write(f"Definition: {def_addr_str}\n")
            f.write(f"Use: {use_addr_str}\n\n")
        f.write(f"Total def-use chains in CFG: {len(all_def_use_chains)}\n")
print("\nDef-use chains not included in the CFG or without control flow path:")
for def_ins_addr, use_ins_addr, reason in def_use_chains_not_in_cfg:
    if def_ins_addr is not None:
        def_ins_addr_str = def_ins_addr
    else:
        def_ins_addr_str = "Unknown"
    if use_ins_addr is not None:
        use_ins_addr_str = use_ins_addr
    else:
        use_ins_addr_str = "Unknown"

    print(f"Definition at instruction address: {def_ins_addr_str}")
    print(f"  Use at instruction address: {use_ins_addr_str} - Reason: {reason}")
    # print(f"  Distance: {defdiss}")

print(f"Total number of def-use chains not in CFG: {len(def_use_chains_not_in_cfg)}")

print("\nExternal definitions involved in def-use chains not included in CFG:")
for atom_id, uses_ins_addrs in external_defs_not_in_cfg:
    atom_type, atom_value = atom_id
    if atom_type == 'mem':
        print(f"External MemoryLocation at address: {hex(atom_value)}")
    elif atom_type == 'reg':
        reg_name = proj.arch.register_names.get(atom_value, f"Unknown({atom_value})")
        print(f"External Register: {reg_name}")
    else:
        print(f"External Atom: {atom_value}")

    # Print uses if any
    if uses_ins_addrs:
        for use_ins_addr in uses_ins_addrs:
            if use_ins_addr is not None:
                use_ins_addr_str = use_ins_addr
            else:
                use_ins_addr_str = "Unknown"
            print(f"  Used at instruction address: {use_ins_addr_str}")
    else:
        print("  No uses recorded.")

print(f"Total number of external definitions not in CFG: {len(external_defs_not_in_cfg)}")


# with open('def_use.txt', 'w') as f:
#     for def_ins_addr, use_ins_addr, reason in def_use_chains_not_in_cfg:
#         f.write(f"Definition: 0x{def_ins_addr:x}")
#         f.write(f"Use: 0x{use_ins_addr:x}\n")
# with open('external.txt', 'w') as f:
#     for atom_id, uses_ins_addrs in external_defs_not_in_cfg:
#         f.write(f"External: {atom_id}")
#         f.write("Uses: " + ", ".join(f"0x{addr:x}" for addr in uses_ins_addrs) + "\n\n")


with open('new_def_use1.txt', 'w') as f:
    for def_ins_addr, use_ins_addr, reason in def_use_chains_not_in_cfg:
        def_ins_addr_str = def_ins_addr
        use_ins_addr_str = use_ins_addr
        f.write(f"Definition: {def_ins_addr_str}\n")
        f.write(f"Use: {use_ins_addr_str}\n")
        # f.write(f"Distance: {distances}\n")
        # f.write(f"Path: {path}\n")
        # f.write(f"Reason: {reason}\n")
        # f.write("\n")  # Add a newline between entries

with open('new_external1.txt', 'w') as f:
    for atom_id, uses_ins_addrs in external_defs_not_in_cfg:
        atom_type, atom_value = atom_id
        if atom_type == 'mem':
            atom_str = f"0x{atom_value:x}"
        elif atom_type == 'reg':
            reg_name = proj.arch.register_names.get(atom_value, f"Unknown({atom_value})")
            atom_str = f"{reg_name}"
        else:
            atom_str = f"{atom_value}"

        f.write(f"{atom_str}\n")
        f.write("Uses:")
        for use_ins_addr in uses_ins_addrs:
            if use_ins_addr is not None:
                use_ins_addr_str = use_ins_addr
            else:
                use_ins_addr_str = "Unknown"
            f.write(f"  {use_ins_addr_str}\n")
        # f.write("\n")  # Add a newline between entries



Def-use chains not included in the CFG or without control flow path:
Definition at instruction address: 0x20300430
  Use at instruction address: 0x20300434 - Reason: Use instruction not in CFG
Definition at instruction address: 0x2030041c
  Use at instruction address: 0x2030041c - Reason: Use instruction not in CFG
Definition at instruction address: 0x203003dc
  Use at instruction address: 0x203003e0 - Reason: Use instruction not in CFG
Definition at instruction address: 0x203003b0
  Use at instruction address: 0x203003b4 - Reason: Use instruction not in CFG
Definition at instruction address: 0x2030042c
  Use at instruction address: 0x20300430 - Reason: Use instruction not in CFG
Definition at instruction address: 0x203003a8
  Use at instruction address: 0x203003ac - Reason: Use instruction not in CFG
Definition at instruction address: 0x203003d8
  Use at instruction address: 0x203003dc - Reason: Use instruction not in CFG
Definition at instruction address: 0x20300388
  Use at instruc

## last test

In [8]:
cfg_nodes = list(cfg.graph.nodes())

In [9]:
print("CFG Nodes: ", len(cfg_nodes))

CFG Nodes:  148388


In [None]:
# cfg.kb.functions = {}


In [4]:
print("Entry point:", hex(p.loader.main_object.entry))
print("CFG nodes:", hex(p.entry), len(cfg.graph.nodes()))
entry_node = cfg.get_any_node(p.entry)
for n in cfg.graph.nodes():
    if n.addr == p.loader.main_object.entry:
        entry_node = n
        break

if entry_node is None:
    print("Warning: Entry node not found in CFG. Distances computation may fail.")
    distances = {}
else:
    # Compute shortest paths from the entry node
    distances = dict(nx.shortest_path_length(cfg.graph, source=entry_node))

Entry point: 0x401060
CFG nodes: 0x401060 23


In [None]:
p.analyses.CFG()
print(cfg.graph.nodes())

ERROR    | 2025-02-14 14:11:40,054 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,063 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,068 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,078 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,095 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,101 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,108 | angr.analyses.propagator.engine_vex.SimEnginePropagatorVEX | Unsupported statement type CAS.
ERROR    | 2025-02-14 14:11:40,152 | angr.analyses.propagator.engine_vex.SimEnginePropagat

In [7]:
print(distances)

NameError: name 'distances' is not defined

In [6]:
for function_addr, function in cfg.kb.functions.items():
    try:
        print(f"\nAnalyzing function {function.name} at 0x{function_addr:x}")

        # Run ReachingDefinitions analysis on the function
        rd_analysis = p.analyses.ReachingDefinitions(
            subject=function,
            func_addr=function_addr,
            track_tmps=True,
            observe_all=True  # Observe all definitions and uses
        )

        for _def in rd_analysis.all_definitions:
            def_ins_addr = _def.codeloc.ins_addr
            uses = rd_analysis.all_uses.get_uses(_def)
            # def_use_chain.add((def_ins_addr, uses))
            #get uses

            # Check if the definition is external
            if isinstance(_def.codeloc, ExternalCodeLocation):
                atom = _def.atom
                # Create a hashable identifier for the atom
                if isinstance(atom, MemoryLocation):
                    atom_id = ('mem', atom.addr)
                elif isinstance(atom, Register):
                    reg_name = p.arch.register_names.get(atom.reg_offset, str(atom))
                    atom_id = ('reg', reg_name)
                else:
                    atom_id = ('other', str(atom))
                # Use the instruction addresses of uses
                uses_ins_addrs = tuple(use.ins_addr for use in uses)
                external_defs_not_in_cfg.add((atom_id, uses_ins_addrs))
                continue  # Continue processing if needed

            # Get the CFG node containing the definition instruction address
            def_node = cfg.get_any_node(def_ins_addr, anyaddr=True)

            if def_node is None:
                # Record the definition not in CFG
                definitions_not_in_cfg.add((def_ins_addr, 'Def instruction not in CFG'))
                continue  # Skip further processing for this definition

            def_distance = distances.get(def_node, 'Unknown')

    
            # Handle uses
            if uses:
                for use in uses:
                    use_ins_addr = use.ins_addr
                    def_use_chain.add((def_ins_addr, use_ins_addr))
                    use_node = cfg.get_any_node(use_ins_addr, anyaddr=True)
                    print(use_node)
        
                    if use_node is None:
                        # Record the def-use chain not in CFG
                        def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, 'Use instruction not in CFG'))
                        
                        continue  # Continue to next use

                    if def_node != use_node:
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                        if not nx.has_path(cfg.graph, def_node, use_node):
                            # Record the def-use chain with no path in CFG
                            def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, 'No path from def to use in CFG'))

                            continue  # Continue to next use
            else:
                # No uses recorded
                pass

    except Exception as e:
        print(f"Error analyzing function {function.name} (0x{function_addr:x}): {e}")


Analyzing function sub_1 at 0x1

Analyzing function printf at 0x401050
<CFGENode 0x401050[10]>

Analyzing function _start at 0x401060
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37]>
<CFGENode _start 0x401060[37

In [7]:
with open('test.txt', 'w') as f:
        f.write("Def-use chains  valid control flow path:\n\n")
        for def_addr, use_addr in def_use_chain:
            f.write(f"Definition at 0x{def_addr:x}\n")
            f.write(f"  Use at 0x{use_addr:x}\n\n")
        f.write(f"Total def-use chains in CFG: {len(def_use_chain)}\n")


In [None]:

print("\nDef-use chains not included in the CFG or without control flow path:")
for def_ins_addr, use_ins_addr, reason in def_use_chains_not_in_cfg:
    if def_ins_addr is not None:
        def_ins_addr_str = f"0x{def_ins_addr:x}"
    else:
        def_ins_addr_str = "Unknown"
    if use_ins_addr is not None:
        use_ins_addr_str = f"0x{use_ins_addr:x}"
    else:
        use_ins_addr_str = "Unknown"

    print(f"Definition at instruction address: {def_ins_addr_str}")
    print(f"  Use at instruction address: {use_ins_addr_str} - Reason: {reason}")
    # print(f"  Distance: {defdiss}")

print(f"Total number of def-use chains not in CFG: {len(def_use_chains_not_in_cfg)}")

print("\nExternal definitions involved in def-use chains not included in CFG:")
for atom_id, uses_ins_addrs in external_defs_not_in_cfg:
    atom_type, atom_value = atom_id
    if atom_type == 'mem':
        print(f"External MemoryLocation at address: {hex(atom_value)}")
    elif atom_type == 'reg':
        reg_name = p.arch.register_names.get(atom_value, f"Unknown({atom_value})")
        print(f"External Register: {reg_name}")
    else:
        print(f"External Atom: {atom_value}")

    # Print uses if any
    if uses_ins_addrs:
        for use_ins_addr in uses_ins_addrs:
            if use_ins_addr is not None:
                use_ins_addr_str = f"0x{use_ins_addr:x}"
            else:
                use_ins_addr_str = "Unknown"
            print(f"  Used at instruction address: {use_ins_addr_str}")
    else:
        print("  No uses recorded.")

print(f"Total number of external definitions not in CFG: {len(external_defs_not_in_cfg)}")

In [None]:
with open('def.txt', 'w') as f:
    for def_ins_addr, use_ins_addr, reason in def_use_chains_not_in_cfg:
        def_ins_addr_str = f"0x{def_ins_addr:x}"
        use_ins_addr_str = f"0x{use_ins_addr:x}"
        f.write(f"Definition: {def_ins_addr_str}\n")
        f.write(f"Use: {use_ins_addr_str}\n")


with open('ext.txt', 'w') as f:
    for atom_id, uses_ins_addrs in external_defs_not_in_cfg:
        atom_type, atom_value = atom_id
        if atom_type == 'mem':
            atom_str = f"0x{atom_value:x}"
        elif atom_type == 'reg':
            reg_name = p.arch.register_names.get(atom_value, f"Unknown({atom_value})")
            atom_str = f"{reg_name}"
        else:
            atom_str = f"{atom_value}"

        f.write(f"{atom_str}\n")
        f.write("Uses:")
        for use_ins_addr in uses_ins_addrs:
            if use_ins_addr is not None:
                use_ins_addr_str = f"0x{use_ins_addr:x}"
            else:
                use_ins_addr_str = "Unknown"
            f.write(f"  {use_ins_addr_str}\n")

## above is for testing the accuracy of angr

In [None]:
import angr

binary_path = '/home/kai/project/angr_tetxt'

# Create the project using the ELF loader (no need for manual vector table parsing)
p = angr.Project(binary_path, auto_load_libs=True)

print("Entry point:", hex(p.loader.main_object.entry))


In [None]:
cfg = p.analyses.CFGEmulated(
    normalize=True,
    context_sensitivity_level=3,  # Increase context sensitivity if needed
    # starts=[entry_point],
    keep_state=True,
    enable_function_hints=True
)

In [None]:
print("CFG generated", len(cfg.graph.nodes()), len(cfg.graph.edges()))

In [None]:
entry_node = cfg.get_any_node(p.entry)
print("Entry node:", entry_node)

In [None]:
cfg.get_all_nodes(p.entry)

In [None]:
cfg.graph.node()

In [None]:
entry_func = cfg.kb.functions[p.entry]
entry_func.blocks

In [None]:
entry_func.addr

In [None]:
cfg.get_successors_and_jumpkind(entry_node)

In [None]:
import re

def read_def_use_file(filename):
    """Reads a def-use file and returns a list of (def, use) tuples as integers."""
    pairs = []
    current_def = None
    # Regular expression to capture hexadecimal addresses.
    hex_re = re.compile(r'0x[0-9A-Fa-f]+')
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith("Definition"):
                m = hex_re.search(line)
                if m:
                    current_def = int(m.group(), 16)
            elif line.startswith("Use") and current_def is not None:
                m = hex_re.search(line)
                if m:
                    use_addr = int(m.group(), 16)
                    pairs.append((current_def, use_addr))
                    current_def = None  # reset for the next pair
    return pairs

# Read def-use pairs from the file.
def_use_pairs = read_def_use_file('def_use1.txt')


entry_point = p.loader.main_object.entry
print("Entry point: ", hex(entry_point))

# Build the CFG using CFGEmulated.
# cfg_emulated = p.analyses.CFGEmulated(
#     starts=[entry_point],
#     keep_state=True,
#     context_sensitivity_level=3,
#     normalize=True
# )

print("CFGEmulated analysis completed with {} nodes.".format(len(cfg.graph.nodes())))

# Iterate over the CFGEmulated nodes and compute each node's address range.
for node in cfg.graph.nodes():
    block_start = node.addr
    # Try to get the block size. If node.size is None, fall back to computing it.
    try:
        block_size = node.size if node.size is not None else p.factory.block(node.addr).size
    except Exception as e:
        print(f"Could not compute size for block at {hex(node.addr)}: {e}")
        continue
    block_end = block_start + block_size

    # For each def-use pair, check if the definition or use falls within the block.
    for (def_addr, use_addr) in def_use_pairs:
        if block_start <= def_addr < block_end:
            print("Definition {:#x} is in basic block range {:#x} - {:#x}".format(def_addr, block_start, block_end))
        if block_start <= use_addr < block_end:
            print("Use {:#x} is in basic block range {:#x} - {:#x}".format(use_addr, block_start, block_end))


In [None]:

# Define critical registers and memory ranges
critical_registers = ['pc', 'sp', 'lr', 'cpsr']
critical_memory_ranges = [
    (0x400E0800, 0x400E0FFF),  # System Control registers
    # Add other critical ranges as per the datasheet
]

def is_hardware_address(addr):
    # Peripheral memory space for SAM3X8E
    hardware_ranges = [
        (0x40000000, 0x5FFFFFFF),
    ]
    for start, end in hardware_ranges:
        if addr >= start and addr <= end:
            return True
    return False

def is_critical_memory(addr):
    for start, end in critical_memory_ranges:
        if addr >= start and addr <= end:
            return True
    return False

In [None]:
definitions_not_in_cfg = set()
def_use_chains_not_in_cfg = set()
external_defs_not_in_cfg = set()

In [None]:
# print(cfg.graph.nodes())

In [None]:
entry_node = None
for n in cfg.graph.nodes():
    if n.addr == entry_point:
        entry_node = n
        break

if entry_node is None:
    print("Warning: Entry node not found in CFG. Distances computation may fail.")
    distances = {}
else:
    # Compute shortest paths from the entry node
    distances = dict(nx.shortest_path_length(cfg.graph, source=entry_node))



In [None]:
print(dict(distances))

In [None]:
for function_addr, function in cfg.kb.functions.items():
    print(f"Function at 0x{function_addr:08X}")
    print(f"  Name: {function.name}")

In [None]:
dot_file_path = "cfg.dot"
write_dot(cfg.graph, dot_file_path)
print(f"CFG graph saved to {dot_file_path}")

# Convert DOT file to PNG (requires Graphviz)
png_file_path = "cfg.png"
os.system(f"dot -Tpng {dot_file_path} -o {png_file_path}")
print(f"CFG graph saved as image: {png_file_path}")



In [None]:

for function_addr, function in cfg.kb.functions.items():
    try:
        print(f"\nAnalyzing function {function.name} at 0x{function_addr:x}")

        # Run ReachingDefinitions analysis on the function
        rd_analysis = p.analyses.ReachingDefinitions(
            subject=function,
            func_addr=function_addr,
            track_tmps=True,
            observe_all=True  # Observe all definitions and uses
        )

        # Iterate over all definitions
        #get all the definations
        for _def in rd_analysis.all_definitions:
            def_ins_addr = _def.codeloc.insn_addr
            uses = rd_analysis.all_uses.get_uses(_def)
            #get uses

            # Check if the definition is external
            if isinstance(_def.codeloc, ExternalCodeLocation):
                atom = _def.atom
                # Create a hashable identifier for the atom
                if isinstance(atom, MemoryLocation):
                    atom_id = ('mem', atom.addr)
                elif isinstance(atom, Register):
                    atom_id = ('reg', atom.reg_offset)
                else:
                    atom_id = ('other', str(atom))
                # Use the instruction addresses of uses
                uses_ins_addrs = tuple(use.ins_addr for use in uses)
                external_defs_not_in_cfg.add((atom_id, uses_ins_addrs))
                continue  # Continue processing if needed

            # Get the CFG node containing the definition instruction address
            def_node = cfg.model.get_any_node(def_ins_addr, anyaddr=True)
            # print("@@@@@@@@@@@@@@@@@@@@@@new def")
            # print(def_node)
            # def_node = get_block_containing_insn(cfg, def_ins_addr)
            # print(def_node)
            #I think this part can be skipped. 
            if def_node is None:
                # Record the definition not in CFG
                definitions_not_in_cfg.add((def_ins_addr, 'Def instruction not in CFG'))
                continue  # Skip further processing for this definition
            print(def_node)
            
            matching_node = None
            for n in cfg.graph.nodes():
                same_addr = (n.addr == def_node.addr)
                same_func = (n.function_address == def_node.function_address)
                # callstack_key may be None if not available, so handle gracefully:
                def_callstack = getattr(def_node, 'callstack_key', None)
                n_callstack = getattr(n, 'callstack_key', None)
                same_callstack = (def_callstack == n_callstack)

                if same_addr and same_func and same_callstack:
                    matched_node = n
                    break

            # If we found a matched_node in the graph, attempt to get its distance
            if matched_node is not None and matched_node in distances:
                def_distance = distances[matched_node]
            else:
                # either not found or not reachable from entry
                def_distance = 'Unknown'
            
            
            # def_distance = distances.get(def_node, 'Unknown')
            # print(def_distance)
            #calculate the path length(def layers)
            # entry_node = cfg.model.get_any_node(entry_point, anyaddr=True)
            # if entry_node and nx.has_path(cfg.graph, entry_node, def_node):
            #     path_length = nx.shortest_path_length(cfg.graph, entry_node, def_node)
            # else:
            #     path_length = -1
            ##########################
            # Handle uses
            if uses:
                for use in uses:
                    use_ins_addr = use.ins_addr
                    use_node = cfg.get_any_node(use_ins_addr, anyaddr=True)
                    # print(use_node)
                    # use_node = get_block_containing_insn(cfg, use_ins_addr)
                    # print("**********")
                    # print(use_node1)
                    # print("----------------")

                    # print(use_node)
                    # print(use_node)
                    #this part can also be skipped  
                    # if def_node == use_node:
                    # #     # They are in the same basic block, skip
                    #     continue
                    if use_node is None:
                        # Record the def-use chain not in CFG
                        def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, def_distance, 'Use instruction not in CFG'))
                        
                        continue  # Continue to next use

                    # # Check if there is a path from def_node to use_node
                    # if def_ins_addr == use_ins_addr:
                    #     continue 
                    # if def_node == use_node:
                    #     # They are in the same basic block, skip
                    #     continue
                    # if def_node != use_node and def_ins_addr != use_ins_addr:
                    matched_use_node = None
                    for n in cfg.graph.nodes():
                        same_addr = (n.addr == use_node.addr)
                        same_func = (n.function_address == use_node.function_address)
                        use_callstack = getattr(use_node, 'callstack_key', None)
                        n_callstack = getattr(n, 'callstack_key', None)
                        same_callstack = (use_callstack == n_callstack)
                        if same_addr and same_func and same_callstack:
                            matched_use_node = n
                            break

                    if def_node != use_node and matched_node is not None:
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                        if matched_node is not None and matched_use_node is not None:
                            if not nx.has_path(cfg.graph, matched_node, matched_use_node):
                                def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, f"Distance: {def_distance} - No path from def to use in CFG"))
                        else:
                            def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, f"Distance: {def_distance} - Could not match use_node in CFG"))
                        
                        
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                            # Record the def-use chain with no path in CFG
                            def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, def_distance, 'No path from def to use in CFG'))
                            # print(def_use_chains_not_in_cfg)
                            continue  # Continue to next use
            else:
                # No uses recorded
                pass

    except Exception as e:
        print(f"Error analyzing function {function.name} (0x{function_addr:x}): {e}")

In [None]:
print(def_distance)