In [1]:
import angr
import sys
import struct
import networkx as nx
import monkeyhex
from angr.code_location import ExternalCodeLocation
from angr.knowledge_plugins.key_definitions.atoms import Register, MemoryLocation


In [2]:
import os
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import write_dot

In [3]:
binary_path = '/home/kai/project/experimentdata/FREERTOS.bin'

# Arduino Due base address for Flash memory
base_addr = 0x00080000

with open(binary_path, 'rb') as f:
    # Read the first 8 bytes (Initial Stack Pointer and Reset Handler)
    vector_table = f.read(8)
    if len(vector_table) < 8:
        print("Error: Binary file is too short to contain a valid vector table.")
        sys.exit(1)
    # Unpack the data (little-endian format)
    initial_sp, reset_handler = struct.unpack('<II', vector_table)
    entry_point = reset_handler
    print(f"Initial Stack Pointer: 0x{initial_sp:08X}")
    print(f"Entry Point (Reset Handler) address: 0x{entry_point:08X}")

p = angr.Project(
    binary_path,
    main_opts={
        'backend': 'blob',
        'arch': 'armel',  # 'armel' for little-endian ARM
        'base_addr': base_addr,
        'entry_point': entry_point,
    },
    auto_load_libs=True
)
#using emulated model
cfg = p.analyses.CFGEmulated(
    normalize=True,
    context_sensitivity_level=3,  # Increase context sensitivity if needed
    # starts=[entry_point],
    keep_state=True,
    enable_function_hints=True
)



Initial Stack Pointer: 0x20075680
Entry Point (Reset Handler) address: 0x000806D9




In [4]:

# Define critical registers and memory ranges
critical_registers = ['pc', 'sp', 'lr', 'cpsr']
critical_memory_ranges = [
    (0x400E0800, 0x400E0FFF),  # System Control registers
    # Add other critical ranges as per the datasheet
]

def is_hardware_address(addr):
    # Peripheral memory space for SAM3X8E
    hardware_ranges = [
        (0x40000000, 0x5FFFFFFF),
    ]
    for start, end in hardware_ranges:
        if addr >= start and addr <= end:
            return True
    return False

def is_critical_memory(addr):
    for start, end in critical_memory_ranges:
        if addr >= start and addr <= end:
            return True
    return False

In [5]:
definitions_not_in_cfg = set()
def_use_chains_not_in_cfg = set()
external_defs_not_in_cfg = set()

In [6]:
entry_node = None
for n in cfg.graph.nodes():
    if n.addr == entry_point:
        entry_node = n
        break

if entry_node is None:
    print("Warning: Entry node not found in CFG. Distances computation may fail.")
    distances = {}
else:
    # Compute shortest paths from the entry node
    distances = dict(nx.shortest_path_length(cfg.graph, source=entry_node))



In [7]:
print(dict(distances))

{<CFGENode 0x806d9[10]>: 0, <CFGENode 0x80705[8]>: 1, <CFGENode 0x806e3[8]>: 1, <CFGENode 0x80723[24]>: 2, <CFGENode 0x8070d[14]>: 2, <CFGENode 0x806eb[14]>: 2, <CFGENode 0x80743[4]>: 3, <CFGENode 0x8073b[8]>: 3, <CFGENode 0x8071b[8]>: 3, <CFGENode 0x806f9[12]>: 3}


In [9]:
for function_addr, function in cfg.kb.functions.items():
    print(f"Function at 0x{function_addr:08X}")
    print(f"  Name: {function.name}")

Function at 0x00080000
  Name: sub_80000
Function at 0x000801C5
  Name: sub_801c5
Function at 0x00080201
  Name: sub_80201
Function at 0x000806D9
  Name: _start
Function at 0x00080889
  Name: sub_80889
Function at 0x000808C1
  Name: sub_808c1
Function at 0x000808C7
  Name: sub_808c7
Function at 0x000808D1
  Name: sub_808d1
Function at 0x000808D5
  Name: sub_808d5
Function at 0x00081B59
  Name: sub_81b59
Function at 0x00081B7D
  Name: sub_81b7d
Function at 0x00081BC3
  Name: sub_81bc3
Function at 0x00081BD1
  Name: sub_81bd1
Function at 0x00081CBD
  Name: sub_81cbd
Function at 0x00081DB5
  Name: sub_81db5
Function at 0x00081DEF
  Name: sub_81def
Function at 0x00081E35
  Name: sub_81e35
Function at 0x0008202B
  Name: sub_8202b
Function at 0x00082039
  Name: sub_82039
Function at 0x00082517
  Name: sub_82517
Function at 0x00082533
  Name: sub_82533
Function at 0x000826A3
  Name: sub_826a3
Function at 0x000826BD
  Name: sub_826bd
Function at 0x0008274B
  Name: sub_8274b
Function at 0x00082

In [12]:
dot_file_path = "cfg.dot"
write_dot(cfg.graph, dot_file_path)
print(f"CFG graph saved to {dot_file_path}")

# Convert DOT file to PNG (requires Graphviz)
png_file_path = "cfg.png"
os.system(f"dot -Tpng {dot_file_path} -o {png_file_path}")
print(f"CFG graph saved as image: {png_file_path}")



CFG graph saved to cfg.dot
CFG graph saved as image: cfg.png


sh: 1: dot: not found


In [None]:

for function_addr, function in cfg.kb.functions.items():
    try:
        print(f"\nAnalyzing function {function.name} at 0x{function_addr:x}")

        # Run ReachingDefinitions analysis on the function
        rd_analysis = p.analyses.ReachingDefinitions(
            subject=function,
            func_addr=function_addr,
            track_tmps=True,
            observe_all=True  # Observe all definitions and uses
        )

        # Iterate over all definitions
        #get all the definations
        for _def in rd_analysis.all_definitions:
            def_ins_addr = _def.codeloc.ins_addr
            uses = rd_analysis.all_uses.get_uses(_def)
            #get uses

            # Check if the definition is external
            if isinstance(_def.codeloc, ExternalCodeLocation):
                atom = _def.atom
                # Create a hashable identifier for the atom
                if isinstance(atom, MemoryLocation):
                    atom_id = ('mem', atom.addr)
                elif isinstance(atom, Register):
                    atom_id = ('reg', atom.reg_offset)
                else:
                    atom_id = ('other', str(atom))
                # Use the instruction addresses of uses
                uses_ins_addrs = tuple(use.ins_addr for use in uses)
                external_defs_not_in_cfg.add((atom_id, uses_ins_addrs))
                continue  # Continue processing if needed

            # Get the CFG node containing the definition instruction address
            def_node = cfg.model.get_any_node(def_ins_addr, anyaddr=True)
            # print("@@@@@@@@@@@@@@@@@@@@@@new def")
            # print(def_node)
            # def_node = get_block_containing_insn(cfg, def_ins_addr)
            # print(def_node)
            #I think this part can be skipped. 
            if def_node is None:
                # Record the definition not in CFG
                definitions_not_in_cfg.add((def_ins_addr, 'Def instruction not in CFG'))
                continue  # Skip further processing for this definition
            print(def_node)
            
            matching_node = None
            for n in cfg.graph.nodes():
                same_addr = (n.addr == def_node.addr)
                same_func = (n.function_address == def_node.function_address)
                # callstack_key may be None if not available, so handle gracefully:
                def_callstack = getattr(def_node, 'callstack_key', None)
                n_callstack = getattr(n, 'callstack_key', None)
                same_callstack = (def_callstack == n_callstack)

                if same_addr and same_func and same_callstack:
                    matched_node = n
                    break

            # If we found a matched_node in the graph, attempt to get its distance
            if matched_node is not None and matched_node in distances:
                def_distance = distances[matched_node]
            else:
                # either not found or not reachable from entry
                def_distance = 'Unknown'
            
            
            # def_distance = distances.get(def_node, 'Unknown')
            # print(def_distance)
            #calculate the path length(def layers)
            # entry_node = cfg.model.get_any_node(entry_point, anyaddr=True)
            # if entry_node and nx.has_path(cfg.graph, entry_node, def_node):
            #     path_length = nx.shortest_path_length(cfg.graph, entry_node, def_node)
            # else:
            #     path_length = -1
            ##########################
            # Handle uses
            if uses:
                for use in uses:
                    use_ins_addr = use.ins_addr
                    use_node = cfg.get_any_node(use_ins_addr, anyaddr=True)
                    # print(use_node)
                    # use_node = get_block_containing_insn(cfg, use_ins_addr)
                    # print("**********")
                    # print(use_node1)
                    # print("----------------")

                    # print(use_node)
                    # print(use_node)
                    #this part can also be skipped  
                    # if def_node == use_node:
                    # #     # They are in the same basic block, skip
                    #     continue
                    if use_node is None:
                        # Record the def-use chain not in CFG
                        def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, def_distance, 'Use instruction not in CFG'))
                        
                        continue  # Continue to next use

                    # # Check if there is a path from def_node to use_node
                    # if def_ins_addr == use_ins_addr:
                    #     continue 
                    # if def_node == use_node:
                    #     # They are in the same basic block, skip
                    #     continue
                    # if def_node != use_node and def_ins_addr != use_ins_addr:
                    matched_use_node = None
                    for n in cfg.graph.nodes():
                        same_addr = (n.addr == use_node.addr)
                        same_func = (n.function_address == use_node.function_address)
                        use_callstack = getattr(use_node, 'callstack_key', None)
                        n_callstack = getattr(n, 'callstack_key', None)
                        same_callstack = (use_callstack == n_callstack)
                        if same_addr and same_func and same_callstack:
                            matched_use_node = n
                            break

                    if def_node != use_node and matched_node is not None:
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                        if matched_node is not None and matched_use_node is not None:
                            if not nx.has_path(cfg.graph, matched_node, matched_use_node):
                                def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, f"Distance: {def_distance} - No path from def to use in CFG"))
                        else:
                            def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, f"Distance: {def_distance} - Could not match use_node in CFG"))
                        
                        
                        # if not nx.has_path(cfg.graph, def_node, use_node):
                            # Record the def-use chain with no path in CFG
                            def_use_chains_not_in_cfg.add((def_ins_addr, use_ins_addr, def_distance, 'No path from def to use in CFG'))
                            # print(def_use_chains_not_in_cfg)
                            continue  # Continue to next use
            else:
                # No uses recorded
                pass

    except Exception as e:
        print(f"Error analyzing function {function.name} (0x{function_addr:x}): {e}")


Analyzing function sub_80000 at 0x80000
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>
<CFGENode 0x80000[4]>

Analyzing function sub_801c5 at 0x801c5
<CFGENode 0x801c5[14]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801d3[6]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801d9[8]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801e1[6]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801d3[6]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801f3[4]>
<CFGENode 0x801d9[8]>
<CFGENode 0x801e1[6]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801c5[14]>
<CFGENode 0x801e7[6]>
<CFGENode 0x801e1[6]>
<CFGENode 0x801d3[6]>
<CFGENode 0x801e1[6]>
<CFGENode 0x801d9[8]>
<CFGENo



<CFGENode 0x81b95[2]>
<CFGENode 0x81bc3[10]>
<CFGENode 0x81b87[8]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b59[16]>
<CFGENode 0x81b8f[4]>
<CFGENode 0x81b59[16]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b93[2]>
<CFGENode 0x81b71[4]>
<CFGENode 0x81b69[8]>
<CFGENode 0x81b75[8]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b87[8]>
<CFGENode 0x81b59[16]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b95[2]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b95[2]>
<CFGENode 0x81b59[16]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b69[8]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b7d[10]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81bc3[10]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81bc3[10]>
<CFGENode 0x81b8f[4]>
<CFGENode 0x81b93[2]>
<CFGENode 0x81b71[4]>
<CFGENode 0x81b59[16]>
<CFGENode 0x81b87[8]>
<CFGENode 0x81b75[8]>
<CFGENode 0x81b93[2]>
<CFGENode 0x81b97[44]>
<CFGENode 0x81b97[44]>
<CFGE



<CFGENode 0x81cad[16]>
<CFGENode 0x81ca1[12]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c49[12]>
<CFGENode 0x81c2d[24]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c59[14]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c59[14]>
<CFGENode 0x81c7f[20]>
<CFGENode 0x81bdd[4]>
<CFGENode 0x81c55[4]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c2d[24]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c75[4]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c59[14]>
<CFGENode 0x81c49[12]>
<CFGENode 0x81c7f[20]>
<CFGENode 0x81bdd[4]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81bdd[4]>
<CFGENode 0x81c49[12]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c2d[24]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81bd1[12]>
<CFGENode 0x81c2d[24]>
<CFGENode 0x81bd1[12]>
<CFGENode 0x81c2d[24]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81be1[72]>
<CFGENode 0x81c45[4]>
<CFGENode 0x81bd1[12]>
<CFGENode 0x81c29[4]>
<CFGENode 0x81c59[14]>
<CFGENode 0x81bd1[12]>
<CFGENode 0x81c75[4]>
<CFGENode 0x81c7f[2

Exception ignored in: <function WeakValueDictionary.__init__.<locals>.remove at 0x7968a4266700>
Traceback (most recent call last):
  File "/usr/lib/python3.12/weakref.py", line 105, in remove
    def remove(wr, selfref=ref(self), _atomic_removal=_remove_dead_weakref):

KeyboardInterrupt: 


<CFGENode 0x83273[4]>
<CFGENode 0x834b9[10]>
<CFGENode 0x8330d[10]>
<CFGENode 0x83329[16]>
<CFGENode 0x83455[8]>
<CFGENode 0x8328b[6]>
<CFGENode 0x83493[28]>
<CFGENode 0x83385[20]>
<CFGENode 0x832c7[8]>
<CFGENode 0x832d3[8]>
<CFGENode 0x83405[4]>
<CFGENode 0x83477[28]>
<CFGENode 0x83303[10]>
<CFGENode 0x8337d[6]>
<CFGENode 0x83233[18]>
<CFGENode 0x83411[16]>
<CFGENode 0x83261[18]>
<CFGENode 0x83261[18]>
<CFGENode 0x832b5[10]>
<CFGENode 0x83233[18]>
<CFGENode 0x83405[4]>
<CFGENode 0x83261[18]>
<CFGENode 0x83347[4]>
<CFGENode 0x83317[4]>
<CFGENode 0x83477[28]>
<CFGENode 0x834eb[10]>
<CFGENode 0x83277[18]>
<CFGENode 0x83261[18]>
<CFGENode 0x83399[4]>
<CFGENode 0x83463[8]>
<CFGENode 0x834b9[10]>
<CFGENode 0x83233[18]>
<CFGENode 0x833e5[20]>
<CFGENode 0x8337d[6]>
<CFGENode 0x834c3[16]>
<CFGENode 0x833a1[16]>
<CFGENode 0x83273[4]>
<CFGENode 0x8342b[42]>
<CFGENode 0x8329f[10]>
<CFGENode 0x8330d[10]>
<CFGENode 0x83379[4]>
<CFGENode 0x83411[16]>
<CFGENode 0x8331b[8]>
<CFGENode 0x8329f[10]>
<CFG

In [10]:
print(def_distance)

Unknown
