# UsageFinder

UsageFinder is a tool for exploring **how functions are used** in a target program. This is useful for developing a general understanding of the target application as well as for exploring misuses of functions (e.g. violations of API expectations, etc.). In this tool, we refer to the set of functions (identified by the user) as the "API" we're exploring.

This notebook contains the entire implementation of the tool. If you're just interested in using the tool:

1. Select "Kernel -> Restart & Run All" from the menu (or press the "fast forward" double-triangle button) run the whole notebook
2. Scroll down or <a href="#start_here">CLICK HERE</a> for the "UsageFinder Users: START HERE" heading. Interact with the UI widgets starting in that section.

# Implementation - skip this section

MATE-developers (and curious folks) only.

### Imports

In [None]:
from collections import defaultdict
import html
import pprint
import re

In [None]:
# Suppress "Building paths with unbounded maximum length" warnings in output
import logging
logging.getLogger().setLevel(logging.CRITICAL)

In [None]:
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.dialects.postgresql import array_agg, ARRAY, array
from sqlalchemy.sql.elements import Grouping
from sqlalchemy.sql.expression import case, func, literal, null, true
from sqlalchemy.types import String

In [None]:
# needed to render HTML inside of Juptyer
from IPython.display import clear_output, Markdown, Image, HTML # NB shadows MATE HTML NewType that wraps str

import ipywidgets as widgets

In [None]:
import pygraphviz as pgv

In [None]:
from mate_query import cfl
from mate_common.models.builds import BuildState

### Utility Functions

In [None]:
def nid(uuid):
    return session.query(cpg.Node).filter_by(uuid=str(uuid)).one()

In [None]:
def getfn(name):
    return session.query(cpg.Function).filter_by(demangled_name=name).one()

In [None]:
def show_llvm(fn):
    lastline = 0
    for b in fn.blocks:
        print(f"### {b} ###")
        for i in b.instructions:
            try:
                if lastline != i.attributes['location']['line']:
                    lastline = i.attributes['location']['line']
                    print(f"\n{i.attributes['source_code']} /* line {lastline} */")
            except KeyError:
                pass

            print(f"  // {i}: {i.attributes['pretty_string']}")
            print(f"  //      Successors: {i.successors}")

### callsite analysis


In [None]:
def check_constant_ref_to_global_string(this_constant):
    vdtu_edge = aliased(cpg.Edge) # VALUE_DEFINITION_TO_USE
    gti_edge = aliased(cpg.Edge) # GLOBAL_TO_INITIALIZER

    s = aliased(cpg.ConstantString)
    
    str_in_hex = (
        session.query(cpg.GlobalVariable)
        .join(vdtu_edge,
              (vdtu_edge.source == cpg.GlobalVariable.uuid)
              & (vdtu_edge.target == this_constant.uuid)
              & (vdtu_edge.kind == EdgeKind.VALUE_DEFINITION_TO_USE)
             )
        .join(gti_edge,
             (gti_edge.source == cpg.GlobalVariable.uuid)
              & (gti_edge.kind == EdgeKind.GLOBAL_TO_INITIALIZER)
             )
        .join(s, gti_edge.target_node)
        .with_entities(s.attributes['string_value']) #, cpg.ConstantString.attributes['string_value'])
        .one()
    )[0] # SQLAlchemy turn Result into string
    
    return f"string '{__builtins__.bytes.fromhex(str_in_hex).decode('ASCII')}'"

def argdata(arg):
    try:
        return f"int {arg.attributes['constant_int_value']}"
    except KeyError:
        pass
    
    try:
        str_in_hex = arg.attributes['string_value']

        # our jupyter notebook config automatically imports modules
        # in mate.cpg.types.*, including a module named 'bytes', which
        # shadows the built-in 'bytes' type, so explicitly identify the one we want
        return f"string '{__builtins__.bytes.fromhex(str_in_hex).decode('ASCII')}'"
    except KeyError:
        pass
    
    # is it a Constant pointing to a global ConstantString?
    try:
        return check_constant_ref_to_global_string(arg)
    except (NoResultFound, AttributeError) as e:
        pass
    
    try:
        if arg.is_null_value:
            return f"NULL"
    except AttributeError:
        pass
    
    try:
        if arg.is_nan:
            return f"NaN"
    except AttributeError:
        pass
    
    try:
        if arg.might_be_null:
            return "might be NULL"
    except AttributeError:
        pass
    
    return '' # f"? {arg}" # debug output to instead print the arg node


def cs_row(fn, cs):
    data = [
        cs,
        f"{cs.attributes['location']['file']}:{cs.attributes['location']['line']}",
        cs.parent_block.parent_function.demangled_name,
        cs.attributes['source_code']
    ]
    
    # TODO: clean up arg extraction
    cs_args = [
        cs.argument0, cs.argument1, cs.argument2, cs.argument3, cs.argument4, cs.argument5, cs.argument6, cs.argument7, cs.argument8, cs.argument9
    ][:len(fn.arguments)]
    
    data.extend([argdata(arg) for arg in cs_args])
        
    return data

def showme_callsite_table(fn):
    display([cs_row(fn, cs) for cs in fn.callsites],
                 columns=(
                     ["CS node", "source", "in function", "src line"] +
                     [f"arg{n}" for n in range(0, len(fn.arguments))]
                 ))

### Explore related functions

In [None]:
def function_and_contexts_that_call_multiple_api_functions(api_fns):
    """Find all functions that make more than one call to an API = collection of functions"""
    F = aliased(cpg.Function)
    B = aliased(cpg.Block)
    CS = aliased(cpg.CallSite)
    CG = aliased(cpg.Edge)
    CF = aliased(cpg.Edge)
    Caller = aliased(cpg.Function)
    Callee = aliased(cpg.Function)

    callers = (
        session.query(CG)
        .filter(
            CG.kind == EdgeKind.CALLGRAPH,
            CG.target.in_([f.uuid for f in api_fns])
        )
        .join(Caller, Caller.uuid == CG.source)
        .join(Callee, Callee.uuid == CG.target)
        .group_by(Caller, CG.attributes['caller_context'])
        .having(func.count(Callee.uuid) > 1)
        .with_entities(Caller.uuid, CG.attributes['caller_context'])
        .all()
    )
    
    return callers

In [None]:
def scoped_api_callsite_uuids(containing_fn_uuid, containing_context_txt, api_fns):
    """Get all of the API callsites in a given function and caller_context"""
    F = aliased(cpg.Function)
    B = aliased(cpg.Block)
    CS = aliased(cpg.CallSite)
    CG = aliased(cpg.Edge)
    CF = aliased(cpg.Edge)
    
    return [
        r[0] for r in (
            session.query(F)
            .filter_by(uuid=containing_fn_uuid)
            .join(B, F.blocks)
            .join(CS, B.instructions)
            # Note: we're using CALL_TO_FUNCTION edges explicitly
            # here so that we can filter on the caller's context.
            # This only matters for dynamic callsites that have
            # different sets of potential callees in different contexts,
            # which happens fairly rarely...
            .join(
                CF,
                (CF.kind == EdgeKind.CALL_TO_FUNCTION)
                & (CF.source == CS.uuid)
                & (CF.target.in_([f.uuid for f in api_fns]))
                & (CF.attributes['caller_context'].astext == containing_context_txt)
            )
            .with_entities(CS.uuid)
            .distinct()
            .all()
        )
    ]

In [None]:
# Build an intra-procedural CFG query looking for paths between callsites.
# Note: ideally we'd stop once we find the first callsite so that the sequence
# A --> B --> C only returns (A,B) (B,C) but until the PathBuilder API is updated
# we are forced to also return (A,C)...

def callsites_related_by_intra_cpg(callsite_uuids):
    cfg_ordering = (
        db.PathBuilder(db.Path)
        .starting_at(lambda Node: Node.uuid.in_(callsite_uuids))
        .stopping_at(lambda Node: Node.uuid.in_(callsite_uuids))
        .continuing_while(
            lambda Config, Edge: (
                (Edge.kind == EdgeKind.INSTRUCTION_TO_SUCCESSOR_INSTRUCTION)
                & (Config.c.edge.is_(None) | Edge.source.notin_(callsite_uuids))
            )
        )
        # keep_edge=True so that we can at least filter out (A,A) 0-length paths...
        .build(cpg, keep_start=True, keep_edge=True, keep_trace=False)
    )

    Source = aliased(cpg.Instruction)
    Target = aliased(cpg.Instruction)

    return (
        session.query(cfg_ordering)
        .filter(cfg_ordering.edge.isnot(None))
        .join(Source, Source.uuid == cfg_ordering.source)
        .join(Target, Target.uuid == cfg_ordering.target)
        .with_entities(
            Source.uuid,
            Source.attributes['pretty_string'],
            Target.uuid,
            Target.attributes['pretty_string'],
        )
        .distinct()
        .all()
    )

In [None]:
def callsite_access_paths(calling_ctx, callsite_uuids):
    Argument = aliased(cpg.Node)

    base_step = (
        session.query(cpg.CallSite)
        .filter(cpg.CallSite.uuid.in_(callsite_uuids))
        .join(
            cpg.Edge,
            (cpg.Edge.target == cpg.CallSite.uuid)
            & (cpg.Edge.kind == EdgeKind.VALUE_DEFINITION_TO_USE)
            & (cpg.Edge.attributes['is_argument_operand'].as_boolean() == True)
        )
        .join(Argument, cpg.Edge.source == Argument.uuid)
        .with_entities(
            cpg.CallSite.uuid.label("callsite"),
            func.concat(
                "->argument",
                cpg.Edge.attributes['operand_number'].as_string()
            ).label("accesspath"),
            Argument.uuid.label("value"),
            literal(calling_ctx).label("context"),
        )
        .union(
            session.query(cpg.CallSite)
            .filter(cpg.CallSite.uuid.in_(callsite_uuids))
            .with_entities(
                cpg.CallSite.uuid.label("callsite"),
                literal("->return").label("accesspath"),
                cpg.CallSite.uuid.label("value"),
                literal(calling_ctx).label("context"),
            )
        )
        .cte("access_paths", recursive=True)
    )

    recursive_step = (
        session.query(cpg.Edge)
        .filter(cpg.Edge.kind == EdgeKind.POINTS_TO)
        .join(
            base_step,
            (base_step.c.value == cpg.Edge.source)
            & (cpg.Edge.attributes['context'].astext == base_step.c.context)
        )
        .with_entities(
            base_step.c.callsite.label("callsite"),
            func.concat(base_step.c.accesspath, "->deref").label("accesspath"),
            cpg.Edge.target.label("value"),
            null().cast(String).label("context"),
        )
    )

    AccessPaths = base_step.union(recursive_step)
    return AccessPaths

### Explore callsite relations

In [None]:
def callsites_related_by_value(calling_ctx, callsite_uuids):
    AccessPaths = callsite_access_paths(calling_ctx, callsite_uuids)
    AccessPaths2 = aliased(AccessPaths)
    
    CallSite1 = aliased(cpg.CallSite)
    CallSite2 = aliased(cpg.CallSite)

    Value = aliased(cpg.Node)

    return (
        session.query(AccessPaths)
        .join(
            AccessPaths2,
            (AccessPaths.c.value == AccessPaths2.c.value)
            & (AccessPaths.c.callsite != AccessPaths2.c.callsite)
        )
        .with_entities(
            AccessPaths.c.callsite,
            AccessPaths.c.accesspath,
            AccessPaths2.c.callsite,
            AccessPaths2.c.accesspath,
            AccessPaths.c.value,
        )
        .join(CallSite1, CallSite1.uuid == AccessPaths.c.callsite)
        .join(CallSite2, CallSite2.uuid == AccessPaths2.c.callsite)
        .join(Value, Value.uuid == AccessPaths.c.value)
        .with_entities(
            CallSite1.uuid,
            CallSite1.attributes['pretty_string'],
            AccessPaths.c.accesspath,
            CallSite2.uuid,
            CallSite2.attributes['pretty_string'],
            AccessPaths2.c.accesspath,
            Value.uuid,
            Value.attributes['pretty_string'],
        )
        .all()
    )

In [None]:
def callsites_related_by_dataflow(calling_ctx, callsite_uuids):
    AccessPaths = callsite_access_paths(calling_ctx, callsite_uuids)
    AccessPaths2 = aliased(AccessPaths)

    CallSite1 = aliased(cpg.CallSite)
    CallSite2 = aliased(cpg.CallSite)

    InitialConfiguration = (
        session.query(AccessPaths)    
        .with_entities(
            AccessPaths.c.callsite.label("info"),
            AccessPaths.c.value.label("uuid"),
            case(
                [
                    (
                        AccessPaths.c.context.isnot(None),
                        array([AccessPaths.c.context, "$"]),
                    ),
                ],
                else_=array(["$"]),
            ).cast(ARRAY(String)).label("stack"),
        )
        .cte()
    )   

    
    # NOTE: we're restricting to Intra-procedural dataflow
    # Query = what function(s) contain the callsites of interest?
    B = aliased(cpg.Block)
    F = aliased(cpg.Function)
    function_uuids = (
        session.query(cpg.CallSite)
        .filter(cpg.CallSite.uuid.in_(callsite_uuids))
        .join(B, cpg.CallSite.parent_block)
        .join(F, B.parent_function)
        .with_entities(F.uuid)
        .distinct()    
    )
    
    
    argument_dataflow = (
        db.PathBuilder(db.Path)
        .reverse()
        .initial_configuration(InitialConfiguration)
        .continuing_while(
            lambda Config, Edge: (
                (
                    (Edge.kind == EdgeKind.VALUE_DEFINITION_TO_USE)
                    & (
                        Edge.target.notin_(callsite_uuids)
                      & ~Edge.target_node.has(cpg.Node.kind == NodeKind.ALLOCA) # avoid alloca alignment constants
                    )
                )
                | (
                    (Edge.kind == EdgeKind.LOAD_MEMORY)
                  & (Edge.attributes['context'].astext == calling_ctx)
                )
                | (
                    (Edge.kind == EdgeKind.STORE_MEMORY)
                  & (Edge.attributes['context'].astext == calling_ctx)
                  & (
                      Edge.source_node.has(
                          cpg.Instruction.parent_block.has(
                              cpg.Block.parent_function.has(
                                  cpg.Function.uuid.in_(function_uuids)
                              )
                          )
                      )
                  )
                )
            )
        )
        .build(cpg, keep_start=True)
    )

    argument_dataflow2 = aliased(argument_dataflow)

    CallSite1 = aliased(cpg.CallSite)
    CallSite2 = aliased(cpg.CallSite)
    DataflowSource = aliased(cpg.Node)

    return (
        session.query(argument_dataflow)
        .join(
            argument_dataflow2,
            (argument_dataflow.source == argument_dataflow2.source)
            & (argument_dataflow.info != argument_dataflow2.info)
        )
        .with_entities(
            argument_dataflow.info,
            argument_dataflow.target,
            argument_dataflow2.info,
            argument_dataflow2.target,
            argument_dataflow.source
        )
        .join(CallSite1, CallSite1.uuid == argument_dataflow.info)
        .join(CallSite2, CallSite2.uuid == argument_dataflow2.info)
        .join(DataflowSource, DataflowSource.uuid == argument_dataflow.source)
        .join(
            AccessPaths,
            (AccessPaths.c.callsite == CallSite1.uuid)
            & (AccessPaths.c.value == argument_dataflow.target)
        )
        .join(
            AccessPaths2,
            (AccessPaths2.c.callsite == CallSite2.uuid)
            & (AccessPaths2.c.value == argument_dataflow2.target)
        )
        .with_entities(
            CallSite1.uuid,
            CallSite1.attributes['pretty_string'],
            AccessPaths.c.accesspath,
            CallSite2.uuid,
            CallSite2.attributes['pretty_string'],
            AccessPaths2.c.accesspath,
            DataflowSource.uuid,
            DataflowSource.attributes['pretty_string'],
        )
        .distinct()
        .all()
    )

### 2D visualization of relation between API calls

Build a table of API calls that can happen `f() -> g()`

Where there exists some CFG path from a call to function `f()` (the row) to function `g()` (the column), show the relation in the cell.

In [None]:
def table_label(uuid):
    n = nid(uuid)
    try:
        return f"{n.callees[0].name}(...) #{uuid}"
    except AttributeError:
        try:
            return n.attributes['source_code']
        except KeyError:
            return n.attributes['pretty_string']

In [None]:
def callsite_html_tooltip(callsite_node):
    return f"{callsite_node}\n{html.escape(pprint.pformat(callsite_node.attributes))}" # escape chars esp. in src line

# note: callsites are sorted and deduplicated
# render_callsite_relation is a function that operates on (a, b)
def showme_2d_api_table_html(callsite_uuids, render_callsite_relation):
    html_rows = [
        '<th title="' + callsite_html_tooltip(nid(a)) + '">' + table_label(a) + '</th>\n' + '\n'.join([
            render_callsite_relation(a, b) for b in sorted(set(callsite_uuids))
        ]) for a in sorted(set(callsite_uuids))
    ]


    html_table = f"""
    <table>
    <thead>
        <tr>
            <td></td>
            {''.join([
                '<td title="' + callsite_html_tooltip(nid(n)) + '" style="transform: rotate(180deg); writing-mode: vertical-rl; white-space: nowrap; display: overflow: visible; text-align: left;">' + table_label(n) + '</td>'
                for n in sorted(set(callsite_uuids))
            ])}
        </tr>
    </thead>
    <tbody>
        {''.join(['<tr>' + r + '</tr>' for r in html_rows])}
    </tbody>
    </table>
    </body>
    """
    
    display(HTML(html_table))

In [None]:
def showme_table_html(containing_fn_uuid, calling_ctx, api_fns, relation_skipper=None):
    source_callsites = set()
    target_callsites = set()
    exists_callsite_path = set()

    for (s, _, t, _) in callsites_related_by_intra_cpg(scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        source_callsites.add(s)
        target_callsites.add(t)
        exists_callsite_path.add((s, t))

    exists_values_relation = set()
    for (cs1, _, ap1, cs2, _, ap2, v, vstring) in callsites_related_by_value(calling_ctx, scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        if relation_skipper and relation_skipper(cs1, ap1, cs2, ap2):
            continue
        exists_values_relation.add((cs1, cs2))

    exists_dataflow_relation = set()
    for (cs1, _, ap1, cs2, _, ap2, dfuuid, dfstr) in callsites_related_by_dataflow(calling_ctx, scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        if relation_skipper and relation_skipper(cs1, ap1, cs2, ap2):
            continue
        exists_dataflow_relation.add((cs1, cs2))

    def render_exists_html(s, t):
        tooltip = f"{'c' if (s, t) in exists_callsite_path else '.'}{'v' if (s, t) in exists_values_relation else '.'}{'d' if (s, t) in exists_dataflow_relation else '.'}"
        
        if s == t:
            return '<td title="' + tooltip + '" style="text-align: center;">-</td>'
        if (s, t) in exists_callsite_path:
            if (s, t) in exists_values_relation:
                return '<td title="' + tooltip + '" style="background-color: #43e087; text-align: center;">v</td>'
            elif (s, t) in exists_dataflow_relation:
                return '<td title="' + tooltip + '" style="background-color: #89b2cc; text-align: center;">d</td>'
            return '<td title="' + tooltip + '" style="background-color: gray; text-align: center;">c</td>'
        elif (s, t) in exists_values_relation:
            return '<td title="' + tooltip + '" style="text-align: center;">v</td>'
        elif (s, t) in exists_dataflow_relation:
            return '<td title="' + tooltip + '" style="text-align: center;">d</td>'
        else:
            return '<td title="' + tooltip + '"></td>'

    showme_2d_api_table_html(source_callsites.union(target_callsites), render_exists_html)


In [None]:
arg_expr_re = re.compile("->argument([0-9]+)")

def arg_tooltip(cs, ap):
    tooltip = ""
    
    m = arg_expr_re.search(ap)
    if m:
        Arg = aliased(cpg.Node)
        arg_node = (
            session.query(Arg)
            .join(
                cpg.Edge,
                (cpg.Edge.source == Arg.uuid)
                & (cpg.Edge.target == cs.uuid)
                & (cpg.Edge.kind == EdgeKind.VALUE_DEFINITION_TO_USE)
                & (cpg.Edge.attributes['is_argument_operand'].as_boolean() == True)
                & (cpg.Edge.attributes['operand_number'].as_integer() == int(m.group(1)))
            )
            .one()        
        )
        
        tooltip = f"via Arg={arg_node}"
        argdata_txt = argdata(arg_node)
        if argdata_txt:
            tooltip += "\nN.B. " + html.escape(argdata_txt)
    
    return tooltip

def showme_2d_zoom_table_html(cs1_node, cs1_aps, cs2_node, cs2_aps, render_callsite_relation):
    label = f"{'/'.join([f.demangled_name for f in cs1_node.callees])} \\ {'/'.join([f.demangled_name for f in cs2_node.callees])}"

    html_rows = [
        '<th title="' + arg_tooltip(cs1_node, ap1) + '">' + ap1 + '</th>\n' + '\n'.join([
            render_callsite_relation(ap1, ap2) for ap2 in sorted(set(cs2_aps))
        ]) for ap1 in sorted(set(cs1_aps))
    ]


    html_table = f"""
    <table>
    <thead>
        <tr>
            <td title="Rows: {cs1_node}\nColumns: {cs2_node}" style="vertical-align:bottom;text-align:right;">{label}</td>
            {''.join([
                '<td title="' + arg_tooltip(cs2_node, ap2) + '" style="transform: rotate(180deg); writing-mode: vertical-rl; white-space: nowrap; display: overflow: visible; text-align: left;">' + ap2 + '</td>'
                for ap2 in sorted(set(cs2_aps))
            ])}
        </tr>
    </thead>
    <tbody>
        {''.join(['<tr style="border: 1px solid black;">' + r + '</tr>' for r in html_rows])}
    </tbody>
    </table>
    </body>
    """
    
    display(HTML(html_table))

def showme_cspair_table_html(containing_fn_uuid, calling_ctx, zoom_cs1, zoom_cs2):
    cs1_node = nid(zoom_cs1)
    cs2_node = nid(zoom_cs2)
    
    exists_values_relation = set()
    related_values = dict()
    related_dataflows = dict()
    for (cs1, _, ap1, cs2, _, ap2, v, vstring) in callsites_related_by_value(calling_ctx, [zoom_cs1, zoom_cs2]):
        if zoom_cs1 == cs1:
            exists_values_relation.add( (ap1, ap2) )
            related_values[(ap1, ap2)] = (v, vstring)
            
    exists_dataflow_relation = set()
    for (cs1, _, ap1, cs2, _, ap2, dfuuid, dfstr) in callsites_related_by_dataflow(calling_ctx, [zoom_cs1, zoom_cs2]):
        if zoom_cs1 == cs1:
            exists_dataflow_relation.add( (ap1, ap2) )
            related_dataflows[(ap1, ap2)] = dfstr

    cs1_aps = set() # rows
    cs2_aps = set() # columns
    for (cs, ap, _, _) in session.query(callsite_access_paths(calling_ctx, [zoom_cs1, zoom_cs2])).all():
        if cs == zoom_cs1:
            cs1_aps.add(ap)
        else:
            cs2_aps.add(ap)
            
    def render_zoom_pair_html(s, t):
        tooltip = ""
        tooltip += f"{'v' if (s, t) in exists_values_relation else '.'}"
        tooltip += f"{'d' if (s, t) in exists_dataflow_relation else '.'}"
        
        value_details = set()
        dataflow_details = set()
        
        try:
            (_, vstr) = related_values[(s,t)]
            tooltip += "\n\nShared value: '" + html.escape(vstr) + "'"
            value_details.add(vstr)
        except KeyError:
            pass
        
        try:
            dfstr = related_dataflows[(s,t)]
            tooltip += "\n\nShared dataflow: '" + html.escape(dfstr) + "'"
            dataflow_details.add(dfstr)
        except KeyError:
            pass
        
        # combine identical value and dataflow details
        relation_details = []
        for detail in value_details.intersection(dataflow_details):
            relation_details.append("Value & Dataflow: " + html.escape(detail))
        for detail in value_details.difference(dataflow_details):
            relation_details.append("Value: " + html.escape(detail))
        for detail in dataflow_details.difference(value_details):
            relation_details.append("Dataflow: " + html.escape(detail))
        
        cell_content = ''
        cell_style = 'style="border: 1px solid black; text-align: center;"'
        if (s, t) in exists_values_relation:
            cell_style = 'style="border: 1px solid black; text-align: left; background-color: #43e087;"'
            cell_content = '<br /><br />'.join(relation_details) if relation_details else 'v'
        elif (s, t) in exists_dataflow_relation:
            cell_style = 'style="border: 1px solid black; text-align: left; background-color: #89b2cc;"'
            cell_content = '<br /><br />'.join(relation_details) if relation_details else 'd'

        return '<td title="' + tooltip + '" ' + cell_style + '>' + cell_content + '</td>'

    showme_2d_zoom_table_html(cs1_node, cs1_aps, cs2_node, cs2_aps, render_zoom_pair_html)

### conditional edges between callsites

In [None]:
def cdg_related_to_callsites(callsite_uuids):
    """Return UUIDs of control-dependent instructions relevant to callsites"""
    
    control_dependence_query = (
        db.PathBuilder(db.Path)
        .stopping_at(lambda Node: Node.uuid.in_(callsite_uuids))
        .continuing_while(
            lambda _, Edge: Edge.kind.in_(
                [
                    EdgeKind.TERMINATOR_INSTRUCTION_TO_CONTROL_DEPENDENT_INSTRUCTION,
                    EdgeKind.FUNCTION_ENTRY_TO_CONTROL_DEPENDENT_INSTRUCTION,
                ]
            )
        )
        .reverse()
        .build(
            cpg,
            keep_start=False,
            keep_edge=True,
        )
    )

    ctrl_uuids = set()
    for (src, tgt) in (
        session.query(control_dependence_query)
        .join(cpg.Edge, cpg.Edge.uuid == control_dependence_query.edge)
        .with_entities(cpg.Edge.source, cpg.Edge.target)
        .all()
    ):
        ctrl_uuids.add(src)
        ctrl_uuids.add(tgt)

    return ctrl_uuids.difference(callsite_uuids) # omit the input callsite nodes

In [None]:
def cfg_between_avoiding(start_uuid, end_uuid, avoid_uuids):
    """Return the UUIDs of nodes CFG-reachable forward from start and backward from end, not passing through avoids"""
    
    assert(end_uuid not in avoid_uuids) # doesn't make sense to ask for "reach end_uuid avoiding end_uuid"
    
    indicated_uuids = [start_uuid, end_uuid] + list(avoid_uuids)
    
    start_forward_cfg = (
        db.PathBuilder(db.Path)
        .starting_at(lambda Node: Node.uuid == start_uuid)
        .continuing_while(
            lambda _, Edge: (
                (Edge.kind == EdgeKind.INSTRUCTION_TO_SUCCESSOR_INSTRUCTION)
                & (Edge.target.notin_(indicated_uuids))
            )
        )
        .build(cpg, keep_start=False)
    )
    
    end_backward_cfg = (
        db.PathBuilder(db.Path)
        .stopping_at(lambda Node: Node.uuid == end_uuid)
        .continuing_while(
            lambda _, Edge: (
                (Edge.kind == EdgeKind.INSTRUCTION_TO_SUCCESSOR_INSTRUCTION)
                & (Edge.source.notin_(indicated_uuids))
            )
        )
        .reverse()
        .build(cpg, keep_start=False)
    )

    return [r[0] for r in (
        session.query(start_forward_cfg)
        .join(end_backward_cfg, start_forward_cfg.target == end_backward_cfg.source)
        .with_entities(
            start_forward_cfg.target
        )
        .filter(start_forward_cfg.target.notin_([start_uuid, end_uuid])) # omit input nodes
        .distinct()
        .all()
    )]

In [None]:
def callsite_pair_conditionals_avoiding(callsite1_uuid, callsite2_uuid, avoid_uuids):
    """Return UUIDs of controlling nodes relevant to the CFG path from cs1 to cs2.
    
    These are CDG nodes controlling either cs1 or cs2 
    that are CFG-reachable both forward from cs1 and backward from cs2
    through paths that don't pass through avoid_uuids.
    """
    cfg_nodes = set(cfg_between_avoiding(callsite1_uuid, callsite2_uuid, avoid_uuids))
    relevant_ctrldep = cdg_related_to_callsites([callsite1_uuid, callsite2_uuid])
    return relevant_ctrldep.intersection(cfg_nodes)

### graph of relations

Collapse callsites to the same function into a single node, providing a summary view.

In [None]:
def cfg_can_reach_avoiding(start_uuid, end_uuid, avoid_uuids):
    assert(end_uuid not in avoid_uuids) # "reach end_uuid avoiding end_uuid" doesn't make sense!
    
    indicated_uuids = [start_uuid, end_uuid] + list(avoid_uuids)
    
    start_forward_cfg = (
        db.PathBuilder(db.Path)
        .starting_at(lambda Node: Node.uuid == start_uuid)
        .continuing_while(
            lambda _, Edge: (
                (Edge.kind == EdgeKind.INSTRUCTION_TO_SUCCESSOR_INSTRUCTION)
                & (Edge.target.notin_([start_uuid] + list(avoid_uuids)))
            )
        )
        .stopping_at(lambda Node: Node.uuid == end_uuid)
        .build(cpg, keep_start=False)
    )
    
    return session.query(start_forward_cfg).first() is not None

In [None]:
def rets_reachable_from(instruction_uuids):
    F = aliased(cpg.Function)
    B = aliased(cpg.Block)
    
    starting_fns = (
        session.query(cpg.Instruction)
        .filter(cpg.Instruction.uuid.in_(instruction_uuids))
        .join(B, cpg.Instruction.parent_block)
        .join(F, B.parent_function)
        .with_entities(F.uuid)
        .distinct()
    )
    
    start_forward_cfg = (
        db.PathBuilder(db.Path)
        .starting_at(lambda Node: Node.uuid.in_(instruction_uuids))
        .continuing_while(
            lambda _, Edge: Edge.kind == EdgeKind.INSTRUCTION_TO_SUCCESSOR_INSTRUCTION
        )
        .build(cpg, keep_start=False)
    )
    
    return (
        session.query(start_forward_cfg)
        .with_entities(cpg.Ret)
        .join(B, cpg.Ret.parent_block)
        .join(F, B.parent_function)
        .filter(F.uuid.in_(starting_fns))
        .distinct()
        .all()
    )

In [None]:
# labels for relevant control-dependent edges:
# - conditionals between s and t, 
# - through paths that don't pass through avoid_uuids,
# - that determine whether t executes or not
def conditional_edge_labels(s, t, avoid_uuids):
    relevant_cfg_nodes = set(cfg_between_avoiding(s, t, avoid_uuids))
    relevant_ctrldep = cdg_related_to_callsites([s, t])

    lbl_info = []
    for n in relevant_ctrldep.intersection(relevant_cfg_nodes):
        line_no = nid(n).location['line']
        lbl_info.append((line_no, f"{line_no}: {table_label(n)}"))

    lbl_lines = ["(none)"]
    if lbl_info:
        lbl_lines = [txt for (_, txt) in sorted(lbl_info)]

    return list(dict.fromkeys(lbl_lines)) # deduplicates, preserves order

In [None]:
def showme_graphviz(containing_fn_uuid, calling_ctx, api_fns, relation_skipper, show_entry, show_returns):
    source_callsites = set()
    target_callsites = set()
    exists_callsite_path = set()

    for (s, _, t, _) in callsites_related_by_intra_cpg(scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        source_callsites.add(s)
        target_callsites.add(t)
        exists_callsite_path.add((s, t))

    exists_values_relation = set()
    for (cs1, _, ap1, cs2, _, ap2, v, vstring) in callsites_related_by_value(calling_ctx, scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        if relation_skipper and relation_skipper(cs1, ap1, cs2, ap2):
            continue
        exists_values_relation.add((cs1, cs2))

    exists_dataflow_relation = set()
    for (cs1, _, ap1, cs2, _, ap2, dfuuid, dfstr) in callsites_related_by_dataflow(calling_ctx, scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, api_fns)):
        if relation_skipper and relation_skipper(cs1, ap1, cs2, ap2):
            continue
        exists_dataflow_relation.add((cs1, cs2))

    def cs2fn(callsite_uuid):
        return nid(callsite_uuid).callees[0].name
    
    def cs_label(callsite_uuid):
        return f"{nid(callsite_uuid).location['line']}: {cs2fn(callsite_uuid)}(...) #{callsite_uuid}"
        
    collapsedCallsiteG = pgv.AGraph(directed=True, strict=False)
    dataflowG = pgv.AGraph(directed=False, strict=False)
    callsiteCondsG = pgv.AGraph(directed=True, strict=False)

    all_callsites = source_callsites.union(target_callsites)
    
    # collapse callsites to the same function into a single node for that function
    for fname in set([cs2fn(cs) for cs in all_callsites]):
        collapsedCallsiteG.add_node(fname)
        dataflowG.add_node(fname)
        
    # do NOT collapse the callsideCondsG nodes: create a node for each callsite
    for cs in all_callsites:
        callsiteCondsG.add_node(cs_label(cs), style="filled", fillcolor="#94d0f2")
    
    rendered_value_relation = set([(cs2fn(s), cs2fn(t)) for (s, t) in exists_values_relation])
    rendered_dataflow_relation = set([(cs2fn(s), cs2fn(t)) for (s, t) in exists_dataflow_relation])
    
    for (a, b) in rendered_dataflow_relation:
        if a <= b:
            dataflowG.add_edge(a, b, color="#477694")
    
    for (a, b) in set([(cs2fn(s), cs2fn(t)) for (s, t) in exists_callsite_path]):
        collapsedCallsiteG.add_edge(a, b, color="gray")
    
        if (a, b) in rendered_value_relation:
            collapsedCallsiteG.add_edge(a, b, color="#23ba65")

        if (a, b) in rendered_dataflow_relation:
            collapsedCallsiteG.add_edge(a, b, color="#477694")

    display(Markdown(f"## Collapsed Graph Representation"))
    display(Markdown('This is a summary of the "All API Features" table. All callsites for the same function have been collapsed into a single node.'))

    #collapsedCallsiteG.graph_attr["epsilon"] = "0.001"
    collapsedCallsiteG.graph_attr["splines"] = "ortho"
    #print(collapsedCallsiteG.string()) # print dot file to standard output, for debugging
    collapsedCallsiteG.layout("dot")    # dot, neato, etc
    collapsedCallsiteG.draw("usage-finder-graphviz-collapsed.png")  # write to file

    display(Image("usage-finder-graphviz-collapsed.png"))

    display(Markdown(f"## Dataflow Representation"))
    display(Markdown('This shows the dataflow relation between (collapsed) callsites to functions (e.g. related arguments).'))
    
    dataflowG.layout("dot")
    dataflowG.draw("usage-finder-graphviz-dataflow.png")

    display(Image("usage-finder-graphviz-dataflow.png"))
    
    
    # graph with edges labeled with source lines (if available), sorted by line number
    #
    # note: need to escape source lines containing '\n'
    #       \l left-aligns the text of a label line
    for s in sorted(source_callsites):
        for t in sorted(target_callsites):
            if s == t:
                continue # no self edges
                
            # only show edges if there is a CFG path between s and t not through any other callsite
            if cfg_can_reach_avoiding(s, t, all_callsites.difference([t])):
                lbl_lines = conditional_edge_labels(s, t, all_callsites.difference([t]))
                callsiteCondsG.add_edge(cs_label(s), cs_label(t),
                                        label=''.join([f"{txt.encode('unicode_escape').decode('utf-8')}\l" 
                                                       for txt in lbl_lines]))

        
    # add node and edges for entry point of function
    if show_entry:
        for entry_instr in set([nid(cs).parent_block.parent_function.entry_block.instructions[0] for cs in all_callsites]):
            entry_node_txt = f"Entry: {entry_instr.parent_block.parent_function.demangled_name}()"
            callsiteCondsG.add_node(entry_node_txt, style="filled", fillcolor="#feffd6")

            for api_cs in all_callsites:
                if cfg_can_reach_avoiding(entry_instr.uuid, api_cs, all_callsites.difference([api_cs])):
                    lbl_lines = conditional_edge_labels(entry_instr.uuid, api_cs, all_callsites.difference([api_cs]))
                    callsiteCondsG.add_edge(entry_node_txt, cs_label(api_cs),
                                            color="#b3b572",
                                            label=''.join([f"{txt.encode('unicode_escape').decode('utf-8')}\l" 
                                                           for txt in lbl_lines]))  

    # add return nodes and edges
    if show_returns:
        for ret in rets_reachable_from(all_callsites):
            ret_node_txt = f"{ret.attributes['location']['line']}: Return #{ret.uuid}"
            callsiteCondsG.add_node(ret_node_txt, style="filled", fillcolor="#d5a6f7")

            for api_cs in all_callsites:
                if cfg_can_reach_avoiding(api_cs, ret.uuid, all_callsites):
                    # NOTE: no label, conditional labels only apply to execution of API functions
                    callsiteCondsG.add_edge(cs_label(api_cs), ret_node_txt, color="#9a74b5") 
            
    display(Markdown(f"## Conditional Representation"))
    display(Markdown('This shows the relevant conditionals between callsites.'))
    
    callsiteCondsG.layout("dot")
    callsiteCondsG.draw("usage-finder-graphviz-conds.png")

    display(Image("usage-finder-graphviz-conds.png"))


<a id="start_here"></a>

```
██╗   ██╗███████╗ █████╗  ██████╗ ███████╗███████╗██╗███╗   ██╗██████╗ ███████╗██████╗ 
██║   ██║██╔════╝██╔══██╗██╔════╝ ██╔════╝██╔════╝██║████╗  ██║██╔══██╗██╔════╝██╔══██╗
██║   ██║███████╗███████║██║  ███╗█████╗  █████╗  ██║██╔██╗ ██║██║  ██║█████╗  ██████╔╝
██║   ██║╚════██║██╔══██║██║   ██║██╔══╝  ██╔══╝  ██║██║╚██╗██║██║  ██║██╔══╝  ██╔══██╗
╚██████╔╝███████║██║  ██║╚██████╔╝███████╗██║     ██║██║ ╚████║██████╔╝███████╗██║  ██║
 ╚═════╝ ╚══════╝╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚═╝     ╚═╝╚═╝  ╚═══╝╚═════╝ ╚══════╝╚═╝  ╚═╝
```




# UsageFinder Users: START HERE

Notion of an API = collection of functions of interest.

Approach:

- Select a CPG
- Identify internal and/or external functions of interest (the "API"). Shift-click to make multiple selections.
- Identify API features = function arguments or returns to omit from some analysis (option, don't select any to analyze all features)
- Select functions to explore from a list of functions/contexts that each make more than one API call.
- Visualize API usage in those functions. Optionally: include or exclude Entry points and/or Return points of each function. For each, UsageFinder generates
  1. a table showing the relationship between all callsites to API functions: is `g()` control flow reachable from `f()`? are those specific calls related by use of the same value? is there dataflow between those specific calls?
  2. graph vizualization summarizing the table, by collapsing all callsites to a single target function `f()` into a single node in the graph
  3. Optional: produce a new table but limited only to the API features identified by the user (**if any**). E.g. won't show dataflow relation between `f()` and `g()` if the dataflow is via some argument the user has marked as "not interesting"
  4. graph vizualization of dataflow relation between (collapsed) callsites to functions (e.g. via related arguments).
  5. graph showing relevant conditionals between API calls (only conditionals related to whether the target function is/isn't executed).
  6. table listing relevant conditionals between API calls
- Select individual pairs of API callsites for detailed analysis. For each: show a table with how arguments and return values (and their dereferences, if relevant) are related by value and/or by dataflow.

In [None]:
session = db.new_session()
cpg = None

build_picker = None

def refresh_available_cpg(arg):
    global build_picker
    
    clear_output()
    display(btn_refresh_available_cpg)
    
    build_picker = widgets.RadioButtons(
        options=sorted([(f"{b.bitcode_artifact.attributes['binary_filename']} ({b.options['pointer_analysis']} {b.options['context_sensitivity']}): {b.uuid}", b.uuid)
                        for b in session.query(db.Build).filter(db.Build.state == BuildState.Built).all()
                       ]),
        layout={'width': 'max-content'},
        description='Builds:',
        disabled=False
    )
    display(build_picker)
    

btn_refresh_available_cpg = widgets.Button(description = 'Refresh list of CPGs', 
                                           layout=widgets.Layout(width='auto'),
                                           button_style='primary')   
btn_refresh_available_cpg.on_click(refresh_available_cpg)
display(btn_refresh_available_cpg)

In [None]:
external_fns_picker = None
internal_fns_picker = None

def get_selected_api():
    return [session.query(cpg.Function).get(f) for f in internal_fns_picker.value] + [session.query(cpg.Function).get(f) for f in external_fns_picker.value]

def setup_selected_cpg(arg):
    global cpg, external_fns_picker, internal_fns_picker
    cpg = session.graph_from_build(session.query(db.Build).get(build_picker.value))
    
    clear_output()
    display(btn_setup_selected_cpg)
    print("Loading..."),
    print(f"Loaded CPG with {session.query(cpg.Node).count()} nodes.")

    internal_fns_picker = widgets.SelectMultiple(
        options=sorted([(f"{f.demangled_name}: {len(f.callsites)} callsites. {f.llvm_type.attributes['pretty_string']}", f.uuid)
                        for f in session.query(cpg.Function).filter_by(is_declaration=False).all()], key=str),
        value=[],
        rows=20,
        description='Internal Fns:',
        disabled=False,
        layout=widgets.Layout(width='80%')
    )
    display(internal_fns_picker)
    
    external_fns_picker = widgets.SelectMultiple(
        options=sorted([(f"{f.demangled_name}: {len(f.callsites)} callsites. {'(input related)' if len(f.input_signatures) > 0 else ''} {f.llvm_type.attributes['pretty_string']}", f.uuid)
                        for f in session.query(cpg.Function).filter_by(is_declaration=True).all()], key=str),
        value=[],
        rows=20,
        description='External Fns:',
        disabled=False,
        layout=widgets.Layout(width='80%')
    )
    display(external_fns_picker)
    

btn_setup_selected_cpg = widgets.Button(description = 'Show Functions for selected CPG',
                                        layout=widgets.Layout(width='auto'),
                                        button_style='primary')
btn_setup_selected_cpg.on_click(setup_selected_cpg)
display(btn_setup_selected_cpg)

# ignore certain arguments/returns

In [None]:
features_to_ignore_picker = widgets.SelectMultiple()

def show_function_feature(f, arg):
    try:
        return f"{f.demangled_name}: arg{arg.attributes['argument_number']}={arg.name}"
    except KeyError:
        return f"{f.demangled_name}: arg{arg.attributes['argument_number']}={arg}"

def function_to_features(f):
    return ( 
        [(f"{f.demangled_name}: return", (f.demangled_name, '->return'))] +
        [(show_function_feature(f, arg), (f.demangled_name, f"->argument{arg.attributes['argument_number']}")) for arg in f.arguments]
    )


def show_features_to_ignore_picker(arg):
    global features_to_ignore_picker
    
    feature_tuples = []
    for f in get_selected_api():
        for single_f_features in function_to_features(f):
            feature_tuples.append(single_f_features)

    features_to_ignore_picker = widgets.SelectMultiple(
        options=sorted(feature_tuples, key=str),
        value=[],
        rows=20,
        description='Ignore these:',
        disabled=False,
        layout=widgets.Layout(width='80%')
    )
    clear_output()        
    display(btn_show_features_to_ignore_picker)
    print(f"Selected API: {', '.join([f.demangled_name for f in get_selected_api()])}")
    display(features_to_ignore_picker)

btn_show_features_to_ignore_picker = widgets.Button(description = 'Show Available API Features',
                                                    layout=widgets.Layout(width='auto'),
                                                    button_style='primary')
btn_show_features_to_ignore_picker.on_click(show_features_to_ignore_picker)

display(btn_show_features_to_ignore_picker)

## select containing functions and contexts that call this API


In [None]:
fn_context_picker = widgets.SelectMultiple()

def show_fn_context_picker(arg):
    global fn_context_picker
    fn_context_picker = widgets.SelectMultiple(
        options=sorted([(f"{nid(containing_fn_uuid).demangled_name}: context={calling_ctx}",
                         (containing_fn_uuid, calling_ctx))
                        for (containing_fn_uuid, calling_ctx) in function_and_contexts_that_call_multiple_api_functions(get_selected_api())],
                        key=str),
        value=[],
        rows=20,
        description='Locations',
        disabled=False,
        layout=widgets.Layout(width='80%')
    )
    clear_output()
    display(btn_show_fn_context_picker)
    print(f"Selected API: {', '.join([f.demangled_name for f in get_selected_api()])}")
    display(fn_context_picker)

btn_show_fn_context_picker = widgets.Button(description = 'Show places where API is used',
                                            layout=widgets.Layout(width='auto'),
                                            button_style='primary')
btn_show_fn_context_picker.on_click(show_fn_context_picker)

display(btn_show_fn_context_picker)

In [None]:
# Possible improvements to be made
# - support intrinsics (e.g. currently memset only shows 'return')
# - support varargs (e.g. snprintf only shows 'return')
# - better matching: right now we're just string prefix matching the access path text
# - make lookups/checks more efficient


def skip_this_relation(cs1, ap1, cs2, ap2):
    relation_dict = defaultdict(set)
    for (fname, arg_txt) in features_to_ignore_picker.value:
        relation_dict[fname].add(arg_txt)
    relation_dict.default_factory = None
    
    for callee in nid(cs1).callees:
        for arg_prefix in relation_dict.get(callee.demangled_name, set()):
            if ap1.startswith(arg_prefix):
                return True
    for callee in nid(cs2).callees:
        for arg_prefix in relation_dict.get(callee.demangled_name, set()):
            if ap2.startswith(arg_prefix):
                return True
            
    return False


def show_graphs(arg):
    clear_output()
    print(f"Selected API: {', '.join([f.demangled_name for f in get_selected_api()])}")
    display(chk_show_entry)
    display(chk_show_returns)
    display(btn_show_graphs)
    
    for (idx, (containing_fn_uuid, calling_ctx)) in enumerate(fn_context_picker.value, start=1):
        display(Markdown(f"# [{idx}/{len(fn_context_picker.value)}] Function: {nid(containing_fn_uuid).name}\n\nContext: `{calling_ctx}`"))
        display(Markdown(f"## All API Features"))
        showme_table_html(containing_fn_uuid, calling_ctx, get_selected_api()) # omitting skipping function

        # only show filtered graphs if we're doing any filtering
        if features_to_ignore_picker.value:
            display(Markdown(f"## Filtering Identified API Features"))
            print(f"Ignoring relations via these API features: {features_to_ignore_picker.value}")
            showme_table_html(containing_fn_uuid, calling_ctx, get_selected_api(), skip_this_relation)
        else:
            print("No API features selected to 'ignore'")

        showme_graphviz(containing_fn_uuid, calling_ctx, 
                        get_selected_api(), 
                        skip_this_relation, 
                        chk_show_entry.value, chk_show_returns.value)
    print("Done.")

chk_show_entry = widgets.Checkbox(
    value=True,
    description='Include Entry node',
    disabled=False,
    indent=False
)
chk_show_returns = widgets.Checkbox(
    value=True,
    description='Include Return nodes',
    disabled=False,
    indent=False
)
btn_show_graphs = widgets.Button(description = 'Visualize API Usage',
                                 layout=widgets.Layout(width='auto'),
                                 button_style='primary')
btn_show_graphs.on_click(show_graphs)
display(chk_show_entry)
display(chk_show_returns)
display(btn_show_graphs)

# show related arguments for specific callsite pairs

In [None]:
zoom_cs_picker = widgets.SelectMultiple()

def setup_zoom_cs_pair(arg):
    global zoom_cs_picker

    print("collecting callsite pairs...")
    
    selectable_cs_pairs = []
    for (containing_fn_uuid, calling_ctx) in fn_context_picker.value:
        for (s, sstr, t, tstr) in callsites_related_by_intra_cpg(scoped_api_callsite_uuids(containing_fn_uuid, calling_ctx, get_selected_api())):
            if s != t:
                selectable_cs_pairs.append((f"{nid(containing_fn_uuid).demangled_name}: {'/'.join([f.demangled_name for f in nid(s).callees])} #{s} -> {'/'.join([f.demangled_name for f in nid(t).callees])} #{t} (context = {calling_ctx})",
                                            (containing_fn_uuid, calling_ctx, s, t)))
        
    zoom_cs_picker = widgets.SelectMultiple(
        options=sorted(selectable_cs_pairs, key=str),
        value=[],
        rows=20,
        description='Callsite Pairs:',
        disabled=False,
        layout=widgets.Layout(width='80%')
    )
    
    clear_output()
    display(btn_setup_zoom_cs_pair)
    display(zoom_cs_picker)
    

btn_setup_zoom_cs_pair = widgets.Button(description = 'Refresh list of callsites available for detailed analysis',
                                        layout=widgets.Layout(width='auto'),
                                        button_style='primary')
btn_setup_zoom_cs_pair.on_click(setup_zoom_cs_pair)
display(btn_setup_zoom_cs_pair)

In [None]:
def show_zoom_graphs(arg):
    clear_output()
    display(btn_show_zoom_graphs)
    
    for (idx, (containing_fn_uuid, calling_ctx, cs1, cs2)) in enumerate(zoom_cs_picker.value, start=1):
        display(Markdown(f"# [{idx}/{len(zoom_cs_picker.value)}] Function: {nid(containing_fn_uuid).name}\n\nContext: `{calling_ctx}`"))
        cs1_node = nid(cs1)
        cs2_node = nid(cs2)
        
        display(Markdown(f"### {'/'.join([f.demangled_name + '(...)' for f in cs1_node.callees])} #{cs1_node.uuid} -> {'/'.join([f.demangled_name + '(...)' for f in cs2_node.callees])} #{cs2_node.uuid}"))
        
        md_overview = f"```\n"
        md_overview += f"Rows:\n"
        md_overview += f"  {cs1_node.attributes['location']['file']}:{cs1_node.attributes['location']['line']}"
        try:
            md_overview += f" {cs1_node.attributes['source_code']}"
        except KeyError:
            md_overview += " (source code not found)"
        md_overview += f"\n  LLVM: {cs1_node.attributes['pretty_string']}\n"
        
        md_overview += f"Columns:\n"
        md_overview += f"  {cs2_node.attributes['location']['file']}:{cs2_node.attributes['location']['line']}"
        try:
            md_overview += f" {cs2_node.attributes['source_code']}"
        except KeyError:
            md_overview += " (source code not found)"
        md_overview += f"\n  LLVM: {cs2_node.attributes['pretty_string']}\n"

        md_overview += "```"
        display(Markdown(md_overview))
        
        showme_cspair_table_html(containing_fn_uuid, calling_ctx, cs1, cs2)

    
btn_show_zoom_graphs = widgets.Button(description = 'Visualize callsite-pair relationships',
                                 layout=widgets.Layout(width='auto'),
                                 button_style='primary')
btn_show_zoom_graphs.on_click(show_zoom_graphs)
display(btn_show_zoom_graphs)

To return to the start of UsageFinder, <a href="#start_here">CLICK HERE</a>.