# Analysis of instructions used by app

In [68]:
from pathlib import Path

import pandas as pd
import plotly.express as px
import re
import subprocess as sp

In [69]:
path_to_objdump = "objdump"
objdump_args = ["-d", "--no-show-raw-insn", "--no-addresses"]
instruction_regex = r"^\s+([a-z]\S+)(\s+\S+)*$"
instruction_column = "Instruction"

In [70]:
def validate_objdump(path: str) -> None:
    completed_process = sp.run([path, "-v"], capture_output=False)
    completed_process.check_returncode()

validate_objdump(path_to_objdump)

GNU objdump (GNU Binutils for Ubuntu) 2.38
Copyright (C) 2022 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) any later version.
This program has absolutely no warranty.


In [71]:
def get_elf_instructions(path_to_elf: Path, name: str|None=None) -> pd.DataFrame:
    instructions_count = dict()
    completed_process = sp.run([path_to_objdump, *objdump_args, path_to_elf], capture_output=True)
    completed_process.check_returncode()
    for line in completed_process.stdout.decode("utf-8").splitlines():
        m = re.match(instruction_regex, line)
        if m:
            instruction = m.group(1)
            if instruction not in instructions_count:
                instructions_count[instruction] = 1
            else:
                instructions_count[instruction] += 1
    column = name if name else str(path_to_elf)
    return pd.DataFrame.from_dict(data=instructions_count, columns=[column], orient='index').sort_values(by=[column], ascending=False)
    

## Statistics of *grep* and *sed*

In [72]:
grep_statistics = get_elf_instructions(Path("/usr/bin/grep"), name="grep")
sed_statistics = get_elf_instructions(Path("/usr/bin/sed"), name="sed")
sort_statistics = get_elf_instructions(Path("/usr/bin/sort"), name="sort")
diff_statistics = get_elf_instructions(Path("/usr/bin/diff"), name="diff")

dfs = [grep_statistics, sed_statistics, sort_statistics, diff_statistics]
nan_value = 0
summary = pd.concat(dfs, join='outer', axis=1).fillna(nan_value)
summary.loc[:,'sort']

mov       4599.0
call       948.0
test       562.0
je         609.0
cmp        738.0
           ...  
divsd        1.0
btcq         1.0
setge        0.0
setle        0.0
cmovge       0.0
Name: sort, Length: 156, dtype: float64

### Visualization with plotly

In [73]:
fig = px.histogram(summary, x=summary.index, y=summary.columns, barmode='group', width=3000)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="left",
    x=0
))

In [74]:
fig = px.histogram(summary, x=summary.index, y=summary.columns, barmode='group', histnorm='percent', width=3000)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="left",
    x=0
))