### Import necessary modules

In [1]:
from analysis_tool import *

### Ask about any function

In [2]:
help(where_instruction)

Help on function where_instruction in module analysis_tool:

where_instruction(instruction: str, name: str) -> pandas.core.frame.DataFrame
    Leaves only those rows in which the instruction occurs a non-zero number of times.
    :param instruction: Instruction.
    :param name: Name of the dataframe or its beginning.
    :return: Dataframe with selected rows.



In [3]:
help(total_histogram)

Help on function total_histogram in module analysis_tool:

total_histogram(names: list[str] | None = None, percent: bool = True, ascending: bool = False, width: int = 2000)
    Builds a histogram of the total instruction usage in dataframes with the names given.
    :param names: None or list of dataframe names (or their beginnings).
    If None, all dataframes in the scope will be used. Default: None.
    :param percent: If True, the histogram will be built by percentage, not by absolute values. Default: True.
    :param ascending: If True, the histogram columns will be sorted in ascending order, otherwise - in descending order.
    Default: False.
    :param width: Width of the histogram. Default: 10000.



### Get data from archives (convenient for artifacts of Github Actions workflows)

In [4]:
initialize_with_archives("../14.03", "../14.03_dfs")

### Data analysis

#### How can I look at the first few rows of the dataframe?

In [5]:
head("m", 7)  # Can work with the beginning of a key (as well as all other functions)

Unnamed: 0,filename,file,of,endbr64,sub,mov,test,je,call,add,...,pfmul,pf2id,cvtpi2ps,pswapd,pfadd,pfsub,kandnd,mulw,aad,into
0,/usr/bin/fsck.minix,1,3,11,114,1770,268,296,646,146,...,0,0,0,0,0,0,0,0,0,0
1,/usr/bin/usermod,1,3,70,186,4998,856,842,1684,276,...,0,0,0,0,0,0,0,0,0,0
2,/usr/bin/xtables-legacy-multi,1,3,30,101,3973,498,504,1014,229,...,0,0,0,0,0,0,0,0,0,0
3,/usr/bin/zstd,1,3,48,220,7964,864,604,2277,370,...,0,0,0,0,0,0,0,0,0,0
4,/usr/bin/yat2m,1,3,6,55,939,249,234,309,84,...,0,0,0,0,0,0,0,0,0,0
5,/usr/bin/unexpand,1,3,8,34,980,147,163,211,75,...,0,0,0,0,0,0,0,0,0,0
6,/usr/bin/db_printlog,1,3,97,11,877,148,47,187,13,...,0,0,0,0,0,0,0,0,0,0


#### How many executable files are there in each system? (i.e. count number of rows in each dataframe)

In [6]:
for df in dfs_list():
    print(f"{df}: {df_len(df)} rows")

manjaro: 3557 rows
ubuntu: 3052 rows
opensuse: 2404 rows


#### How many instructions of each category are there in files in Ubuntu?

In [7]:
divide_into_categories("u")

Unnamed: 0,filename,Other,Central Processing Unit (CPU) instructions set,Single Instruction Multiple Data (SIMD) instructions set,Floating-Point Unit (FPU) instructions set,Transactional Synchronization Extensions (TSX) instructions set,Memory Protection Extensions (MPX) instructions set,Advanced Encryption Standard (AES) instructions set,Virtual Machine Extensions (VMX) instructions set
0,/usr/sbin/rmt-tar,468,8130,0,0,0,0,0,0
1,/usr/bin/mawk,1451,23323,0,0,0,0,0,0
2,/usr/bin/x86_64-linux-gnu-cpp-12,8265,150733,12,0,0,0,0,0
3,/usr/bin/scp,1211,19858,0,0,0,0,0,0
4,/usr/bin/mkfifo,462,4278,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
3047,/DataCollection/venv/lib/python3.10/site-packa...,5,334,0,0,0,0,0,0
3048,/DataCollection/venv/lib/python3.10/site-packa...,2142,4670,0,1890,0,0,0,0
3049,/DataCollection/venv/lib/python3.10/site-packa...,355,14900,28,0,0,0,0,0
3050,/DataCollection/venv/lib/python3.10/site-packa...,181,18212,0,0,0,0,0,0


#### How many instructions of each group are there in files in OpenSUSE?

In [8]:
divide_into_groups("opensuse")

Unnamed: 0,filename,Other,Binary Arithmetic Instructions,System Instructions,Bit and Byte Instructions,Control Transfer Instructions,Data Transfer Instructions,Miscellaneous Instructions,Logical Instructions,Shift and Rotate Instructions,...,Primitives of Functions,Fused Arithmetic Instructions,Gather Instructions,Scatter Instructions,Compress Instructions,Pack Instructions,State Management Instructions,FPU and SIMD State Management Instructions,Decimal Arithmetic Instructions,Expand Instructions
0,/usr/bin/python3.10,11,9,10,3,25,8,14,4,3,...,0,0,0,0,0,0,0,0,0,0
1,/DataCollection/venv/lib/python3.10/site-packa...,718,1630,3346,497,2665,902,1103,311,272,...,0,0,0,0,0,0,0,0,0,0
2,/DataCollection/venv/lib/python3.10/site-packa...,2145,8924,35169,6202,25512,3081,6102,5206,647,...,0,0,0,0,0,0,0,0,0,0
3,/usr/lib64/python3.10/lib-dynload/cmath.cpytho...,903,156,656,73,824,268,179,53,15,...,0,0,0,0,0,0,0,0,0,0
4,/DataCollection/venv/lib/python3.10/site-packa...,140,957,5018,845,3410,443,558,952,27,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2399,/DataCollection/venv/lib/python3.10/site-packa...,181,2916,3924,603,4799,4123,511,986,191,...,0,0,0,0,0,0,0,0,0,0
2400,/DataCollection/venv/lib/python3.10/site-packa...,183,2914,3937,610,4820,4132,515,991,190,...,0,0,0,0,0,0,0,0,0,0
2401,/DataCollection/venv/lib/python3.10/site-packa...,11620,3601,5875,0,476,0,35,502,24,...,0,0,0,0,0,0,0,0,0,0
2402,/DataCollection/venv/lib/python3.10/site-packa...,11640,3614,5881,0,478,0,35,502,24,...,0,0,0,0,0,0,0,0,0,0


#### Where are AES instructions used in Manjaro?

In [10]:
where_category(category="Advanced Encryption Standard (AES) instructions set", name="u")

#### Where are SIMD instructions used in Ubuntu?

In [None]:
where_category(category="Single Instruction Multiple Data (SIMD) instructions set", name="u")

#### Where is AAD instruction in OpenSUSE used?

In [11]:
where_instruction(instruction="aad", name="o")

Unnamed: 0,filename,file,of,endbr64,sub,mov,test,je,call,add,...,kxorb,korb,vfnmsub132pd,vmovntdqa,vfnmsub132ps,vmovntps,vmovntpd,vcvtpd2dqy,cvtpd2dq,movntpd
167,/usr/lib/python3.10/site-packages/pip/_vendor/...,1,1,0,261,3341,657,1112,1590,595,...,0,0,0,0,0,0,0,0,0,0
2398,/DataCollection/venv/lib/python3.10/site-packa...,1,1,0,261,3341,657,1112,1590,595,...,0,0,0,0,0,0,0,0,0,0


### Build a histogram of the total instruction category usage

In [None]:
total_categories_histogram(width=1000)

### Build a histogram of the total instruction group usage

In [13]:
total_groups_histogram(width=2000)