# Demonstration of some features of the analysis tool

### Import necessary modules

In [1]:
from analysis_tool import *

In [2]:
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

### Ask about any function

In [3]:
help(where_instruction)

Help on function where_instruction in module analysis_tool:

where_instruction(instruction: str, name: str) -> pandas.core.frame.DataFrame
    !
    Leaves only those rows in which the instruction occurs a non-zero number of times.
        @param instruction: Instruction.
        @param name: Name of the dataframe or its beginning.
        @return Dataframe with selected rows.



In [4]:
help(total_histogram)

Help on function total_histogram in module analysis_tool:

total_histogram(names: list[str] | None = None, percent: bool = True, ascending: bool = False, width: int = 2000) -> None
    !
    Builds a histogram of the total instruction usage in dataframes with the names given.
        @param names: None or list of dataframe names (or their beginnings).
        If None, all dataframes in the scope will be used. Default: None.
        @param percent: If True, the histogram will be built by percentage, not by absolute values. Default: True.
        @param ascending: If True, the histogram columns will be sorted in ascending order,
        otherwise - in descending order. Default: False.
        @param width: Width of the histogram. Default: 2000.



#### Or take a look at the documentation: https://danila-pechenev.github.io/InstructionAnalysisFramework/namespaceanalysis__tool.html

### Get data from archives (convenient for artifacts of Github Actions workflows)

In [5]:
initialize_with_archives("../30.03", "../30.03_dfs")
#                         ↑ path to the folder with archives   

### Data analysis

#### How can I look at the first few rows of the dataframe?

In [6]:
head("m", 7)  # Can work with the beginning of a key (as well as all other functions)

Unnamed: 0,filename,file,of,endbr64,sub,mov,test,je,call,add,...,hsubpd,vcvtqq2ps,vcvtusi2ss,vpmovdb,vpexpandd,kshiftlq,pmaxsb,pminsb,vpmacsdd,vcmpgtps
0,/usr/bin/python3.10,1,3,6,3,8,3,6,4,3,...,0,0,0,0,0,0,0,0,0,0
1,/usr/lib/python3.10/lib-dynload/_heapq.cpython...,1,3,16,29,1002,97,131,173,50,...,0,0,0,0,0,0,0,0,0,0
2,/DataCollection/venv/lib/python3.10/site-packa...,1,4,0,371,3011,423,546,614,464,...,0,0,0,0,0,0,0,0,0,0
3,/DataCollection/venv/lib/python3.10/site-packa...,1,4,0,748,32480,5651,7048,7002,1164,...,0,0,0,0,0,0,0,0,0,0
4,/usr/lib/python3.10/lib-dynload/cmath.cpython-...,1,3,29,61,542,81,171,391,80,...,0,0,0,0,0,0,0,0,0,0
5,/usr/lib/libuuid.so.1.3.0,1,3,25,66,1142,65,58,156,380,...,0,0,0,0,0,0,0,0,0,0
6,/DataCollection/venv/lib/python3.10/site-packa...,1,4,0,3670,109394,16710,22408,22752,3886,...,0,0,0,0,0,0,0,0,0,0


#### How many executable files are there in each system? (i.e. count number of rows in each dataframe)

In [7]:
for df in dfs_list():
    print(f"{df}: {df_len(df)} rows")

manjaro: 3538 rows
ubuntu: 3037 rows
opensuse: 2381 rows


#### How many instructions of each category are there in files in Ubuntu?

In [8]:
divide_into_categories("u")

Unnamed: 0,filename,Other,Central Processing Unit (CPU) instructions set,Floating-Point Unit (FPU) instructions set,Transactional Synchronization Extensions (TSX) instructions set,Single Instruction Multiple Data (SIMD) instructions set,Advanced Encryption Standard (AES) instructions set,Memory Protection Extensions (MPX) instructions set,Virtual Machine Extensions (VMX) instructions set
0,/usr/bin/dash,1022,18903,0,0,0,0,0,0
1,/usr/lib/x86_64-linux-gnu/libc.so.6,26807,334738,392,46,9427,0,0,0
2,/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2,3285,34982,14,0,172,0,0,0
3,/usr/bin/python3.10,30135,659875,14,0,0,0,0,0
4,/DataCollection/venv/lib/python3.10/site-packa...,718,10820,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
3032,/usr/sbin/readprofile,214,1198,0,0,0,0,0,0
3033,/usr/sbin/e2image,520,3460,0,0,0,0,0,0
3034,/usr/sbin/chroot,397,4022,0,0,0,0,0,0
3035,/usr/sbin/aa-status,340,4013,0,0,0,0,0,0


#### How many instructions of each group are there in files in OpenSUSE?

In [9]:
divide_into_groups("opensuse")

Unnamed: 0,filename,Other,Binary Arithmetic Instructions,System Instructions,Bit and Byte Instructions,Control Transfer Instructions,Data Transfer Instructions,Miscellaneous Instructions,Logical Instructions,Shift and Rotate Instructions,...,Primitives of Functions,Fused Arithmetic Instructions,Gather Instructions,Scatter Instructions,Compress Instructions,Pack Instructions,State Management Instructions,FPU and SIMD State Management Instructions,Decimal Arithmetic Instructions,Expand Instructions
0,/usr/bin/python3.10,11,9,10,3,25,8,14,4,3,...,0,0,0,0,0,0,0,0,0,0
1,/usr/lib64/python3.10/lib-dynload/_heapq.cpyth...,15,144,381,67,345,115,118,42,22,...,0,0,0,0,0,0,0,0,0,0
2,/DataCollection/venv/lib/python3.10/site-packa...,718,1630,3346,497,2665,902,1103,311,272,...,0,0,0,0,0,0,0,0,0,0
3,/DataCollection/venv/lib/python3.10/site-packa...,2145,8924,35169,6202,25512,3081,6102,5206,647,...,0,0,0,0,0,0,0,0,0,0
4,/DataCollection/venv/lib/python3.10/site-packa...,140,957,5018,845,3410,443,558,952,27,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2376,/usr/lib64/spa-0.2/support/libspa-journal.so,63,209,397,67,354,142,224,56,15,...,0,0,0,0,0,0,0,0,0,0
2377,/usr/lib64/spa-0.2/vulkan/libspa-vulkan.so,536,2200,6116,1141,3535,1061,1694,599,74,...,0,0,0,0,0,0,0,0,0,0
2378,/usr/lib64/spa-0.2/audiotestsrc/libspa-audiote...,422,1806,4426,822,2606,528,1151,314,19,...,0,0,0,0,0,0,0,0,0,0
2379,/usr/lib64/spa-0.2/test/libspa-test.so,335,1911,4897,929,2864,607,1322,386,18,...,0,0,0,0,0,0,0,0,0,0


#### Where are AES instructions used in Manjaro?

In [10]:
where_category(category="Advanced Encryption Standard (AES) instructions set", name="manjaro")

Unnamed: 0,filename,Other,Central Processing Unit (CPU) instructions set,Floating-Point Unit (FPU) instructions set,Single Instruction Multiple Data (SIMD) instructions set,Advanced Encryption Standard (AES) instructions set,Memory Protection Extensions (MPX) instructions set,Transactional Synchronization Extensions (TSX) instructions set,Virtual Machine Extensions (VMX) instructions set
0,/usr/lib/libcrypto.so.3,87286,537117,2159,14838,502,3,6,4
1,/usr/lib/libgnutls.so.30.34.3,12792,274696,41,2158,102,1,0,0
2,/usr/lib/libgcrypt.so.20.4.1,31600,169938,54,30076,2106,1,0,0
3,/usr/lib/libfreeblpriv3.so,8575,110817,0,3605,406,0,0,0
4,/usr/bin/chromedriver,206805,2385241,218,5982,102,1,0,0
5,/usr/lib/chromium/chromium,3662207,44275874,474,60058,951,1,0,0


#### Where are SIMD instructions used in Ubuntu?

In [11]:
where_category(category="Single Instruction Multiple Data (SIMD) instructions set", name="u")

Unnamed: 0,filename,Other,Central Processing Unit (CPU) instructions set,Floating-Point Unit (FPU) instructions set,Transactional Synchronization Extensions (TSX) instructions set,Single Instruction Multiple Data (SIMD) instructions set,Advanced Encryption Standard (AES) instructions set,Memory Protection Extensions (MPX) instructions set,Virtual Machine Extensions (VMX) instructions set
0,/usr/lib/x86_64-linux-gnu/libc.so.6,26807,334738,392,46,9427,0,0,0
1,/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2,3285,34982,14,0,172,0,0,0
2,/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30,18482,256073,179,0,2,0,0,0
3,/usr/lib/x86_64-linux-gnu/libcrypto.so.3,48279,605410,2159,6,14838,502,3,4
4,/DataCollection/venv/lib/python3.10/site-packa...,20598,400861,13951,0,47533,0,0,0
...,...,...,...,...,...,...,...,...,...
107,/usr/lib/snapd/snap-preseed,27364,901059,0,0,3546,0,0,0
108,/usr/bin/x86_64-linux-gnu-dwp,18416,292878,43,0,2,0,0,0
109,/usr/bin/gdb,77566,1398671,171,0,2,0,0,0
110,/usr/lib32/libm.so.6,1981,158256,35347,0,8,0,0,0


#### Where is AAD instruction used in OpenSUSE?

In [12]:
where_instruction(instruction="aad", name="op")

Unnamed: 0,filename,file,of,endbr64,sub,mov,test,je,call,add,...,vinsertf32x8,movntq,pswapd,pfadd,pfsub,blendps,vcmpnleps,cmpxchg16b,vcvtsi2ssq,vcvtss2si
0,/usr/lib/python3.10/site-packages/pip/_vendor/...,1,1,0,261,3341,657,1112,1590,595,...,0,0,0,0,0,0,0,0,0,0


#### How to figure out top 10 most popular instructions in Manjaro?

In [13]:
top_popular("m")

Unnamed: 0,filename,mov,lea,call,je,cmp,test,jmp,pop,add,push
0,/usr/bin/python3.10,8,5,4,6,1,3,4,2,3,3
1,/usr/lib/python3.10/lib-dynload/_heapq.cpython...,1002,215,173,131,103,97,154,54,50,54
2,/DataCollection/venv/lib/python3.10/site-packa...,3011,591,614,546,360,423,768,470,464,370
3,/DataCollection/venv/lib/python3.10/site-packa...,32480,3260,7002,7048,3289,5651,6523,1366,1164,1379
4,/usr/lib/python3.10/lib-dynload/cmath.cpython-...,542,125,391,171,26,81,334,70,80,40
...,...,...,...,...,...,...,...,...,...,...,...
3533,/usr/bin/db5.3/db_upgrade,174,30,60,20,7,22,24,12,8,13
3534,/usr/bin/db5.3/db_load,1062,193,280,180,87,214,171,49,31,35
3535,/usr/bin/db5.3/db_recover,170,39,58,22,11,16,25,12,14,13
3536,/usr/bin/db5.3/db_dump,373,51,102,39,9,50,52,19,8,20


##### How to add a dataframe to the scope? 

In [14]:
add_df("top10manjaro", top_popular("m"))

##### How to get a dataframe from the scope? 

In [15]:
get_df("top10m")

Unnamed: 0,filename,mov,lea,call,je,cmp,test,jmp,pop,add,push
0,/usr/bin/python3.10,8,5,4,6,1,3,4,2,3,3
1,/usr/lib/python3.10/lib-dynload/_heapq.cpython...,1002,215,173,131,103,97,154,54,50,54
2,/DataCollection/venv/lib/python3.10/site-packa...,3011,591,614,546,360,423,768,470,464,370
3,/DataCollection/venv/lib/python3.10/site-packa...,32480,3260,7002,7048,3289,5651,6523,1366,1164,1379
4,/usr/lib/python3.10/lib-dynload/cmath.cpython-...,542,125,391,171,26,81,334,70,80,40
...,...,...,...,...,...,...,...,...,...,...,...
3533,/usr/bin/db5.3/db_upgrade,174,30,60,20,7,22,24,12,8,13
3534,/usr/bin/db5.3/db_load,1062,193,280,180,87,214,171,49,31,35
3535,/usr/bin/db5.3/db_recover,170,39,58,22,11,16,25,12,14,13
3536,/usr/bin/db5.3/db_dump,373,51,102,39,9,50,52,19,8,20


In [16]:
list(get_df("top10m").columns)

['filename',
 'mov',
 'lea',
 'call',
 'je',
 'cmp',
 'test',
 'jmp',
 'pop',
 'add',
 'push']

#### How to figure out top 10 the rarest instructions in Ubuntu?

In [17]:
top_rare("u")

Unnamed: 0,filename,str,insw,bndstx,cmpps,lfs,wbinvd,popfw,xrelease,retw,pushw
0,/usr/bin/dash,0,0,0,0,0,0,0,0,0,0
1,/usr/lib/x86_64-linux-gnu/libc.so.6,0,0,0,0,0,0,0,0,0,0
2,/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2,0,0,0,0,0,0,0,0,0,0
3,/usr/bin/python3.10,0,0,0,0,0,0,0,0,0,0
4,/DataCollection/venv/lib/python3.10/site-packa...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
3032,/usr/sbin/readprofile,0,0,0,0,0,0,0,0,0,0
3033,/usr/sbin/e2image,0,0,0,0,0,0,0,0,0,0
3034,/usr/sbin/chroot,0,0,0,0,0,0,0,0,0,0
3035,/usr/sbin/aa-status,0,0,0,0,0,0,0,0,0,0


In [18]:
add_df("top10ubuntu", top_rare("u"))

In [19]:
list(get_df("top10u").columns)

['filename',
 'str',
 'insw',
 'bndstx',
 'cmpps',
 'lfs',
 'wbinvd',
 'popfw',
 'xrelease',
 'retw',
 'pushw']

#### How to find out information about the instruction?

In [20]:
what_is_instruction("lfs")  # Opens a new tab

<IPython.core.display.Javascript object>

#### How to remove a dataframe from the scope?

In [21]:
remove_df("top10m")

##### How to get a list of dataframes in the scope?

In [22]:
dfs_list()

['manjaro', 'ubuntu', 'opensuse', 'top10ubuntu']

In [23]:
remove_df("top10u")

In [24]:
dfs_list()

['manjaro', 'ubuntu', 'opensuse']

#### How to figure out total instruction usage in OpenSUSE?

In [25]:
total_instruction_usage("o")

file: 2381
of: 12037
endbr64: 41019
sub: 3181069
mov: 64718914
test: 7440659
je: 8896509
call: 13189239
add: 6203498
ret: 1889595
push: 6372166
jmp: 7706812
nopl: 3085708
xchg: 488089
xor: 5306432
pop: 6872703
and: 2235217
lea: 13447327
hlt: 1132
nopw: 1519024
cmp: 7400172
shr: 1190929
sar: 524097
cmpb: 1096500
jne: 5359475
cmpq: 529079
movb: 886928
or: 1212870
testb: 352173
jg: 282946
js: 410357
jb: 510509
shl: 1488748
jle: 499597
jge: 97031
addq: 126193
subq: 108713
cltq: 105203
nop: 717932
jl: 99524
data16: 477222
movslq: 948097
mulsd: 92721
addsd: 74867
movl: 1459762
cmpl: 506833
movzbl: 2049385
movsd: 418022
movabs: 754382
pxor: 541598
movapd: 121352
ja: 632440
cvtsi2sd: 64189
jns: 100376
imul: 554856
movsbq: 27488
jbe: 453335
jae: 483811
cmovne: 226453
movzwl: 824203
subl: 182033
movq: 2381133
punpcklqdq: 248921
movaps: 1515884
movups: 1612121
movsbl: 70805
movw: 100500
mul: 67869
ucomisd: 47624
setnp: 14693
cmove: 229477
jp: 39669
comisd: 43332
cvttsd2si: 34539
subsd: 41983
xorp

{'file': 2381,
 'of': 12037,
 'endbr64': 41019,
 'sub': 3181069,
 'mov': 64718914,
 'test': 7440659,
 'je': 8896509,
 'call': 13189239,
 'add': 6203498,
 'ret': 1889595,
 'push': 6372166,
 'jmp': 7706812,
 'nopl': 3085708,
 'xchg': 488089,
 'xor': 5306432,
 'pop': 6872703,
 'and': 2235217,
 'lea': 13447327,
 'hlt': 1132,
 'nopw': 1519024,
 'cmp': 7400172,
 'shr': 1190929,
 'sar': 524097,
 'cmpb': 1096500,
 'jne': 5359475,
 'cmpq': 529079,
 'movb': 886928,
 'or': 1212870,
 'testb': 352173,
 'jg': 282946,
 'js': 410357,
 'jb': 510509,
 'shl': 1488748,
 'jle': 499597,
 'jge': 97031,
 'addq': 126193,
 'subq': 108713,
 'cltq': 105203,
 'nop': 717932,
 'jl': 99524,
 'data16': 477222,
 'movslq': 948097,
 'mulsd': 92721,
 'addsd': 74867,
 'movl': 1459762,
 'cmpl': 506833,
 'movzbl': 2049385,
 'movsd': 418022,
 'movabs': 754382,
 'pxor': 541598,
 'movapd': 121352,
 'ja': 632440,
 'cvtsi2sd': 64189,
 'jns': 100376,
 'imul': 554856,
 'movsbq': 27488,
 'jbe': 453335,
 'jae': 483811,
 'cmovne': 226

#### How to sort columns in the table by sum?

In [26]:
# For Manjaro
sort_columns_by_sum("m")

Unnamed: 0,filename,mov,lea,call,je,cmp,test,jmp,pop,add,...,vpmovw2m,kandd,vpmovd2m,vsqrtpd,fclex,vcvtsi2ssq,rdtscp,pfadd,pfsub,mulw
0,/usr/bin/python3.10,8,5,4,6,1,3,4,2,3,...,0,0,0,0,0,0,0,0,0,0
1,/usr/lib/python3.10/lib-dynload/_heapq.cpython...,1002,215,173,131,103,97,154,54,50,...,0,0,0,0,0,0,0,0,0,0
2,/DataCollection/venv/lib/python3.10/site-packa...,3011,591,614,546,360,423,768,470,464,...,0,0,0,0,0,0,0,0,0,0
3,/DataCollection/venv/lib/python3.10/site-packa...,32480,3260,7002,7048,3289,5651,6523,1366,1164,...,0,0,0,0,0,0,0,0,0,0
4,/usr/lib/python3.10/lib-dynload/cmath.cpython-...,542,125,391,171,26,81,334,70,80,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3533,/usr/bin/db5.3/db_upgrade,174,30,60,20,7,22,24,12,8,...,0,0,0,0,0,0,0,0,0,0
3534,/usr/bin/db5.3/db_load,1062,193,280,180,87,214,171,49,31,...,0,0,0,0,0,0,0,0,0,0
3535,/usr/bin/db5.3/db_recover,170,39,58,22,11,16,25,12,14,...,0,0,0,0,0,0,0,0,0,0
3536,/usr/bin/db5.3/db_dump,373,51,102,39,9,50,52,19,8,...,0,0,0,0,0,0,0,0,0,0


### Build a histogram of the total instruction usage

In [27]:
total_histogram(width=20000)

### Build a histogram of the total instruction category usage

In [28]:
total_categories_histogram(width=1000)

### Build a histogram of the total instruction group usage

In [29]:
total_groups_histogram(width=2000)