# *Notebook Name*

## Libraries and Configurations

Import configuration files

In [28]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [29]:
import pandas as pd

Import **other libraries**

In [30]:
from rich.progress import Progress
from rich import traceback

import os

traceback.install()

<bound method InteractiveShell.excepthook of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7e8360a2aa50>>

Custom helper scripts

In [31]:
%cd ..
from scripts import plotHelper, encodingHelper
%cd data_exploration_cleaning

/home/bacci/COMPACT/notebooks
/home/bacci/COMPACT/notebooks/data_exploration_cleaning


## Import Data

Importing every `CSV` file in the `binary` folder, merge them and create the labelled dataframe.

In [47]:
# Base directory containing the folders
base_dir = config["DEFAULT"]["binary_path"]

# Initialize an empty dictionary to store DataFrames
dataframes = {}

# Traverse the directory structure
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".csv"):
            # Construct the full file path
            file_path = os.path.join(root, file)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path, dtype=str)

            # Store the DataFrame in the dictionary with a unique key (e.g., file name)
            dataframes[file] = df

In [48]:
bin_df = pd.concat(dataframes.values(), ignore_index=True)

In [49]:
bin_df = bin_df.astype(str)

In [50]:
bin_df.dtypes

mac                   object
e_id_ssid             object
len_ssid              object
ssid                  object
e_id_sup_rates        object
len_sup_rates         object
supported_rates       object
e_id_ext_sup_rates    object
len_ext_sup_rates     object
ext_sup_rates         object
e_id_dsss             object
len_dsss              object
dsss_parameter        object
e_id_ht_cap           object
len_ht_cap            object
ht_cap                object
e_id_ext_cap          object
len_ext_cap           object
ext_cap               object
e_id_vht_cap          object
len_vht_cap           object
vht_cap               object
e_id_vst              object
len_vst               object
vst                   object
e_id_ext_tags         object
len_ext_tags          object
ext_tags              object
frame_check_seq       object
label                 object
dtype: object

Export to CSV in `interim` folder

In [51]:
bin_df.to_csv("../../data/interim/binary_df_raw.csv", index=False)

Set every column's type to string in order to avoid conversions and misrepresentation of the binary data.

In [52]:
bin_df = bin_df.astype(str)

Drop all the Element ID columns, since by definition they are always the same.

In [53]:
# Columns to drop
drop_starts_with = ["e_id_"]
bin_df = bin_df.drop(
    columns=[col for col in bin_df.columns if col.startswith(tuple(drop_starts_with))]
)

In [54]:
bin_df = bin_df.replace("nan", "0")

Zero padding columns to the maximum length

In [55]:
def zero_pad_column(df, exclude=[]):
    max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()
    for col in df.columns:
        if col not in exclude:
            max_length = max_lengths[col]
            df[col] = df[col].fillna("").astype(str).str.ljust(max_length, "0")
    return df

In [56]:
df = zero_pad_column(bin_df, ["mac", "label"])

  max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()


In [57]:
df

Unnamed: 0,mac,len_ssid,ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,len_dsss,dsss_parameter,len_ht_cap,...,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,frame_check_seq,label
0,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00101110110101000110100111001111,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,10110101111111110110100110111000,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,00000110,0011000100110001001100010011011101110011011110...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,11111010001100111100010011110000,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,00001001,0010000101101111011100000011000001110011011100...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,11101110101101011101100100100001,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,00001000,0101011001101111011001000110000101100110011011...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,01011111010111110111011001010011,XiaomiRedmi4_B


In [58]:
len(df.iloc[76703][2])

  len(df.iloc[76703][2])


144

In [59]:
df = df.drop(columns=["frame_check_seq"])

In [60]:
df

Unnamed: 0,mac,len_ssid,ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,len_dsss,dsss_parameter,len_ht_cap,ht_cap,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,label
0,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,00000110,0011000100110001001100010011011101110011011110...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,00001001,0010000101101111011100000011000001110011011100...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,00001000,0101011001101111011001000110000101100110011011...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B


In [61]:
df.iloc[76703][5]

  df.iloc[76703][5]


'00001000'