# *Notebook Name*

## Libraries and Configurations

Import configuration files

In [1]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [2]:
import pandas as pd

Import **other libraries**

In [3]:
from rich.progress import Progress
from rich import traceback

import os

traceback.install()

<bound method InteractiveShell.excepthook of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7647d45a6890>>

Custom helper scripts

In [4]:
%cd ..
from scripts import plotHelper, encodingHelper
%cd data_exploration_cleaning

/home/bacci/COMPACT/notebooks
/home/bacci/COMPACT/notebooks/data_exploration_cleaning


## Import Data

Importing every `CSV` file in the `binary` folder, merge them and create the labelled dataframe.

In [5]:
# Base directory containing the folders
base_dir = config["DEFAULT"]["binary_path"]

# Initialize an empty dictionary to store DataFrames
dataframes = {}

# Traverse the directory structure
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".csv"):
            # Construct the full file path
            file_path = os.path.join(root, file)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Store the DataFrame in the dictionary with a unique key (e.g., file name)
            dataframes[file] = df

In [6]:
bin_df = pd.concat(dataframes.values(), ignore_index=True)

Export to CSV in `interim` folder

In [7]:
bin_df.to_csv("../../data/interim/binary_df_raw.csv", index=False)

Set every column's type to string in order to avoid conversions and misrepresentation of the binary data.

In [8]:
bin_df = bin_df.astype(str)

In [13]:
# Columns to drop
drop_starts_with = ["e_id_"]
bin_df = bin_df.drop(
    columns=[col for col in bin_df.columns if col.startswith(tuple(drop_starts_with))]
)

In [14]:
bin_df = bin_df.replace("nan", "0")

Zero padding columns to the maximum length

In [15]:
def zero_pad_column(df, exclude=[]):
    max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()
    for col in df.columns:
        if col not in exclude:
            max_length = max_lengths[col]
            df[col] = df[col].fillna("").astype(str).str.ljust(max_length, "0")
    return df

In [16]:
df = zero_pad_column(bin_df, ["label"])

  max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()


In [17]:
df

Unnamed: 0,mac,len_ssid,ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,len_dsss,dsss_parameter,len_ht_cap,...,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,frame_check_seq,label
0,1a:e6:5a:fe:34:4c,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1.0000,11010.0,...,000000,1111111.0,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,0000000000,0000000000000000000000000000000000000000000000...,0000000,0000000000000000000000000000000000000000000000...,00101110110101000110100111001111,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1.0000,11010.0,...,000000,1111111.0,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,0000000000,0000000000000000000000000000000000000000000000...,0000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1.0000,11010.0,...,000000,1111111.0,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,0000000000,0000000000000000000000000000000000000000000000...,0000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,10.000,11010.0,...,000000,1111111.0,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,0000000000,0000000000000000000000000000000000000000000000...,0000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,10.000,11010.0,...,000000,1111111.0,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,0000000000,0000000000000000000000000000000000000000000000...,0000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000,0000000000000000000000000000000000000000000000...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1101.0,11010.0,...,000000,1111111.0,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,111.000000,0000000001010000111100100000100000000000011000...,0000000,0000000000000000000000000000000000000000000000...,10110101111111110110100110111000,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,11000,0011000100110001001100010011011101110011011110...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1101.0,11010.0,...,000000,1111111.0,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,111.000000,0000000001010000111100100000100000000000011000...,0000000,0000000000000000000000000000000000000000000000...,11111010001100111100010011110000,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,10010,0010000101101111011100000011000001110011011100...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1101.0,11010.0,...,000000,1111111.0,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,111.000000,0000000001010000111100100000100000000000011000...,0000000,0000000000000000000000000000000000000000000000...,11101110101101011101100100100001,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,10000,0101011001101111011001000110000101100110011011...,1000,0000001000000100000010110001011000000000000000...,1000.0,0000110000010010000110000010010000110000010010...,1.0,1101.0,11010.0,...,000000,1111111.0,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,111.000000,0000000001010000111100100000100000000000011000...,0000000,0000000000000000000000000000000000000000000000...,01011111010111110111011001010011,XiaomiRedmi4_B


In [25]:
len(df.iloc[76703][2])

  len(df.iloc[76703][2])


144