# *Notebook Name*

## Libraries and Configurations

Import configuration files

In [1]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [2]:
import pandas as pd

Import **other libraries**

In [3]:
from rich.progress import Progress
from rich import traceback

import os

traceback.install()

<bound method InteractiveShell.excepthook of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7399b023e8d0>>

Custom helper scripts

In [4]:
%cd ..
from scripts import plotHelper, encodingHelper
%cd data_exploration_cleaning

/home/bacci/COMPACT/notebooks
/home/bacci/COMPACT/notebooks/data_exploration_cleaning


## Import Data

Importing every `CSV` file in the `binary` folder, merge them and create the labelled dataframe.

In [5]:
# Base directory containing the folders
base_dir = config["DEFAULT"]["binary_path"]

# Initialize an empty dictionary to store DataFrames
dataframes = {}

# Traverse the directory structure
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".csv"):
            # Construct the full file path
            file_path = os.path.join(root, file)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path, dtype=str)

            # Store the DataFrame in the dictionary with a unique key (e.g., file name)
            dataframes[file] = df

In [6]:
bin_df = pd.concat(dataframes.values(), ignore_index=True)

In [7]:
bin_df = bin_df.astype(str)

In [8]:
bin_df.dtypes

mac                   object
e_id_ssid             object
len_ssid              object
ssid                  object
e_id_sup_rates        object
len_sup_rates         object
supported_rates       object
e_id_ext_sup_rates    object
len_ext_sup_rates     object
ext_sup_rates         object
e_id_dsss             object
len_dsss              object
dsss_parameter        object
e_id_ht_cap           object
len_ht_cap            object
ht_cap                object
e_id_ext_cap          object
len_ext_cap           object
ext_cap               object
e_id_vht_cap          object
len_vht_cap           object
vht_cap               object
e_id_vst              object
len_vst               object
vst                   object
e_id_ext_tags         object
len_ext_tags          object
ext_tags              object
frame_check_seq       object
label                 object
dtype: object

Export to CSV in `interim` folder

In [9]:
bin_df.to_csv("../../data/interim/binary_df_raw.csv", index=False)

Set every column's type to string in order to avoid conversions and misrepresentation of the binary data.

In [10]:
bin_df = bin_df.astype(str)

Drop all the Element ID columns, since by definition they are always the same.

In [11]:
# Columns to drop
drop_starts_with = ["e_id_"]
bin_df = bin_df.drop(
    columns=[col for col in bin_df.columns if col.startswith(tuple(drop_starts_with))]
)

In [12]:
bin_df = bin_df.replace("nan", "0")

Zero padding columns to the maximum length

In [13]:
def zero_pad_column(df, exclude=[]):
    max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()
    for col in df.columns:
        if col not in exclude:
            max_length = max_lengths[col]
            df[col] = df[col].fillna("").astype(str).str.ljust(max_length, "0")
    return df

In [14]:
df = zero_pad_column(bin_df, ["mac", "label"])

  max_lengths = df.drop(columns=exclude).applymap(lambda x: len(str(x))).max()


In [15]:
df

Unnamed: 0,mac,len_ssid,ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,len_dsss,dsss_parameter,len_ht_cap,...,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,frame_check_seq,label
0,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00101110110101000110100111001111,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000001,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11010111100011001000011011110001,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00000010,00011010,...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,11000100001011000110000000100001,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000000,0000000000000000000000000000000000000000000000...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,10110101111111110110100110111000,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,00000110,0011000100110001001100010011011101110011011110...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,11111010001100111100010011110000,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,00001001,0010000101101111011100000011000001110011011100...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,11101110101101011101100100100001,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,00001000,0101011001101111011001000110000101100110011011...,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00001101,00011010,...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,01011111010111110111011001010011,XiaomiRedmi4_B


In [16]:
len(df.iloc[76703][2])

  len(df.iloc[76703][2])


144

In [17]:
df = df.drop(columns=["frame_check_seq", "len_dsss", "ssid"])

In [18]:
df

Unnamed: 0,mac,len_ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,dsss_parameter,len_ht_cap,ht_cap,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,label
0,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,00000110,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,00001001,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,00001000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B


In [19]:
df.iloc[76703][5]

  df.iloc[76703][5]


'0000110000010010000110000010010000110000010010000110000001101100'

In [20]:
df.columns

Index(['mac', 'len_ssid', 'len_sup_rates', 'supported_rates',
       'len_ext_sup_rates', 'ext_sup_rates', 'dsss_parameter', 'len_ht_cap',
       'ht_cap', 'len_ext_cap', 'ext_cap', 'len_vht_cap', 'vht_cap', 'len_vst',
       'vst', 'len_ext_tags', 'ext_tags', 'label'],
      dtype='object')

In [21]:
max_lengths = df.apply(lambda x: x.astype(str).str.len().max())

In [22]:
max_lengths

mac                    17
len_ssid                8
len_sup_rates           8
supported_rates        64
len_ext_sup_rates       8
ext_sup_rates          64
dsss_parameter          8
len_ht_cap              8
ht_cap                208
len_ext_cap             8
ext_cap                 8
len_vht_cap            88
vht_cap                96
len_vst                 8
vst                  1336
len_ext_tags            8
ext_tags              240
label                  19
dtype: int64

In [23]:
def zero_pad_columns(df):
    max_lengths = df.drop(columns=["label"]).apply(
        lambda x: x.astype(str).str.len().max()
    )
    for col in df.columns:
        if col != "label":
            max_length = max_lengths[col]
            df[col] = df[col].astype(str).str.ljust(max_length, "0")
    return df


df = zero_pad_columns(df)

In [24]:
df

Unnamed: 0,mac,len_ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,dsss_parameter,len_ht_cap,ht_cap,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,label
0,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
1,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
2,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
3,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
4,1a:e6:5a:fe:34:4c,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76699,da:a1:19:45:40:f0,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76700,da:a1:19:45:40:f0,00000110,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76701,da:a1:19:45:40:f0,00001001,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B
76702,da:a1:19:45:40:f0,00001000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001101,00011010,0110111000000001000000111111111100000000000000...,00000000,01111111,0000000000000000000010100000001000000001000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000011000...,00000000,0000000000000000000000000000000000000000000000...,XiaomiRedmi4_B


Check that every column is effectively padded to the same length

In [25]:
std_lengths = df.apply(lambda x: x.astype(str).str.len().std())
std_lengths

mac                  0.000000
len_ssid             0.000000
len_sup_rates        0.000000
supported_rates      0.000000
len_ext_sup_rates    0.000000
ext_sup_rates        0.000000
dsss_parameter       0.000000
len_ht_cap           0.000000
ht_cap               0.000000
len_ext_cap          0.000000
ext_cap              0.000000
len_vht_cap          0.000000
vht_cap              0.000000
len_vst              0.000000
vst                  0.000000
len_ext_tags         0.000000
ext_tags             0.000000
label                2.402216
dtype: float64

## Burst View

In [26]:
df_grouped = df.groupby("mac").agg(lambda x: x.mode()[0]).reset_index()

In [27]:
df_grouped

Unnamed: 0,mac,len_ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,dsss_parameter,len_ht_cap,ht_cap,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,label
0,00:0f:00:6a:68:8b,00001010,00001000,1000001010000100100010111001011000010010001001...,00000100,0000110000011000001100000110000000000000000000...,00000000,00011010,0110111000000001000000101111111100000000000000...,00000000,01111111,0000000100000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,10100111,0000000001010000111100100000010000010000010010...,00000000,0000000000000000000000000000000000000000000000...,SamsungJ6_K
1,00:be:3b:a2:a8:56,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001100,00011010,0010000100000001000110111111111111111111000000...,00000000,01111111,0000000000001000000011111000010000000001010000...,0000000000000000000000000000000000000000000000...,00001001,0000000010010000010011000000010000001000101111...,00000000,0000000000000000000000000000000000000000000000...,HuaweiP20_G
2,02:00:00:00:00:00,00000000,00001000,0000001000000100000010110001011000001100000100...,00000100,0011000001001000011000000110110000000000000000...,00001001,00011010,0010110100000001000101111111111100000000000000...,00000000,01111111,0000000100000000000010000000000000000000000000...,0000000000000000000000000000000000000000000000...,00000111,0000000001010000111100100000100000000000000100...,00000000,0000000000000000000000000000000000000000000000...,SamsungM31_A
3,02:00:00:00:3e:b2,00000000,00000100,1000001010000100100010111001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001011,00011010,0010110101000000000110111111111100000000000000...,00000000,01111111,0000000000000000000010000000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00011100,0010001100000001000010000000100000011000000000...,iPhone11_C
4,02:00:3a:5e:a1:f4,00000000,00000100,1000001010000100100010111001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001010,00011010,0010110101000000000110111111111100000000000000...,00000000,01111111,0000000000000000000010000000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00011100,0010001100000001000010000000100000011000000000...,iPhone11_B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4790,fe:f9:ac:47:0d:b7,00000000,00000100,1000001010000100100010111001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00001011,00011010,0010110101000000000110111111111100000000000000...,00000000,01111111,0000000000000000000010000000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00011100,0010001100000001000010000000100000011000000000...,iPhone12_W
4791,fe:f9:fc:fb:83:9e,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010000101000000000101111111111100000000000000...,00000000,01111111,0000010000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00001001,0000000000010111111100100000101000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone6_N
4792,fe:fc:07:34:10:69,00000000,00000100,1000001010000100100010111001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000110111111111100000000000000...,00000000,01111111,0000000000000000000010000000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00011100,0010001100000001000010000000100000011000000000...,iPhone11_C
4793,fe:fc:aa:d1:89:d1,00000000,00000100,1000001010000100100010111001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000110111111111100000000000000...,00000000,01111111,0000000000000000000010000000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00011100,0010001100000001000010000000100000011000000000...,iPhone12_W


## Generate couples df

In [28]:
df = df.head(4795)

In [29]:
import pandas as pd
from itertools import combinations

# Assuming df is your original DataFrame with an 'index' column and a 'label' column
# Replace these values with your actual DataFrame and column names
# df = your_original_dataframe
# index_column_name = 'index'
# label_column_name = 'label'

# Create all possible combinations of indexes (pairs)
index_pairs = list(combinations(df.index, 2))

# Create a list to store the results
results = []

# Iterate over the index pairs
for pair in index_pairs:
    index1, index2 = pair
    label1 = df.loc[index1, "label"]
    label2 = df.loc[index2, "label"]

    # Determine if the labels are equal or not and assign the corresponding value
    if label1 == label2:
        results.append((index1, index2, 1))  # Add +1 if labels are equal
    else:
        results.append((index1, index2, -1))  # Add -1 if labels are not equal

# Create a DataFrame from the results
result_df = pd.DataFrame(results, columns=["Item 1", "Item 2", "Equality"])

In [30]:
result_df

Unnamed: 0,Item 1,Item 2,Equality
0,0,1,1
1,0,2,1
2,0,3,1
3,0,4,1
4,0,5,1
...,...,...,...
11493610,4791,4793,1
11493611,4791,4794,1
11493612,4792,4793,1
11493613,4792,4794,1


## Generate Bitmask Filters

In [31]:
df_no_mac = df.drop(columns=["mac", "label"])

In [32]:
df_no_mac

Unnamed: 0,len_ssid,len_sup_rates,supported_rates,len_ext_sup_rates,ext_sup_rates,dsss_parameter,len_ht_cap,ht_cap,len_ext_cap,ext_cap,len_vht_cap,vht_cap,len_vst,vst,len_ext_tags,ext_tags,label
0,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
1,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
2,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
3,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
4,00000000,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000010,00011010,0010110101000000000101111111111100000000000000...,00000000,01111111,0000000000000000000010001000010000000000000000...,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,00000000,0000000000000000000000000000000000000000000000...,iPhone7_F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4790,00001100,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,1110111100000001000100111111111111111111000000...,00001100,01111111,0000010000000000000010101000001000000000010000...,1001001011110001100100000011001111111010111111...,00001010,0000000001010000111100100000100000000000001010...,00000011,0000001000000000000101110000000000000000000000...,OppoFindX3Neo_A
4791,00001011,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,1110111100000001000100111111111111111111000000...,00001100,01111111,0000010000000000000010101000001000000000010000...,1001001011110001100100000011001111111010111111...,00001010,0000000001010000111100100000100000000000001010...,00000011,0000001000000000000101110000000000000000000000...,OppoFindX3Neo_A
4792,00010010,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,1110111100000001000100111111111111111111000000...,00001100,01111111,0000010000000000000010101000001000000000010000...,1001001011110001100100000011001111111010111111...,00001010,0000000001010000111100100000100000000000001010...,00000011,0000001000000000000101110000000000000000000000...,OppoFindX3Neo_A
4793,00001001,00000100,0000001000000100000010110001011000000000000000...,00001000,0000110000010010000110000010010000110000010010...,00000001,00011010,1110111100000001000100111111111111111111000000...,00001100,01111111,0000010000000000000010101000001000000000010000...,1001001011110001100100000011001111111010111111...,00001010,0000000001010000111100100000100000000000001010...,00000011,0000001000000000000101100000000000000000000000...,OppoFindX3Neo_A
