# Encoding Information Elements HEX fields

## Libraries and Configurations

Import configuration files

In [155]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [156]:
import pandas as pd

Import **ML libraries**

In [157]:
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor

Import **visualization libraries**

In [158]:
import seaborn as sns

sns.set(style="whitegrid")

import matplotlib.pyplot as plt

Import **neural network libraries**

In [159]:
from keras.utils import plot_model

from IPython.display import Image

Deep Learning Model

In [160]:
from tensorflow.keras.layers import Input, Dense, Reshape, Concatenate, Embedding

Import **other libraries**

In [161]:
from rich.progress import Progress
from rich import traceback

traceback.install()

<bound method InteractiveShell.excepthook of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x10dccd710>>

Custom helper scripts

In [162]:
%cd ..
from scripts import plotHelper, encodingHelper
%cd data_exploration_cleaning

/Users/bacci/Library/CloudStorage/SynologyDrive-giovanni/Research 🌱/Repositories/COMPACT/notebooks
/Users/bacci/Library/CloudStorage/SynologyDrive-giovanni/Research 🌱/Repositories/COMPACT/notebooks/data_exploration_cleaning


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Import Data

In [163]:
# Combined dataframe
combined_df_csv = config["DEFAULT"]["interim_path"] + "combined_df_raw_balanced.csv"

In [164]:
cols_to_keep = [
    "HT Capabilities",
    "Extended Capabilities",
    "Vendor Specific Tags",
    "Supported Rates",
    "Extended Supported Rates",
    "VHT Capabilities",
    "HE Capabilities",
    "Label",
]

In [165]:
df = pd.read_csv(combined_df_csv, usecols=cols_to_keep)

In [166]:
df

Unnamed: 0,HT Capabilities,Extended Capabilities,Vendor Specific Tags,Supported Rates,Extended Supported Rates,VHT Capabilities,HE Capabilities,Label
0,2d001bff00000000000000000000000000000000000000...,80000000040000020,,82848b96,0c1218243048606c,,23010808180080203002000d009f08000000fdfffdff39...,iPhone12Pro_C
1,2d001bff00000000000000000000000000000000000000...,80000000040000020,,82848b96,0c1218243048606c,,23010808180080203002000d009f08000000fdfffdff39...,iPhone12Pro_C
2,2d001bff00000000000000000000000000000000000000...,80000000040000020,,82848b96,0c1218243048606c,,23010808180080203002000d009f08000000fdfffdff39...,iPhone12Pro_C
3,2d001bff00000000000000000000000000000000000000...,80000000040000020,,82848b96,0c1218243048606c,,23010808180080203002000d009f08000000fdfffdff39...,iPhone12Pro_C
4,2d001bff00000000000000000000000000000000000000...,80000000040000020,,82848b96,0c1218243048606c,,23010808180080203002000d009f08000000fdfffdff39...,iPhone12Pro_C
...,...,...,...,...,...,...,...,...
36041,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,02040b16,0c1218243048606c,92719033feff8601feff8601,02000f,XiaomiRedmiNote7_S
36042,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,02040b16,0c1218243048606c,92719033feff8601feff8601,020010,XiaomiRedmiNote7_S
36043,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,02040b16,0c1218243048606c,92719033feff8601feff8601,020025,XiaomiRedmiNote7_S
36044,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,02040b16,0c1218243048606c,92719033feff8601feff8601,020010,XiaomiRedmiNote7_S


Check dataframe info

In [167]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36046 entries, 0 to 36045
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   HT Capabilities           34544 non-null  object
 1   Extended Capabilities     30565 non-null  object
 2   Vendor Specific Tags      24841 non-null  object
 3   Supported Rates           36046 non-null  object
 4   Extended Supported Rates  36045 non-null  object
 5   VHT Capabilities          5685 non-null   object
 6   HE Capabilities           10740 non-null  object
 7   Label                     36046 non-null  object
dtypes: object(8)
memory usage: 2.2+ MB


Dropping columns which have too many null values:
- `HE Capabilities` -> 70.2% of entries has null value
- `VHT Capabilities` -> 84.2% of entires has null value

In [168]:
df.drop(columns=["VHT Capabilities", "HE Capabilities"], inplace=True)

Dropping columns with too few categories (see `data_visualization_statistics.ipynb`):
- `Supported Rates` -> 78.3% of entries has the same value
- `Extended Supported Rates` -> 92.3% of entries has the same value

In [169]:
df.drop(columns=["Supported Rates", "Extended Supported Rates"], inplace=True)

In [170]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36046 entries, 0 to 36045
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   HT Capabilities        34544 non-null  object
 1   Extended Capabilities  30565 non-null  object
 2   Vendor Specific Tags   24841 non-null  object
 3   Label                  36046 non-null  object
dtypes: object(4)
memory usage: 1.1+ MB


## Data Processing

There are some missing values in the dataframe, filling them with zeros

In [171]:
df.fillna(0, inplace=True)

In [172]:
df

Unnamed: 0,HT Capabilities,Extended Capabilities,Vendor Specific Tags,Label
0,2d001bff00000000000000000000000000000000000000...,80000000040000020,0,iPhone12Pro_C
1,2d001bff00000000000000000000000000000000000000...,80000000040000020,0,iPhone12Pro_C
2,2d001bff00000000000000000000000000000000000000...,80000000040000020,0,iPhone12Pro_C
3,2d001bff00000000000000000000000000000000000000...,80000000040000020,0,iPhone12Pro_C
4,2d001bff00000000000000000000000000000000000000...,80000000040000020,0,iPhone12Pro_C
...,...,...,...,...
36041,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,XiaomiRedmiNote7_S
36042,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,XiaomiRedmiNote7_S
36043,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,XiaomiRedmiNote7_S
36044,2d0113ff00000000000000000000000000000000000000...,04000a02004000408001,0050f208002400,XiaomiRedmiNote7_S


### Use dummies