In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
from moe import *
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import LearningRateMonitor
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from pycaret.classification import *

In [2]:
df_train = pd.read_parquet('CIC_IoMT_2024_WiFi_MQTT_train.parquet')
df_test = pd.read_parquet('CIC_IoMT_2024_WiFi_MQTT_test.parquet')

In [3]:
def map_label(label):
    if 'Benign' in label:
        return 'benign'
    elif 'ARP_Spoofing' in label:
        return 'spoofing'
    elif 'Recon' in label:
        return 'recon'
    elif 'MQTT' in label:
        return 'MQTT'
    elif 'DoS' in label and 'DDoS' not in label:
        return 'DoS'
    elif 'DDoS' in label:
        return 'DDoS'
    
df_train['label'] = df_train['label'].map(map_label)
df_test['label'] = df_test['label'].map(map_label)

In [4]:
fraction_train_data, _ = train_test_split(df_train, train_size=0.5, shuffle=True, stratify=df_train['label'])

In [5]:
clf_setup = setup(data=fraction_train_data, target='label', session_id=1234, use_gpu=False, fix_imbalance=False, test_data=df_test, index=False)

Unnamed: 0,Description,Value
0,Session id,1234
1,Target,label
2,Target type,Multiclass
3,Target mapping,"DDoS: 0, DoS: 1, MQTT: 2, benign: 3, recon: 4, spoofing: 5"
4,Original data shape,"(5194597, 46)"
5,Transformed data shape,"(5194597, 46)"
6,Transformed train set shape,"(3580415, 46)"
7,Transformed test set shape,"(1614182, 46)"
8,Numeric features,45
9,Preprocess,True


In [6]:
df_train_preprocessed = get_config('X_train_transformed')  # Get the transformed features (X)
df_train_preprocessed['label'] = get_config('y_train_transformed')  # Add the label column back
df_test_preprocessed = get_config('X_test_transformed')  # Get the transformed features (X)
df_test_preprocessed['label'] = get_config('y_test_transformed')  # Add the label column back

df_train_preprocessed.to_csv('preprocessed_train_dataset.csv', index=False)
df_test_preprocessed.to_csv('preprocessed_test_dataset.csv', index=False)

In [8]:
X_train = df_train_preprocessed.drop(columns=['label']).astype(np.float32)
X_test = df_test_preprocessed.drop(columns=['label']).astype(np.float32)

y_train = np.asarray(df_train_preprocessed['label'].astype('category').cat.codes, np.float32)
y_test = np.asarray(df_test_preprocessed['label'].astype('category').cat.codes, np.float32)

In [9]:
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
np.save('class_weights.npy', class_weights)