In [1]:
import warnings
import sys
sys.path.append('../../../../')
warnings.filterwarnings("ignore")
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from models import *
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
from pytorch_lightning.callbacks import LearningRateMonitor
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
data_path = '../../../../data/CIC_2018/no_preprocess/df_equal_DDOS attack-HOIC.parquet'
df = pd.read_parquet(data_path)
df.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,1519202005,37953,5,3,135,127,135,0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,Benign
1,500,17,1519201986,117573474,3,0,1500,0,500,500,...,8,0.0,0.0,0,0,58800000.0,23800000.0,75600000,42000000,Benign
2,500,17,1519201986,117573474,3,0,1500,0,500,500,...,8,0.0,0.0,0,0,58800000.0,23800000.0,75600000,42000000,Benign
3,500,17,1519201991,99743998,5,0,2500,0,500,500,...,8,4000290.0,0.0,4000290,4000290,31900000.0,37900000.0,75600000,7200397,Benign
4,500,17,1519201991,99743999,5,0,2500,0,500,500,...,8,4000286.0,0.0,4000286,4000286,31900000.0,37900000.0,75600000,7200399,Benign


In [3]:
df['Label'].isna().sum()

0

In [4]:
import numpy as np

infinite_values = df.isin([np.inf, -np.inf]).sum().sum()
print(f"Number of infinite values in the dataframe: {infinite_values}")

df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

infinite_values = df.isin([np.inf, -np.inf]).sum().sum()
print(f"Number of infinite values in the dataframe: {infinite_values}")


Number of infinite values in the dataframe: 0
Number of infinite values in the dataframe: 0


In [5]:
df['Label'].value_counts()

Label
DDOS attack-HOIC        686012
Benign                  360833
DDOS attack-LOIC-UDP      1730
Name: count, dtype: int64

In [6]:
df = df.drop(index=df.loc[df['Label'] == 'Label'].index)

In [7]:
df = df.dropna()

In [8]:
X = df.drop('Label', axis=1)
y = df['Label']
y = y.map({'Benign': 0, 'DDOS attack-HOIC': 1, 'DDOS attack-LOIC-UDP': 0})
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, stratify=y)

In [9]:
df_train = pd.concat([X_train, y_train], axis=1)
df_test = pd.concat([X_test, y_test], axis=1)

In [10]:
df_train.isna().sum().sum()

0

In [11]:
df_test.isna().sum().sum()

0

In [12]:
df_train['Label'].value_counts()

Label
1    548810
0    288666
2      1384
Name: count, dtype: int64

In [13]:
df_train.info()
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 838860 entries, 1033010 to 451450
Data columns (total 80 columns):
 #   Column             Non-Null Count   Dtype   
---  ------             --------------   -----   
 0   Dst Port           838860 non-null  int32   
 1   Protocol           838860 non-null  int8    
 2   Timestamp          838860 non-null  int32   
 3   Flow Duration      838860 non-null  int32   
 4   Tot Fwd Pkts       838860 non-null  int32   
 5   Tot Bwd Pkts       838860 non-null  int8    
 6   TotLen Fwd Pkts    838860 non-null  int32   
 7   TotLen Bwd Pkts    838860 non-null  int16   
 8   Fwd Pkt Len Max    838860 non-null  int16   
 9   Fwd Pkt Len Min    838860 non-null  int16   
 10  Fwd Pkt Len Mean   838860 non-null  float16 
 11  Fwd Pkt Len Std    838860 non-null  float16 
 12  Bwd Pkt Len Max    838860 non-null  int16   
 13  Bwd Pkt Len Min    838860 non-null  int16   
 14  Bwd Pkt Len Mean   838860 non-null  float16 
 15  Bwd Pkt Len Std    838860 non-nul

In [14]:
df_test['Label'].value_counts()

Label
1    137202
0     72167
2       346
Name: count, dtype: int64

In [15]:
datamodule = ExpertPretrainDataModule(df_train, df_test, binarize_on_label=1)

In [16]:
expert_model = ExpertModel(79, [256, 128])
print(expert_model)

ExpertModel(
  (model): Sequential(
    (0): BatchNorm1d(79, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=79, out_features=256, bias=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [None]:
logger = TensorBoardLogger(f"../../../../logs/expert/no_preprocess_no_autoencoder_hoic", name="expert_tensorboard")
csv_logger = CSVLogger(f"../../../../logs/expert/no_preprocess_no_autoencoder_hoic", name="expert_csv")
lr_monitor = LearningRateMonitor(logging_interval='epoch')


trainer = pl.Trainer(
        max_epochs=2,
        logger=[logger, csv_logger],
        callbacks=[lr_monitor],
        accelerator='gpu',
        precision='64-true',
)

trainer.fit(expert_model, datamodule=datamodule)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-10-04 13:44:20.008919: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-04 13:44:20.021073: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-04 13:44:20.024772: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]