In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
from moe import *
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import LearningRateMonitor
from torch.utils.data import DataLoader, TensorDataset

In [2]:
data_train = pd.read_parquet('artifacts' + '/train_dataset_treated.parquet')
data_test = pd.read_parquet('artifacts' + '/test_dataset_treated.parquet')

In [4]:
print(f'Memory usage data_train: {data_train.memory_usage().sum() / 1024 ** 2:.2f} MB')
print(f'Memory usage data_test: {data_test.memory_usage().sum() / 1024 ** 2:.2f} MB')

Memory usage data_train: 1351.85 MB
Memory usage data_test: 585.22 MB


In [None]:
def get_class_weights(y):
    from sklearn.utils.class_weight import compute_class_weight
    return compute_class_weight('balanced', classes=np.unique(y), y=y)

In [3]:
import joblib
class_dict = joblib.load('artifacts/clean_data' + '/encoding_dict.joblib')
print(class_dict)

{'DDoS': 0, 'DoS': 1, 'MQTT': 2, 'benign': 3, 'recon': 4, 'spoofing': 5}


In [4]:
dropout_rate = 0.2

In [5]:
gate = GateModel(X_train.shape[1], 6, [64, 128, 64], dropout_rate, class_weights=class_weights)

In [6]:
logger = TensorBoardLogger("logs", name="gate_tensorboard")
csv_logger = pl.loggers.CSVLogger("logs", name="gate_csv")
lr_monitor = LearningRateMonitor(logging_interval='epoch')

train_loader = DataLoader(TensorDataset(torch.tensor(X_train.values, device='cuda'), 
                                torch.tensor(y_train, device='cuda')), batch_size=4096, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X_test.values, device='cuda'), 
                                        torch.tensor(y_test, device='cuda')), batch_size=4096)


trainer = pl.Trainer(                
        max_epochs=10 ,
        logger=[logger, csv_logger],
        callbacks=[lr_monitor],
        accelerator='gpu'
)

trainer.fit(gate, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-09-20 10:35:14.470339: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-20 10:35:14.481500: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-20 10:35:14.484787: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1

Epoch 9: 100%|██████████| 255/255 [00:13<00:00, 18.65it/s, v_num=2, train_loss=1.230, val_total_loss=1.280, val_f2=0.683]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 255/255 [00:13<00:00, 18.65it/s, v_num=2, train_loss=1.230, val_total_loss=1.280, val_f2=0.683]


In [7]:
model = MoE(
            gate_model=gate,
            num_expert_models=len(np.unique(y_train)), 
            expert_hidden_units=[64, 128, 64],
            dropout_rate=dropout_rate,
            class_weights=class_weights
)

In [8]:
logger = TensorBoardLogger("logs", name="MoE_tensorboard")
csv_logger = pl.loggers.CSVLogger("logs", name="MoE_csv")
lr_monitor = LearningRateMonitor(logging_interval='epoch')

In [9]:
trainer = pl.Trainer(                
        max_epochs=50,
        logger=[logger, csv_logger],
        callbacks=[lr_monitor],
        accelerator='gpu'
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [10]:
train_loader = DataLoader(TensorDataset(torch.tensor(X_train.values, device='cuda'), 
                                torch.tensor(y_train, device='cuda')), batch_size=4096, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X_test.values, device='cuda'), 
                                        torch.tensor(y_test, device='cuda')), batch_size=4096)

In [11]:
trainer.fit(model, train_loader, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | gate      | GateModel        | 18.7 K | train
1 | experts   | ModuleList       | 110 K  | train
2 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
129 K     Trainable params
0         Non-trainable params
129 K     Total params
0.517     Total estimated model params size (MB)
108       Modules in train mode
0         Modules in eval mode


Epoch 49: 100%|██████████| 255/255 [00:15<00:00, 16.50it/s, v_num=2, train_loss=0.803, val_total_loss=0.844, val_f2=0.754, train_f2=0.822]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 255/255 [00:16<00:00, 15.68it/s, v_num=2, train_loss=0.803, val_total_loss=0.844, val_f2=0.754, train_f2=0.822]
