In [1]:
import pandas as pd
import numpy as np
import gymnasium as gym
import swifter
from load_data import LoadDataset

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
import torch

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score

In [3]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.policies import *
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_checker import check_env

In [4]:
selected_features = ['destination_port',
            'init_win_bytes_backward',
            'init_win_bytes_forward',
            'bwd_packets_s',
            'fwd_iat_min',
            'min_seg_size_forward',
            'flow_iat_min',
            'flow_duration',
            'total_length_of_fwd_packets',
            'total_backward_packets',
            'bwd_iat_min',
            'bwd_packet_length_std',
            'fwd_iat_total',
            'fwd_packet_length_mean',
            'fwd_packet_length_max',
            'flow_iat_std',
            'fwd_packets_s',
            'down_up_ratio',
            'total_fwd_packets',
            'bwd_packet_length_min',
            'flow_bytes_s',
            'bwd_header_length',
            'packet_length_mean',
            'total_length_of_bwd_packets',
            "label"]

In [5]:
df_train = LoadDataset("./oversampling/NRAS_trainset.csv",  columns = selected_features, modify_datatype = True)
df_test = LoadDataset("./raw/raw_testset.csv", columns = selected_features, modify_datatype = True)

In [10]:
df_train['label'].unique()

array([0, 1], dtype=int8)

In [11]:
df_train.loc[2, selected_features].to_numpy()

array([5.0618e+04, 3.6000e+02, 9.7400e+02, 1.5625e+04, 0.0000e+00,
       3.2000e+01, 6.4000e+01, 6.4000e+01, 0.0000e+00, 1.0000e+00,
       0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
       0.0000e+00, 1.5625e+04, 1.0000e+00, 1.0000e+00, 0.0000e+00,
       0.0000e+00, 3.2000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00],
      dtype=float32)

In [8]:
df_train['label'] = df_train['label'].replace([2,3,4,5,6,7,8,9], 1)
df_test['label'] = df_test['label'].replace([2,3,4,5,6,7,8,9], 1)

In [21]:
X_train = df_train.drop('label', axis = 1)
y_train = df_train['label']

X_test = df_test.drop('label', axis = 1)
y_test = df_test['label']

In [22]:
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [23]:
train_data = pd.DataFrame(data = X_train, columns = selected_features[:-1])
test_data = pd.DataFrame(data = X_test, columns = selected_features[:-1])

train_data['label'] = y_train
test_data['label'] = y_test

In [35]:
del X_train, X_test, y_train, y_test, df_train, df_test
import gc
gc.collect()

2648

In [48]:
class IDS(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.data = data

        self.action_space = gym.spaces.Discrete(2)
        self.observation_space = gym.spaces.Box(low = -20, high = 20, shape = (data.shape[1] - 1,), dtype = np.float64)
        
        self.rewards = {(0, 0): 0, (0, 1): -1, (1, 1): 1, (1, 0): -1} # {(true_label, action) : reward}
        
        self.current_step = 0
        self.max_steps = 1000

        self.steps_beyond_done = None

    def reset(self, seed = None):
        self.steps_beyond_done = None
        self.current_step = 0
        
        self.index = np.random.randint(0, self.data.shape[0])
        self.obs = self.data.iloc[self.index]
        self.label = int(self.obs.pop("label"))

        return self.obs.values, {}

    def step(self, action):
        info = {}
        self.current_step += 1

        reward = self.rewards[(self.label, action)]
        
        terminated = True if (self.current_step >= self.max_steps) else False
        truncated = False
        if not terminated:
            self.index += 1
            if self.index >= self.data.shape[0]:
                self.index = 0

            self.obs = self.data.iloc[self.index]
            self.label = int(self.obs.pop("label"))

        elif self.steps_beyond_done is None:
            self.steps_beyond_done = 0
        else:
            self.steps_beyond_done += 1
            reward = 0

        return self.obs.values, reward, terminated, truncated, info

In [49]:
check_env(IDS(train_data))

In [50]:
class MyCallback(BaseCallback):
    def __init__(self, train, val, eval_freq):
        super().__init__()
        
        self.train_data = train
        self.val_data = val
        self.eval_freq = eval_freq

    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            super()._on_step()

            predicted = self.val_data.drop(columns = ["label"]).swifter.apply(
                lambda x: self.model.predict(x, deterministic=True)[0], axis=1
            )
            accuracy = accuracy_score(self.val_data["label"], predicted)
            f1 = f1_score(self.val_data["label"], predicted)
            
            print(f"[VAL SCORE] Timesteps: {self.num_timesteps} \n\t --> val_acc: {accuracy:.4f} -- f1-score: {f1:.4f}")
            print("-" * 60)

        return True
    
eval_callback = MyCallback(train_data, test_data, eval_freq = 20000 // 15)

In [51]:
envs = DummyVecEnv([lambda: IDS(train_data)] * 15)

In [52]:
policy_kwargs = dict(activation_fn = torch.nn.ReLU, net_arch = [128, 64, 32])

In [53]:
model = PPO("MlpPolicy", envs, 
            policy_kwargs = policy_kwargs,
            learning_rate = lambda progress: progress * 0.002,
            batch_size = 32, n_epochs = 50,
            clip_range = 0.2, verbose = 2,
            device = 'cuda')

Using cuda device


In [54]:
model.learn(total_timesteps = 6000000, callback = eval_callback)

[VAL SCORE] Timesteps: 19995 
	 --> val_acc: 0.1689 -- f1-score: 0.2889
------------------------------------------------------------
------------------------------
| time/              |       |
|    fps             | 1356  |
|    iterations      | 1     |
|    time_elapsed    | 22    |
|    total_timesteps | 30720 |
------------------------------
[VAL SCORE] Timesteps: 39990 
	 --> val_acc: 0.1691 -- f1-score: 0.2893
------------------------------------------------------------
[VAL SCORE] Timesteps: 59985 
	 --> val_acc: 0.1691 -- f1-score: 0.2893
------------------------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 101         |
|    iterations           | 2           |
|    time_elapsed         | 604         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.020416223 |
|    clip_fraction        | 0.512       |
|    c

<stable_baselines3.ppo.ppo.PPO at 0x26d6c70f8b0>

In [55]:
model.save("./checkpoint/PPO_drl.zip")