### Exploration

Rub setup.ipynb first when using on jupyterlab

In [1]:
# CHANGE FOR TRAINING DIFFERENT MODELS
TYPE = 'DQN'
NAME = 'dueling'
DATASET = 'cicddos2019-top'
AMOUNT = 4

In [2]:
import os
import gym
import time
import json
import random
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from datetime import datetime

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.deepq.policies import FeedForwardPolicy, register_policy
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv

from stable_baselines import deepq
from stable_baselines import A2C
from stable_baselines import bench
from stable_baselines import logger
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8859676435590725852,
 name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 6941747268461902661
 physical_device_desc: "device: XLA_CPU device",
 name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 12949064853774944845
 physical_device_desc: "device: XLA_GPU device"]

In [3]:
# [dueling, double, per, pn]
parameters_presets = {
    'vanilla': [False, False, False, False],
    'dueling': [True, False, False, False],
    'double': [False, True, False, False],
    'dd': [True, True, False, False],
    'pn_dd': [True, True, False, True],
    'per_dd': [True, True, True, False],
    'pn_per_dd': [True, True, True, True],
}
# [separate datasets?, train set dir, test set dir or empty, dummies list(only for sets with split train and test), topfeat?]
dataset_presets = {
    'nslkdd': [
        True, 
        "/project/datasets/clean-ids-collection/nsl-kdd/clean/KDDTrain.feather", 
        "/project/datasets/clean-ids-collection/nsl-kdd/clean/KDDTest.feather",
        [
            'protocol_type', 
            'service',
            'flag'
        ],
        'class',
        'normal',
        False
              ],
    'unswnb15': [
        True, 
        "/project/datasets/clean-ids-collection/unsw-nb15/clean/designated-train-test-sets/UNSW_NB15_training-set.feather", 
        "/project/datasets/clean-ids-collection/unsw-nb15/clean/designated-train-test-sets/UNSW_NB15_testing-set.feather",
        [
            'proto', 
            'service',
            'state'
        ],
        'attack_cat',
        'normal',
        False
              ],
    'cicddos2019': [
        False, 
        "/project/datasets/clean-ids-collection/cic-ddos2019/clean/cicddos2019.feather", 
        None,
        [],
        'Label',
        'Benign',
        False
              ],
    'cicddos2019-top': [
        False, 
        "/project/datasets/clean-ids-collection/cic-ddos2019/clean/cicddos2019.feather", 
        None,
        [],
        'Label',
        'Benign',
        True
              ],
    'cicdos2017': [
        False, 
        "/project/datasets/clean-ids-collection/cic-dos2017/clean/cicdos2017.feather", 
        None,
        [],
        'Label',
        'Benign',
        False
              ],
    'cicids2017': [
        False, 
        "/project/datasets/clean-ids-collection/cic-ids2017/clean/cicids2017.feather", 
        None,
        [],
        'Label',
        'Benign',
        False
              ],
}
parameters = parameters_presets[NAME]
data_parameters = dataset_presets[DATASET]

In [4]:
def mal_ben(df, label, normal):
    print("Preprocessing: make labels binary")
    print()
    df[label] = df[label].astype('object')
    atk_idx = df.loc[df[label] != normal].index
    df.loc[atk_idx, label] = 1.0
    df.loc[df.index.difference(atk_idx), label] = 0.0
    df[label] = df[label].astype(dtype=np.float32)

def preproc(df_train, df_test, columns):
    print("Preprocessing: One hot encoding + abundant features")
    print()
    df_train = pd.get_dummies(df_train,columns=columns)
    df_test = pd.get_dummies(df_test,columns=columns)
    extra_removables = df_test.columns ^ df_train.columns
    for to_remove in extra_removables:
            try:
                df_test.drop(to_remove, inplace=True, axis=1)
            except:
                print(f"{to_remove} already not part of test set, skipping")
            try:
                df_train.drop(to_remove, inplace=True, axis=1)
            except:
                print(f"{to_remove} already not part of train set, skipping")
    return df_train, df_test

def split_df(df):
    print("Preprocessing: split into X and Y sets")
    print()
    col = df.columns[-1]
    cols = df.columns[:-1]
    return df[cols], df[col]

def top_feat(df, dataset):
    print("Preprocessing: convert to top features")
    print()
    features = {
        'cicdos2017-top': ["Init Bwd Win Bytes",  "Idle Min", "ACK Flag Count", "Fwd Packet Length Min", "Fwd PSH Flags"],
        'cicids2017-top': ["Avg Packet Size", "Packet Length Variance", "Fwd Packet Length Mean", "Init Fwd Win Bytes", "Bwd IAT Total"],
        'cicddos2019-top': ["URG Flag Count", "Down/Up Ratio", "Bwd Packet Length Min", "ACK Flag Count", "Fwd Packets Length Total"],
        'nslkdd-top': ["dst_host_serror_rate", "service_private", "count","dst_host_count", "service_domain_u", "flag_REJ", "dst_host_diff_srv_rate"],
        'unswnb15-top': ["sttl", "dttl", "ct_state_ttl", "service", "dload", "rate", "dmean", "dbytes", "dur", "is_sm_ips_ports", "dloss"],
    }
    important_features = features[dataset] + ["Label"] # Adding class for custom environment logic
    important_features = list(set(important_features))
    removable_features = df.columns ^ important_features
    
    return df.drop(labels=removable_features, axis='columns')

def nslkdd_split_df(train_df, test_df):
    train_col = train_df.columns[-1]
    print(train_col)
    train_cols = train_df.columns[:-1]
    print(train_cols)
    test_col = test_df.columns[-1]
    print(test_col)
    test_cols = test_df.columns[:-1]
    print(test_cols)

    return train_df[train_cols], train_df[train_col], test_df[test_cols], test_df[test_col]

def balancing_dataset_sampler_df(df, train_frac=0.2, val_frac=0.1, test_frac=0.7):
    col = df.columns[-1]
    print(col)
    cols = df.columns[:-1]
    print(cols)
    vc = df[col].value_counts()
    print(vc)
    n = vc.iloc[-1]
    print(n)
    m = vc.iloc[0]
    print(m)
    print(int(m-n))
    initial_cut = df.loc[df[col] == vc.index[0]].sample(n=int(m-n), replace=False)
    print(initial_cut.index)
    df = df.drop(index=initial_cut.index)
    vc = df[col].value_counts()
    print(vc)
    print(int(n*train_frac))
    train_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*train_frac), replace=False))
    train_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=train_df.index)

    validation_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*val_frac), replace=False))
    validation_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=validation_df.index)

    test_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*test_frac), replace=False))
    test_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=test_df.index)

    return train_df[cols], train_df[col], validation_df[cols], validation_df[col], test_df[cols], test_df[col]


def ids_eval(model, x_test, y_test):
    print("Evaluation: starting validation of the model")
    print()
    TP, FP, TN, FN = 0,0,0,0
    env = IdsEnv(images_per_episode=1, dataset=(x_test, y_test), random=False)
    obs, done = env.reset(), False
    try:
        while True:
            obs, done = env.reset(), False
            while not done:
                obs, rew, done, info = env.step(model.predict(obs)[0])
                label = info['label']
                if label == 0 and rew > 0:
                    TP += 1
                if label == 0 and rew == 0:
                    FP += 1
                if label == 1 and rew > 0:
                    TN += 1
                if label == 1 and rew == 0:
                    FN += 1

    except StopIteration:
        accuracy = (float(TP + TN) / (TP + FP + FN + TN)) 
        precision = (float(TP) / (TP + FP))
        recall = (float(TP) / (TP + FN)) # = TPR = Sensitivity
        try:
            FPR = (float(FP) / (TN + FP)) # 1 - specificity
        except:
            FPR = 0.0
        f1_score = 2 * (precision * recall) / (precision + recall)
        print()
        print('Evaluation: validation done...')
        print('Accuracy: {0}%'.format(accuracy * 100))
        print('Precision: {0}%'.format(precision * 100))
        print('Recall/TPR/Sensitivity: {0}%'.format(recall * 100))
        print('FPR: {0}%'.format(FPR * 100))
        print('F1 score: {0}'.format(f1_score))
    return [accuracy, precision, recall, FPR, f1_score]


In [5]:
if data_parameters[0]:
    df_train = pd.read_feather(data_parameters[1])
    df_test = pd.read_feather(data_parameters[2])
    df_train, df_test = preproc(df_train, df_test, data_parameters[3])
    LENGTH = len(df_train.columns) - 1
else:
    df = pd.read_feather(data_parameters[1])
    LENGTH = len(df.columns) - 1

In [6]:
if data_parameters[0]:
    df_train.info()
    df_train.describe()
else:
    df.info()
    df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138007 entries, 0 to 138006
Data columns (total 67 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   Flow Duration             138007 non-null  int32   
 1   Total Fwd Packets         138007 non-null  int16   
 2   Total Backward Packets    138007 non-null  int16   
 3   Fwd Packets Length Total  138007 non-null  float32 
 4   Bwd Packets Length Total  138007 non-null  float32 
 5   Fwd Packet Length Max     138007 non-null  float32 
 6   Fwd Packet Length Min     138007 non-null  float32 
 7   Fwd Packet Length Mean    138007 non-null  float32 
 8   Fwd Packet Length Std     138007 non-null  float32 
 9   Bwd Packet Length Max     138007 non-null  float32 
 10  Bwd Packet Length Min     138007 non-null  float32 
 11  Bwd Packet Length Mean    138007 non-null  float32 
 12  Bwd Packet Length Std     138007 non-null  float32 
 13  Flow Bytes/s              138

In [7]:
if data_parameters[6]:
    if data_parameters[0]:
        df_train = top_feat(df_train, DATASET)
        df_test = top_feat(df_test, DATASET)
    else:
        df = top_feat(df, DATASET)

Preprocessing: convert to top features



Preprocessing done by Laurens D'Hooge

## Creating the model
- Build an agent to classify network flow automatically
- Feed a packet that gets classified
- Want the classification to be equal to the label

In [8]:
if data_parameters[0]:
    mal_ben(df_train, data_parameters[4], data_parameters[5])
    mal_ben(df_test, data_parameters[4], data_parameters[5])
else: 
    mal_ben(df, data_parameters[4], data_parameters[5])

Preprocessing: make labels binary



In [9]:
if data_parameters[0]:
    LENGTH = len(df_train.columns) - 1
else:
    LENGTH = len(df.columns) - 1

In [10]:
if data_parameters[0]:
    x_train, y_train, x_test, y_test  = nslkdd_split_df(df_train, df_test)
else:
    x_train, y_train, x_val, y_val, x_test, y_test = balancing_dataset_sampler_df(df, train_frac=0.8, val_frac=0.0, test_frac=0.2)

Label
Index(['Fwd Packets Length Total', 'Bwd Packet Length Min', 'ACK Flag Count',
       'URG Flag Count', 'Down/Up Ratio'],
      dtype='object')
0.0    95092
1.0    42915
Name: Label, dtype: int64
42915
95092
52177
Int64Index([113332, 131988, 118772, 121012,  79720,  93331, 110990,  56478,
             29298,  82362,
            ...
             60272,  75449, 104232, 111552, 127563,  31014, 112030,  65670,
            125591, 111488],
           dtype='int64', length=52177)
0.0    42915
1.0    42915
Name: Label, dtype: int64
34332


In [11]:
y_train = y_train.to_frame()
y_test = y_test.to_frame()

# custom keys -> replace by index

x_train = x_train.set_index([pd.Index(range (0, len(x_train)))])
y_train = y_train.set_index([pd.Index(range (0, len(y_train)))])
x_test = x_test.set_index([pd.Index(range (0, len(x_test)))])
y_test = y_test.set_index([pd.Index(range (0, len(y_test)))])


### Custom environment

In [12]:
class IdsEnv(gym.Env):
    def __init__(self, images_per_episode=1, dataset=(x_train, y_train), random=True):
        # Actions we can take, classify as malicious or non-malicious (later also the correct attack)
        # change to 19 if detectiong all different attacks
        self.action_space = gym.spaces.Discrete(2)
         # All the features we have, len(important_features) - 1 features and 1 label. Label should not be included
        self.observation_space = gym.spaces.Box(low=float('-inf'), high=float('inf'), shape=(LENGTH,))
        self.images_per_episode = images_per_episode
        self.step_count = 0

        self.x, self.y = dataset
        self.random = random
        self.dataset_idx = 0
    
    def step(self, action):
        done = False
        reward = int(action == self.expected_action)
        current_label = self.expected_action
        obs = self._next_obs()

        self.step_count += 1
        if self.step_count >= self.images_per_episode:
            done = True

        return obs, reward, done, {'label': current_label}

    def _next_obs(self):
        if self.random:
            next_obs_idx = random.randint(0, len(self.x) - 1)
            self.expected_action = int(self.y.iloc[next_obs_idx,:])
            obs = self.x.iloc[next_obs_idx,:]

        else:
            obs = self.x.iloc[self.dataset_idx]
            self.expected_action = int(self.y.iloc[self.dataset_idx])

            self.dataset_idx += 1
            if self.dataset_idx >= len(self.x):
                raise StopIteration()
        return obs
    
    def reset(self):
        self.step_count = 0

        obs = self._next_obs()
        return obs

Train classifier using dqn

In [13]:
class CustomPolicy(FeedForwardPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch,
                 reuse=False, obs_phs=None, dueling=True, **_kwargs):
        super(CustomPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse,
                                        feature_extraction="mlp", obs_phs=obs_phs, dueling=dueling,
                                        layer_norm=True, **_kwargs)
        
register_policy("CustomPolicy", CustomPolicy)

In [14]:
def ids(params, tp):
    
    env = IdsEnv(images_per_episode=1)
    if tp == 'DQN':
        model = deepq.DQN(
            CustomPolicy, 
            env, 
            policy_kwargs=dict(dueling=params[0], layers=[128,128]),
            double_q=params[1],
            verbose=1, 
            learning_rate=0.00025,
            buffer_size=1000000,
            exploration_fraction=0.1,
            exploration_final_eps=0.1,
            train_freq=4,
            learning_starts=5000,
            target_network_update_freq=10000,
            gamma=1.0,
            param_noise=params[3],
            prioritized_replay=params[2],
            prioritized_replay_alpha=0.6,
            batch_size=32,
        )
    if tp == 'A2C':
        model = A2C(
            MlpPolicy, 
            env, 
            verbose=1,
        )
    model.learn(
        total_timesteps=int(1.0e5),
        log_interval=int(1.0e4),
    )

    env.close()
    
    return model

times = []
models = []

for i in range(AMOUNT):
    start_time = time.time()
    models.append(ids(parameters, TYPE))
    print()
    duration = time.time() - start_time
    print(f"{TYPE} {i + 1} Training Time:", duration)
    times.append(duration)







--------------------------------------
| % time spent exploring  | 10       |
| episodes                | 10000    |
| mean 100 episode reward | 0.8      |
| steps                   | 9999     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 9        |
| episodes                | 20000    |
| mean 100 episode reward | 0.9      |
| steps                   | 19999    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 9        |
| episodes                | 30000    |
| mean 100 episode reward | 0.8      |
| steps                   | 29999    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 9        |
| episodes                | 40000    |
| mean 100 episode reward | 0.8      |
| steps                   | 39999    |
--------------------------------------
--------------------------------------
| % time spent exploring 

In [15]:
# 0 is benign (positive), 1 is malicious (negative) 
def ids_eval(model):
    TP, FP, TN, FN = 0,0,0,0
    env = IdsEnv(images_per_episode=1, dataset=(x_test, y_test), random=False)
    obs, done = env.reset(), False
    try:
        while True:
            obs, done = env.reset(), False
            while not done:
                obs, rew, done, info = env.step(model.predict(obs)[0])
                label = info['label']
                if label == 0 and rew > 0:
                    TP += 1
                if label == 0 and rew == 0:
                    FP += 1
                if label == 1 and rew > 0:
                    TN += 1
                if label == 1 and rew == 0:
                    FN += 1

    except StopIteration:
        accuracy = (float(TP + TN) / (TP + FP + FN + TN)) 
        precision = (float(TP) / (TP + FP))
        try:
            recall = (float(TP) / (TP + FN)) # = TPR = Sensitivity
        except:
            recall = 0
        try:
            FPR = (float(FP) / (TN + FP)) # 1 - specificity
        except:
            FPR = 0
        try:
            f1_score = 2 * (precision * recall) / (precision + recall)
        except:
            f1_score = 0
        print()
        print('validation done...')
        print('Accuracy: {0}%'.format(accuracy * 100))
        print('Precision: {0}%'.format(precision * 100))
        print('Recall/TPR/Sensitivity: {0}%'.format(recall * 100))
        print('FPR: {0}%'.format(FPR * 100))
        print('F1 score: {0}'.format(f1_score))
    return [accuracy, precision, recall, FPR, f1_score]

results = []
for i, m in enumerate(models):
    results.append(ids_eval(m))

total_results = []
total_results = [0] * len(results[0])
accuracies = [result[0] for result in results]

for j in range(AMOUNT):
    for i in range(len(results[0])):
        total_results[i] =  total_results[i] + results[j][i]
for i in range(len(results[0])):
        total_results[i] =  total_results[i] / AMOUNT
        
print()    
print('Total validation done...')
print('Accuracy: {0}%'.format(total_results[0] * 100))
print('Precision: {0}%'.format(total_results[1] * 100))
print('Recall/TPR/Sensitivity: {0}%'.format(total_results[2] * 100))
print('FPR: {0}%'.format(total_results[3] * 100))
print('F1 score: {0}'.format(total_results[4]))


path = f'/project/normalization-runs/models'

if not os.path.exists(path):
    print(f"{path} doesn't exist, creating it + subfolders")
    os.makedirs(path)
    os.makedirs(f'{path}/{DATASET}')
    os.makedirs(f'{path}/{DATASET}/DQN')
    os.makedirs(f'{path}/{DATASET}/A2C')
    
if not os.path.exists(f'{path}/{DATASET}'):
    print(f"{path}/{DATASET} doesn't exist, creating it + subfolders")
    os.makedirs(f'{path}/{DATASET}')
    os.makedirs(f'{path}/{DATASET}/DQN')
    os.makedirs(f'{path}/{DATASET}/A2C')

for i in range(AMOUNT):
    print()
    print(f'Saving model {i + 1} to {path}/{DATASET}/{TYPE}/{NAME}_{TYPE}_{i + 1}.pkl')
    models[i].save(f'{path}/{DATASET}/{TYPE}/{NAME}_{TYPE}_{i + 1}.pkl')








validation done...
Accuracy: 97.89093451409929%
Precision: 96.17804707527384%
Recall/TPR/Sensitivity: 99.58976833976834%
FPR: 3.695358269490762%
F1 score: 0.9785417901600474

[0.9789093451409928, 0.9617804707527383, 0.9958976833976834, 0.036953582694907616, 0.9785417901600474]

Total validation done...
Accuracy: 97.89093451409929%
Precision: 96.17804707527384%
Recall/TPR/Sensitivity: 99.58976833976834%
FPR: 3.695358269490762%
F1 score: 0.9785417901600474
/project/to-run-for-results/nsl-kdd/128/models/cicddos2019-top doesn't exist, creating it + subfolders

Saving model 1
