### Exploration

In [None]:
!git clone https://github.com/openai/baselines.git
%cd baselines
!pip install -e .
!pip install stable-baselines
!pip install --upgrade tensorflow==1.14.0
!pip install --upgrade tensorflow-gpu==1.14.0
!pip install gym
!pip install pyarrow
!pip uninstall --yes tensorboard-plugin-wit
%cd ..

In [None]:
import os
import gym
import time
import json
import random
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from datetime import datetime

from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv

from stable_baselines import deepq
from stable_baselines import bench
from stable_baselines import logger
import tensorflow as tf

from baselines.common.tf_util import make_session
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
cicddos2019 = pd.read_feather("/project/datasets/clean-ids-collection/cic-ddos2019/clean/cicddos2019.feather")
data = pd.concat([cicddos2019], ignore_index=True)
print(len(data.columns))
data.head(5)


In [None]:
data.info()
data.describe()

Preprocessing done by Laurens D'Hooge

## Creating the model
- Build an agent to classify network flow automatically
- Feed a packet that gets classified
- Want the classification to be equal to the label

In [None]:
def balancing_dataset_sampler_df(df, train_frac=0.2, val_frac=0.1, test_frac=0.7):
    col = df.columns[-1]
    print(col)
    cols = df.columns[:-1]
    print(cols)
    vc = df[col].value_counts()
    print(vc)
    n = vc.iloc[-1]
    print(n)
    m = vc.iloc[0]
    print(m)
    print(int(m-n))
    initial_cut = df.loc[df[col] == vc.index[0]].sample(n=int(m-n), replace=False)
    print(initial_cut.index)
    df = df.drop(index=initial_cut.index)
    vc = df[col].value_counts()
    print(vc)
    print(int(n*train_frac))
    train_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*train_frac), replace=False))
    train_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=train_df.index)

    validation_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*val_frac), replace=False))
    validation_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=validation_df.index)

    test_df = df.groupby(col).apply(lambda x: x.sample(n=int(n*test_frac), replace=False))
    test_df.reset_index(level=0, inplace=True, drop=True)
    df = df.drop(index=test_df.index)

    return train_df[cols], train_df[col], validation_df[cols], validation_df[col], test_df[cols], test_df[col]


In [None]:
try:
    data.drop('Timestamp', inplace=True, axis=1)
except:
    pass
print(data['Label'].value_counts())
data['Label'] = data['Label'].astype('object')
atk_idx = data.loc[data['Label'] != "Benign"].index
data.loc[atk_idx, 'Label'] = 1.0
data.loc[data.index.difference(atk_idx), 'Label'] = 0.0
data['Label'] = data['Label'].astype(dtype=np.float32)

In [None]:
x_train, y_train, x_val, y_val, x_test, y_test  = balancing_dataset_sampler_df(data, train_frac=0.9, val_frac=0.0, test_frac=0.1)

In [None]:
y_train = y_train.to_frame()
y_test = y_test.to_frame()

# custom keys -> replace by index

x_train = x_train.set_index([pd.Index(range (0, len(x_train)))])
y_train = y_train.set_index([pd.Index(range (0, len(y_train)))])
x_test = x_test.set_index([pd.Index(range (0, len(x_test)))])
y_test = y_test.set_index([pd.Index(range (0, len(y_test)))])


### Custom environment

In [None]:
class IdsEnv(gym.Env):
    def __init__(self, images_per_episode=1, dataset=(x_train, y_train), random=True):
        # Actions we can take, classify as malicious or non-malicious (later also the correct attack)
        # change to 19 if detectiong all different attacks
        self.action_space = gym.spaces.Discrete(2)
         # All the features we have, len(important_features) - 1 features and 1 label. Label should not be included
        self.observation_space = gym.spaces.Box(low=float('-inf'), high=float('inf'), shape=(66,))
        self.images_per_episode = images_per_episode
        self.step_count = 0

        self.x, self.y = dataset
        self.random = random
        self.dataset_idx = 0
    
    def step(self, action):
        done = False
        reward = int(action == self.expected_action)
        current_label = self.expected_action
        obs = self._next_obs()

        self.step_count += 1
        if self.step_count >= self.images_per_episode:
            done = True

        return obs, reward, done, {'label': current_label}

    def _next_obs(self):
        if self.random:
            next_obs_idx = random.randint(0, len(self.x) - 1)
            self.expected_action = int(self.y.iloc[next_obs_idx,:])
            obs = self.x.iloc[next_obs_idx,:]

        else:
            obs = self.x.iloc[self.dataset_idx]
            self.expected_action = int(self.y.iloc[self.dataset_idx])

            self.dataset_idx += 1
            if self.dataset_idx >= len(self.x):
                raise StopIteration()
        return obs
    
    def reset(self):
        self.step_count = 0

        obs = self._next_obs()
        return obs

Train classifier using dqn

In [None]:
def ids_ddqn():
    env = IdsEnv(images_per_episode=1)
    env = bench.Monitor(env, logger.get_dir())

    model = deepq.DQN(
        MlpPolicy, 
        env, 
        policy_kwargs=dict(dueling=False),
        double_q=True,
        verbose=1, 
        learning_rate=0.00025,
        buffer_size=1000000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=50000,
        target_network_update_freq=10000,
        gamma=0.99,
        prioritized_replay=False,
        prioritized_replay_alpha=0.6,
        batch_size=32,
    )
    model.learn(
        total_timesteps=int(1.0e6),
        log_interval=10000,
        
    )

    env.close()
    
    return model

start_time = time.time()
ddqn_model_1 = ids_ddqn()
print()
print("DQN 1 Training Time:", time.time() - start_time)
start_time = time.time()
ddqn_model_2 = ids_ddqn()
print()
print("DQN 2 Training Time:", time.time() - start_time)
start_time = time.time()
ddqn_model_3 = ids_ddqn()
print()
print("DQN 3 Training Time:", time.time() - start_time)
start_time = time.time()
ddqn_model_4 = ids_ddqn()
print()
print("DQN 4 Training Time:", time.time() - start_time)


In [None]:
# 0 is benign (positive), 1 is malicious (negative) 
def ids_eval(model):
    TP, FP, TN, FN = 0,0,0,0
    env = IdsEnv(images_per_episode=1, dataset=(x_test, y_test), random=False)
    obs, done = env.reset(), False
    try:
        while True:
            obs, done = env.reset(), False
            while not done:
                obs, rew, done, info = env.step(model.predict(obs)[0])
                label = info['label']
                if label == 0 and rew > 0:
                    TP += 1
                if label == 0 and rew == 0:
                    FP += 1
                if label == 1 and rew > 0:
                    TN += 1
                if label == 1 and rew == 0:
                    FN += 1

    except StopIteration:
        accuracy = (float(TP + TN) / (TP + FP + FN + TN)) 
        precision = (float(TP) / (TP + FP))
        recall = (float(TP) / (TP + FN)) # = TPR = Sensitivity
        FPR = (float(FP) / (TN + FP)) # 1 - specificity
        f1_score = 2 * (precision * recall) / (precision + recall)
        print()
        print('validation done...')
        print('Accuracy: {0}%'.format(accuracy * 100))
        print('Precision: {0}%'.format(precision * 100))
        print('Recall/TPR/Sensitivity: {0}%'.format(recall * 100))
        print('FPR: {0}%'.format(FPR * 100))
        print('F1 score: {0}'.format(f1_score))
    return [accuracy, precision, recall, FPR, f1_score]

models = [ddqn_model_1, ddqn_model_2, ddqn_model_3, ddqn_model_4]

results_1 = ids_eval(ddqn_model_1)
results_2 = ids_eval(ddqn_model_2)
results_3 = ids_eval(ddqn_model_3)
results_4 = ids_eval(ddqn_model_4)
total_results = [-1,-1,-1,-1,-1]
accuracies = [results_1[0], results_2[0], results_3[0], results_4[0]]

for i in range(len(results_1)):
    total_results[i] = (results_1[i] + results_2[i] + results_3[i] + results_4[i] )/ 4

print()    
print('Total validation done...')
print('Accuracy: {0}%'.format(total_results[0] * 100))
print('Precision: {0}%'.format(total_results[1] * 100))
print('Recall/TPR/Sensitivity: {0}%'.format(total_results[2] * 100))
print('FPR: {0}%'.format(total_results[3] * 100))
print('F1 score: {0}'.format(total_results[4]))

highest = 0
for i in range(4):
    if accuracies[i] > accuracies[highest]:
        highest = i

print()
print("Saving model {0} with an accuracy of {1}%".format(highest + 1), accuracies[highest])
models[highest].save('double_dqn_cicddos2019_allfeat.pkl')

