# 1. Building Custom Environment

This section involves using OpenAI Gym to develop our Deep Reinforcement Learning (DRL) environment. The environment that we define will have the following characteristics:

Actions:

0 (do not alert)
1 (alert)

Rewards:

+1 if agent correctly alerts to an attack
0 if agent does not raise an alert when it is not needed
-1 if agent does not raise an alert when there is an attack
-1 if agent raises alert when there it is not needed

Episode Termination Condition:

i. An episode reaches >= 500 steps
ii. An attack is issued and no alert is made

In [50]:
import gym 
import numpy as np
import pandas as pd
import tensorflow
from stable_baselines.common.env_checker import check_env

In [54]:
class DRL_IDS_Env(gym.Env):
    def __init__(self, test_data): # test data created in last module
        '''
        constructor
        '''
        super().__init__()
        self.test_data = test_data
    
        # set limit for episode to 500 steps
        self.max_steps = 500
        self.extra_steps = None # counter for steps going beyond the max_steps limit
    
        # defining the reward function as discussed above
        # [(true_label, action) : reward]
        self.rewards = {(0, 1): -1, # (benign, alert) : -1
                        (1, 0): -1, # (attack, no alert) : -1
                        (1, 1): 1, # (attack, alert) : 1
                        (0, 0): 0} # (benign, no alert) : 0
        
        # defining action/observation space
        self.action_space = gym.spaces.Discrete(2)  # either 0 (NORMAL) or 1 (ATTACK)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(test_data.shape[1] - 1,), dtype=np.float64) # 'box' implies we are dealing with real, valued quantities
    
    def step(self, action):
        '''
        agent taking a single step
        this method is called after an agent takes a step
        '''
        
        # check if action exists in action space
        try:
            self.action_space.contains(action)
        except AssertionError as msg:
            print(msg)
        
        # determine if the episode is finished
        ep_info = {}
        finished = False
        self.current_step += 1
        if self.current_step >= self.max_steps:
            ep_info['end_cause'] = 'max_step_limit_reached'
            finished = True # we do not want to exceed our max step limit
        
        if self.label == 1 and action == 0: # this implies there was an attack that we did not alert
            ep_info['end_cause'] = 'attack_unalerted'
            finished = True
            
        # calculate reward based on the label of the observation and action taken by agent
        reward = self.rewards[(self.label, action)] # maps back to our self.reward dictionary
        
        # calculate the next state if finished = False
        if not finished:
            self.i += 1 # hop to next row in dataset
            if self.i >= self.test_data.shape[0]: # if this extends beyond the number of rows in our dataset
                self.i = 0 # set back to first 'state'
            
            self.obs = self.test_data.iloc[self.i] # pulling that row, or 'observation' from our dataset
            self.label = int(self.obs.pop('label'))
            
        elif self.extra_steps is None:
            self.extra_steps = 0
        else:
            if self.extra_steps == 0:
                gym.logger.warn('Episode max_step length exceeded. You are entering uncharted territory and should reset the episode.')
                self.extra_steps += 1
                reward = 0
                
        return self.obs.values, reward, finished, ep_info
    
    def reset(self):
        
        extra_steps = None
        self.current_step = 0
        
        self.i = np.random.randint(0, self.test_data.shape[0]) # pick a random starting location from the 0th row to the nth row
        
        print('reset at state number: ', self.i)
        
        self.obs = self.test_data.iloc[self.i]
        
        # record the true label of self.obs
        self.label = int(self.obs.pop('label'))
        
        return self.obs.values

Now we will create an instance of DRL_IDS_Env (and validate it using stable_baselines)
Note: stable_baselines (https://stable-baselines.readthedocs.io/en/master/) is a set of improved implementations of Reinforcement Learning (RL) algorithms based on OpenAI Baselines.

In [59]:
train_data = pd.read_csv("processed_data/train.csv")
env = DRL_IDS_Env(train_data)
check_env(env, warn=True)

reset at state number:  728989
reset at state number:  1790620
