In [1]:
!pip install gymnasium stable-baselines3 numpy tensorflow torch torchvision torchaudio

Collecting torchvision
  Downloading torchvision-0.18.0-cp312-cp312-win_amd64.whl.metadata (6.6 kB)
Collecting torchaudio
  Downloading torchaudio-2.3.0-cp312-cp312-win_amd64.whl.metadata (6.4 kB)
Downloading torchvision-0.18.0-cp312-cp312-win_amd64.whl (1.2 MB)
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   -- ------------------------------------- 0.1/1.2 MB 3.4 MB/s eta 0:00:01
   ------------------ --------------------- 0.5/1.2 MB 6.7 MB/s eta 0:00:01
   -------------------------------- ------- 0.9/1.2 MB 9.9 MB/s eta 0:00:01
   ---------------------------------------- 1.2/1.2 MB 9.3 MB/s eta 0:00:00
Downloading torchaudio-2.3.0-cp312-cp312-win_amd64.whl (2.4 MB)
   ---------------------------------------- 0.0/2.4 MB ? eta -:--:--
   -------- ------------------------------- 0.5/2.4 MB 15.9 MB/s eta 0:00:01
   ----------------- ---------------------- 1.0/2.4 MB 16.1 MB/s eta 0:00:01
   --------------------------- ------------ 1.6/2.4 MB 17.1 MB/s eta 0:00:01


In [2]:
# Importing necessary libraries
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv
import os
import time

In [3]:
# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [4]:
# Creating a custom Gymnasium environment for Network Intrusion Detection
class NetworkIntrusionEnv(gym.Env):
    """Custom Environment that follows gymnasium interface for IDS"""
    metadata = {'render_modes': ['console']}

    def __init__(self):
        super(NetworkIntrusionEnv, self).__init__()
        # Defining action space (0: benign, 1: malicious)
        self.action_space = spaces.Discrete(2)
        # Defining observation space as an example with 5 features
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(5,), dtype=np.float32)
        
        # Generating an initial state
        self.state = np.random.rand(5)
        self.steps_beyond_done = None

    def reset(self, seed=None, options=None):
        # Adding support for the seed parameter
        if seed is not None:
            np.random.seed(seed)
        self.state = np.random.rand(5)
        self.steps_beyond_done = None
        return np.array(self.state, dtype=np.float32), {}

    def step(self, action):
        # Implement your step logic here
        self.state = np.random.rand(5)
        reward = self.is_malicious(self.state)
        terminated = False
        truncated = False
        info = {}
        return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info

    def render(self, mode='console'):
        if mode == 'console':
            print(f'Current state: {self.state}')

    def is_malicious(self, state):
        # Defining malicious condition, example combining several features
        # Logic based on abnormal values typically seen in attacks
        # Example: High packet rate and low error rate might indicate flooding attack
        high_packet_rate = state[1] > 0.7
        low_error_rate = state[3] < 0.05
        large_payload = state[4] > 0.8
        return int(high_packet_rate and low_error_rate or large_payload)

In [5]:
# Custom callback to print training progress
class ProgressCallback(BaseCallback):
    def __init__(self, total_timesteps, verbose=0):
        super(ProgressCallback, self).__init__(verbose)
        self.total_timesteps = total_timesteps
        self.start_time = None
        self.epoch_rewards = []

    def _on_training_start(self) -> None:
        self.start_time = time.time()
        self.epoch_rewards = []
        print("Training started.")

    def _on_step(self) -> bool:
        if self.n_calls % self.model.n_steps == 0:
            elapsed_time = time.time() - self.start_time
            remaining_time = (self.total_timesteps - self.num_timesteps) / self.num_timesteps * elapsed_time
            mean_reward = np.mean(self.locals['rewards'])
            self.epoch_rewards.append(mean_reward)
            epoch = len(self.epoch_rewards)
            print(f"Epoch: {epoch}, Step: {self.num_timesteps}, Elapsed Time: {elapsed_time:.2f}s, Remaining Time: {remaining_time:.2f}s, Mean Reward: {mean_reward:.2f}")
        return True

    def _on_training_end(self) -> None:
        total_time = time.time() - self.start_time
        print(f"Training completed in {total_time:.2f} seconds")

In [6]:
# Initializing the environment and checking it
env = NetworkIntrusionEnv()
check_env(env)

In [7]:
# Wrapping the environment to make it compatible with stable-baselines3
vec_env = DummyVecEnv([lambda: env])

In [8]:
# Hyperparameter tuning (We would extend this with a grid search or similar approach)
hyperparams = {
    'n_steps': 2048,
    'batch_size': 64,
    'n_epochs': 10,
    'learning_rate': 0.0003
}

In [9]:
# Setting up PPO agent with the appropriate device
model = PPO("MlpPolicy", vec_env, verbose=1, tensorboard_log="./ids_tensorboard/", device=device, **hyperparams)

Using cpu device


In [10]:
# Setting up evaluation callback and custom progress callback
eval_callback = EvalCallback(vec_env, best_model_save_path='./ids_rl_model/', log_path='./ids_logs/', eval_freq=500, deterministic=True, render=False)
progress_callback = ProgressCallback(total_timesteps=10000)

In [11]:
# Training the agent
model.learn(total_timesteps=10000, callback=eval_callback)

Logging to ./ids_tensorboard/PPO_1




KeyboardInterrupt: 

In [15]:
# Evaluating the trained agent
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward} +/- {std_reward}')

SyntaxError: incomplete input (3645560766.py, line 3)

In [16]:
# Optional Model Saving
model.save("IDS_Network_PPO_Model")

NameError: name 'model' is not defined