In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q kaggle

In [3]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"ajeevanreddy","key":"f96c72b6ecc2548efe34438739e65dcf"}'}

In [4]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d solarmainframe/ids-intrusion-csv -p /content/drive/MyDrive/RL

Dataset URL: https://www.kaggle.com/datasets/solarmainframe/ids-intrusion-csv
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading ids-intrusion-csv.zip to /content/drive/MyDrive/RL
 99% 1.58G/1.60G [00:10<00:00, 191MB/s]
100% 1.60G/1.60G [00:10<00:00, 170MB/s]


In [6]:
!unzip /content/drive/MyDrive/RL/ids-intrusion-csv.zip -d /content/drive/MyDrive/RL

Archive:  /content/drive/MyDrive/RL/ids-intrusion-csv.zip
  inflating: /content/drive/MyDrive/RL/02-14-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-15-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-16-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-20-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-21-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-22-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-23-2018.csv  
  inflating: /content/drive/MyDrive/RL/02-28-2018.csv  
  inflating: /content/drive/MyDrive/RL/03-01-2018.csv  
  inflating: /content/drive/MyDrive/RL/03-02-2018.csv  


In [2]:
# STEP 1: Import Libraries
import os
import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import deque
import random
from tqdm import tqdm

In [4]:
# STEP 2: Load Dataset
data_path = "/content/drive/MyDrive/RL/"
all_files = glob.glob(os.path.join(data_path, "*.csv"))

print("Total files found:", len(all_files))

# Just read the first file for inspection
sample_file = all_files[0]
df = pd.read_csv(sample_file)

print("File:", sample_file)
print("Shape:", df.shape)
print("Columns:", df.columns[:20])  # preview

Total files found: 10
File: /content/drive/MyDrive/RL/02-14-2018.csv
Shape: (1048575, 80)
Columns: Index(['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts',
       'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max',
       'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std',
       'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean',
       'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean',
       'Flow IAT Std'],
      dtype='object')


In [5]:
data_path = "/content/drive/MyDrive/RL/"
all_files = glob.glob(os.path.join(data_path, "*.csv"))

print("Total files found:", len(all_files))

for i, file in enumerate(all_files[:5]):  # limit to 5 files for preview
    print(f"\nReading file {i+1}: {file}")
    df = pd.read_csv(file, nrows=5)  # only read first 5 rows
    print(df.head())

Total files found: 10

Reading file 1: /content/drive/MyDrive/RL/02-14-2018.csv
   Dst Port  Protocol            Timestamp  Flow Duration  Tot Fwd Pkts  \
0         0         0  14/02/2018 08:31:01      112641719             3   
1         0         0  14/02/2018 08:33:50      112641466             3   
2         0         0  14/02/2018 08:36:39      112638623             3   
3        22         6  14/02/2018 08:40:13        6453966            15   
4        22         6  14/02/2018 08:40:23        8804066            14   

   Tot Bwd Pkts  TotLen Fwd Pkts  TotLen Bwd Pkts  Fwd Pkt Len Max  \
0             0                0                0                0   
1             0                0                0                0   
2             0                0                0                0   
3            10             1239             2273              744   
4            11             1143             2209              744   

   Fwd Pkt Len Min  ...  Fwd Seg Size Min  Activ

In [7]:
# STEP 3 (Fixed): Preprocessing

# Initialize an empty list to store dataframes
dataframes = []

# Read and concatenate all CSV files
for file in all_files:
    try:
        # Read each CSV file into a dataframe
        df = pd.read_csv(file)
        dataframes.append(df)
    except Exception as e:
        print(f"Error reading file {file}: {e}")

# Concatenate all dataframes into a single dataframe
data = pd.concat(dataframes, ignore_index=True)

# Drop categorical / non-numeric cols not useful for RL agent
drop_cols = ['Dst IP', 'Timestamp']  # you can add 'Src IP', etc if present
data = data.drop(columns=drop_cols, errors='ignore')

# Convert 'Dst Port' to numeric, coercing errors to NaN
data['Dst Port'] = pd.to_numeric(data['Dst Port'], errors='coerce')

# Fill NaN values in 'Dst Port' with 0 (or another appropriate value)
data['Dst Port'] = data['Dst Port'].fillna(0)

# Separate features and labels
y = data['Label']
X = data.drop(columns=['Label'], errors='ignore')

# Identify and drop remaining non-numeric columns
non_numeric_cols = X.select_dtypes(exclude=np.number).columns
if len(non_numeric_cols) > 0:
    print(f"Dropping non-numeric columns: {list(non_numeric_cols)}")
    X = X.drop(columns=non_numeric_cols)

# Encode labels (Benign = 0, Attacks = 1, or multiclass)
le = LabelEncoder()
y = le.fit_transform(y)

print("Classes:", le.classes_)  # see all attack types

# Normalize numeric features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)


Dropping non-numeric columns: ['Protocol', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'B

In [8]:
# STEP 4 (Fixed): RL Environment

class IDSEnv:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.n_samples = len(y)
        self.current_index = 0
        self.action_space = len(np.unique(y))  # adapt to multiclass
        self.state_space = X.shape[1]

    def reset(self):
        self.current_index = 0
        return self.X[self.current_index]

    def step(self, action):
        true_label = self.y[self.current_index]
        reward = 1 if action == true_label else -1
        self.current_index += 1
        done = self.current_index >= self.n_samples
        next_state = (
            self.X[self.current_index % self.n_samples] if not done else np.zeros_like(self.X[0])
        )
        return next_state, reward, done, {}

In [9]:
# STEP 5: Advanced DQN Variants

# Dueling DQN
class DuelingDQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DuelingDQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 128)

        # Value and Advantage streams
        self.value_stream = nn.Linear(128, 1)
        self.advantage_stream = nn.Linear(128, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        value = self.value_stream(x)
        advantage = self.advantage_stream(x)
        q_vals = value + (advantage - advantage.mean(dim=1, keepdim=True))
        return q_vals

In [10]:
# STEP 6: Agent with Double DQN

class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=1e-4, gamma=0.99, batch_size=64, buffer_size=50000):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.batch_size = batch_size

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Main & Target Networks
        self.policy_net = DuelingDQN(state_dim, action_dim).to(self.device)
        self.target_net = DuelingDQN(state_dim, action_dim).to(self.device)
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
        self.memory = deque(maxlen=buffer_size)

        self.update_target()

    def update_target(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def act(self, state, epsilon=0.1):
        if np.random.rand() < epsilon:
            return np.random.randint(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        q_vals = self.policy_net(state)
        return torch.argmax(q_vals, dim=1).item()

    def remember(self, s, a, r, s_next, done):
        self.memory.append((s, a, r, s_next, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)

        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        # Q(s, a)
        q_vals = self.policy_net(states).gather(1, actions.unsqueeze(1)).squeeze()

        # Double DQN target
        next_actions = torch.argmax(self.policy_net(next_states), dim=1)
        next_q_vals = self.target_net(next_states).gather(1, next_actions.unsqueeze(1)).squeeze()
        target = rewards + (1 - dones) * self.gamma * next_q_vals

        loss = F.mse_loss(q_vals, target.detach())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [13]:
# STEP 7: Compact Training Loop

env = IDSEnv(X_train, y_train)
agent = DQNAgent(env.state_space, env.action_space)

episodes = 5   # keep small for testing; increase later if needed
steps_per_episode = 2000  # limit steps per episode (instead of all samples)
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995

for ep in range(episodes):
    state = env.reset()
    total_reward = 0

    for t in range(steps_per_episode):
        action = agent.act(state, epsilon)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)

        # Replay less frequently for speed
        if t % 10 == 0:
            agent.replay()

        state = next_state
        total_reward += reward
        if done:
            break

    agent.update_target()
    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    print(f"Episode {ep+1}/{episodes}, Reward: {total_reward}, Epsilon: {epsilon:.3f}")

Episode 1/5, Reward: -1742, Epsilon: 0.995
Episode 2/5, Reward: -1748, Epsilon: 0.990
Episode 3/5, Reward: -1742, Epsilon: 0.985
Episode 4/5, Reward: -1690, Epsilon: 0.980
Episode 5/5, Reward: -1706, Epsilon: 0.975


In [16]:
# STEP 8 (Optimized Testing)

def fast_test(agent, X_test, y_test, batch_size=1024):
    device = agent.device
    correct, total = 0, 0

    # Convert test data to tensor
    X_test_tensor = torch.FloatTensor(X_test).to(device)

    # Predict in batches
    agent.policy_net.eval()
    with torch.no_grad():
        for i in range(0, len(X_test), batch_size):
            batch_X = X_test_tensor[i:i+batch_size]
            outputs = agent.policy_net(batch_X)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            batch_y = y_test[i:i+batch_size]
            correct += (preds == batch_y).sum()
            total += len(batch_y)

    return correct / total

accuracy = fast_test(agent, X_test, y_test)
print("Fast Test Accuracy:", accuracy)

Fast Test Accuracy: 0.8306970890478996


In [17]:
# STEP 5: Enhanced Dueling DQN with Dropout + BatchNorm

class EnhancedDQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(EnhancedDQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.dropout = nn.Dropout(0.3)

        # Dueling streams
        self.value_stream = nn.Linear(128, 1)
        self.advantage_stream = nn.Linear(128, action_dim)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        value = self.value_stream(x)
        advantage = self.advantage_stream(x)
        q_vals = value + (advantage - advantage.mean(dim=1, keepdim=True))
        return q_vals

In [18]:
# STEP 6: Upgrade Agent to use EnhancedDQN

class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=1e-4, gamma=0.99, batch_size=128, buffer_size=100000):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.batch_size = batch_size

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.policy_net = EnhancedDQN(state_dim, action_dim).to(self.device)
        self.target_net = EnhancedDQN(state_dim, action_dim).to(self.device)
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
        self.memory = deque(maxlen=buffer_size)

        self.update_target()

    def update_target(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def act(self, state, epsilon=0.1):
        if np.random.rand() < epsilon:
            return np.random.randint(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        q_vals = self.policy_net(state)
        return torch.argmax(q_vals, dim=1).item()

    def remember(self, s, a, r, s_next, done):
        self.memory.append((s, a, r, s_next, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)

        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        # Q(s, a)
        q_vals = self.policy_net(states).gather(1, actions.unsqueeze(1)).squeeze()

        # Double DQN
        next_actions = torch.argmax(self.policy_net(next_states), dim=1)
        next_q_vals = self.target_net(next_states).gather(1, next_actions.unsqueeze(1)).squeeze()
        target = rewards + (1 - dones) * self.gamma * next_q_vals

        loss = F.mse_loss(q_vals, target.detach())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [20]:
# STEP 8: Enhanced Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score

def evaluate(agent, X_test, y_test, batch_size=1024):
    device = agent.device
    preds_all = []
    true_all = []

    X_test_tensor = torch.FloatTensor(X_test).to(device)
    agent.policy_net.eval()

    with torch.no_grad():
        for i in range(0, len(X_test), batch_size):
            batch_X = X_test_tensor[i:i+batch_size]
            outputs = agent.policy_net(batch_X)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            preds_all.extend(preds)
            true_all.extend(y_test[i:i+batch_size])

    # Convert to arrays
    preds_all = np.array(preds_all)
    true_all = np.array(true_all)

    acc = accuracy_score(true_all, preds_all)
    prec = precision_score(true_all, preds_all, average='weighted', zero_division=0)
    rec = recall_score(true_all, preds_all, average='weighted')
    f1 = f1_score(true_all, preds_all, average='weighted')

    print("\n Classification Report:\n", classification_report(true_all, preds_all, target_names=le.classes_))
    print("Confusion Matrix:\n", confusion_matrix(true_all, preds_all))
    print(f" Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")

    return acc, prec, rec, f1

# Run evaluation
acc, prec, rec, f1 = evaluate(agent, X_test, y_test)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



 Classification Report:
                           precision    recall  f1-score   support

                  Benign       0.83      1.00      0.91   2696942
                     Bot       0.00      0.00      0.00     57238
        Brute Force -Web       0.00      0.00      0.00       122
        Brute Force -XSS       0.00      0.00      0.00        46
        DDOS attack-HOIC       0.00      0.00      0.00    137203
    DDOS attack-LOIC-UDP       0.00      0.00      0.00       346
  DDoS attacks-LOIC-HTTP       0.00      0.00      0.00    115238
   DoS attacks-GoldenEye       0.00      0.00      0.00      8302
        DoS attacks-Hulk       0.00      0.00      0.00     92382
DoS attacks-SlowHTTPTest       0.00      0.00      0.00     27978
   DoS attacks-Slowloris       0.00      0.00      0.00      2198
          FTP-BruteForce       0.00      0.00      0.00     38672
           Infilteration       0.00      0.00      0.00     32387
                   Label       0.00      0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
# Install required packages
!pip install -q imbalanced-learn --quiet


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/240.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━[0m [32m163.8/240.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [22]:
# Imports (add to top of file if not present)

from imblearn.over_sampling import RandomOverSampler
from sklearn.utils.class_weight import compute_class_weight
import torch.utils.data as data_utils
from torch.utils.data import TensorDataset, DataLoader
import os

# 1) Optional: Binary mapping (if you prefer Binary detection)

# Uncomment if you want to convert to binary (Benign vs Attack)
# y_train_bin = np.where(y_train == le.transform(['Benign'])[0], 0, 1)
# y_test_bin  = np.where(y_test  == le.transform(['Benign'])[0], 0, 1)
# Then set env.action_space = 2 and proceed. For now we keep multiclass.

# 2) Resample training set with RandomOverSampler (balances minority classes)

ros = RandomOverSampler(random_state=42)
X_train_res, y_train_res = ros.fit_resample(X_train, y_train)
print("After oversampling, counts:", np.bincount(y_train_res))

# replace training data used for supervised pretraining
X_pre = X_train_res
y_pre = y_train_res

# 3) Compute class weights (for loss)

classes = np.unique(y_pre)
class_weights = compute_class_weight(class_weight="balanced", classes=classes, y=y_pre)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Class weights:", dict(zip(le.classes_, class_weights)))

# 4) Focal Loss (optional but helpful on imbalanced multiclass)

class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, weight=None, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.weight = weight
        self.reduction = reduction
        self.ce = nn.CrossEntropyLoss(weight=weight, reduction='none')

    def forward(self, input, target):
        logp = -self.ce(input, target)
        p = torch.exp(logp)
        loss = -((1 - p) ** self.gamma) * logp
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        return loss

After oversampling, counts: [10787766 10787766 10787766 10787766 10787766 10787766 10787766 10787766
 10787766 10787766 10787766 10787766 10787766 10787766 10787766 10787766]
Class weights: {'Benign': np.float64(1.0), 'Bot': np.float64(1.0), 'Brute Force -Web': np.float64(1.0), 'Brute Force -XSS': np.float64(1.0), 'DDOS attack-HOIC': np.float64(1.0), 'DDOS attack-LOIC-UDP': np.float64(1.0), 'DDoS attacks-LOIC-HTTP': np.float64(1.0), 'DoS attacks-GoldenEye': np.float64(1.0), 'DoS attacks-Hulk': np.float64(1.0), 'DoS attacks-SlowHTTPTest': np.float64(1.0), 'DoS attacks-Slowloris': np.float64(1.0), 'FTP-BruteForce': np.float64(1.0), 'Infilteration': np.float64(1.0), 'Label': np.float64(1.0), 'SQL Injection': np.float64(1.0), 'SSH-Bruteforce': np.float64(1.0)}


In [None]:
# Oversample (balance classes)

ros = RandomOverSampler(random_state=42)
X_train_res, y_train_res = ros.fit_resample(X_train, y_train)
print("Resampled counts:", np.bincount(y_train_res))

# Class weights
classes = np.unique(y_train_res)
class_weights = compute_class_weight("balanced", classes=classes, y=y_train_res)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

# Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, weight=None):
        super().__init__()
        self.ce = nn.CrossEntropyLoss(weight=weight, reduction='none')
        self.gamma = gamma
    def forward(self, input, target):
        logp = -self.ce(input, target)
        p = torch.exp(logp)
        return (-(1 - p) ** self.gamma * logp).mean()

# Supervised Pretraining
def supervised_pretrain(model, X, y, epochs=3, bs=2048, lr=1e-4):
    model.to(device).train()
    ds = TensorDataset(torch.FloatTensor(X), torch.LongTensor(y))
    loader = DataLoader(ds, batch_size=bs, shuffle=True)
    opt = optim.Adam(model.parameters(), lr=lr)
    crit = FocalLoss(weight=class_weights_tensor)
    for ep in range(epochs):
        loss_sum = 0
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            out = model(xb)
            loss = crit(out, yb)
            loss.backward(); opt.step()
            loss_sum += loss.item() * len(xb)
        print(f"Pretrain {ep+1}/{epochs} - Loss: {loss_sum/len(ds):.4f}")

supervised_pretrain(agent.policy_net, X_train_res, y_train_res, epochs=3, bs=4096)

agent.target_net.load_state_dict(agent.policy_net.state_dict())

# Simple Balanced Replay
class BalancedReplay:
    def __init__(self, maxlen=50000): self.mem = defaultdict(lambda: deque(maxlen=maxlen))
    def add(self, sample, label): self.mem[int(label)].append(sample)
    def sample(self, bs):
        classes = list(self.mem.keys()); per_cls = max(1, bs//len(classes))
        batch = []
        for c in classes:
            items = list(self.mem[c])
            if items: batch.extend(random.choices(items, k=per_cls))
        return random.sample(batch, min(len(batch), bs))

agent.replay_memory = BalancedReplay()

Resampled counts: [10787766 10787766 10787766 10787766 10787766 10787766 10787766 10787766
 10787766 10787766 10787766 10787766 10787766 10787766 10787766 10787766]


In [None]:
# Training Loop
episodes, epsilon, eps_min, eps_decay = 5, 1.0, 0.1, 0.9
for ep in range(episodes):
    s, done, tot_r = env.reset(), False, 0
    while not done:
        a = agent.act(s, epsilon)
        s2, r, done, _ = env.step(a)
        label = y_train[env.current_index-1]
        agent.replay_memory.add((s,a,r,s2,done), label)
        if len(agent.replay_memory.mem)>0:
            mb = agent.replay_memory.sample(agent.batch_size)
            if mb: agent.replay_balanced()
        s, tot_r = s2, tot_r+r
    agent.update_target()
    epsilon = max(eps_min, epsilon*eps_decay)
    print(f"Episode {ep+1}/{episodes} | Reward={tot_r} | Eps={epsilon:.2f}")

# Evaluation
def evaluate(agent, X, y, bs=4096):
    agent.policy_net.eval(); preds=[]
    with torch.no_grad():
        for i in range(0, len(X), bs):
            xb = torch.FloatTensor(X[i:i+bs]).to(device)
            preds.extend(torch.argmax(agent.policy_net(xb),1).cpu().numpy())
    print(classification_report(y, preds, target_names=le.classes_, zero_division=0))
    print("CM:", confusion_matrix(y, preds).shape)
    return np.array(preds)

preds = evaluate(agent, X_test, y_test)

Fresh Start

In [2]:
# Step 1: Imports
import os
import glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# Step 2: Load Dataset
path = "/content/drive/MyDrive/RL/"
files = glob.glob(os.path.join(path, "*.csv"))

df_list = []
for f in files:
    try:
        df_list.append(pd.read_csv(f))
    except:
        print("Error reading:", f)

df = pd.concat(df_list, ignore_index=True)
print("Dataset shape:", df.shape)
print("Columns:", df.columns[:10])

  df_list.append(pd.read_csv(f))


In [None]:
# Step 3: Preprocessing
df = df.dropna(axis=1, how="all").dropna()

# Encode labels (Attack / Normal)
if "Label" in df.columns:
    df['Label'] = df['Label'].str.strip()
    encoder = LabelEncoder()
    df['Label'] = encoder.fit_transform(df['Label'])
else:
    raise ValueError("Label column not found in dataset!")

y = df['Label']
X = df.drop(columns=['Label'])

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)