# Импорт библиотек

In [18]:
import pandas as pd
import numpy as np
import random
import gym
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import trange
from gym import spaces
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from collections import deque

In [19]:
import kagglehub

# Чтение датасетов

In [20]:
# Download latest version
path = kagglehub.dataset_download("chethuhn/network-intrusion-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/network-intrusion-dataset


In [21]:
webattacks = pd.read_csv("/kaggle/input/network-intrusion-dataset/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv")
ddos = pd.read_csv("/kaggle/input/network-intrusion-dataset/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
normal = pd.read_csv("/kaggle/input/network-intrusion-dataset/Monday-WorkingHours.pcap_ISCX.csv")

In [22]:
webattacks.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,389,113095465,48,24,9668,10012,403,0,201.416667,203.548293,...,32,203985.5,575837.3,1629110,379,13800000.0,4277541.0,16500000,6737603,BENIGN
1,389,113473706,68,40,11364,12718,403,0,167.117647,171.919413,...,32,178326.875,503426.9,1424245,325,13800000.0,4229413.0,16500000,6945512,BENIGN
2,0,119945515,150,0,0,0,0,0,0.0,0.0,...,0,6909777.333,11700000.0,20400000,6,24400000.0,24300000.0,60100000,5702188,BENIGN
3,443,60261928,9,7,2330,4221,1093,0,258.888889,409.702161,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,53,269,2,2,102,322,51,51,51.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [23]:
ddos.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,54865,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,55054,109,1,1,6,6,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,55055,52,1,1,6,6,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,46236,34,1,1,6,6,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,54863,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [24]:
normal.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


# Предобработка

## Объединение датасетов с последующей подготовкой

In [25]:
webattacks["attack_type"] = "WebAttack"
ddos["attack_type"] = "DDoS"
normal["attack_type"] = "Normal"

df = pd.concat([webattacks, ddos, normal], ignore_index=True)
df.columns = df.columns.str.strip()

df["is_attack"] = df["Label"].apply(lambda x: 0 if x == "BENIGN" else 1)

df = df.replace([np.inf, -np.inf], np.nan).dropna()

## Отбор признаков

In [26]:
features = [col for col in df.columns if col not in ["Label", "attack_type", "is_attack"]]
X = df[features]
y = df["is_attack"]

## Масштабирование

In [27]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Радзеление на выборки

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

print("Форма обучающей выборки:", X_train.shape)
print("Форма тестовой выборки:", X_test.shape)
print("Баланс классов (обучение):", np.bincount(y_train))

Форма обучающей выборки: (740338, 78)
Форма тестовой выборки: (185085, 78)
Баланс классов (обучение): [636174 104164]


# Среда RL

In [29]:
class TrafficEnv(gym.Env):
    def __init__(self, X, y):
        super(TrafficEnv, self).__init__()
        self.X = X
        self.y = y
        self.current_index = 0
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(X.shape[1],), dtype=np.float32)
        self.action_space = spaces.Discrete(2)

    def reset(self):
        self.current_index = 0
        return self.X[self.current_index]

    def step(self, action):
        true_label = self.y[self.current_index]

        # Награда: +1 за правильное действие, -1 за неправильное
        if action == true_label:
            reward = 1
        else:
            reward = -1

        self.current_index += 1
        done = self.current_index >= len(self.X)

        if not done:
            next_state = self.X[self.current_index]
        else:
            next_state = np.zeros_like(self.X[0])  # Пустое состояние в конце

        return next_state, reward, done, {}

    def render(self, mode="human"):
        pass

# Реализация DQN агента

In [30]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.model(x)

# Параметры
state_dim = X_train.shape[1]
action_dim = 2
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
lr = 0.001
batch_size = 64
replay_memory = deque(maxlen=10000)

# Инициализация
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy_net = DQN(state_dim, action_dim).to(device)
optimizer = optim.Adam(policy_net.parameters(), lr=lr)
loss_fn = nn.MSELoss()

# Функция действия (ε-greedy)
def select_action(state, epsilon):
    if np.random.rand() < epsilon:
        return random.randint(0, action_dim - 1)
    state = torch.FloatTensor(state).unsqueeze(0).to(device)
    with torch.no_grad():
        q_values = policy_net(state)
    return torch.argmax(q_values).item()

# Обучение агента в среде

In [31]:
env = TrafficEnv(X_train, y_train.values)
num_episodes = 25
epsilon = 1.0

max_steps = 100

for episode in trange(num_episodes, desc="Обучение агента"):
    state = env.reset()
    total_reward = 0
    done = False
    step_count = 0

    while not done and step_count < max_steps:
        action = select_action(state, epsilon)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        step_count += 1

        replay_memory.append((state, action, reward, next_state, done))
        state = next_state

        if len(replay_memory) >= batch_size:
            batch = random.sample(replay_memory, batch_size)
            states, actions, rewards, next_states, dones = zip(*batch)

            states = np.array(states)
            next_states = np.array(next_states)
            actions = np.array(actions)
            rewards = np.array(rewards)
            dones = np.array(dones)

            states = torch.FloatTensor(states).to(device)
            actions = torch.LongTensor(actions).unsqueeze(1).to(device)
            rewards = torch.FloatTensor(rewards).unsqueeze(1).to(device)
            next_states = torch.FloatTensor(next_states).to(device)
            dones = torch.BoolTensor(dones).unsqueeze(1).to(device)

            q_values = policy_net(states).gather(1, actions)
            next_q_values = policy_net(next_states).max(1)[0].unsqueeze(1)
            expected_q = rewards + gamma * next_q_values * (~dones)

            loss = loss_fn(q_values, expected_q.detach())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    print(f"Эпизод {episode+1}: Общая награда = {total_reward}, шагов: {step_count}, ε = {epsilon:.3f}")


Обучение агента:   4%|▍         | 1/25 [00:00<00:06,  3.45it/s]

Эпизод 1: Общая награда = 6, шагов: 100, ε = 0.995


Обучение агента:   8%|▊         | 2/25 [00:01<00:23,  1.02s/it]

Эпизод 2: Общая награда = -18, шагов: 100, ε = 0.990


Обучение агента:  12%|█▏        | 3/25 [00:03<00:24,  1.11s/it]

Эпизод 3: Общая награда = 6, шагов: 100, ε = 0.985


Обучение агента:  16%|█▌        | 4/25 [00:03<00:19,  1.06it/s]

Эпизод 4: Общая награда = 4, шагов: 100, ε = 0.980


Обучение агента:  20%|██        | 5/25 [00:04<00:14,  1.37it/s]

Эпизод 5: Общая награда = 14, шагов: 100, ε = 0.975


Обучение агента:  24%|██▍       | 6/25 [00:04<00:11,  1.63it/s]

Эпизод 6: Общая награда = -2, шагов: 100, ε = 0.970


Обучение агента:  28%|██▊       | 7/25 [00:04<00:10,  1.79it/s]

Эпизод 7: Общая награда = 18, шагов: 100, ε = 0.966


Обучение агента:  32%|███▏      | 8/25 [00:05<00:08,  2.10it/s]

Эпизод 8: Общая награда = 4, шагов: 100, ε = 0.961


Обучение агента:  36%|███▌      | 9/25 [00:05<00:06,  2.54it/s]

Эпизод 9: Общая награда = -4, шагов: 100, ε = 0.956


Обучение агента:  40%|████      | 10/25 [00:05<00:05,  2.90it/s]

Эпизод 10: Общая награда = 4, шагов: 100, ε = 0.951


Обучение агента:  44%|████▍     | 11/25 [00:05<00:04,  3.30it/s]

Эпизод 11: Общая награда = 4, шагов: 100, ε = 0.946


Обучение агента:  48%|████▊     | 12/25 [00:06<00:03,  3.62it/s]

Эпизод 12: Общая награда = 10, шагов: 100, ε = 0.942


Обучение агента:  52%|█████▏    | 13/25 [00:06<00:03,  3.89it/s]

Эпизод 13: Общая награда = 16, шагов: 100, ε = 0.937


Обучение агента:  56%|█████▌    | 14/25 [00:06<00:02,  4.09it/s]

Эпизод 14: Общая награда = 18, шагов: 100, ε = 0.932


Обучение агента:  60%|██████    | 15/25 [00:06<00:02,  3.67it/s]

Эпизод 15: Общая награда = 16, шагов: 100, ε = 0.928


Обучение агента:  64%|██████▍   | 16/25 [00:07<00:02,  3.52it/s]

Эпизод 16: Общая награда = 12, шагов: 100, ε = 0.923


Обучение агента:  68%|██████▊   | 17/25 [00:07<00:02,  3.46it/s]

Эпизод 17: Общая награда = 0, шагов: 100, ε = 0.918


Обучение агента:  72%|███████▏  | 18/25 [00:07<00:02,  3.32it/s]

Эпизод 18: Общая награда = 8, шагов: 100, ε = 0.914


Обучение агента:  76%|███████▌  | 19/25 [00:08<00:01,  3.38it/s]

Эпизод 19: Общая награда = 18, шагов: 100, ε = 0.909


Обучение агента:  80%|████████  | 20/25 [00:08<00:01,  3.41it/s]

Эпизод 20: Общая награда = -2, шагов: 100, ε = 0.905


Обучение агента:  84%|████████▍ | 21/25 [00:08<00:01,  3.29it/s]

Эпизод 21: Общая награда = 2, шагов: 100, ε = 0.900


Обучение агента:  88%|████████▊ | 22/25 [00:09<00:00,  3.22it/s]

Эпизод 22: Общая награда = 16, шагов: 100, ε = 0.896


Обучение агента:  92%|█████████▏| 23/25 [00:09<00:00,  3.15it/s]

Эпизод 23: Общая награда = -2, шагов: 100, ε = 0.891


Обучение агента:  96%|█████████▌| 24/25 [00:09<00:00,  3.37it/s]

Эпизод 24: Общая награда = 0, шагов: 100, ε = 0.887


Обучение агента: 100%|██████████| 25/25 [00:09<00:00,  2.55it/s]

Эпизод 25: Общая награда = -6, шагов: 100, ε = 0.882





# Обучение агента на тестовой выборке с последующей оценкой

In [32]:
# Переводим X_test в numpy
X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# Получим предсказания от агента
predictions = []

with torch.no_grad():
    for state in X_test_np:
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        q_values = policy_net(state_tensor)
        action = torch.argmax(q_values).item()
        predictions.append(action)

# Метрики
print("Classification Report:")
print(classification_report(y_test_np, predictions, target_names=["Benign", "Attack"]))

print("Confusion Matrix:")
print(confusion_matrix(y_test_np, predictions))

Classification Report:
              precision    recall  f1-score   support

      Benign       0.97      0.98      0.98    159044
      Attack       0.89      0.84      0.86     26041

    accuracy                           0.96    185085
   macro avg       0.93      0.91      0.92    185085
weighted avg       0.96      0.96      0.96    185085

Confusion Matrix:
[[156370   2674]
 [  4205  21836]]
