In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import umap
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the dataset
df = pd.read_csv('./data/NF-UNSW-NB15.csv')  # Replace with your actual dataset path

# Feature selection
features = ['IN_BYTES', 'OUT_BYTES', 'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS']
target = 'Label'

df = df.dropna()

# Prepare feature and target variables
X = df[features]
y = df[target]

# Feature Encoding and Normalization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("Data preprocessing complete.")

Data preprocessing complete.


In [3]:
# Apply UMAP for dimensionality reduction
umap_model = umap.UMAP(n_components=2, random_state=42)
X_umap = umap_model.fit_transform(X_scaled)

# Visualize the UMAP results
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='viridis', alpha=0.5)
plt.colorbar(scatter, label='Label')
plt.title('UMAP Visualization of Network Traffic Data')
plt.xlabel('UMAP Component 1')
plt.ylabel('UMAP Component 2')
plt.show()

print("UMAP dimensionality reduction complete.")

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [None]:
# Define constants
STATE_SIZE = 2  # Output of UMAP
ACTION_SIZE = 2  # Benign or Malicious
REPLAY_MEMORY_SIZE = 2000
BATCH_SIZE = 32
GAMMA = 0.99
LEARNING_RATE = 0.001
TARGET_UPDATE_FREQ = 10

# Create Q-network model
def build_model():
    model = Sequential()
    model.add(Dense(64, input_dim=STATE_SIZE, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(ACTION_SIZE, activation='linear'))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse')
    return model

# Experience Replay
class ReplayBuffer:
    def __init__(self, size):
        self.buffer = deque(maxlen=size)

    def add(self, experience):
        self.buffer.append(experience)

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

# Initialize model and replay buffer
q_network = build_model()
target_network = build_model()
target_network.set_weights(q_network.get_weights())
replay_buffer = ReplayBuffer(REPLAY_MEMORY_SIZE)

print("DQN model architecture defined.")

In [None]:
def train_dqn(episodes=1000):
    for e in range(episodes):
        # Initialize state
        state = X_umap[np.random.randint(len(X_umap))]
        state = np.reshape(state, [1, STATE_SIZE])
        total_reward = 0

        for time in range(len(X_umap)):
            # Select action
            if np.random.rand() < epsilon:
                action = np.random.randint(ACTION_SIZE)
            else:
                q_values = q_network.predict(state)
                action = np.argmax(q_values[0])

            # Execute action and observe new state
            next_state = X_umap[np.random.randint(len(X_umap))]
            reward = 1 if action == y[np.random.randint(len(y))] else -1
            total_reward += reward
            next_state = np.reshape(next_state, [1, STATE_SIZE])

            # Store experience in replay buffer
            replay_buffer.add((state, action, reward, next_state))

            # Sample a batch of experiences from the replay buffer
            if replay_buffer.size() > BATCH_SIZE:
                batch = replay_buffer.sample(BATCH_SIZE)
                for s, a, r, ns in batch:
                    target = r + GAMMA * np.max(target_network.predict(ns)[0])
                    target_f = q_network.predict(s)
                    target_f[0][a] = target
                    q_network.fit(s, target_f, epochs=1, verbose=0)

            # Update state
            state = next_state

            # Update target network periodically
            if e % TARGET_UPDATE_FREQ == 0:
                target_network.set_weights(q_network.get_weights())

        print(f"Episode: {e}/{episodes}, Total Reward: {total_reward}")

epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.01

train_dqn()

print("Training complete.")


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predict using the trained model
def evaluate_model():
    predictions = []
    for state in X_umap:
        state = np.reshape(state, [1, STATE_SIZE])
        q_values = q_network.predict(state)
        action = np.argmax(q_values[0])
        predictions.append(action)

    # Convert predictions to numpy array
    predictions = np.array(predictions)

    # Evaluate the performance
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions)
    recall = recall_score(y, predictions)
    f1 = f1_score(y, predictions)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")

evaluate_model()
