In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder

# Step 1: Load and Preprocess Data
csv_path = "../data/e3bd3035f88e55fa_MOHANAD_A4706/data/NF-UQ-NIDS.csv"
data = pd.read_csv(csv_path)

selected_features = ['L4_SRC_PORT','L4_DST_PORT','PROTOCOL','L7_PROTO',
    "IN_BYTES", "OUT_BYTES", "IN_PKTS", "OUT_PKTS", 
    "TCP_FLAGS", "FLOW_DURATION_MILLISECONDS", "Label"
]
data = data[selected_features]
data = data.dropna()

def normalize_minus_one_to_one(df, columns):
    normalized_df = df.copy()
    min_max_scaler = MinMaxScaler(feature_range=(-1, 1))
    normalized_columns = min_max_scaler.fit_transform(df[columns])
    normalized_df[columns] = normalized_columns
    return normalized_df, min_max_scaler

features_to_normalize = [col for col in data.columns if col != "Label"]
data, scaler = normalize_minus_one_to_one(data, features_to_normalize)

preprocessed_filename = "../data/pre_processed/preprocessed_nf_uq_nids_haaa.csv"
data.to_csv(preprocessed_filename, index=False)

FileNotFoundError: [Errno 2] No such file or directory: '../data/e3bd3035f88e55fa_MOHANAD_A4706/data/NF-UQ-NIDS.csv'

In [5]:
# Load the preprocessed dataset
data = pd.read_csv(preprocessed_filename)
label_encoder = LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

X = data.drop('Label', axis=1).values
y = data['Label'].values

attack_label = 1  # Assuming 1 represents "attack"
X_attack = X[y == attack_label]
X_attack = scaler.fit_transform(X_attack)

In [6]:
import tensorflow as tf
from tensorflow.keras import layers

def build_generator(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(128, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(output_dim, activation='tanh'))
    return model

def build_critic(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))
    return model

input_dim = 100  # Dimension of the noise vector
output_dim = X_attack.shape[1]  # Number of features in the dataset
generator = build_generator(input_dim, output_dim)
critic = build_critic(output_dim)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
class WGAN(tf.keras.Model):
    def __init__(self, generator, critic, gp_weight=10.0):
        super(WGAN, self).__init__()
        self.generator = generator
        self.critic = critic
        self.gp_weight = gp_weight

    def compile(self, g_optimizer, c_optimizer, g_loss_fn, c_loss_fn):
        super(WGAN, self).compile()
        self.g_optimizer = g_optimizer
        self.c_optimizer = c_optimizer
        self.g_loss_fn = g_loss_fn
        self.c_loss_fn = c_loss_fn

    def gradient_penalty(self, batch_size, real_data, fake_data):
        alpha = tf.random.normal([batch_size, 1], 0.0, 1.0)
        diff = fake_data - real_data
        interpolated = real_data + alpha * diff

        with tf.GradientTape() as gp_tape:
            gp_tape.watch(interpolated)
            pred = self.critic(interpolated, training=True)

        grads = gp_tape.gradient(pred, [interpolated])[0]
        norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
        gp = tf.reduce_mean((norm - 1.0) ** 2)
        return gp

    def train_step(self, real_data):
        batch_size = tf.shape(real_data)[0]
        for _ in range(5):
            noise = tf.random.normal([batch_size, input_dim])
            with tf.GradientTape() as c_tape:
                fake_data = self.generator(noise, training=True)
                real_output = self.critic(real_data, training=True)
                fake_output = self.critic(fake_data, training=True)
                c_loss = self.c_loss_fn(real_output, fake_output)
                gp = self.gradient_penalty(batch_size, real_data, fake_data)
                c_loss += gp * self.gp_weight

            c_grads = c_tape.gradient(c_loss, self.critic.trainable_variables)
            self.c_optimizer.apply_gradients(zip(c_grads, self.critic.trainable_variables))

        noise = tf.random.normal([batch_size, input_dim])
        with tf.GradientTape() as g_tape:
            fake_data = self.generator(noise, training=True)
            fake_output = self.critic(fake_data, training=True)
            g_loss = self.g_loss_fn(fake_output)

        g_grads = g_tape.gradient(g_loss, self.generator.trainable_variables)
        self.g_optimizer.apply_gradients(zip(g_grads, self.generator.trainable_variables))

        return {"g_loss": g_loss, "c_loss": c_loss}

In [11]:
import matplotlib.pyplot as plt
from tqdm import tqdm

generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5, beta_2=0.9)
critic_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5, beta_2=0.9)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

wgan = WGAN(generator, critic)
wgan.compile(
    g_optimizer=generator_optimizer,
    c_optimizer=critic_optimizer,
    g_loss_fn=generator_loss,
    c_loss_fn=critic_loss
)

# Training Loop
epochs = 100
batch_size = 64
g_losses = []
c_losses = []

for epoch in tqdm(range(epochs), desc="Training Progress"):
    for i in range(0, len(X_attack), batch_size):
        real_data = X_attack[i:i+batch_size]
        losses = wgan.train_step(real_data)
        g_losses.append(losses["g_loss"])
        c_losses.append(losses["c_loss"])

    if epoch % 100 == 0:
        print(f"Epoch {epoch}/{epochs} completed")
        plt.figure(figsize=(10, 5))
        plt.plot(g_losses, label='Generator Loss')
        plt.plot(c_losses, label='Critic Loss')
        plt.xlabel('Training Steps')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

Training Progress:   0%|          | 0/100 [00:36<?, ?it/s]


KeyboardInterrupt: 