In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd

2024-03-13 15:05:00.107477: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-13 15:05:00.107519: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-13 15:05:00.108411: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-13 15:05:00.113582: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define the training parameters
EPOCHS = 25
noise_dim = 5
num_examples_to_generate = 16
batch_size = 8
sample_size = 88343

In [3]:
# Define the Generator model
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(noise_dim,)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(128, activation='linear'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(15, activation='relu'))
    return model

In [4]:
# Define the Discriminator model
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(15,)))
    model.add(layers.Dense(32, activation='relu'))
#     model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [5]:
# Define the loss functions
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

In [6]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [7]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [8]:
# Create the models
generator = make_generator_model()
discriminator = make_discriminator_model()

# Define the optimizers
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [9]:
def sample():
    noise = tf.random.normal([sample_size, noise_dim])

    aug = generator(noise)
    aug_df = pd.DataFrame(aug, columns=df.columns)
    aug_df = aug_df.astype(int)
    
    return aug_df

In [10]:
file = 'real_benign.csv'

df = pd.read_csv(file)
shape = df.shape
print(f"Shape = {shape}")

df = df.drop(['Label_code', 'tot_fw_pk', 'tot_bw_pk', 'fw_pkt_l_max', 'fw_pkt_l_min', 'fw_pkt_l_avg', 'bw_pkt_l_max', 'bw_pkt_l_min', 'bw_pkt_l_mean', 'fw_pkt_l_std', 'bw_pkt_l_std', 'pkt_size_avg', 'tot_l_fw_pkt', 'bw_fl_pkt_s', 'bw_pkt_s'], axis=1)
df.info()

# Convert the DataFrame to a NumPy array without the header
data_array = df.iloc[1:].values

# Convert the NumPy array to a TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices(data_array)
dataset = dataset.batch(batch_size)
dataset

Shape = (88343, 30)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88343 entries, 0 to 88342
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   fl_dur       88343 non-null  int64
 1   fw_fl_byt_s  88343 non-null  int64
 2   bw_fl_byt_s  88343 non-null  int64
 3   fw_fl_pkt_s  88343 non-null  int64
 4   fw_iat_tot   88343 non-null  int64
 5   fw_iat_avg   88343 non-null  int64
 6   fw_iat_std   88343 non-null  int64
 7   fw_iat_max   88343 non-null  int64
 8   fw_iat_min   88343 non-null  int64
 9   bw_iat_tot   88343 non-null  int64
 10  bw_iat_avg   88343 non-null  int64
 11  bw_iat_std   88343 non-null  int64
 12  bw_iat_max   88343 non-null  int64
 13  bw_iat_min   88343 non-null  int64
 14  fw_pkt_s     88343 non-null  int64
dtypes: int64(15)
memory usage: 10.1 MB


<_BatchDataset element_spec=TensorSpec(shape=(None, 15), dtype=tf.int64, name=None)>

In [11]:
# Define the training step for the discriminator
@tf.function
# Define the training step for the discriminator
@tf.function
def train_step_discriminator(data):
    noise = tf.random.normal([num_examples_to_generate, noise_dim])
    with tf.GradientTape() as disc_tape: 
        generated_data = generator(noise, training=True)

        real_output = discriminator(data, training=True)
        fake_output = discriminator(generated_data, training=True)

        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return disc_loss

In [12]:
# Define the training step for the generator
@tf.function
def train_step_generator():
    noise = tf.random.normal([num_examples_to_generate, noise_dim])
    with tf.GradientTape() as gen_tape:
        generated_data = generator(noise, training=True)
        fake_output = discriminator(generated_data, training=True)
        gen_loss = generator_loss(fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    return gen_loss

In [13]:
# Training loop
def train_gans(epochs):
    for epoch in range(epochs):
        for data in dataset:
            disc_loss = train_step_discriminator(data)
            gen_loss = train_step_generator()

        # Generate and save data for visualization
        if epoch % 100 == 0:
            pass

        print(f"Epoch {epoch+1}, Discriminator Loss: {disc_loss}, Generator Loss: {gen_loss}")


In [14]:
train_gans(EPOCHS)

Epoch 1, Discriminator Loss: 0.9796890616416931, Generator Loss: 2.7222073078155518
Epoch 2, Discriminator Loss: 1.3485281467437744, Generator Loss: 0.9874298572540283
Epoch 3, Discriminator Loss: 1.5085039138793945, Generator Loss: 0.7644875049591064
Epoch 4, Discriminator Loss: 1.2974334955215454, Generator Loss: 0.9768776893615723
Epoch 5, Discriminator Loss: 1.546891689300537, Generator Loss: 0.4930856227874756
Epoch 6, Discriminator Loss: 1.541694164276123, Generator Loss: 0.5145958662033081
Epoch 7, Discriminator Loss: 1.5413161516189575, Generator Loss: 0.6513816118240356
Epoch 8, Discriminator Loss: 1.3474361896514893, Generator Loss: 0.8516837358474731
Epoch 9, Discriminator Loss: 1.079552173614502, Generator Loss: 1.0724341869354248
Epoch 10, Discriminator Loss: 0.9976547956466675, Generator Loss: 1.0563812255859375
Epoch 11, Discriminator Loss: 1.4522366523742676, Generator Loss: 0.6623651385307312
Epoch 12, Discriminator Loss: 1.1809978485107422, Generator Loss: 0.955427289

In [15]:
df

Unnamed: 0,fl_dur,fw_fl_byt_s,bw_fl_byt_s,fw_fl_pkt_s,fw_iat_tot,fw_iat_avg,fw_iat_std,fw_iat_max,fw_iat_min,bw_iat_tot,bw_iat_avg,bw_iat_std,bw_iat_max,bw_iat_min,fw_pkt_s
0,3,88,50,1,3,1,2,3,0,0,0,0,0,0,1
1,0,800,900,12,0,0,0,0,0,0,0,0,0,0,13
2,0,1043,1173,16,0,0,0,0,0,0,0,0,0,0,16
3,0,1333,1499,21,0,0,0,0,0,0,0,0,0,0,21
4,0,2664,2998,42,0,0,0,0,0,0,0,0,0,0,42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88338,2,105,118,2,2,1,1,2,0,2,1,1,2,0,2
88339,0,1200,1349,19,0,0,0,0,0,0,0,0,0,0,19
88340,1,267,300,4,1,0,0,1,0,1,0,0,1,0,4
88341,0,686,771,11,0,0,0,0,0,0,0,0,0,0,11


In [16]:
aug_df = sample()
aug_df

Unnamed: 0,fl_dur,fw_fl_byt_s,bw_fl_byt_s,fw_fl_pkt_s,fw_iat_tot,fw_iat_avg,fw_iat_std,fw_iat_max,fw_iat_min,bw_iat_tot,bw_iat_avg,bw_iat_std,bw_iat_max,bw_iat_min,fw_pkt_s
0,0,836,935,13,0,0,0,0,0,0,0,0,0,0,10
1,0,930,1040,14,0,0,0,0,0,0,0,0,0,0,12
2,0,796,890,12,0,0,0,0,0,0,0,0,0,0,10
3,0,1031,1153,16,0,0,0,0,0,0,0,0,0,0,13
4,0,837,936,13,0,0,0,0,0,0,0,0,0,0,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88338,0,1000,1118,15,0,0,0,0,0,0,0,0,0,0,13
88339,1,259,290,3,1,0,0,0,0,0,0,0,0,0,3
88340,1,254,285,3,1,0,0,0,0,0,0,0,0,0,3
88341,0,571,639,9,0,0,0,0,0,0,0,0,0,0,7


In [17]:
# filtered_test = test[(test['pkt_size_avg'] == 34) & (test['tot_l_fw_pkt'] == 192) & (test['fw_pkt_l_max'] == 64) & (test['fw_pkt_l_min'] == 64) & (test['fw_pkt_l_avg'] == 64) & (test['fw_pkt_l_std'] == 0) & (test['bw_pkt_l_max'] == 72) & (test['bw_pkt_l_min'] == 72) & (test['bw_pkt_l_mean'] == 72) & (test['bw_pkt_l_std'] == 0)]
# filtered_test.shape[0]

In [18]:
# aug_df.loc[:, ['fw_pkt_l_max', 'fw_pkt_l_min', 'fw_pkt_l_avg']] = 64
# aug_df.loc[:, ['bw_pkt_l_max', 'bw_pkt_l_min', 'bw_pkt_l_mean']] = 72
# aug_df.loc[:, ['fw_pkt_l_std', 'bw_pkt_l_std']] = 0
# aug_df.loc[:, ['pkt_size_avg']] = 34
# aug_df.loc[:, ['tot_l_fw_pkt']] = 192

# aug_df['Label_code'] = 1

# aug_df

In [19]:
aug_df.insert(1, 'tot_fw_pk', 3)
aug_df.insert(2, 'tot_bw_pk', 3)
aug_df.insert(3, 'tot_l_fw_pkt', 192)
aug_df.insert(4, 'fw_pkt_l_max', 64)
aug_df.insert(5, 'fw_pkt_l_min', 64)
aug_df.insert(6, 'fw_pkt_l_avg', 64)
aug_df.insert(7, 'fw_pkt_l_std', 0)
aug_df.insert(8, 'bw_pkt_l_max', 72)
aug_df.insert(9, 'bw_pkt_l_min', 72)
aug_df.insert(10, 'bw_pkt_l_mean', 72)
aug_df.insert(11, 'bw_pkt_l_std', 0)
aug_df.insert(15, 'bw_fl_pkt_s', 0)
aug_df['bw_fl_pkt_s'] = aug_df['fw_fl_pkt_s']
aug_df.insert(27, 'bw_pkt_s', 0)
aug_df['bw_pkt_s'] = aug_df['fw_pkt_s']
aug_df.insert(28, 'pkt_size_avg', 34)
aug_df['Label_code'] = 1

aug_df

Unnamed: 0,fl_dur,tot_fw_pk,tot_bw_pk,tot_l_fw_pkt,fw_pkt_l_max,fw_pkt_l_min,fw_pkt_l_avg,fw_pkt_l_std,bw_pkt_l_max,bw_pkt_l_min,...,fw_iat_min,bw_iat_tot,bw_iat_avg,bw_iat_std,bw_iat_max,bw_iat_min,fw_pkt_s,bw_pkt_s,pkt_size_avg,Label_code
0,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,10,10,34,1
1,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,12,12,34,1
2,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,10,10,34,1
3,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,13,13,34,1
4,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,10,10,34,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88338,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,13,13,34,1
88339,1,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,3,3,34,1
88340,1,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,3,3,34,1
88341,0,3,3,192,64,64,64,0,72,72,...,0,0,0,0,0,0,7,7,34,1


In [20]:
aug_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88343 entries, 0 to 88342
Data columns (total 30 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   fl_dur         88343 non-null  int64
 1   tot_fw_pk      88343 non-null  int64
 2   tot_bw_pk      88343 non-null  int64
 3   tot_l_fw_pkt   88343 non-null  int64
 4   fw_pkt_l_max   88343 non-null  int64
 5   fw_pkt_l_min   88343 non-null  int64
 6   fw_pkt_l_avg   88343 non-null  int64
 7   fw_pkt_l_std   88343 non-null  int64
 8   bw_pkt_l_max   88343 non-null  int64
 9   bw_pkt_l_min   88343 non-null  int64
 10  bw_pkt_l_mean  88343 non-null  int64
 11  bw_pkt_l_std   88343 non-null  int64
 12  fw_fl_byt_s    88343 non-null  int64
 13  bw_fl_byt_s    88343 non-null  int64
 14  fw_fl_pkt_s    88343 non-null  int64
 15  bw_fl_pkt_s    88343 non-null  int64
 16  fw_iat_tot     88343 non-null  int64
 17  fw_iat_avg     88343 non-null  int64
 18  fw_iat_std     88343 non-null  int64
 19  fw_i

In [21]:
aug_df.to_csv('aug_benign.csv', index=False)