In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split



# Load dataset
df = pd.read_csv("Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Drop columns with all NaNs or unnamed indices
df = df.dropna(axis=1, how='all')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Replace inf/-inf with NaN, then fill NaNs with 0
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)

# Encode labels: 1 for attack, 0 for normal
df['Label'] = df['Label'].apply(lambda x: 0 if 'BENIGN' in x else 1)

# Drop non-numeric/categorical columns if any
non_numerics = df.select_dtypes(include=['object']).columns
df = df.drop(non_numerics.difference(['Label']), axis=1)

# Separate features and labels
X = df.drop('Label', axis=1).values
y = df['Label'].values

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("Training samples:", X_train.shape[0])
print("Feature dimension:", X_train.shape[1])


Training samples: 180596
Feature dimension: 78


In [5]:
import numpy as np

# Extract attack samples (label = 1)
X_attack = X_train[y_train == 1]

print(f"Attack sample count: {X_attack.shape[0]}")
print(f"Feature dimension: {X_attack.shape[1]}")


Attack sample count: 102283
Feature dimension: 78


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

# Config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
latent_dim = 64
feature_dim = 78
batch_size = 128
lr = 0.0001
epochs = 100

# ===== Generator =====
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(True),
            nn.Linear(256, feature_dim),
        )

    def forward(self, z):
        return self.model(z)

# ===== Discriminator =====
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(feature_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# ===== Load and prepare attack data =====
X_attack_tensor = torch.tensor(X_attack, dtype=torch.float32)
train_loader = DataLoader(TensorDataset(X_attack_tensor), batch_size=batch_size, shuffle=True)

# ===== Initialize models and training utils =====
generator = Generator().to(device)
discriminator = Discriminator().to(device)
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr)
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr)

# ===== Training Loop =====
for epoch in range(epochs):
    for real_batch, in train_loader:
        real_batch = real_batch.to(device)
        batch_size_curr = real_batch.size(0)

        # Real and fake label smoothing
        real_labels = torch.ones(batch_size_curr, 1).uniform_(0.9, 1.0).to(device)
        fake_labels = torch.zeros(batch_size_curr, 1).uniform_(0.0, 0.1).to(device)

        # === Train Discriminator ===
        z = torch.randn(batch_size_curr, latent_dim).to(device)
        fake_data = generator(z)

        d_real = discriminator(real_batch)
        d_fake = discriminator(fake_data.detach())

        loss_real = criterion(d_real, real_labels)
        loss_fake = criterion(d_fake, fake_labels)
        d_loss = loss_real + loss_fake

        optimizer_D.zero_grad()
        d_loss.backward()
        nn.utils.clip_grad_norm_(discriminator.parameters(), 1.0)
        optimizer_D.step()

        # === Train Generator ===
        z = torch.randn(batch_size_curr, latent_dim).to(device)
        fake_data = generator(z)
        g_loss = criterion(discriminator(fake_data), real_labels)

        optimizer_G.zero_grad()
        g_loss.backward()
        nn.utils.clip_grad_norm_(generator.parameters(), 1.0)
        optimizer_G.step()

    print(f"Epoch [{epoch+1}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")

# ===== Save the trained generator =====
torch.save(generator.state_dict(), "mobilegan_generator_stable.pth")

# ===== Function to generate synthetic samples =====
def generate_synthetic_samples(generator, num_samples=1000):
    generator.eval()
    with torch.no_grad():
        z = torch.randn(num_samples, latent_dim).to(device)
        synthetic_data = generator(z)
    return synthetic_data.cpu().numpy()

# ===== Generate and Save Synthetic Attack Data =====
synthetic_attacks = generate_synthetic_samples(generator, num_samples=1000)
df_synthetic = pd.DataFrame(synthetic_attacks)
df_synthetic.to_csv("synthetic_attacks.csv", index=False)
print("✅ Synthetic attack data saved as 'synthetic_attacks.csv'")


Epoch [1/100] | D Loss: 1.0291 | G Loss: 0.9713
Epoch [2/100] | D Loss: 1.3406 | G Loss: 0.7994
Epoch [3/100] | D Loss: 1.1148 | G Loss: 0.9344
Epoch [4/100] | D Loss: 1.0447 | G Loss: 0.8280
Epoch [5/100] | D Loss: 1.1147 | G Loss: 1.1052
Epoch [6/100] | D Loss: 1.1418 | G Loss: 1.0274
Epoch [7/100] | D Loss: 0.9891 | G Loss: 0.9570
Epoch [8/100] | D Loss: 1.0354 | G Loss: 1.1460
Epoch [9/100] | D Loss: 1.1307 | G Loss: 1.2718
Epoch [10/100] | D Loss: 1.1224 | G Loss: 1.2261
Epoch [11/100] | D Loss: 1.0881 | G Loss: 1.9774
Epoch [12/100] | D Loss: 1.1058 | G Loss: 1.1423
Epoch [13/100] | D Loss: 1.0545 | G Loss: 1.4940
Epoch [14/100] | D Loss: 1.0349 | G Loss: 1.6236
Epoch [15/100] | D Loss: 1.1301 | G Loss: 1.2354
Epoch [16/100] | D Loss: 0.9420 | G Loss: 1.9396
Epoch [17/100] | D Loss: 1.1518 | G Loss: 1.4888
Epoch [18/100] | D Loss: 0.9964 | G Loss: 1.8854
Epoch [19/100] | D Loss: 0.9982 | G Loss: 1.0739
Epoch [20/100] | D Loss: 0.8477 | G Loss: 2.5523
Epoch [21/100] | D Loss: 0.77