In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_kddcup99
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


In [None]:
# Fetch the dataset
data = fetch_kddcup99(percent10=True)
X = pd.DataFrame(data.data)

# Encode categorical features
for col in X.select_dtypes(include=[object]).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Convert to float32 tensor
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)


In [None]:
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
            nn.Tanh()
        )

    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

latent_dim = 100
batch_size = 256
epochs = 50
lr = 0.0002

# Models
G = Generator(noise_dim=latent_dim, output_dim=X_tensor.shape[1]).to(device)
D = Discriminator(input_dim=X_tensor.shape[1]).to(device)

# Optimizers
optimizer_G = optim.Adam(G.parameters(), lr=lr)
optimizer_D = optim.Adam(D.parameters(), lr=lr)

# Loss
adversarial_loss = nn.BCELoss()

# DataLoader
dataloader = DataLoader(TensorDataset(X_tensor), batch_size=batch_size, shuffle=True)


In [None]:
for epoch in range(epochs):
    for real_samples, in dataloader:
        real_samples = real_samples.to(device)
        batch_size = real_samples.size(0)

        # Ground truth
        valid = torch.ones(batch_size, 1, device=device)
        fake = torch.zeros(batch_size, 1, device=device)

        # === Train Discriminator ===
        optimizer_D.zero_grad()

        z = torch.randn(batch_size, latent_dim, device=device)
        fake_samples = G(z).detach()

        real_loss = adversarial_loss(D(real_samples), valid)
        fake_loss = adversarial_loss(D(fake_samples), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        # === Train Generator ===
        optimizer_G.zero_grad()

        z = torch.randn(batch_size, latent_dim, device=device)
        generated_samples = G(z)
        g_loss = adversarial_loss(D(generated_samples), valid)

        g_loss.backward()
        optimizer_G.step()

    print(f"[Epoch {epoch+1}/{epochs}] D_loss: {d_loss.item():.4f}, G_loss: {g_loss.item():.4f}")


[Epoch 1/50] D_loss: 0.6598, G_loss: 0.7455
[Epoch 2/50] D_loss: 0.6002, G_loss: 0.7995
[Epoch 3/50] D_loss: 0.6013, G_loss: 0.8564
[Epoch 4/50] D_loss: 0.6166, G_loss: 0.8576
[Epoch 5/50] D_loss: 0.5889, G_loss: 0.8731
[Epoch 6/50] D_loss: 0.5764, G_loss: 0.8624
[Epoch 7/50] D_loss: 0.5787, G_loss: 0.9261
[Epoch 8/50] D_loss: 0.5913, G_loss: 0.8910
[Epoch 9/50] D_loss: 0.5687, G_loss: 0.8505
[Epoch 10/50] D_loss: 0.5736, G_loss: 0.9007
[Epoch 11/50] D_loss: 0.5629, G_loss: 0.9157
[Epoch 12/50] D_loss: 0.5830, G_loss: 0.8701
[Epoch 13/50] D_loss: 0.5739, G_loss: 0.9080
[Epoch 14/50] D_loss: 0.5428, G_loss: 0.9275
[Epoch 15/50] D_loss: 0.5582, G_loss: 0.9099
[Epoch 16/50] D_loss: 0.5585, G_loss: 0.9307
[Epoch 17/50] D_loss: 0.6008, G_loss: 0.9146
[Epoch 18/50] D_loss: 0.5549, G_loss: 0.9064
[Epoch 19/50] D_loss: 0.5590, G_loss: 0.9197
[Epoch 20/50] D_loss: 0.5403, G_loss: 0.8923
[Epoch 21/50] D_loss: 0.5773, G_loss: 0.9447
[Epoch 22/50] D_loss: 0.5691, G_loss: 0.9232
[Epoch 23/50] D_los

In [None]:
import plotly.graph_objs as go
import plotly.io as pio

# Replace these lists with your actual logged losses
epochs = list(range(1, 51))
d_loss = [
    0.6302, 0.6150, 0.6121, 0.5947, 0.3011, 0.6017, 0.5855, 0.5885, 0.5615, 0.5594,
    0.5944, 0.5732, 0.5663, 0.5454, 0.5876, 0.5880, 0.5413, 0.5618, 0.5716, 0.5660,
    0.5735, 0.5656, 0.5630, 0.5659, 0.5604, 0.5590, 0.5544, 0.5597, 0.5679, 0.5756,
    0.5891, 0.5531, 0.5463, 0.5685, 0.5441, 0.5539, 0.5415, 0.5724, 0.5691, 0.5759,
    0.5379, 0.5854, 0.5466, 0.5311, 0.5435, 0.5428, 0.5525, 0.5790, 0.5729, 0.5455
]
g_loss = [
    0.7346, 0.7678, 0.8131, 0.8424, 1.8990, 0.8664, 0.8688, 0.9104, 0.9712, 0.9126,
    0.9388, 0.9524, 0.9087, 0.9429, 0.9310, 0.9318, 0.9538, 0.9663, 0.8969, 0.8859,
    0.9063, 0.9883, 0.9224, 0.8841, 0.8674, 0.9476, 0.9328, 0.9231, 0.9200, 0.9382,
    0.9555, 0.9192, 0.8721, 0.8887, 0.9492, 0.9288, 0.9202, 0.8772, 0.9346, 0.9900,
    0.9750, 0.9428, 0.9441, 0.9274, 0.9990, 0.9459, 0.9603, 0.9569, 0.9301, 0.9294
]

# Create traces
trace_d = go.Scatter(x=epochs, y=d_loss, mode='lines+markers', name='Discriminator Loss', line=dict(color='red'))
trace_g = go.Scatter(x=epochs, y=g_loss, mode='lines+markers', name='Generator Loss', line=dict(color='green'))

# Layout
layout = go.Layout(
    title='GAN Training Losses Over Epochs',
    xaxis=dict(title='Epoch'),
    yaxis=dict(title='Loss'),
    template='plotly_white'
)

# Figure
fig = go.Figure(data=[trace_d, trace_g], layout=layout)

# Show plot
pio.show(fig)


In [None]:
data = fetch_kddcup99(percent10=True)
X = data.data
y = data.target


In [None]:
from sklearn.preprocessing import LabelEncoder

# Convert target labels to numeric
le = LabelEncoder()
y_real = le.fit_transform(y)  # This will convert b'normal.' → 0, b'smurf.' → 1, etc.


In [None]:
# Use real labels for both real and fake sets (for testing GAN fidelity)
y_train_fake = y_real[:len(synthetic_data)]


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Convert labels to numeric
le = LabelEncoder()
y_real = le.fit_transform(y)  # 'y' is from original KDD dataset

# Generate synthetic data
G.eval()
num_samples = len(X_tensor)
z = torch.randn(num_samples, latent_dim).to(device)
synthetic_data = G(z).detach().cpu().numpy()

# Train on fake data
X_train_fake = synthetic_data
y_train_fake = y_real[:len(X_train_fake)]  # use actual labels

# Test on real data
X_real = X_tensor.numpy()
X_test_real, _, y_test_real, _ = train_test_split(X_real, y_real, test_size=0.1, random_state=42)

# Logistic Regression
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_fake, y_train_fake)
y_pred = clf.predict(X_test_real)

# Evaluate
accuracy = accuracy_score(y_test_real, y_pred)
print(f"Accuracy (Train on GAN-generated data, Test on real data): {accuracy:.4f}")


Accuracy (Train on GAN-generated data, Test on real data): 0.5683


LogisticRegression


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Fetch and decode data
data = fetch_kddcup99(percent10=True)
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Decode byte strings
X = X.applymap(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
y = y.apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

# Encode categorical features
for col in X.select_dtypes(include=['object']).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Binarize target: normal = 0, attack = 1
y_binary = y.apply(lambda val: 0 if val == 'normal.' else 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binary, test_size=0.2, random_state=42, stratify=y_binary)



DataFrame.applymap has been deprecated. Use DataFrame.map instead.



In [None]:
# Train Logistic Regression
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
clf.fit(X_train, y_train)


In [None]:
# Predictions
y_pred = clf.predict(X_test)

# Evaluation Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Normal", "Attack"]))


Accuracy: 0.9927432822225596
Precision: 0.9970919941334142
Recall: 0.9938625565539578
F1 Score: 0.9954746561811653

Confusion Matrix:
 [[19226   230]
 [  487 78862]]

Classification Report:
               precision    recall  f1-score   support

      Normal       0.98      0.99      0.98     19456
      Attack       1.00      0.99      1.00     79349

    accuracy                           0.99     98805
   macro avg       0.99      0.99      0.99     98805
weighted avg       0.99      0.99      0.99     98805



In [None]:
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_score, recall_score, f1_score, accuracy_score
import plotly.graph_objects as go

# Predictions and probabilities
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

# Metrics
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# -----------------------------
# 📊 Confusion Matrix - LIGHT MODE
# -----------------------------
cm_labels = ["Normal", "Attack"]
fig_cm = go.Figure(data=go.Heatmap(
    z=cm,
    x=cm_labels,
    y=cm_labels,
    colorscale='Blues',
    text=cm,
    texttemplate="%{text}"
))
fig_cm.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted",
    yaxis_title="Actual",
    template='plotly_white',
    font=dict(family="Arial", color="black"),
    paper_bgcolor='white',
    plot_bgcolor='white',
    width=600,
    height=500,
)
fig_cm.show()

# -----------------------------
# 📈 ROC Curve - LIGHT MODE
# -----------------------------
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

fig_roc = go.Figure()
fig_roc.add_trace(go.Scatter(
    x=fpr, y=tpr,
    mode='lines',
    name=f'AUC = {roc_auc:.2f}',
    line=dict(color='darkcyan', width=3)
))
fig_roc.add_trace(go.Scatter(
    x=[0, 1], y=[0, 1],
    mode='lines',
    name='Random Guess',
    line=dict(dash='dash', color='gray')
))
fig_roc.update_layout(
    title='ROC Curve',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    showlegend=True,
    template='plotly_white',
    font=dict(family="Arial", color="black"),
    paper_bgcolor='white',
    plot_bgcolor='white',
    width=600,
    height=500,
)
fig_roc.show()

# -----------------------------
# 📊 Bar Chart: Evaluation Metrics - LIGHT MODE
# -----------------------------
metrics = {
    'Accuracy': acc,
    'Precision': prec,
    'Recall': rec,
    'F1 Score': f1
}

colors = ['cornflowerblue', 'seagreen', 'goldenrod', 'indianred']

fig_bar = go.Figure()

for i, (metric, value) in enumerate(metrics.items()):
    fig_bar.add_trace(go.Bar(
        x=[metric],
        y=[value],
        name=metric,
        marker_color=colors[i],
        text=[f'{value:.2f}'],
        textposition='outside'
    ))

fig_bar.update_layout(
    title='Evaluation Metrics (Logistic Regression)',
    xaxis_title='Metric',
    yaxis_title='Score',
    yaxis=dict(range=[0, 1.1]),
    showlegend=False,
    template='plotly_white',
    font=dict(family="Arial", color="black"),
    paper_bgcolor='white',
    plot_bgcolor='white',
    width=600,
    height=500,
)
fig_bar.show()
