In [1]:
import pandas as pd

In [2]:
import pandas as pd

# Load the benign dataset
df_benign = pd.read_csv('benign_ml.csv')
print(f"Shape of benign_ml.csv: {df_benign.shape}")

# Load the attack dataset
df_attack = pd.read_csv('attack_ml.csv')
print(f"Shape of attack_ml.csv: {df_attack.shape}")

# df_attack.replace([np.inf, -np.inf], 0, inplace=True)
df_attack.fillna(0, inplace=True)
# 🔽 Downsample benign to match attack count
df_benign_sampled = df_benign.sample(
    n=len(df_attack),
    random_state=42
)
# Merge the two dataframes by concatenating them vertically
df_merged = pd.concat([df_benign_sampled, df_attack], ignore_index=True)

print("\nFirst 5 rows of the merged DataFrame:")
display(df_merged.head())

print(f"\nShape of the merged DataFrame: {df_merged.shape}")

Shape of benign_ml.csv: (600000, 43)
Shape of attack_ml.csv: (145209, 43)

First 5 rows of the merged DataFrame:


Unnamed: 0,Flow Duration,Flow Bytes/s,Flow Packets/s,Down/Up Ratio,Total Fwd Packets,Total Backward Packets,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,...,Label,Label1,Label2,Packet Rate Intensity,Byte Efficiency Ratio,Directional Asymmetry Score,Flag Aggression Index,Burstiness Score,Packet Size Variance Ratio,Flow Stability Index
0,82.0,0.0,24390.2439,0.0,2.0,0.0,24390.2439,0.0,0.0,0.0,...,Benign,0,Benign,293.85836,0.0,0.666667,-1.0,0.0,0.0,0.0
1,179.0,0.0,11173.18436,0.0,2.0,0.0,11173.18436,0.0,0.0,0.0,...,Benign,0,Benign,62.073246,0.0,0.666667,-1.0,0.0,0.0,0.0
2,87987.0,1227.454056,22.730631,1.0,1.0,1.0,11.365315,11.365315,38.0,70.0,...,Benign,0,Benign,0.000258,51.72446,0.0,0.0,0.0,0.371984,0.0
3,2770.0,76895.30686,722.021661,1.0,1.0,1.0,361.01083,361.01083,49.0,164.0,...,Benign,0,Benign,0.260564,106.352702,0.0,0.0,0.0,0.751645,0.0
4,24832.0,5637.886598,80.541237,1.0,1.0,1.0,40.270619,40.270619,42.0,98.0,...,Benign,0,Benign,0.003243,69.141539,0.0,0.0,0.0,0.524296,0.0



Shape of the merged DataFrame: (290418, 43)


In [3]:
FEATURE_COLS = [
    # original flow features
    "Flow Duration","Flow Bytes/s","Flow Packets/s","Down/Up Ratio",
    "Total Fwd Packets","Total Backward Packets","Fwd Packets/s","Bwd Packets/s",
    "Min Packet Length","Max Packet Length","Packet Length Mean","Packet Length Std",
    "Fwd Packet Length Mean","Fwd Packet Length Std",
    "Bwd Packet Length Mean","Bwd Packet Length Std",
    "Flow IAT Mean","Flow IAT Std","Flow IAT Max",
    "Fwd IAT Mean","Fwd IAT Std","Bwd IAT Mean","Bwd IAT Std",
    "SYN Flag Count","ACK Flag Count","RST Flag Count","PSH Flag Count",
    "Fwd Header Length","Bwd Header Length",
    "Avg Fwd Segment Size","Avg Bwd Segment Size",
    "Active Mean","Idle Mean",

    # engineered features
    "Packet Rate Intensity",
    "Byte Efficiency Ratio",
    "Directional Asymmetry Score",
    "Flag Aggression Index",
    "Burstiness Score",
    "Packet Size Variance Ratio",
    "Flow Stability Index"
]


In [4]:
from sklearn.preprocessing import StandardScaler

X = df_merged[FEATURE_COLS]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [5]:
import joblib
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [6]:
X_benign = X_scaled[df_merged["Label1"] == 0]

In [7]:
from sklearn.ensemble import IsolationForest

iso = IsolationForest(
    n_estimators=300,
    contamination=0.20,
    max_samples=0.8,
    random_state=42,
    n_jobs=-1
)


iso.fit(X_benign)


0,1,2
,n_estimators,300
,max_samples,0.8
,contamination,0.2
,max_features,1.0
,bootstrap,False
,n_jobs,-1
,random_state,42
,verbose,0
,warm_start,False


In [8]:
joblib.dump(iso, "isolation_forest.pkl")


['isolation_forest.pkl']

In [9]:
y_true = df_merged["Label1"]  # 0 benign, 1 attack
y_pred_if = iso.predict(X_scaled)

# convert IF output
y_pred_if = (y_pred_if == -1).astype(int)


In [10]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred_if))


              precision    recall  f1-score   support

           0       0.83      0.80      0.81    145209
           1       0.81      0.84      0.82    145209

    accuracy                           0.82    290418
   macro avg       0.82      0.82      0.82    290418
weighted avg       0.82      0.82      0.82    290418



In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping

  if not hasattr(np, "object"):


In [12]:
print(tf.__version__)

2.20.0


In [13]:
input_dim = X_benign.shape[1]

input_layer = Input(shape=(input_dim,))

# Encoder
x = Dense(128, activation="relu")(input_layer)
x = Dense(64, activation="relu")(x)
bottleneck = Dense(32, activation="relu")(x)

# Decoder
x = Dense(64, activation="relu")(bottleneck)
x = Dense(128, activation="relu")(x)
output_layer = Dense(input_dim, activation="linear")(x)

autoencoder = Model(inputs=input_layer, outputs=output_layer)


In [14]:
autoencoder.compile(
    optimizer="adam",
    loss="mse"
)

In [15]:
early_stop = EarlyStopping(
    monitor="loss",
    patience=3,
    restore_best_weights=True
)
history = autoencoder.fit(
    X_benign,
    X_benign,
    epochs=30,
    batch_size=1024,
    shuffle=True,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.3019
Epoch 2/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - loss: 0.0353
Epoch 3/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - loss: 0.0207
Epoch 4/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.0152
Epoch 5/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.0117
Epoch 6/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - loss: 0.0106
Epoch 7/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - loss: 0.0093
Epoch 8/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - loss: 0.0111
Epoch 9/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - loss: 0.0087
Epoch 10/30
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms

In [None]:

# 📉 Plot Training Loss
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.title('Autoencoder Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.grid(True)
plt.show()


In [17]:
autoencoder.save("autoencoder.keras")

In [18]:
X_test_recon = autoencoder.predict(X_scaled)

reconstruction_error = np.mean(
    np.square(X_scaled - X_test_recon),
    axis=1
)

[1m9076/9076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 3ms/step


In [19]:
benign_recon = autoencoder.predict(X_benign)
benign_error = np.mean(
    np.square(X_benign - benign_recon),
    axis=1
)
threshold = np.percentile(benign_error, 98)  # 98% benign confidence

[1m4538/4538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step


In [20]:
y_pred_ae = (reconstruction_error > threshold).astype(int)

In [21]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred_ae))

              precision    recall  f1-score   support

           0       0.61      0.98      0.75    145209
           1       0.95      0.37      0.54    145209

    accuracy                           0.68    290418
   macro avg       0.78      0.68      0.64    290418
weighted avg       0.78      0.68      0.64    290418



In [22]:
final_alert = np.logical_and(y_pred_if == 1, y_pred_ae == 1).astype(int)

In [23]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_true, final_alert))
print(confusion_matrix(y_true, final_alert))


              precision    recall  f1-score   support

           0       0.61      0.98      0.75    145209
           1       0.95      0.37      0.54    145209

    accuracy                           0.68    290418
   macro avg       0.78      0.68      0.64    290418
weighted avg       0.78      0.68      0.64    290418

[[142304   2905]
 [ 90892  54317]]


In [24]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

from xgboost import XGBClassifier

In [25]:
y = df_merged["Label2"]

In [26]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

print("Class mapping:")
for i, cls in enumerate(le.classes_):
    print(i, "→", cls)


Class mapping:
0 → Benign
1 → Botnet/Malware
2 → Brute Force
3 → Denial of Service (DoS/DDoS)
4 → Reconnaissance
5 → Web Application Attacks


In [27]:
X_train, X_test1, y_train, y_test = train_test_split(
    X,
    y_encoded,
    test_size=0.2,
    stratify=y_encoded,
    random_state=42
)
test_index = X_test1.index

In [28]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test1)

In [29]:
xgb = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="multi:softprob",
    eval_metric="mlogloss",
    num_class=len(le.classes_),
    random_state=42,
    n_jobs=-1
)
# ⚖️ Compute Class Weights
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd

classes = np.unique(y_encoded)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_encoded)
class_weights = dict(zip(classes, weights))

print("Train Label Value Counts:")
print(pd.Series(y_train).value_counts())

sample_weights = [class_weights[y] for y in y_train]

xgb.fit(X_train, y_train, sample_weight=sample_weights)

0,1,2
,objective,'multi:softprob'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.8
,device,
,early_stopping_rounds,
,enable_categorical,False


In [30]:
joblib.dump(xgb, "xgboost.pkl")

['xgboost.pkl']

In [None]:
# 🧠 SHAP Explainability
import shap
import matplotlib.pyplot as plt

# Use a small sample for SHAP to save time
X_test_sample = pd.DataFrame(X_test, columns=FEATURE_COLS).sample(n=1000, random_state=42)

explainer = shap.TreeExplainer(xgb)
shap_values = explainer.shap_values(X_test_sample)

plt.figure()
shap.summary_plot(shap_values, X_test_sample, class_names=le.classes_)


In [31]:
y_pred_xgb = xgb.predict(X_test)
print(classification_report(
    y_test,
    y_pred_xgb,
    target_names=le.classes_
))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))

# 🔥 Confusion Matrix Heatmap
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_test, y_pred_xgb), annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title('XGBoost Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()


                              precision    recall  f1-score   support

                      Benign       1.00      1.00      1.00     29042
              Botnet/Malware       0.99      1.00      0.99      7105
                 Brute Force       1.00      1.00      1.00      4416
Denial of Service (DoS/DDoS)       0.99      1.00      0.99     16718
              Reconnaissance       1.00      0.99      1.00       212
     Web Application Attacks       0.99      0.88      0.94       591

                    accuracy                           0.99     58084
                   macro avg       1.00      0.98      0.99     58084
                weighted avg       0.99      0.99      0.99     58084

Confusion Matrix:
[[28916    53     1    72     0     0]
 [   15  7087     0     3     0     0]
 [    0     0  4408     6     0     2]
 [   70     1     8 16639     0     0]
 [    0     0     0     1   210     1]
 [   46     9     2    11     0   523]]


In [32]:
def predict_attack(flow_features):
    flow_scaled = scaler.transform(flow_features)
    pred_class = xgb.predict(flow_scaled)
    return le.inverse_transform(pred_class)

# Example
# predict_attack(new_flow_df)


In [33]:
# # Convert predictions to risk contribution
# # if_score = y_pred_if.astype(float)     # 0 or 1
# if_score = (iso.decision_function(X_test) * -1)
# if_score = (if_score - if_score.min()) / (if_score.max() - if_score.min())

# # ae_score = y_pred_ae.astype(float)     # 0 or 1
# ae_score = reconstruction_error
# ae_score = (ae_score - ae_score.min()) / (ae_score.max() - ae_score.min())

# # Rule-based score (XGBoost)
# # rule_score = np.where(y_pred_xgb == "Normal", 0.0, 1.0)
# xgb_score = xgb.predict_proba(X_test)[:, 1]


In [34]:
# # Step 2: Align predictions
# if_score = pd.Series(y_pred_if, index=X_scaled.index)

# ae_score = (
#     pd.Series(y_pred_ae, index=X_scaled.index)
#       .reindex(test_index, fill_value=0)
# )

# xgb_pred = (
#     pd.Series(y_pred_xgb, index=X_test.index)
#       .reindex(test_index, fill_value="Normal")
# )


In [35]:
# ✅ Step 2: Create prediction Series with CORRECT indexes

# Isolation Forest predictions
if_score = pd.Series(
    y_pred_if,
    index=X.index,
    name="if_score"
)

# Autoencoder predictions
ae_score = pd.Series(
    y_pred_ae,
    index=X.index,
    name="ae_score"
)

# XGBoost predictions
xgb_pred = pd.Series(
    y_pred_xgb,
    index=X_test1.index,
    name="xgb_pred"
)


In [36]:
# ✅ Step 3: Align all predictions to same rows
common_index = (
    if_score.index
    .intersection(ae_score.index)
    .intersection(xgb_pred.index)
)

if_score = if_score.loc[common_index]
ae_score = ae_score.loc[common_index]
xgb_pred = xgb_pred.loc[common_index]

In [37]:
rule_score = np.where(xgb_pred == "Normal", 0.0, 1.0)

In [38]:
final_risk_score = (
    0.4 * if_score +
    0.3 * ae_score +
    0.2 * rule_score
)

In [39]:
final_decision = np.where(
    final_risk_score > 0.7, "BLOCK",
    np.where(final_risk_score >= 0.5, "ALERT", "ALLOW")
)

In [40]:
final_risk_score = pd.Series(final_risk_score, index=test_index)
final_decision   = pd.Series(final_decision, index=test_index)

In [41]:
# BLOCK or ALERT = Attack
y_pred_final = np.where(final_decision == "ALLOW", 0, 1)

In [42]:
y_true_aligned = y_true.loc[common_index]

In [43]:
from sklearn.metrics import classification_report, confusion_matrix
print("📊 FINAL HYBRID MODEL PERFORMANCE\n")
print(classification_report(y_true_aligned, y_pred_final, target_names=["Normal", "Attack"]))
print("Confusion Matrix:")
print(confusion_matrix(y_true_aligned, y_pred_final))

📊 FINAL HYBRID MODEL PERFORMANCE

              precision    recall  f1-score   support

      Normal       0.83      0.80      0.81     29042
      Attack       0.81      0.84      0.82     29042

    accuracy                           0.82     58084
   macro avg       0.82      0.82      0.82     58084
weighted avg       0.82      0.82      0.82     58084

Confusion Matrix:
[[23179  5863]
 [ 4710 24332]]


In [44]:
results_df = pd.DataFrame({
    "IF_Anomaly": if_score,
    "AE_Anomaly": ae_score,
    "XGB_Prediction": xgb_pred,
    "Final_Risk_Score": final_risk_score.round(4),
    "Final_Action": final_decision
})
results_df.head(30)

Unnamed: 0,IF_Anomaly,AE_Anomaly,XGB_Prediction,Final_Risk_Score,Final_Action
5,0,0,0,0.2,ALERT
14,0,0,0,0.2,ALERT
15,1,0,0,0.6,ALLOW
16,0,0,0,0.2,ALERT
18,0,0,0,0.2,ALERT
26,0,0,0,0.2,ALERT
28,1,0,0,0.6,BLOCK
29,0,0,0,0.2,ALERT
36,0,0,0,0.2,ALLOW
37,1,0,0,0.6,ALLOW
