In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_auc_score, RocCurveDisplay, accuracy_score, precision_recall_fscore_support, ConfusionMatrixDisplay
)
from sklearn.ensemble import GradientBoostingClassifier

import os

In [2]:
DATA_DIR = "../dataset"
OUTPUT_DIR = "../results"

TRAIN_CSV = os.path.join(DATA_DIR, "UNSW_NB15_training-set.csv")
TEST_CSV  = os.path.join(DATA_DIR, "UNSW_NB15_testing-set.csv")

In [3]:
#load data
def load_data(train_csv, test_csv):
    train = pd.read_csv(train_csv, low_memory=False)
    test  = pd.read_csv(test_csv,  low_memory=False)
    for df in (train, test):
        df.columns = [c.strip().lower() for c in df.columns]
    # expect 'label' present
    return train, test

train_df, test_df = load_data(TRAIN_CSV, TEST_CSV)

In [4]:
#clean data
def clean(df, drop_attack_cat=True):
    df = df.copy()
    drop_candidates = [c for c in ["id", "label.1", "stime", "ltime", "timestamp", "time"] if c in df.columns]
    if drop_candidates:
        df = df.drop(columns=drop_candidates)
    y = df["label"].astype(int)
    df = df.drop(columns=["label"])
    if drop_attack_cat and "attack_cat" in df.columns:
        df = df.drop(columns=["attack_cat"])
    cat_cols = df.select_dtypes(include=["object"]).columns
    for c in cat_cols:
        df[c] = pd.factorize(df[c], sort=True)[0]
    df = df.replace([np.inf, -np.inf], np.nan).dropna()
    X = StandardScaler().fit_transform(df.values)
    return X, y

In [5]:
def eval_cls(model_name, y_true, y_hat, proba=None):
    acc = accuracy_score(y_true, y_hat)
    p, r, f1, _ = precision_recall_fscore_support(y_true, y_hat, average='binary', zero_division=0)
    auc = None
    if proba is not None:
        try:
            auc = roc_auc_score(y_true, proba)
        except Exception:
            pass
    return {
        "model": model_name,
        "accuracy": acc,
        "precision_att": p,
        "recall_att": r,
        "f1_att": f1,
        "roc_auc": auc
    }

def show_report(name, y_true, y_hat):
    from sklearn.metrics import classification_report, confusion_matrix
    print(f"\n=== {name} ===")
    print(confusion_matrix(y_true, y_hat))
    print(classification_report(y_true, y_hat, digits=4))

In [6]:
X_train, y_train = clean(train_df)
X_test,  y_test  = clean(test_df)

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
y_hat_gb = gb.predict(X_test)
try:
    proba_gb = gb.predict_proba(X_test)[:,1]
except Exception:
    proba_gb = None

show_report("GradientBoosting", y_test, y_hat_gb)
res_gb = eval_cls("GradBoost", y_test, y_hat_gb, proba_gb)


=== GradientBoosting ===
[[23476 13524]
 [ 6308 39024]]
              precision    recall  f1-score   support

           0     0.7882    0.6345    0.7030     37000
           1     0.7426    0.8608    0.7974     45332

    accuracy                         0.7591     82332
   macro avg     0.7654    0.7477    0.7502     82332
weighted avg     0.7631    0.7591    0.7550     82332



In [8]:
MODEL_NAME = "Gradient Boosting"
cm = confusion_matrix(y_test, y_hat_gb)
pd.DataFrame(cm, index=["Actual_0","Actual_1"], columns=["Pred_0","Pred_1"])\
  .to_csv(os.path.join(OUTPUT_DIR, f"{MODEL_NAME}_cm.csv"), index=True)

fig, ax = plt.subplots()
ConfusionMatrixDisplay(cm).plot(ax=ax, colorbar=False)
ax.set_title(f"{MODEL_NAME} Confusion Matrix")
fig.tight_layout()
fig.savefig(os.path.join(OUTPUT_DIR, f"{MODEL_NAME}_cm.png"), dpi=160)
plt.close(fig)

rep = classification_report(y_test, y_hat_gb, digits=4)
with open(os.path.join(OUTPUT_DIR, f"{MODEL_NAME}_report.txt"), "w") as f:
    f.write(rep)
    
try:
    if proba_gb is not None:
        fig, ax = plt.subplots()
        RocCurveDisplay.from_predictions(y_test, proba_gb, ax=ax)
        ax.set_title(f"{MODEL_NAME} ROC")
        fig.tight_layout()
        fig.savefig(os.path.join(OUTPUT_DIR, f"{MODEL_NAME}_roc.png"), dpi=160)
        plt.close(fig)
except NameError:
    pass