In [None]:
from sklearn.datasets import fetch_kddcup99
import pandas as pd
import numpy as np

In [None]:
data = fetch_kddcup99(percent10=True)
X = pd.DataFrame(data.data)
y = pd.Series(data.target)
y_binary = np.where(y == b'normal.', 0, 1)

In [None]:
from sklearn.preprocessing import LabelEncoder

for col in X.select_dtypes(include=[object]).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_binary, test_size=0.3, random_state=42, stratify=y_binary
)

In [None]:
from xgboost import XGBClassifier

model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [None]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import plotly.figure_factory as ff

cm = confusion_matrix(y_test, y_pred)
labels = ["Normal", "Anomaly"]
z = cm
x = labels
y = labels

z_text = [[str(cell) for cell in row] for row in z]

fig = ff.create_annotated_heatmap(
    z, x=x, y=y, annotation_text=z_text, colorscale='Blues'
)
fig.update_layout(title="Confusion Matrix", xaxis_title="Predicted", yaxis_title="Actual")
fig.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
import plotly.graph_objects as go
classes = ["0 (Normal)", "1 (Anomaly)"]
metrics = ['precision', 'recall', 'f1-score']
data = []

for metric in metrics:
    data.append(go.Bar(
        name=metric.capitalize(),
        x=classes,
        y=[report['0'][metric], report['1'][metric]]
    ))

fig = go.Figure(data=data)
fig.update_layout(
    barmode='group',
    title="Classification Report Metrics",
    xaxis_title="Class",
    yaxis_title="Score",
    yaxis=dict(range=[0, 1.05])
)
fig.show()

In [None]:
import plotly.express as px

feature_importance = model.feature_importances_
feature_names = [f'Feature {i}' for i in range(X.shape[1])]

df_feat = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importance
}).sort_values(by='Importance', ascending=False).head(15)

fig = px.bar(
    df_feat,
    x='Importance',
    y='Feature',
    orientation='h',
    title="Top 15 Feature Importances (XGBoost)",
    color='Importance',
    color_continuous_scale='Blues'
)
fig.update_layout(yaxis=dict(autorange='reversed'))
fig.show()

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score
import plotly.graph_objects as go
precision, recall, _ = precision_recall_curve(y_test, y_prob)
avg_precision = average_precision_score(y_test, y_prob)
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=recall, y=precision, mode='lines', name=f'Precision-Recall curve (AP = {avg_precision:.2f})',
    line=dict(color='green')
))

fig.update_layout(
    title="Precision-Recall Curve",
    xaxis_title="Recall",
    yaxis_title="Precision",
    showlegend=True
)

fig.show()