In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report
)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt


In [None]:
# =====================
# 1. Load dataset
# =====================
file_path = "team_11_ton-iot-engineered_train.txt"
df = pd.read_csv(file_path)

# Inspect
print("Dataset shape:", df.shape)
df.head()


In [None]:
# =====================
# 2. Prepare features and labels
# =====================
X = df.drop(columns=["label"])   # features
y = df["label"]                  # target (0=normal, 1=attack)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training set:", X_train.shape, "Test set:", X_test.shape)


In [None]:
# =====================
# 3. Train Models
# =====================

# Logistic Regression
log_reg = LogisticRegression(max_iter=500, class_weight="balanced")
log_reg.fit(X_train, y_train)

# Random Forest
rf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight="balanced")
rf.fit(X_train, y_train)

# XGBoost
xgb = XGBClassifier(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=6,
    scale_pos_weight=len(y_train[y_train==0]) / len(y_train[y_train==1]),
    eval_metric="logloss",
    random_state=42
)

xgb.fit(X_train, y_train)


In [None]:
# =====================
# 4. Evaluation
# =====================
models = {"Logistic Regression": log_reg, "Random Forest": rf, "XGBoost": xgb}

for name, model in models.items():
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    print("\n📌", name)
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall   :", recall_score(y_test, y_pred))
    print("F1-score :", f1_score(y_test, y_pred))
    print("ROC-AUC  :", roc_auc_score(y_test, y_prob))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
# =====================
# 5. Feature Importance (XGBoost)
# =====================
xgb_importance = xgb.feature_importances_

plt.figure(figsize=(10,6))
plt.bar(range(len(xgb_importance)), xgb_importance)
plt.xticks(range(len(X.columns)), X.columns, rotation=90)
plt.title("XGBoost Feature Importance")
plt.show()
