In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
from benchmark import PerformanceMonitor
from xgboost import XGBClassifier
from data_loader import load_and_standardize_data

  import pynvml  # type: ignore[import]


In [2]:
# Konfiguration
FILE_PATH = r"..\data\processed\PhiUSIIL_Phishing_URL_Dataset.csv"
# Hier ist es Komma-getrennt und heißt 'label'
X, y = load_and_standardize_data(FILE_PATH, target_col_name="label", delimiter=",")

--- [Loader] Starte Laden von: PhiUSIIL_Phishing_URL_Dataset.csv ---
--- [Loader] Fertig. Features: 50, Samples: 235795 ---


In [3]:
# Split (Identisch zum anderen Notebook!)
print("Führe Split durch...")
url_train_x, url_temp_x, url_train_y, url_temp_y = train_test_split(X, y, test_size=0.3, random_state=42)
url_val_x, url_test_x, url_val_y, url_test_y = train_test_split(url_temp_x, url_temp_y, test_size=0.5, random_state=42)

Führe Split durch...


In [4]:
# --- 3. SPLITTING (Der wichtigste Teil für Fairness) ---
# random_state=42 sorgt dafür, dass beide Datensätze "gleich zufällig" gemischt werden
print("Führe Train/Test Split durch (Random Seed 42)...")

url_train_x, url_temp_x, url_train_y, url_temp_y = train_test_split(
    X, y, test_size=0.3, random_state=42
)
url_val_x, url_test_x, url_val_y, url_test_y = train_test_split(
    url_temp_x, url_temp_y, test_size=0.5, random_state=42
)

Führe Train/Test Split durch (Random Seed 42)...


In [5]:
# --- 4. FORMATIERUNG ---
# Sicherstellen, dass alles float32 ist (für GPU Speed)
X_train = url_train_x.astype('float32')
y_train = url_train_y.astype('float32')
X_val = url_val_x.astype('float32')
y_val = url_val_y.astype('float32')
# Testdaten (falls für Inferenz gebraucht)
X_test = url_test_x.astype('float32')
y_test = url_test_y.astype('float32')

print("Datenvorbereitung abgeschlossen. Startklar für Benchmark.")

Datenvorbereitung abgeschlossen. Startklar für Benchmark.


In [7]:
# 1. Monitor initialisieren
monitor = PerformanceMonitor("XGBoost PhiUSIIL")

# --- TRAINING ---
print("Starte Training...")
monitor.start_measurement()

# --- VORBEREITUNG ---
# Daten vorher umwandeln, um Kopieren während des Trainings zu vermeiden
X_train = url_train_x.astype('float32')
y_train = url_train_y.astype('float32')
X_val = url_val_x.astype('float32')
y_val = url_val_y.astype('float32')


bst = XGBClassifier(
    n_estimators=5000,
    max_depth=6,
    learning_rate=0.01,
    objective='binary:logistic',
    early_stopping_rounds=50,
    tree_method="hist",  # Effizientester Algorithmus für GPU
    device="cuda"        # Aktiviert die GPU
)

# Training durchführen
# Wir nutzen .astype(float), um Warnungen bei booleschen/object Spalten zu vermeiden
bst.fit(
    url_train_x.astype(float), 
    url_train_y.astype(float),
    eval_set=[(url_val_x.astype(float), url_val_y.astype(float))], 
    verbose=False
)

monitor.end_measurement(task_name="Training")


Starte Training...
--- Ergebnisse XGBoost PhiUSIIL (Training) ---
Zeit: 6.608s | GPU-Last: 50.7%
VRAM (System): 1379.82 MB | VRAM (Torch): 0.0 MB


{'model': 'XGBoost PhiUSIIL',
 'task': 'Training',
 'time_sec': 6.608,
 'ram_mb': 1084.25,
 'vram_mb': 1379.82,
 'torch_vram_mb': 0.0,
 'cpu_percent': 167.6,
 'gpu_util_percent': 50.7}

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# --- INFERENZ ---
print("Starte Inferenz (gesamtes Testset)...")
monitor.start_measurement()

X_test_ready = url_test_x.astype(float)
y_scores = bst.predict_proba(X_test_ready)[:, 1]
y_pred = bst.predict(X_test_ready)

# --- METRIKEN BERECHNEN ---
y_true = url_test_y.astype(float).values

acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, zero_division=0)
rec = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
auc = roc_auc_score(y_true, y_scores)

tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0

# KORREKTUR: Dictionary umbenannt, damit 'metrics' Modul nicht überschrieben wird
metrics_dict = {
    "accuracy": round(acc, 4),
    "precision": round(prec, 4),
    "recall": round(rec, 4),
    "f1_score": round(f1, 4),
    "auc": round(auc, 4),
    "fpr": round(fpr, 4)
}

monitor.end_measurement(task_name="Inferenz", extra_metrics=metrics_dict)

Starte Inferenz (gesamtes Testset)...


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


--- Ergebnisse XGBoost PhiUSIIL (Inferenz) ---
Zeit: 0.3013s | GPU-Last: 10.7%
VRAM (System): 1303.95 MB | VRAM (Torch): 0.0 MB


{'model': 'XGBoost PhiUSIIL',
 'task': 'Inferenz',
 'time_sec': 0.3013,
 'ram_mb': 1096.17,
 'vram_mb': 1303.95,
 'torch_vram_mb': 0.0,
 'cpu_percent': 1068.3,
 'gpu_util_percent': 10.7,
 'accuracy': 1.0,
 'precision': 1.0,
 'recall': 1.0,
 'f1_score': 1.0,
 'auc': 1.0,
 'fpr': np.float64(0.0)}