In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, average_precision_score
import json
import os

from models.model_xgboost import build_xgboost_model
from models.model_lightgbm import build_lightgbm_model
from models.model_random_forest import build_random_forest_model
from models.model_logreg import build_logreg_model

DATA_PATH = "datasets/summit_dbe_processed.parquet"
RESULTS_DIR = "results"

os.makedirs(RESULTS_DIR, exist_ok=True)

In [2]:
#Load cleaned dataset

df = pd.read_parquet(DATA_PATH)

print("Shape:", df.shape)
print(df["is_failure"].value_counts())

Shape: (50259, 138)
is_failure
0.0    49964
1.0      295
Name: count, dtype: int64


In [3]:
#Split X and y

y = df["is_failure"]
X = df.drop(columns=["is_failure"])

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (50259, 137)
y shape: (50259,)


In [4]:
#Train/test split with stratify

X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.2,
random_state=42,
stratify=y,
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)
print("Failure rate train:", y_train.mean())
print("Failure rate test:", y_test.mean())

Train shape: (40207, 137)
Test shape: (10052, 137)
Failure rate train: 0.005869624692217773
Failure rate test: 0.005869478710704337


We keep the same rare failure ratio in train and test so the model sees realistic data and the test set contains some DBE events.

In [5]:
#Compute scale_pos_weight

neg = (y_train == 0).sum()
pos = (y_train == 1).sum()
scale_pos_weight = neg / pos

print("negatives:", neg)
print("positives:", pos)
print("scale_pos_weight:", scale_pos_weight)

negatives: 39971
positives: 236
scale_pos_weight: 169.36864406779662


The failure class is rare, so we upweight positive samples inside XGBoost instead of oversampling.

In [6]:
def build_model(model_name, scale_pos_weight):
    if model_name == "xgboost":
        return build_xgboost_model(scale_pos_weight)
    elif model_name == "lightgbm":
        return build_lightgbm_model(scale_pos_weight)
    elif model_name == "random_forest":
        return build_random_forest_model()
    elif model_name == "logreg":
        return build_logreg_model()
    else:
        raise ValueError(f"Unknown model name: {model_name}")

models_to_run = ["xgboost", "lightgbm", "random_forest", "logreg"]

In [8]:
for model_name in models_to_run:
    print("\n" + "=" * 60)
    print(f"Training model: {model_name}")
    print("=" * 60)

    # -----------------------------------------
    # BUILD THE MODEL FOR THIS ITERATION
    # -----------------------------------------
    model = build_model(model_name, scale_pos_weight)
    print("Model:", model)

    # -----------------------------------------
    # TRAIN
    # -----------------------------------------
    model.fit(X_train, y_train)
    print("Training done.")

    # -----------------------------------------
    # EVALUATE AT DEFAULT THRESHOLD (0.5)
    # -----------------------------------------
    y_prob = model.predict_proba(X_test)[:, 1]
    y_pred = model.predict(X_test)

    report = classification_report(y_test, y_pred, output_dict=True, digits=3)
    roc_auc = roc_auc_score(y_test, y_prob)
    precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_prob)
    avg_precision = average_precision_score(y_test, y_prob)

    print("Classification report (threshold=0.5):")
    print(classification_report(y_test, y_pred, digits=3))
    print("ROC-AUC:", roc_auc)
    print("Average precision:", avg_precision)

    # -----------------------------------------
    # THRESHOLD SWEEP
    # -----------------------------------------
    thresholds = [0.5, 0.2, 0.1, 0.05, 0.02, 0.01]
    threshold_results = []

    for t in thresholds:
        preds = (y_prob > t).astype(int)
        r = classification_report(y_test, preds, output_dict=True, digits=3)

        if "1" in r:
            rec_1 = r["1"]["recall"]
            prec_1 = r["1"]["precision"]
        else:
            rec_1 = 0.0
            prec_1 = 0.0

        threshold_results.append({
            "threshold": t,
            "recall_failure": rec_1,
            "precision_failure": prec_1,
        })

        print("\nThreshold:", t)
        print(classification_report(y_test, preds, digits=3))

    # -----------------------------------------
    # SAVE METRICS
    # -----------------------------------------
    metrics = {
        "model": model_name,
        "roc_auc": roc_auc,
        "average_precision": avg_precision,
        "report_threshold_0_5": report,
        "threshold_results": threshold_results,
    }

    metrics_path = os.path.join(RESULTS_DIR, f"metrics_{model_name}.json")
    with open(metrics_path, "w") as f:
        json.dump(metrics, f, indent=2)

    print("Saved metrics to:", metrics_path)

    # -----------------------------------------
    # SAVE FEATURE IMPORTANCE (TREE MODELS ONLY)
    # -----------------------------------------
    if hasattr(model, "feature_importances_"):
        importances = model.feature_importances_
        fi_df = pd.DataFrame({
            "feature": X.columns,
            "importance": importances,
        }).sort_values("importance", ascending=False)

        fi_path = os.path.join(RESULTS_DIR, f"feature_importance_{model_name}.csv")
        fi_df.to_csv(fi_path, index=False)
        print("Saved feature importance to:", fi_path)
    else:
        print("Model has no feature_importances_ attribute.")



Training model: xgboost
Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=0.8, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              feature_weights=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=6,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=200,
              n_jobs=-1, num_parallel_tree=None, ...)
Training done.
Classification report (threshold=0.5):
              precision    recall  f1-score   support

         0.0      0.997     0.998     0.997      9993
         1.0      0.547     0.492     0.518        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
