## 6. Trade-Off Evaluation & Final Selection

In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
)

# 6.1 Load performance metrics collected earlier (or recompute briefly)
#     Combine metrics for Random Forest vs. XGBoost vs. sampling strategies
# For brevity, we’ll assume you’ve stored classification reports and AUCs in a DataFrame “results_df”

results_df = pd.DataFrame({
    "Model": ["RF+SMOTE", "RF+ADASYN", "RF+Tomek", "RF+Orig", "XGB+SMOTE", "XGB+ADASYN", "XGB+Tomek", "XGB+Orig"],
    "Precision": [0.90, 0.88, 0.87, 0.86, 0.92, 0.89, 0.88, 0.85],
    "Recall":    [0.95, 0.93, 0.91, 0.90, 0.97, 0.94, 0.92, 0.88],
    "F1_Score":  [0.92, 0.90, 0.89, 0.88, 0.94, 0.91, 0.90, 0.87],
    "AUC-ROC":   [0.99, 0.98, 0.97, 0.96, 0.995,0.985,0.975,0.955],
    "AUC-PR":    [0.75, 0.72, 0.70, 0.68, 0.80, 0.76, 0.74, 0.66]
})

# 6.2 Display results and rank by F1_Score (your chosen primary metric)
results_df = results_df.sort_values("F1_Score", ascending=False).reset_index(drop=True)
results_df

In [None]:
# 6.3 Pareto-optimal selection:
#      If you prefer highest Recall (sensitivity) and high Precision, compute a Pareto frontier.
def is_pareto_efficient(scores):
    """
    Identifies Pareto-efficient points in a 2D array:
    scores[:,0] = Recall (higher is better); scores[:,1] = Precision (higher is better)
    Returns a boolean mask of Pareto-optimal points.
    """
    is_efficient = np.ones(scores.shape[0], dtype=bool)
    for i, s in enumerate(scores):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(scores[is_efficient] > s, axis=1)  # any greater in both dims
            is_efficient[i] = True  # keep self
    return is_efficient

scores = results_df[["Recall", "Precision"]].values
pareto_mask = is_pareto_efficient(scores)
pareto_models = results_df.loc[pareto_mask, ["Model", "Precision", "Recall", "F1_Score"]]
pareto_models

In [None]:
# 6.4 Final “best” model selection (e.g., XGB+SMOTE if it’s both on Pareto frontier and has top F1)
best_model_name = pareto_models.iloc[0]["Model"]
print("Chosen model:", best_model_name)

# 6.5 Save this final choice to disk for reporting
with open("best_model.txt", "w") as f:
    f.write(best_model_name)