# 04 — Model Değerlendirme (Evaluation)

Bu notebook test seti üzerinde:
- ROC-AUC, F1, PR-AUC, Brier Score hesaplar
- ROC ve Precision-Recall eğrilerini çizer
- Konfüzyon matrisini görselleştirir
- Maliyet matrisine göre eşik optimizasyonu yapar

In [None]:
import sys

sys.path.insert(0, "..")

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import (
    roc_curve,
    auc,
    precision_recall_curve,
    ConfusionMatrixDisplay,
    classification_report,
)

plt.rcParams["figure.figsize"] = (10, 4)
plt.rcParams["axes.spines.top"] = False
plt.rcParams["axes.spines.right"] = False

## 1. Model & Veri Yükleme

In [None]:
from src.config import Paths
from src.io import load_latest_model, read_input_dataset
from src.preprocess import preprocess_basic
from src.split import split_dataset

paths = Paths()
model = load_latest_model(paths)
print(f"Model yüklendi: {type(model).__name__}")

df, _ = read_input_dataset(paths.raw_data)
df = preprocess_basic(
    df, target_col="is_canceled", label_map={"no": 0, "yes": 1, 0: 0, 1: 1}
)
split = split_dataset(df, target_col="is_canceled")
X_test, y_test = split.X_test, split.y_test
print(f"Test seti: {X_test.shape}")

## 2. Olasılık Tahminleri

In [None]:
y_prob = model.predict_proba(X_test)[:, 1]
y_pred = (y_prob >= 0.5).astype(int)

print(
    f"Tahmin dağılımı: min={y_prob.min():.4f}, max={y_prob.max():.4f}, mean={y_prob.mean():.4f}"
)

# Olasılık histogramı
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
for cls, color, lbl in [(0, "#3b82f6", "İptal Yok"), (1, "#ef4444", "İptal")]:
    mask = y_test == cls
    axes[0].hist(
        y_prob[mask], bins=40, alpha=0.7, color=color, label=lbl, edgecolor="none"
    )
axes[0].set_title("Tahmin Olasılığı Dağılımı (Sınıfa Göre)")
axes[0].set_xlabel("P(iptal)")
axes[0].legend()

axes[1].hist(y_prob, bins=40, color="#6366f1", edgecolor="none")
axes[1].set_title("Genel Olasılık Dağılımı")
axes[1].set_xlabel("P(iptal)")
plt.tight_layout()
plt.show()

## 3. ROC Eğrisi

In [None]:
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

fig, ax = plt.subplots()
ax.plot(fpr, tpr, color="#3b82f6", lw=2, label=f"ROC AUC = {roc_auc:.4f}")
ax.plot([0, 1], [0, 1], "k--", lw=1, label="Rastgele")
ax.fill_between(fpr, tpr, alpha=0.05, color="#3b82f6")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC Eğrisi")
ax.legend()
plt.tight_layout()
plt.show()
print(f"ROC AUC: {roc_auc:.4f}")

## 4. Precision-Recall Eğrisi

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, y_prob)
pr_auc = auc(recall, precision)
baseline_pr = y_test.mean()

fig, ax = plt.subplots()
ax.plot(recall, precision, color="#10b981", lw=2, label=f"PR AUC = {pr_auc:.4f}")
ax.axhline(
    baseline_pr,
    color="grey",
    linestyle="--",
    lw=1,
    label=f"Baseline = {baseline_pr:.3f}",
)
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")
ax.set_title("Precision-Recall Eğrisi")
ax.legend()
plt.tight_layout()
plt.show()

## 5. Konfüzyon Matrisi

In [None]:
fig, ax = plt.subplots(figsize=(5, 4))
ConfusionMatrixDisplay.from_predictions(
    y_test,
    y_pred,
    display_labels=["İptal Yok", "İptal"],
    cmap="Blues",
    ax=ax,
)
ax.set_title("Konfüzyon Matrisi (threshold=0.5)")
plt.tight_layout()
plt.show()
print(classification_report(y_test, y_pred, target_names=["İptal Yok", "İptal"]))

## 6. Eşik Optimizasyonu (Maliyet Matrisi)

In [None]:
from src.evaluate import sweep_thresholds_for_profit
from src.cost_matrix import load_cost_matrix

cost_matrix = load_cost_matrix()
result = sweep_thresholds_for_profit(y_test.values, y_prob, cost_matrix)

print(f"Optimal Threshold: {result.best_threshold:.3f}")
print(f"Max Profit: {result.best_profit:,.0f}")

if result.rows:
    sweep_df = pd.DataFrame(result.rows)
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    axes[0].plot(sweep_df["threshold"], sweep_df["profit"], color="#f59e0b", lw=2)
    axes[0].axvline(
        result.best_threshold,
        color="#ef4444",
        linestyle="--",
        lw=1,
        label=f"Optimal: {result.best_threshold:.3f}",
    )
    axes[0].set_title("Eşiğe Göre Kâr")
    axes[0].set_xlabel("Threshold")
    axes[0].set_ylabel("Kâr")
    axes[0].legend()

    if "f1" in sweep_df.columns:
        axes[1].plot(
            sweep_df["threshold"], sweep_df["f1"], color="#3b82f6", lw=2, label="F1"
        )
        axes[1].plot(
            sweep_df["threshold"],
            sweep_df.get("precision", [None] * len(sweep_df)),
            color="#10b981",
            lw=1.5,
            label="Precision",
        )
        axes[1].plot(
            sweep_df["threshold"],
            sweep_df.get("recall", [None] * len(sweep_df)),
            color="#f59e0b",
            lw=1.5,
            label="Recall",
        )
        axes[1].set_title("Threshold vs F1/P/R")
        axes[1].set_xlabel("Threshold")
        axes[1].legend()

    plt.tight_layout()
    plt.show()

## Sonuç

| Metrik | Değer |
|--------|-------|
| ROC AUC | _(hücreyi çalıştır)_ |
| PR AUC | _(hücreyi çalıştır)_ |
| Optimal Threshold | _(hücreyi çalıştır)_ |
| Max Profit | _(hücreyi çalıştır)_ |

Resmi değerlendirme için: `python main.py evaluate`

Bir sonraki notebook: `05_calibration.ipynb`