In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

ARTIFACTS_DIR = Path("artifacts")

cv_path = ARTIFACTS_DIR / "gbr_cv_results.csv"
meta_path = ARTIFACTS_DIR / "metrics_and_params.json"

cv_df = pd.read_csv(cv_path)
with meta_path.open("r", encoding="utf-8") as f:
    meta = json.load(f)

cv_df.head()


In [None]:
# neg_mean_absolute_error → pozitif MAE
if "mean_val_mae" not in cv_df.columns:
    if "mean_test_score" in cv_df.columns:
        cv_df["mean_val_mae"] = -cv_df["mean_test_score"]
    else:
        raise ValueError("mean_val_mae veya mean_test_score bulunamadı, RandomizedSearchCV çıktısını kontrol et.")

cv_df[["mean_val_mae"] + [c for c in cv_df.columns if c.startswith("param_")]].head()


In [None]:
plt.figure(figsize=(8, 5))

# Kullanacağımız sütun isimleri
n_est_col = "param_regressor__n_estimators"
depth_col = "param_regressor__max_depth"

sns.scatterplot(
    data=cv_df,
    x=n_est_col,
    y="mean_val_mae",
    hue=depth_col,
    palette="viridis",
)

plt.title("Hyperparameter tuning: n_estimators vs Validation MAE\n(renk: max_depth)")
plt.xlabel("n_estimators")
plt.ylabel("Validation MAE (CV ortalaması)")
plt.legend(title="max_depth")
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(7, 5))
sns.boxplot(
    data=cv_df,
    x="param_regressor__max_depth",
    y="mean_val_mae",
)
plt.title("Hyperparameter tuning: max_depth vs Validation MAE")
plt.xlabel("max_depth")
plt.ylabel("Validation MAE (CV ortalaması)")
plt.tight_layout()
plt.show()


In [None]:
# n_estimators bazında ortalama validation MAE (tüm diğer parametreler üzerinden ortalama)
grouped = cv_df.groupby("param_regressor__n_estimators")["mean_val_mae"].mean().reset_index()

grouped


In [None]:
plt.figure(figsize=(8, 5))
plt.plot(grouped["param_regressor__n_estimators"], grouped["mean_val_mae"], marker="o")
plt.xlabel("n_estimators")
plt.ylabel("Validation MAE (CV ortalaması)")
plt.title("Validation MAE vs n_estimators (CV sonucu)")

# Tuned modelin test MAE'sini metrics_and_params.json'dan çekelim
tuned = meta.get("tuned", {})
tuned_test = tuned.get("test_metrics", {})
tuned_test_mae = tuned_test.get("mae", None)

if tuned_test_mae is not None:
    plt.axhline(tuned_test_mae, color="red", linestyle="--", label=f"Tuned test MAE = {tuned_test_mae:,.0f}")
    plt.legend()

plt.tight_layout()
plt.show()


In [None]:
baseline = meta.get("baseline", {})
baseline_test = baseline.get("test_metrics", {})
baseline_test_mae = baseline_test.get("mae", None)

plt.figure(figsize=(8, 5))
plt.plot(grouped["param_regressor__n_estimators"], grouped["mean_val_mae"], marker="o", label="Validation MAE (CV)")
if baseline_test_mae is not None:
    plt.axhline(baseline_test_mae, color="orange", linestyle="--", label=f"Baseline test MAE = {baseline_test_mae:,.0f}")
if tuned_test_mae is not None:
    plt.axhline(tuned_test_mae, color="red", linestyle="--", label=f"Tuned test MAE = {tuned_test_mae:,.0f}")

plt.xlabel("n_estimators")
plt.ylabel("MAE")
plt.title("Validation MAE vs n_estimators + Test MAE çizgileri")
plt.legend()
plt.tight_layout()
plt.show()
