# Model Evaluation - Temel Kavramlar

Bu notebook, makine öğrenmesi modellerini değerlendirmek için kullanılan temel metrikleri ve teknikleri içerir.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, confusion_matrix, classification_report,
                             roc_auc_score, roc_curve, precision_recall_curve,
                             mean_absolute_error, mean_squared_error, r2_score)
from sklearn.datasets import make_classification, make_regression

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


## 1. Classification Metrikleri

Sınıflandırma problemleri için temel metrikler.


In [None]:
# Örnek veri seti oluştur
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, 
                           n_redundant=5, n_clusters_per_class=1, 
                           random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model eğit
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Temel metrikler
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("=" * 50)
print("CLASSIFICATION METRICS")
print("=" * 50)
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")


## 2. Confusion Matrix

Confusion matrix, model performansını detaylı gösterir.


In [None]:
cm = confusion_matrix(y_test, y_pred)

# Confusion Matrix görselleştirme
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Sınıf 0', 'Sınıf 1'],
            yticklabels=['Sınıf 0', 'Sınıf 1'])
plt.title('Confusion Matrix')
plt.ylabel('Gerçek Değerler')
plt.xlabel('Tahmin Edilen Değerler')
plt.show()

# Confusion Matrix'ten metrikler hesaplama
TN, FP, FN, TP = cm.ravel()
print(f"\\nTrue Negative (TN):  {TN}")
print(f"False Positive (FP):  {FP}")
print(f"False Negative (FN):  {FN}")
print(f"True Positive (TP):   {TP}")


## 3. ROC Curve ve AUC

ROC curve, model performansını görselleştirmek için kullanılır.


In [None]:
# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = roc_auc_score(y_test, y_pred_proba)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.4f})', linewidth=2)
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"AUC Score: {roc_auc:.4f}")


## 4. Regression Metrikleri

Regresyon problemleri için temel metrikler.


In [None]:
from sklearn.linear_model import LinearRegression

# Regresyon veri seti
X_reg, y_reg = make_regression(n_samples=1000, n_features=10, noise=20, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

# Model eğit
reg_model = LinearRegression()
reg_model.fit(X_train_reg, y_train_reg)
y_pred_reg = reg_model.predict(X_test_reg)

# Regresyon metrikleri
mae = mean_absolute_error(y_test_reg, y_pred_reg)
mse = mean_squared_error(y_test_reg, y_pred_reg)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_reg, y_pred_reg)

print("=" * 50)
print("REGRESSION METRICS")
print("=" * 50)
print(f"MAE:  {mae:.4f}")
print(f"MSE:  {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²:   {r2:.4f}")


## 5. Cross-Validation

K-fold cross-validation ile model performansını daha güvenilir değerlendirme.


In [None]:
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

# K-Fold Cross-Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X, y, cv=kfold, scoring='accuracy')

print("=" * 50)
print("CROSS-VALIDATION RESULTS")
print("=" * 50)
print(f"CV Scores: {cv_scores}")
print(f"Mean CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

# Stratified K-Fold (sınıflandırma için)
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores_stratified = cross_val_score(model, X, y, cv=skfold, scoring='accuracy')
print(f"\\nStratified CV Mean: {cv_scores_stratified.mean():.4f} (+/- {cv_scores_stratified.std() * 2:.4f})")


## 6. Classification Report

Detaylı sınıflandırma raporu.


In [None]:
print("=" * 50)
print("CLASSIFICATION REPORT")
print("=" * 50)
print(classification_report(y_test, y_pred, target_names=['Sınıf 0', 'Sınıf 1']))
