# Hyperparameter Tuning - Proje: Kapsamlı Model Optimizasyonu

Bu proje, farklı algoritmalar için hyperparameter tuning yaparak en iyi modeli bulur.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.datasets import make_classification
from scipy.stats import randint

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


## 1. Veri Hazırlama


In [None]:
# Veri seti
X, y = make_classification(n_samples=2000, n_features=20, n_informative=15, 
                           n_redundant=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Eğitim seti boyutu: {X_train.shape}")
print(f"Test seti boyutu: {X_test.shape}")


## 2. Farklı Algoritmalar için Hyperparameter Tuning


In [None]:
# 1. Logistic Regression
lr_param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

lr_grid = GridSearchCV(
    LogisticRegression(random_state=42, max_iter=1000),
    param_grid=lr_param_grid,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1
)
lr_grid.fit(X_train, y_train)

# 2. Random Forest
rf_param_dist = {
    'n_estimators': randint(50, 300),
    'max_depth': randint(5, 20),
    'min_samples_split': randint(2, 20)
}

rf_random = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    param_distributions=rf_param_dist,
    n_iter=50,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    random_state=42
)
rf_random.fit(X_train, y_train)

# 3. Gradient Boosting
gb_param_dist = {
    'n_estimators': randint(50, 200),
    'learning_rate': [0.01, 0.1, 0.2, 0.3],
    'max_depth': randint(3, 10)
}

gb_random = RandomizedSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_distributions=gb_param_dist,
    n_iter=50,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    random_state=42
)
gb_random.fit(X_train, y_train)

print("Tüm modeller optimize edildi!")


## 3. Sonuçları Karşılaştırma


In [None]:
results = {
    'Logistic Regression': {
        'Best Params': lr_grid.best_params_,
        'CV Score': lr_grid.best_score_,
        'Test Accuracy': accuracy_score(y_test, lr_grid.predict(X_test)),
        'Test ROC-AUC': roc_auc_score(y_test, lr_grid.predict_proba(X_test)[:, 1])
    },
    'Random Forest': {
        'Best Params': rf_random.best_params_,
        'CV Score': rf_random.best_score_,
        'Test Accuracy': accuracy_score(y_test, rf_random.predict(X_test)),
        'Test ROC-AUC': roc_auc_score(y_test, rf_random.predict_proba(X_test)[:, 1])
    },
    'Gradient Boosting': {
        'Best Params': gb_random.best_params_,
        'CV Score': gb_random.best_score_,
        'Test Accuracy': accuracy_score(y_test, gb_random.predict(X_test)),
        'Test ROC-AUC': roc_auc_score(y_test, gb_random.predict_proba(X_test)[:, 1])
    }
}

results_df = pd.DataFrame(results).T
print("Model Karşılaştırması:")
print(results_df[['CV Score', 'Test Accuracy', 'Test ROC-AUC']].round(4))

# En iyi model
best_model = results_df['Test ROC-AUC'].idxmax()
print(f"\n{'='*50}")
print(f"EN İYİ MODEL: {best_model}")
print(f"{'='*50}")
print(f"CV Score: {results_df.loc[best_model, 'CV Score']:.4f}")
print(f"Test ROC-AUC: {results_df.loc[best_model, 'Test ROC-AUC']:.4f}")
print(f"Best Parameters: {results_df.loc[best_model, 'Best Params']}")
