In [13]:
#Kullanılan kütüphaneler:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score, cross_validate
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from itertools import product
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import warnings


#Drivedan dosyayı çekebilmek için onaylamak gerekiyor, 1 kez onaylanması yeterli
from google.colab import drive
drive.mount('/content/drive')


#HIGGS verisetinde 11 milyon örnek var. Belleği zorlayacak biçimde fazla.
#100k örneğe indirgenmesi gerekli. Böylelikle analiz hızlanacak.
#Örnek sayısı da analiz için yeterli.

file_path = '/content/drive/MyDrive/HIGGS.csv'
df = pd.read_csv(file_path, header=None)
df.columns = ['label'] + [f'feature_{i}' for i in range(1, 29)]
df_sample = df.sample(n=100000, random_state=42)
print("Veri Boyutu:", df_sample.shape)

#birkaç örnek satır yazdırmak için df_sample.head() yaptım ancak çok uzun sürdü
#ben de sadece veri boyutunu yazdırdım.



Mounted at /content/drive
Veri Boyutu: (100000, 29)


In [14]:
#Aykırı Değer Analizi (IQR Yöntemi):

#Aykırı değerler modellerin yanlış öğrenmesine sebep olabileceği için onları düzenlemek gerekli

#Aykırı değerleri sınır değerlerle kırpmak (trimming) için
def remove_outliers_iqr(df, feature_cols):
    for col in feature_cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        alt_limit = Q1 - 1.5 * IQR
        üst_limit = Q3 + 1.5 * IQR

        #aykırı değerleri sınır değerlere sabitlemek için
        df[col] = np.clip(df[col], alt_limit, üst_limit)
    return df

#label dışındaki sütunlar için
feature_cols = [col for col in df_sample.columns if col != 'label']

#aykırı değerleri düzenlemek için
df_sample = remove_outliers_iqr(df_sample, feature_cols)

#Bu işlemde aykırı satırlar silinmedi, sadece sınır değerlere sabitlendi.
# Bu sayede veri kaybının çok olması engellendi.
# Her ne kadar aykırı içeriği bozulsa da model kararlılığının artması ön planda tutuldu.


In [15]:
#Min-Max normalizasyonu:
scaler = MinMaxScaler()
df_sample[feature_cols] = scaler.fit_transform(df_sample[feature_cols])

#Modeller farklı ölçeklere göre farklı sonuçlar verebilir. Normalizasyon şart.
# [0,1] aralığına ölçeklendirildi

#Kaydetmek için
df_sample.to_csv("HIGGS_preprocessed.csv", index=False)



In [16]:
#Feature Selection: modelin başarısını en fazla etkileyen 15 özellik seçilecek
#ANOVA F-score kullanıldı. Hızlı çalıştığı ve filtre temelli olduğu için tercih edildi.

#Özellikleri labellardan ayırmak için
X = df_sample.drop("label", axis=1)
y = df_sample["label"]

#En iyi 15 özelliği seçmek için
selector = SelectKBest(score_func=f_classif, k=15)
X_seçilen = selector.fit_transform(X, y)

#Seçilen özelliklerin isimlerini almak için
seçilen_özellikler = X.columns[selector.get_support()]
print("Seçilen Özellikler: ")
print(seçilen_özellikler)

#Yeni kümeyi kaydetmek için
df_seçilen = pd.concat([y.reset_index(drop=True), pd.DataFrame(X_seçilen, columns=seçilen_özellikler)], axis=1)
df_seçilen.to_csv("HIGGS_secilen_ozellikler.csv", index=False)

Seçilen Özellikler: 
Index(['feature_1', 'feature_4', 'feature_6', 'feature_10', 'feature_13',
       'feature_14', 'feature_16', 'feature_17', 'feature_18', 'feature_21',
       'feature_22', 'feature_23', 'feature_26', 'feature_27', 'feature_28'],
      dtype='object')


In [17]:
#Bölüm 3A. İç döngüde farklı öznitelik seçim kombinasyonları denenerek en iyi model ve öznitelikler belirlenir.
#Bu işlemdeki farklı seçim kombinasyonları için hem anova hem mutual inf kullanılacak ve k değerleri 10, 15 ve 20 için kıyaslanacak

warnings.filterwarnings("ignore")  # MutualInfo warning'lerini kapatmak için

# Outer ve inner CV'yi hazırlamak için
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Özellik seçimi varyasyonları
k_list = [10, 15, 20]
score_funcs = {
    "ANOVA": f_classif,
    "MutualInfo": mutual_info_classif
}

# Modeller
models = {
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), activation='relu', max_iter=300, early_stopping=True),
    "LinearSVC": LinearSVC(C=1.0, max_iter=1000),
    "XGBoost": XGBClassifier(tree_method='gpu_hist', use_label_encoder=False, eval_metric='logloss')
}

# Sonuç listesi hazırlamak için
all_results = []

for model_name, model in models.items():
    print(f"\n Model: {model_name}")

    for sel_name, sel_func in score_funcs.items():
        for k in k_list:
            inner_fold_scores = []
            outer_metrics = []

            for fold, (train_idx, test_idx) in enumerate(outer_cv.split(X, y)):
                X_train_full, X_test_full = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

                # Özellik seçimi
                selector = SelectKBest(score_func=sel_func, k=k)
                X_train = selector.fit_transform(X_train_full, y_train)
                X_test = selector.transform(X_test_full)

                # Inner CV doğruluğunu hesaplamak için
                try:
                    val_score = cross_val_score(model, X_train, y_train, cv=inner_cv, scoring='accuracy', n_jobs=-1).mean()
                except Exception as e:
                    print(f"Hata oluştu ({model_name}-{sel_name}-k{k}) inner CV sırasında: {e}")
                    val_score = 0
                inner_fold_scores.append(val_score)

                # Outer test set hazırlığı ve performans metrikleri için
                try:
                    model.fit(X_train, y_train)
                    y_pred = model.predict(X_test)

                    acc = accuracy_score(y_test, y_pred)
                    prec = precision_score(y_test, y_pred, zero_division=0)
                    rec = recall_score(y_test, y_pred, zero_division=0)
                    f1 = f1_score(y_test, y_pred, zero_division=0)

                    if hasattr(model, "predict_proba"):
                        y_probs = model.predict_proba(X_test)[:, 1]
                        roc = roc_auc_score(y_test, y_probs)
                    elif hasattr(model, "decision_function"):
                        decision_scores = model.decision_function(X_test)
                        roc = roc_auc_score(y_test, decision_scores)
                    else:
                        roc = np.nan

                    outer_metrics.append((acc, prec, rec, f1, roc))

                except Exception as e:
                    print(f"Hata oluştu ({model_name}-{sel_name}-k{k}) outer test sırasında: {e}")
                    outer_metrics.append((0, 0, 0, 0, None))

                # Ortalama metrikleri kaydetmek için
            mean_outer = pd.DataFrame(outer_metrics, columns=["Accuracy", "Precision", "Recall", "F1 Score", "ROC-AUC"]).mean(numeric_only=True)
            roc_auc = mean_outer["ROC-AUC"] if "ROC-AUC" in mean_outer else np.nan

            all_results.append({
                "Model": model_name,
                "Selector": sel_name,
                "k": k,
                "Mean Accuracy": mean_outer["Accuracy"],
                "Mean Precision": mean_outer["Precision"],
                "Mean Recall": mean_outer["Recall"],
                "Mean F1 Score": mean_outer["F1 Score"],
                "Mean ROC-AUC": roc_auc,
                "Mean Inner CV Accuracy": sum(inner_fold_scores) / len(inner_fold_scores)
            })

# Sonuçları tablolaştırmak için
results_df = pd.DataFrame(all_results)
print("\n Ortalama Performans Özeti:")
display(results_df.sort_values("Mean Accuracy", ascending=False).round(4))




 Model: KNN

 Model: MLP

 Model: LinearSVC

 Model: XGBoost

 Ortalama Performans Özeti:


Unnamed: 0,Model,Selector,k,Mean Accuracy,Mean Precision,Mean Recall,Mean F1 Score,Mean ROC-AUC,Mean Inner CV Accuracy
23,XGBoost,MutualInfo,20,0.7235,0.7347,0.7458,0.7402,0.8003,0.7173
20,XGBoost,ANOVA,20,0.717,0.7305,0.7355,0.733,0.7926,0.7122
22,XGBoost,MutualInfo,15,0.7169,0.7283,0.7401,0.7342,0.7952,0.7137
19,XGBoost,ANOVA,15,0.7154,0.7296,0.7325,0.731,0.7901,0.7105
11,MLP,MutualInfo,20,0.7124,0.7232,0.7381,0.7304,0.7845,0.7063
8,MLP,ANOVA,20,0.7113,0.7203,0.7413,0.7305,0.7817,0.704
10,MLP,MutualInfo,15,0.71,0.7184,0.742,0.7298,0.7833,0.7034
7,MLP,ANOVA,15,0.709,0.7217,0.7312,0.7262,0.7808,0.7043
18,XGBoost,ANOVA,10,0.7057,0.7222,0.7194,0.7208,0.778,0.7024
21,XGBoost,MutualInfo,10,0.7048,0.7175,0.7275,0.7224,0.7804,0.7027


In [8]:
#OvA Yöntem ile ROC Plot çizdirmek için önce yukarıda eksik olan verileri toparlamak gerekli
roc_plot_data = []

for model_name, model in models.items():
    print(f"\n Model: {model_name}")

    for sel_name, sel_func in score_funcs.items():
        for k in k_list:
            inner_fold_scores = []
            outer_metrics = []

            fpr_list = []
            tpr_list = []
            auc_list = []

            for fold, (train_idx, test_idx) in enumerate(outer_cv.split(X, y)):
                X_train_full, X_test_full = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

                selector = SelectKBest(score_func=sel_func, k=k)
                X_train = selector.fit_transform(X_train_full, y_train)
                X_test = selector.transform(X_test_full)

                try:
                    val_score = cross_val_score(model, X_train, y_train, cv=inner_cv, scoring='accuracy', n_jobs=-1).mean()
                except Exception as e:
                    print(f"Hata oluştu ({model_name}-{sel_name}-k{k}) inner CV sırasında: {e}")
                    val_score = 0
                inner_fold_scores.append(val_score)

                try:
                    model.fit(X_train, y_train)
                    y_pred = model.predict(X_test)

                    if hasattr(model, "predict_proba"):
                        scores = model.predict_proba(X_test)[:, 1]
                    elif hasattr(model, "decision_function"):
                        scores = model.decision_function(X_test)
                    else:
                        scores = None

                    if scores is not None:
                        from sklearn.metrics import roc_curve, roc_auc_score
                        fpr, tpr, _ = roc_curve(y_test, scores)
                        auc = roc_auc_score(y_test, scores)

                        fpr_list.append(np.interp(np.linspace(0, 1, 100), fpr, tpr))
                        auc_list.append(auc)

                    acc = accuracy_score(y_test, y_pred)
                    prec = precision_score(y_test, y_pred, zero_division=0)
                    rec = recall_score(y_test, y_pred, zero_division=0)
                    f1 = f1_score(y_test, y_pred, zero_division=0)

                    outer_metrics.append((acc, prec, rec, f1, auc))

                except Exception as e:
                    print(f"Hata oluştu ({model_name}-{sel_name}-k{k}) outer test sırasında: {e}")
                    outer_metrics.append((0, 0, 0, 0, None))

            mean_outer = pd.DataFrame(outer_metrics, columns=["Accuracy", "Precision", "Recall", "F1 Score", "ROC-AUC"]).mean(numeric_only=True)
            roc_auc = mean_outer["ROC-AUC"] if "ROC-AUC" in mean_outer else np.nan

            all_results.append({
                "Model": model_name,
                "Selector": sel_name,
                "k": k,
                "Mean Accuracy": mean_outer["Accuracy"],
                "Mean Precision": mean_outer["Precision"],
                "Mean Recall": mean_outer["Recall"],
                "Mean F1 Score": mean_outer["F1 Score"],
                "Mean ROC-AUC": roc_auc,
                "Mean Inner CV Accuracy": sum(inner_fold_scores) / len(inner_fold_scores)
            })

            # ROC eğrisi için ortalama TPR hesapla ve kaydet
            if fpr_list and auc_list:
                mean_fpr = np.linspace(0, 1, 100)
                mean_tpr = np.mean(fpr_list, axis=0)
                mean_auc = np.mean(auc_list)

                roc_plot_data.append({
                    "Model": model_name,
                    "Selector": sel_name,
                    "k": k,
                    "FPR": mean_fpr,
                    "TPR": mean_tpr,
                    "ROC-AUC": mean_auc
                })


def plot_roc_per_model(roc_data):
    unique_models = sorted(set(entry["Model"] for entry in roc_data))

    for model in unique_models:
        for method in ["ANOVA", "MutualInfo"]:
            filtered = [d for d in roc_data if d["Model"] == model and d["Selector"] == method]

            if not filtered:
                continue

            plt.figure(figsize=(8, 6))
            palette = sns.color_palette("Set1", len(filtered))

            for i, entry in enumerate(filtered):
                plt.plot(entry["FPR"], entry["TPR"],
                         label=f'k={entry["k"]} (AUC={entry["ROC-AUC"]:.3f})',
                         color=palette[i])

            plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
            plt.title(f'{model} - {method} Feature Selection', fontsize=13)
            plt.xlabel("False Positive Rate")
            plt.ylabel("True Positive Rate")
            plt.legend(loc="lower right")
            plt.grid(True)
            plt.tight_layout()
            plt.show()

plot_ova_roc_all(X, y, models, score_funcs, k_list)


NameError: name 'models' is not defined

In [19]:
#3B. Farklı Hiperparametre Kombinasyonları Denenerek En İyi Model ve Hiperparametreleri Belirleme:

# Gerekirse tüm kolonları göster:
pd.set_option("display.max_columns", None)
warnings.filterwarnings("ignore")

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

fold_metrics = []
all_combination_scores = []

# ROC-AUC inner CV için uygun scoring türü
scoring_metrics = {
    'accuracy': 'accuracy',
    'precision': 'precision',
    'recall': 'recall',
    'f1': 'f1',
    'roc_auc': 'roc_auc_ovr'  # OvR çok sınıflı durumlar için daha uyumlu
}

for fold, (train_idx, test_idx) in enumerate(outer_cv.split(X, y)):
    print(f"\n--- Outer Fold {fold + 1} ---")
    X_train_full, X_test_full = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    selector = SelectKBest(score_func=f_classif, k=20)
    X_train = selector.fit_transform(X_train_full, y_train)
    X_test = selector.transform(X_test_full)

    model_scores = []

    # KNN
    for n in range(3, 12):
        knn = KNeighborsClassifier(n_neighbors=n)
        scores = cross_validate(knn, X_train, y_train, cv=inner_cv, scoring=scoring_metrics, n_jobs=-1)
        means = {m: np.mean(scores[f'test_{m}']) for m in scoring_metrics}
        model_scores.append((means['accuracy'], knn, {'n_neighbors': n}))
        all_combination_scores.append({'Outer Fold': f'Fold {fold + 1}', 'Model': 'KNN', 'Params': {'n_neighbors': n}, **means})

    # MLP
    for hls, act in product([(50,), (100,)], ['relu', 'tanh']):
        mlp = MLPClassifier(hidden_layer_sizes=hls, activation=act, max_iter=300)
        scores = cross_validate(mlp, X_train, y_train, cv=inner_cv, scoring=scoring_metrics, n_jobs=-1)
        means = {m: np.mean(scores[f'test_{m}']) for m in scoring_metrics}
        model_scores.append((means['accuracy'], mlp, {'hidden_layer_sizes': hls, 'activation': act}))
        all_combination_scores.append({'Outer Fold': f'Fold {fold + 1}', 'Model': 'MLP', 'Params': {'hidden_layer_sizes': hls, 'activation': act}, **means})

    # LinearSVC
    for c in [0.1, 1, 10]:
        lsvc = LinearSVC(C=c, max_iter=1000)
        scores = cross_validate(lsvc, X_train, y_train, cv=inner_cv, scoring=scoring_metrics, n_jobs=-1)
        means = {m: np.mean(scores[f'test_{m}']) for m in scoring_metrics}
        print(f"LinearSVC (C={c}) | Accuracy: {means['accuracy']:.4f}")
        model_scores.append((means['accuracy'], lsvc, {'C': c}))
        all_combination_scores.append({'Outer Fold': f'Fold {fold + 1}', 'Model': 'LinearSVC', 'Params': {'C': c}, **means})

    # XGBoost
    try:
        xgb = XGBClassifier(tree_method='gpu_hist', use_label_encoder=False, eval_metric='logloss')
        scores = cross_validate(xgb, X_train, y_train, cv=inner_cv, scoring=scoring_metrics, n_jobs=-1)
        means = {m: np.mean(scores[f'test_{m}']) for m in scoring_metrics}
        model_scores.append((means['accuracy'], xgb, {'default': True}))
        all_combination_scores.append({'Outer Fold': f'Fold {fold + 1}', 'Model': 'XGBClassifier', 'Params': {'default': True}, **means})
    except Exception as e:
        print("!!!XGBoost GPU'da çalıştırılamadı:", e)

    # En iyi modeli belirle
    best_score, best_model, best_params = max(model_scores, key=lambda x: x[0])
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    if hasattr(best_model, "predict_proba"):
        y_probs = best_model.predict_proba(X_test)[:, 1]
        roc = roc_auc_score(y_test, y_probs)
    elif hasattr(best_model, "decision_function"):
        scores = best_model.decision_function(X_test)
        roc = roc_auc_score(y_test, scores)
    else:
        roc = None

    fold_metrics.append({
        'Outer Fold': f'Fold {fold + 1}',
        'Model': type(best_model).__name__,
        'Best Params': best_params,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1 Score': f1,
        'ROC-AUC': roc
    })

# 1) Outer Fold test seti sonuçları
results_df = pd.DataFrame(fold_metrics)
print("\n Outer Fold Performans Özeti:")
display(results_df.round(4))
print(f"\n Ortalama Accuracy: {results_df['Accuracy'].mean():.4f}")




--- Outer Fold 1 ---
LinearSVC (C=0.1) | Accuracy: 0.6429
LinearSVC (C=1) | Accuracy: 0.6430
LinearSVC (C=10) | Accuracy: 0.6431

--- Outer Fold 2 ---
LinearSVC (C=0.1) | Accuracy: 0.6406
LinearSVC (C=1) | Accuracy: 0.6411
LinearSVC (C=10) | Accuracy: 0.6412

--- Outer Fold 3 ---
LinearSVC (C=0.1) | Accuracy: 0.6412
LinearSVC (C=1) | Accuracy: 0.6415
LinearSVC (C=10) | Accuracy: 0.6416

--- Outer Fold 4 ---
LinearSVC (C=0.1) | Accuracy: 0.6392
LinearSVC (C=1) | Accuracy: 0.6397
LinearSVC (C=10) | Accuracy: 0.6397

--- Outer Fold 5 ---
LinearSVC (C=0.1) | Accuracy: 0.6397
LinearSVC (C=1) | Accuracy: 0.6401
LinearSVC (C=10) | Accuracy: 0.6402

🔍 Outer Fold Performans Özeti:


Unnamed: 0,Outer Fold,Model,Best Params,Accuracy,Precision,Recall,F1 Score,ROC-AUC
0,Fold 1,XGBClassifier,{'default': True},0.7229,0.736322,0.740365,0.738338,0.800361
1,Fold 2,XGBClassifier,{'default': True},0.71215,0.725964,0.730707,0.728328,0.788274
2,Fold 3,XGBClassifier,{'default': True},0.7134,0.726352,0.733643,0.729979,0.787355
3,Fold 4,XGBClassifier,{'default': True},0.71485,0.729263,0.73168,0.730469,0.792645
4,Fold 5,MLPClassifier,"{'hidden_layer_sizes': (50,), 'activation': 'r...",0.71595,0.711061,0.778451,0.743232,0.791818



 Ortalama Accuracy: 0.7158

📊 Tüm Model ve Parametre Kombinasyonlarının Inner CV Sonuçları:


KeyError: 'Accuracy'

In [22]:
# 2) Inner CV sonuçlarının detaylı tablosu
all_scores_df = pd.DataFrame(all_combination_scores)
print("\n Tüm Model ve Parametre Kombinasyonlarının Inner CV Sonuçları:")
display(all_scores_df.sort_values(['Outer Fold', 'Model', 'accuracy'], ascending=[True, True, False]).round(4))

#Taabloyu cvs olarak drivedan indirmek için
results_df.to_csv('results_df.csv', index=False)
all_scores_df.to_csv('all_scores_df.csv', index=False)

from google.colab import files
files.download('results_df.csv')
files.download('all_scores_df.csv')


 Tüm Model ve Parametre Kombinasyonlarının Inner CV Sonuçları:


Unnamed: 0,Outer Fold,Model,Params,accuracy,precision,recall,f1
8,Fold 1,KNN,{'n_neighbors': 11},0.6460,0.6481,0.7212,0.6827
6,Fold 1,KNN,{'n_neighbors': 9},0.6426,0.6466,0.7128,0.6781
7,Fold 1,KNN,{'n_neighbors': 10},0.6405,0.6673,0.6368,0.6517
4,Fold 1,KNN,{'n_neighbors': 7},0.6388,0.6450,0.7030,0.6727
5,Fold 1,KNN,{'n_neighbors': 8},0.6359,0.6697,0.6129,0.6400
...,...,...,...,...,...,...,...
77,Fold 5,MLP,"{'hidden_layer_sizes': (50,), 'activation': 'r...",0.7107,0.7275,0.7233,0.7253
80,Fold 5,MLP,"{'hidden_layer_sizes': (100,), 'activation': '...",0.7084,0.7216,0.7298,0.7254
79,Fold 5,MLP,"{'hidden_layer_sizes': (100,), 'activation': '...",0.7081,0.7117,0.7542,0.7316
78,Fold 5,MLP,"{'hidden_layer_sizes': (50,), 'activation': 't...",0.7078,0.7103,0.7546,0.7317


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_curve, auc


def plot_average_ova_by_params(X, y, all_scores_df, outer_splits=5):
    classes = np.unique(y)
    y_bin = label_binarize(y, classes=classes)
    n_classes = y_bin.shape[1]
    cv = StratifiedKFold(n_splits=outer_splits, shuffle=True, random_state=42)
    mean_fpr = np.linspace(0, 1, 100)

    for model_name in all_scores_df['Model'].unique():
        model_subset = all_scores_df[all_scores_df['Model'] == model_name].drop_duplicates(subset=['Params'])
        plt.figure(figsize=(10, 6))
        colors = sns.color_palette("husl", len(model_subset))

        for idx, (_, row) in enumerate(model_subset.iterrows()):
            params = row['Params']
            tpr_list = []
            auc_list = []

            # Model nesnesini oluştur
            if model_name == 'KNN':
                model = KNeighborsClassifier(**params)
            elif model_name == 'MLP':
                model = MLPClassifier(**params, max_iter=300)
            elif model_name == 'LinearSVC':
                model = LinearSVC(**params, max_iter=1000)
            elif model_name == 'XGBClassifier':
                model = XGBClassifier(tree_method='gpu_hist', use_label_encoder=False, eval_metric='logloss')
            else:
                continue

            for train_idx, test_idx in cv.split(X, y):
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y_bin[train_idx], y_bin[test_idx]

                try:
                    ova = OneVsRestClassifier(model)
                    ova.fit(X_train, y_train)
                    y_score = (
                        ova.predict_proba(X_test) if hasattr(ova, 'predict_proba')
                        else ova.decision_function(X_test)
                    )

                    fold_tpr = np.zeros_like(mean_fpr)
                    fold_auc = []

                    for i in range(n_classes):
                        fpr, tpr, _ = roc_curve(y_test[:, i], y_score[:, i])
                        interp_tpr = np.interp(mean_fpr, fpr, tpr)
                        interp_tpr[0] = 0.0
                        fold_tpr += interp_tpr
                        fold_auc.append(auc(fpr, tpr))

                    tpr_list.append(fold_tpr / n_classes)
                    auc_list.append(np.mean(fold_auc))
                except Exception as e:
                    print(f"{model_name} - {params} fold hatası: {e}")

            if tpr_list:
                mean_tpr = np.mean(tpr_list, axis=0)
                mean_auc = np.mean(auc_list)
                mean_tpr[-1] = 1.0
                plt.plot(mean_fpr, mean_tpr, color=colors[idx],
                         label=f"{params} | AUC={mean_auc:.3f}")

        plt.plot([0, 1], [0, 1], 'k--', lw=1, label='Random')
        plt.title(f"{model_name} – Ortalama ROC Eğrileri (5-Fold OvA)", fontsize=12)
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend(loc="lower right", fontsize=9)
        plt.tight_layout()
        plt.show()
plot_average_ova_by_params(X, y, all_scores_df)


NameError: name 'X' is not defined