In [None]:
!pip install -q pandas
!pip install -q librosa
!pip install -q plotly
!pip install -q matplotlib
!pip install -q mutagen
!pip install -q pillow
!pip install -q resampy

# 📚 Kütüphanleri Yükleyelim (Import Libs)

In [1]:
# Data manipulation and processing
import pandas as pd
import numpy as np
import random

from PIL import Image

import os
import time
import librosa
import librosa.display

# import mutagen
# import mutagen.wave

from tqdm import tqdm, trange
from tqdm.auto import tqdm

import IPython.display as ipd
import IPython.display

# Visualization libraries
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# To suppress warnings
import warnings 
warnings.filterwarnings("ignore")

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# Data Split and Cross-Validation for Image Data
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold  

# Evaluation Metrics for Image Classification
from sklearn.metrics import (
    classification_report, confusion_matrix, 
    precision_score, recall_score, accuracy_score, 
    roc_curve, auc, f1_score, log_loss  
)

# Label Binarization for Multi-Class Image Classification
from sklearn.preprocessing import label_binarize  # Used for multi-class classification

# Class Weights Calculation for Imbalanced Image Datasets
from sklearn.utils.class_weight import compute_class_weight  # Used for handling class imbalance in images

# Model Selection Metrics for Image Classification
from sklearn.metrics import roc_auc_score, matthews_corrcoef  # Added ROC AUC score and Matthews correlation coefficient

# Additional Image Classification Metrics
from sklearn.metrics import average_precision_score, precision_recall_curve  # Added precision-recall curve and average precision score

# 📖 Veriyi anlamaya çalışalım (Understanding the data)

In [None]:
df = pd.read_csv("/kaggle/input/urbansound8k/UrbanSound8K.csv")
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# tüm dosya yolunu almak ve oluşturmak için
df["file_path"] = df.apply(lambda row: f'/kaggle/input/urbansound8k/fold{row["fold"]}/{row["slice_file_name"]}', axis=1)

# süreyi hesaplayalım öylesine çok sormayın :p
df["time"] = df["end"] - df["start"]

# Gereksiz sütunları kaldırıyorum
df = df.drop(columns=["start", "end", "fold", "slice_file_name", "fsID", "classID"], axis=1)

df.head()

In [None]:
df['salience'].value_counts() # salince: sesin belirginliğini belirtir!

In [None]:
df['class'].value_counts()

In [None]:
count = df['class'].value_counts()

fig, axs = plt.subplots(1, 2, figsize = (12,6), facecolor = 'white')

palette = sns.color_palette("Paired")
sns.set_palette(palette)
axs[0].pie(count, labels = count.index, autopct = '%1.1f%%', startangle = 140)
axs[0].set_title('Distribution of categories')

sns.barplot(x=count.index, y=count.values, ax = axs[1], palette = 'Paired')
axs[1].set_title('Count of each Category')
axs[1].tick_params(axis='x', rotation=45) 

for i, val in enumerate(count.values):
    axs[1].text(i, val, str(val), ha='center', va='bottom')
    
plt.tight_layout()

plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(df['time'], bins=30, color='skyblue', edgecolor='black')
plt.title("Distribution of Audio Slice Durations")
plt.xlabel("Duration (seconds)")
plt.ylabel("Frequency")
plt.show()

In [None]:
def show_spectrogram(audio_path:str, figsize:tuple=(20,20), cmap='magma',y_axis:str='log'):
    """
    Ses dosyasının Spectrogramını gösteren fonksiyon.
    :param audio_path: Ses dosyasının yolu (str)
    :param figsize: Görselin boyutu (tuple)
    :param cmap: Görselin renk haritası (str)
    :param y_axis: Y eksenin türü ('log', 'linear', 'mel')
    """
    
    # ses dosyasını yüklüyoruz
    try:
        y, sr = librosa.load(audio_path, sr=None)
    except Exception as e:
        print(e)
        return 
    
    # Short-Time Fourier Transform (STFT) ile frekans analizi yapıyoruz
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)

    # Spectrogram'ı çizdirelim
    plt.figure(figsize=figsize)
    librosa.display.specshow(D, sr=sr, y_axis=y_axis, x_axis='time', cmap=cmap) # plasma
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.show()

In [None]:
show_spectrogram('/kaggle/input/urbansound8k/fold4/102102-3-0-0.wav',
                figsize=(10,5), cmap="plasma")

IPython.display.Audio('/kaggle/input/urbansound8k/fold4/102102-3-0-0.wav')

In [None]:
show_spectrogram(audio_path='../input/urbansound8k/fold5/100263-2-0-117.wav',
                 figsize=(10,5),cmap='magma')

IPython.display.Audio('../input/urbansound8k/fold5/100263-2-0-117.wav')

In [None]:
show_spectrogram(audio_path='../input/urbansound8k/fold5/100032-3-0-0.wav',
                 figsize=(10,5))

IPython.display.Audio('../input/urbansound8k/fold5/100032-3-0-0.wav')

In [None]:
random_samples = df.groupby('class').sample(1)
audio_samples, labels = random_samples['file_path'].tolist(), random_samples['class'].tolist()

# Visualize the waveforms
fig, axs = plt.subplots(5, 2, figsize=(15,15))
index = 0

for col in range(2):
    for row in range(5):
        audio_file, sample_rate = librosa.load(audio_samples[index])
        # Use waveshow instead of waveplot
        librosa.display.waveshow(audio_file, sr=sample_rate, ax=axs[row][col])
        axs[row][col].set_title('{}'.format(labels[index]))
        index += 1

fig.tight_layout()
plt.show()

In [None]:
# 15 random dosya seçiyoruz
random_samples = df.sample(15)

# 3x5 matris oluşturuyoruz
fig, axes = plt.subplots(3, 5, figsize=(20, 12))

for i, (_, row) in enumerate(random_samples.iterrows()):
    
    file_path = row["file_path"]
    label = row["class"] 

    row = i // 5
    col = i % 5

    # Ses dosyasını yükle
    y, sr = librosa.load(file_path, sr=None)
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)

    # Spectrogram çizdir
    librosa.display.specshow(D, sr=sr, y_axis="log", x_axis="time", cmap="magma", ax=axes[row, col])
    axes[row, col].set_title(label)

plt.tight_layout()
plt.show()

# 🎶 Veri seti oluşturalım (Creating dataset)

In [None]:
def extract_features(file_name:str):

    try:
        y, sr = librosa.load(file_name, sr = None)

        if len(y) < sr * 0.1:  # 100 ms'den kısa
            print(f"Çok kısa ses dosyası: {file_name}, süre: {len(y)/sr:.3f}s")
        
        
        # MFCC (Mel-Frequency Cepstral Coefficients)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=1024)
        mfcc_mean = np.mean(mfcc.T, axis=0)
    
        # Chroma Features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma.T, axis=0)
    
        # Spectral Contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_contrast_mean = np.mean(spectral_contrast.T, axis=0)
    
        # Zero-Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)
        zcr_mean = np.mean(zcr)
    
        # Spectral Centroid
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_centroid_mean = np.mean(spectral_centroid)
    
        return mfcc_mean, chroma_mean, spectral_contrast_mean, zcr_mean, spectral_centroid_mean
        
    except Exception as e:
        print(f"Hata oluştu: {e}")
        return None

In [None]:
def process_file(row):
    try:
        file_name = row['file_path']
        features = extract_features(file_name)
        
        if features is not None:
            mfcc, chroma, spectral_contrast, zcr, spectral_centroid = features
            return [*mfcc, *chroma, *spectral_contrast, zcr, spectral_centroid, row["class"]]
    except Exception as e:
        print(f"⚠️ İşleme hatası: {file_name} - {e}")
        return None

In [None]:
from tqdm.auto import tqdm
from joblib import Parallel, delayed

def extract_features_parallel(df, n_processes=None):
    if n_processes is None:
        n_processes = os.cpu_count() - 1  # CPU sayısını otomatik al

    print(f"{n_processes} işlemci kullanılıyor")

    # results = Parallel(n_jobs=n_processes)(delayed(process_file)(row) for _, row in tqdm(df.iterrows(), total=len(df), desc="Özellikler Çıkarılıyor"))

    with Parallel(n_jobs=n_processes, backend='loky') as parallel: # 'threading' veya 'multiprocessing' 
        results = parallel(delayed(process_file)(row) for _, row in tqdm(df.iterrows(), total=len(df), desc="Özellikler Çıkarılıyor"))

    
    # None dönen sonuçları filtrele
    results = [r for r in results if r is not None]

    columns = [f'MFCC_{i+1}' for i in range(13)] + \
              [f'Chroma_{i+1}' for i in range(12)] + \
              [f'SpectralContrast_{i+1}' for i in range(7)] + \
              ['ZeroCrossingRate', 'SpectralCentroid', 'class']

    return pd.DataFrame(results, columns=columns)

In [None]:
# Kullanım
ext_df = extract_features_parallel(df)
print(ext_df.head())

# CSV'ye kaydetme
ext_df.to_csv('extracted_audio_features.csv', index=False)

### parallel işleme olmadan

In [None]:
extracted = []
for index_num, row in tqdm(df.iterrows(), total=len(df)):
    file_name = row['file_path']
    
    features = extract_features(file_name)
    if features is not None:
        mfcc, chroma, spectral_contrast, zcr, spectral_centroid = features
        extracted.append([*mfcc, *chroma, *spectral_contrast, zcr, spectral_centroid, row["class"]])
    else:
        print(f"⚠️ Skipping file due to extraction error: {file_name}")

columns = [f'MFCC_{i+1}' for i in range(13)] + \
          [f'Chroma_{i+1}' for i in range(12)] + \
          [f'SpectralContrast_{i+1}' for i in range(7)] + \
          ['ZeroCrossingRate', 'SpectralCentroid', 'class']

ext_df = pd.DataFrame(extracted, columns=columns)

print(ext_df.head())

### veriyi ayıralım 

In [None]:
ext_df

In [None]:
X = ext_df.drop(columns=['class'])  
y = ext_df['class']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [None]:
print("X shape:", X.shape)
print("y_encoded shape:", len(y_encoded))

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.2999, random_state=42, stratify = y_encoded)
X_test, X_valid, y_test, y_valid = train_test_split(X_temp, y_temp, test_size=0.333, random_state=42, stratify = y_temp)

train_percentage = (X_train.shape[0] / X.shape[0]) * 100
test_percentage = (X_test.shape[0] / X.shape[0]) * 100
valid_percentage = (X_valid.shape[0] / X.shape[0]) * 100

print("Training set shapes:", X_train.shape, f"({train_percentage:.2f}%)")
print("Testing set shapes:", X_test.shape, f"({test_percentage:.2f}%)")
print("Validation set shapes:", X_valid.shape, f"({valid_percentage:.2f}%)")

# Modelleme (Modelling)

In [None]:
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, AdaBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_curve, auc, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import optuna

## LogisticRegression

In [None]:
log_reg = LogisticRegression(random_state=42, max_iter=1000) 
log_reg.fit(X_train, y_train)

y_valid_pred = log_reg.predict(X_valid)

valid_accuracy = accuracy_score(y_valid, y_valid_pred)
print("\nValidation Accuracy:", valid_accuracy)
print("\nClassification Report on Validation Set:")
print(classification_report(y_valid, y_valid_pred))

y_test_pred = log_reg.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("\nTest Accuracy:", test_accuracy)
print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_test_pred))

In [None]:
def objective(trial):
    # Define hyperparameter search space
    C = trial.suggest_float('C', 1e-4, 1e2, log=True)  # Regularization strength
    solver = trial.suggest_categorical('solver', ['lbfgs', 'liblinear', 'saga'])
    
    # Additional parameters based on solver compatibility
    if solver in ['lbfgs', 'saga']:
        max_iter = trial.suggest_int('max_iter', 100, 1000)
    else:
        max_iter = 1000  # Default for liblinear

    # Initialize and train the model
    log_reg = LogisticRegression(
        C=C,
        solver=solver,
        max_iter=max_iter,
        random_state=42
    )
    
    log_reg.fit(X_train, y_train)
    
    # Make predictions on validation set
    y_valid_pred = log_reg.predict(X_valid)
    
    # Calculate accuracy
    valid_accuracy = accuracy_score(y_valid, y_valid_pred)
    
    return valid_accuracy

# Create and run the Optuna study
study = optuna.create_study(direction='maximize')  # Maximize accuracy
study.optimize(objective, n_trials=50)  # Run 50 trials

# Print the best parameters and score
print("\nBest parameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

# Train the final model with the best parameters
best_params = study.best_params
final_model = LogisticRegression(
    C=best_params['C'],
    solver=best_params['solver'],
    max_iter=best_params.get('max_iter', 1000),  # Default if not in best_params
    random_state=42
)
final_model.fit(X_train, y_train)

# Evaluate on validation set
y_valid_pred = final_model.predict(X_valid)
valid_accuracy = accuracy_score(y_valid, y_valid_pred)
print("\nFinal Validation Accuracy:", valid_accuracy)
print("\nClassification Report on Validation Set:")
print(classification_report(y_valid, y_valid_pred))

# Evaluate on test set
y_test_pred = final_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("\nFinal Test Accuracy:", test_accuracy)
print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_test_pred))

# Optional: Visualize the optimization process
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()

## XGBClassifier

In [None]:
xgb_model = XGBClassifier(random_state=42, use_label_encoder=True)
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_valid)

xgb_accuracy = accuracy_score(y_valid, xgb_predictions)
print("XGBoost Accuracy:", xgb_accuracy)

# Compute confusion matrix
cm = confusion_matrix(y_valid, xgb_predictions)

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.show()

# Print classification report
report = classification_report(y_valid, xgb_predictions)
print("Classification Report:")
print(report)

In [None]:
def objective(trial):
    # Define hyperparameter search space
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': 42
    }
    
    model = XGBClassifier(**params, use_label_encoder=False)
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust n_trials

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

best_params = study.best_params
best_model = XGBClassifier(**best_params, use_label_encoder=False, random_state=42)
best_model.fit(X_train, y_train)

xgb_predictions = best_model.predict(X_valid)

xgb_accuracy = accuracy_score(y_valid, xgb_predictions)
print("\nXGBoost Accuracy with Optimized Parameters:", xgb_accuracy)

cm = confusion_matrix(y_valid, xgb_predictions)

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Optimized XGBoost")
plt.show()

report = classification_report(y_valid, xgb_predictions)
print("\nClassification Report:")
print(report)

feature_importance = best_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f"Feature {i}: {importance}")

## CatBoostClassifier

In [None]:
def objective(trial):
    # Define hyperparameter search space for CatBoost
    params = {
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
        'random_strength': trial.suggest_float('random_strength', 0, 10),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'random_seed': 42,
        'verbose': 0  # Suppress CatBoost output
    }
    
    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

# Create and run the optimization study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust n_trials

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

# Train the best model
best_params = study.best_params
best_model = CatBoostClassifier(**best_params, random_seed=42, verbose=0)
best_model.fit(X_train, y_train)

# Make predictions
catboost_predictions = best_model.predict(X_valid)

# Calculate accuracy
catboost_accuracy = accuracy_score(y_valid, catboost_predictions)
print("\nCatBoost Accuracy with Optimized Parameters:", catboost_accuracy)

# Create and display confusion matrix
cm = confusion_matrix(y_valid, xgb_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Optimized CatBoost")
plt.show()

# Print classification report
report = classification_report(y_valid, catboost_predictions)
print("\nClassification Report:")
print(report)

# Feature importance
feature_importance = best_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f"Feature {i}: {importance}")

## DecisionTreeClassifier

In [None]:
def objective(trial):
    # Define hyperparameter search space for Decision Tree
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2', None]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 1000),
        'random_state': 42
    }
    
    model = DecisionTreeClassifier(**params)
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

# Create and run the optimization study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust n_trials

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

# Train the best model
best_params = study.best_params
best_model = DecisionTreeClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

# Make predictions
dt_predictions = best_model.predict(X_valid)

# Calculate accuracy
dt_accuracy = accuracy_score(y_valid, dt_predictions)
print("\nDecision Tree Accuracy with Optimized Parameters:", dt_accuracy)

# Create and display confusion matrix
cm = confusion_matrix(y_valid, dt_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Optimized Decision Tree")
plt.show()

# Print classification report
report = classification_report(y_valid, dt_predictions)
print("\nClassification Report:")
print(report)

# Feature importance
feature_importance = best_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f"Feature {i}: {importance}")

## RandomForestClassifier

In [None]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2', None]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'random_state': 42
    }
    
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust n_trials

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

best_params = study.best_params
best_model = RandomForestClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

rf_predictions = best_model.predict(X_valid)

rf_accuracy = accuracy_score(y_valid, rf_predictions)
print("\nRandom Forest Accuracy with Optimized Parameters:", rf_accuracy)

cm = confusion_matrix(y_valid, rf_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Optimized Random Forest")
plt.show()

report = classification_report(y_valid, rf_predictions)
print("\nClassification Report:")
print(report)

feature_importance = best_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f"Feature {i}: {importance}")


## Extra Trees

In [None]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2', None]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'random_state': 42
    }
    
    model = ExtraTreesClassifier(**params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

best_params = study.best_params
best_model = ExtraTreesClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

et_predictions = best_model.predict(X_valid)


valid_accuracy = accuracy_score(y_valid, et_predictions)
print("\nValidation Accuracy:", valid_accuracy)
print("\nClassification Report on Validation Set:")
print(classification_report(y_valid, y_valid_pred))

y_test_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("\nTest Accuracy:", test_accuracy)
print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_test_pred))

et_accuracy = accuracy_score(y_valid, et_predictions)
print("\nExtra Trees Accuracy with Optimized Parameters:", et_accuracy)

cm = confusion_matrix(y_valid, et_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Optimized Extra Trees")
plt.show()

# Classification report yazdır
report = classification_report(y_valid, et_predictions)
print("\nClassification Report:")
print(report)


In [None]:
def evaluate_model(y_valid, y_valid_pred, y_test, y_test_pred, model, X_valid):
    print("\nValidation Accuracy:", accuracy_score(y_valid, y_valid_pred))
    print("\nClassification Report on Validation Set:")
    print(classification_report(y_valid, y_valid_pred))

    print("\nTest Accuracy:", accuracy_score(y_test, y_test_pred))
    print("\nClassification Report on Test Set:")
    print(classification_report(y_test, y_test_pred))

    # **Confusion Matrix**
    cm = confusion_matrix(y_valid, y_valid_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap=plt.cm.Blues)
    plt.title(f"Confusion Matrix")
    plt.show()

    # **ROC Curve**
    try:
        if hasattr(model, "predict_proba"):  
            y_valid_proba = model.predict_proba(X_valid)
        elif hasattr(model, "decision_function"):  
            y_valid_proba = model.decision_function(X_valid)
        else:
            print("⚠️ Model does not support probability predictions, skipping ROC Curve.")
            return

        # **Binary Classification (Tek Sınıf)**
        if len(np.unique(y_valid)) == 2:
            fpr, tpr, _ = roc_curve(y_valid, y_valid_proba[:, 1])
            roc_auc = auc(fpr, tpr)
        # **Multi-Class (OVR Strategy)**
        else:
            roc_auc = roc_auc_score(y_valid, y_valid_proba, multi_class="ovr")
            print("\nMulti-Class ROC AUC Score:", roc_auc)
            return  

        plt.figure()
        plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.show()

    except Exception as e:
        print(f"❌ Error while generating ROC Curve: {e}")


## HistGradientBoostingClassifier

In [None]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),
        'max_iter': trial.suggest_int('max_iter', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10)
    }

    model = HistGradientBoostingClassifier(**params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
best_model = HistGradientBoostingClassifier(**best_params)
best_model.fit(X_train, y_train)

y_valid_pred = best_model.predict(X_valid)
y_test_pred = best_model.predict(X_test)

evaluate_model(y_valid, y_valid_pred, y_test, y_test_pred, best_model, X_valid)

## LGBMClassifier

In [None]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),
        'max_iter': trial.suggest_int('max_iter', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10)
    }

    model = LGBMClassifier(**params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
best_model = LGBMClassifier(**best_params)
best_model.fit(X_train, y_train)

y_valid_pred = best_model.predict(X_valid)
y_test_pred = best_model.predict(X_test)

evaluate_model(y_valid, y_valid_pred, y_test, y_test_pred, best_model, X_valid)

In [None]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 1.0)
    }

    model = AdaBoostClassifier(**params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, predictions)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
best_model = AdaBoostClassifier(**best_params)
best_model.fit(X_train, y_train)

y_valid_pred = best_model.predict(X_valid)
y_test_pred = best_model.predict(X_test)

evaluate_model(y_valid, y_valid_pred, y_test, y_test_pred, best_model, X_valid)