In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import scipy.stats as stats  # bazı dağılımlar için
import lightgbm as lgb


In [6]:
df = pd.read_csv("cleaned_data.csv") 

In [7]:
param_dist = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'n_estimators': [100, 200, 300, 400],
    'max_depth': [3, 5, 7, 10, 15],
    'num_leaves': [20, 31, 50, 70, 100],
    'min_child_samples': [10, 20, 30],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}


In [8]:
X = df.drop(columns=['class'])  # class hedef değişken
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [9]:
model = LGBMClassifier(class_weight='balanced', random_state=42)

random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=50,              # kaç kombinasyon denenecek (arttırılabilir)
    cv=3,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1,
    verbose=1
)

random_search.fit(X_train, y_train)
best_model = random_search.best_estimator_


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001910 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001877 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001714 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001523 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1718
[LightGBM] [Info] Total Bins 1715
[LightGBM] [Info] Total Bins 1718
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Number of data points in the train set: 6904, number of used features: 11
[LightGBM] [Info] Number of data points in the train set: 6904

In [10]:
y_pred = best_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"\nRandom Search Test Accuracy: {acc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nBest Params:", random_search.best_params_)



Random Search Test Accuracy: 0.7540

Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.81      0.85       614
           2       0.74      0.69      0.72       656
           3       0.66      0.64      0.65       658
           4       0.74      0.88      0.80       661

    accuracy                           0.75      2589
   macro avg       0.76      0.75      0.75      2589
weighted avg       0.76      0.75      0.75      2589


Confusion Matrix:
[[497  77  33   7]
 [ 28 452 118  58]
 [ 22  71 420 145]
 [  2   8  68 583]]

Best Params: {'subsample': 1.0, 'num_leaves': 31, 'n_estimators': 400, 'min_child_samples': 30, 'max_depth': 15, 'learning_rate': 0.05, 'colsample_bytree': 0.8}
