In [106]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import lightgbm as lgb


In [107]:
df = pd.read_csv("cleaned_data.csv") 

In [108]:
X = df.drop(columns=['class'])  # class hedef değişken
y = df['class']

In [109]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [110]:
model = lgb.LGBMClassifier(random_state=42, class_weight='balanced')
model.fit(X_train, y_train)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1733
[LightGBM] [Info] Number of data points in the train set: 10356, number of used features: 11
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294




In [111]:
y_pred = model.predict(X_test)  # scale ettiysen X_test_scaled kullan
acc = accuracy_score(y_test, y_pred)

print(f"LightGBM Model Accuracy: {acc:.5f}")


LightGBM Model Accuracy: 0.75628


LightGBM Model Accuracy: 0.75628

In [112]:
df['predicted_class'] = model.predict(X)

In [113]:
print(df[['predicted_class']].head(10))


   predicted_class
0                4
1                4
2                2
3                3
4                3
5                3
6                1
7                3
8                2
9                3


In [114]:
df['actual_class'] = y
df[['actual_class', 'predicted_class']].head(10)


Unnamed: 0,actual_class,predicted_class
0,2,4
1,4,4
2,2,2
3,3,3
4,3,3
5,3,3
6,1,1
7,3,3
8,2,2
9,3,3


In [115]:
equal_ratio_count = ((df['actual_class'] / df['predicted_class']) == 1).sum()
equal_ratio_count

np.int64(11401)

In [116]:
df.shape

(12945, 14)

In [117]:
11401/12945

0.8807261490923136

In [118]:
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)  # %75 civarında verir


0.7562765546543067

# grid search uygulayalım

In [119]:
from sklearn.model_selection import GridSearchCV

params = {
    'learning_rate': [0.05, 0.1, 0.2],
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'num_leaves': [31, 50, 70]
}

grid = GridSearchCV(
    estimator=lgb.LGBMClassifier(class_weight='balanced', random_state=42),
    param_grid=params,
    cv=3,
    scoring='accuracy',
    n_jobs=-1
)

grid.fit(X_train, y_train)

best_model = grid.best_estimator_




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001375 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1718
[LightGBM] [Info] Total Bins 1721
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001379 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1715
[LightGBM] [Info] Number of data points in the train set: 6904, number of used features: 11
[LightGBM] [Info] Number of data points in the train set: 6904, number of used features: 11
[LightGBM] [Info] Number of data points in the train set: 6904, number of used features: 11
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Auto-choos

In [120]:
best_model



In [123]:
model = LGBMClassifier(
    class_weight='balanced',
    max_depth=10,
    n_estimators=200,
    num_leaves=70,
    random_state=42
)

# Modeli eğit
model.fit(X_train, y_train)

# Test verisi üzerinde tahmin yap
y_pred = model.predict(X_test)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1733
[LightGBM] [Info] Number of data points in the train set: 10356, number of used features: 11
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294


In [124]:
# Performans değerlendirme
acc = accuracy_score(y_test, y_pred)
print(f"\nLightGBM Best Params Accuracy: {acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


LightGBM Best Params Accuracy: 0.7470

Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.81      0.85       614
           2       0.73      0.69      0.71       656
           3       0.64      0.63      0.63       658
           4       0.74      0.87      0.80       661

    accuracy                           0.75      2589
   macro avg       0.75      0.75      0.75      2589
weighted avg       0.75      0.75      0.75      2589


Confusion Matrix:
[[495  78  32   9]
 [ 28 451 123  54]
 [ 21  84 413 140]
 [  3   6  77 575]]


In [125]:
print("GridSearch CV score (eğitim içi):", grid.best_score_)
print("Test Accuracy (görmediği veri):", accuracy_score(y_test, y_pred))


GridSearch CV score (eğitim içi): 0.7432406334492082
Test Accuracy (görmediği veri): 0.7470065662417922
