In [1]:
# Kurulu değilse bunları yükleyin:
# pip install lazypredict scikit-learn pandas
import sqlite3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from lazypredict.Supervised import LazyClassifier


In [2]:
conn = sqlite3.connect("papcorns.sqlite")  # dosya adını kendi yoluna göre değiştir

# SQL ile veri çek
query = """
WITH user_spending AS (
    SELECT 
        user_id,
        SUM(amount_usd) AS total_spent,
        CASE WHEN SUM(amount_usd) > 0 THEN 1 ELSE 0 END AS has_spent
    FROM user_events
    GROUP BY user_id
),
latest_event_date AS (
    SELECT MAX(created_at) AS max_date FROM user_events
)
SELECT 
    u.attribution_source,
    u.country,
    julianday((SELECT max_date FROM latest_event_date)) - julianday(u.created_at) AS account_age_days,
    COALESCE(us.total_spent, 0) AS total_spent,
    COALESCE(us.has_spent, 0) AS has_spent
FROM users u
LEFT JOIN user_spending us ON u.id = us.user_id;
"""
df = pd.read_sql_query(query, conn)

In [3]:
df

Unnamed: 0,attribution_source,country,account_age_days,total_spent,has_spent
0,instagram,US,431.00,0.00,0
1,instagram,NL,273.00,35.96,1
2,tiktok,TR,270.00,19.96,1
3,tiktok,TR,318.00,0.00,0
4,organic,NL,465.00,0.00,0
...,...,...,...,...,...
997,instagram,TR,161.00,0.00,0
998,organic,NL,200.00,26.97,1
999,organic,NL,149.00,0.00,0
1000,instagram,US,146.00,9.99,1


In [4]:


# Kategorik verileri sayısal hale getirelim
label_encoders = {}
for col in ['attribution_source', 'country']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # gerekirse sonra inverse çevirmek için

# Özellikler (X) ve hedef (y)
X = df.drop(columns='has_spent')
y = df['has_spent']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [6]:
# LazyClassifier nesnesi oluştur
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

# Eğitim ve test verileriyle modelleri çalıştır
models, predictions = clf.fit(X_train, X_test, y_train, y_test)


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 385, number of negative: 416
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 272
[LightGBM] [Info] Number of data points in the train set: 801, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.480649 -> initscore=-0.077442
[LightGBM] [Info] Start training from score -0.077442


In [7]:
# Modellerin başarı oranlarını listele
print(models)


                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
AdaBoostClassifier                 1.00               1.00     1.00      1.00   
BaggingClassifier                  1.00               1.00     1.00      1.00   
XGBClassifier                      1.00               1.00     1.00      1.00   
SVC                                1.00               1.00     1.00      1.00   
SGDClassifier                      1.00               1.00     1.00      1.00   
RandomForestClassifier             1.00               1.00     1.00      1.00   
QuadraticDiscriminantAnalysis      1.00               1.00     1.00      1.00   
Perceptron                         1.00               1.00     1.00      1.00   
PassiveAggressiveClassifier        1.00               1.00     1.00      1.00   
LinearSVC                          1.00               1.00     1.00      1.00   
ExtraTreesClassifier        