# KÜTÜPHANELERİN YÜKLENMESİ 

In [574]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from category_encoders.target_encoder import TargetEncoder
from sklearn.linear_model import ElasticNet
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import warnings

In [575]:
warnings.filterwarnings("ignore")

# VERİYİ YÜKLEME VE GEREKSİZ SUTÜNU ÇIKARMA

In [576]:
df = pd.read_excel("temizVeri_Bursa.xlsx")
df = df.drop(columns=["İl"])
df = df.dropna()
df = df[df["Fiyat"] < 35_000_000]

In [577]:
y = np.log1p(df["Fiyat"])
X = df.drop(columns=["Fiyat"])    # Fiyat değişkeninin loglanması

# ENCODING İŞLEMİ ( TARGET ENC. )
* Kategorik değişkenleri modellere uyumlu hale getirmek için   

In [578]:
cat_cols = ["Isıtma Tipi", "İlçe", "Mahalle", "Site İçinde mi?"]
num_cols = [c for c in X.columns if c not in cat_cols]

preprocess = ColumnTransformer([
    ("cat", TargetEncoder(), cat_cols),
    ("num", StandardScaler(), num_cols)
], remainder="passthrough")

# MODELLER

In [579]:
rf_model = RandomForestRegressor(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)

In [580]:
cat_model = CatBoostRegressor(
    iterations=1500,
    depth=6,
    learning_rate=0.05,
    l2_leaf_reg=3,
    loss_function="RMSE",
    random_seed=42,
    verbose=0
)

In [581]:
xgb_model = XGBRegressor(
    objective="reg:squarederror",
    random_state=42,
    n_estimators=1000,
    learning_rate=0.03,
    subsample=0.8,
    colsample_bytree=0.8,
    max_depth=6,
    reg_lambda=1.0,
    reg_alpha=0.0,
    verbosity=0
)

In [582]:
lgbm_model = LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.03,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    reg_alpha=0.0,
    random_state=42
)

In [583]:
elasticnet_model = ElasticNet(
    alpha=1.0,          # Ceza katsayısı (regül. şiddeti) — LGBM'deki lambda gibi
    l1_ratio=0.5,       # L1 ve L2 arasındaki oran — 1.0: Lasso, 0.0: Ridge
    max_iter=10000,     # Maksimum iterasyon sayısı
    tol=1e-4,           # Duruş kriteri toleransı
    selection="cyclic", # Özellik seçimi yöntemi
    random_state=42
)

# PIPELINE

In [584]:
models = {
    "RandomForest": rf_model,
    "CatBoost":     cat_model,
    "XGBoost":      xgb_model,
    "LightGBM":     lgbm_model,
    "ElasticNet":   elasticnet_model
}

pipelines = {
    name: Pipeline([("prep", preprocess), ("model", model)])
    for name, model in models.items()
}

# EĞİTİM / TEST VERİSİ AYIRMASI

In [585]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# DEĞERLENDİRME VE SONUÇLAR

In [586]:
results = []

for name, pipe in pipelines.items():
    pipe.fit(X_train, y_train)
    y_pred_log = pipe.predict(X_test)
    y_pred = np.expm1(y_pred_log)
    y_true = np.expm1(y_test)

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    results.append([name, rmse, mae, r2])

results_df = pd.DataFrame(results, columns=["Model", "RMSE", "MAE", "R2"])
print("\nModel Performansları")
print("-------------------------------------------------------------")
print(results_df.to_string(index=False))
print("-------------------------------------------------------------")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 807
[LightGBM] [Info] Number of data points in the train set: 4951, number of used features: 11
[LightGBM] [Info] Start training from score 15.066236

Model Performansları
-------------------------------------------------------------
       Model         RMSE          MAE        R2
RandomForest 2.271019e+06 1.103704e+06  0.623693
    CatBoost 2.193628e+06 1.040914e+06  0.648903
     XGBoost 2.165644e+06 1.025246e+06  0.657804
    LightGBM 2.180148e+06 1.032308e+06  0.653205
  ElasticNet 3.794315e+06 2.037110e+06 -0.050432
-------------------------------------------------------------


# MODEL SONUÇLARI BU ŞEKİLDE ANCAK BİZİM İÇİN HENÜZ YETERLİ DEĞİL
* BU YÜZDEN **FEATURES ENGINEERING** UYGULAYARAK VERİYİ DAHA ANLAMLI HALE GETİRMEYE ÇALIŞACAĞIZ VE BİR MODEL ÜZERİNDEN İLERLEYECEĞİZ
* BEN BURDA EN YÜKSEK DEĞER VEREN  ***XGBOOST*** MODELİYLE DEVAM EDECEĞİM