In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from catboost import CatBoostRegressor

# Veri yolları
train_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/train.csv"
test_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/test.csv"

# Verileri yükleyin
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

# Eksik verileri doldurma
train_data['clean_title'].fillna('Unknown', inplace=True)
test_data['clean_title'].fillna('Unknown', inplace=True)

# Kategorik verileri one-hot encoding ile sayısal değerlere dönüştürme
categorical_columns = ['brand', 'model', 'fuel_type', 'engine', 'transmission', 'ext_col', 'int_col', 'accident', 'clean_title']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# LightGBM modeli
model_lgb = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', lgb.LGBMRegressor(
        subsample = 0.9, num_leaves= 31, n_estimators= 750, 
        min_child_samples= 30, max_depth= 10, learning_rate=0.01,
        colsample_bytree= 0.8
    ))
])

# CatBoost modeli
model_catboost = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', CatBoostRegressor(
        learning_rate= 0.01, l2_leaf_reg= 3, iterations= 500, depth= 12, border_count= 32
    ))
])

# VotingRegressor ile iki modeli birleştirme
voting_model = VotingRegressor([('lgb', model_lgb), ('catboost', model_catboost)])

# Bağımlı ve bağımsız değişkenler
X = train_data.drop(columns=['price', 'id'])
y = train_data['price']

# Veriyi eğitim ve test olarak ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modeli eğitme
voting_model.fit(X_train, y_train)

# Test seti üzerindeki tahminleri yap
y_pred = voting_model.predict(X_test)

# Performans değerlendirmesi (RMSE)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"Test RMSE (VotingRegressor): {rmse}")

# Batch'ler Halinde Test Setinde Tahminler
batch_size = 1000
all_predictions = pd.DataFrame()

for i in range(0, len(test_data), batch_size):
    batch = test_data[i:i+batch_size]
    batch_predictions = voting_model.predict(batch.drop(columns=['id']))
    
    batch_output = pd.DataFrame({'id': batch['id'], 'price': batch_predictions})
    all_predictions = pd.concat([all_predictions, batch_output])

# Sonuçları CSV Dosyasına Kaydetme
all_predictions.to_csv('predictions_voting.csv', index=False)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4392
[LightGBM] [Info] Number of data points in the train set: 150826, number of used features: 2053
[LightGBM] [Info] Start training from score 43890.785316
0:	learn: 79745.5545057	total: 242ms	remaining: 2m
1:	learn: 79651.5979623	total: 452ms	remaining: 1m 52s
2:	learn: 79557.6536973	total: 666ms	remaining: 1m 50s
3:	learn: 79466.4011327	total: 864ms	remaining: 1m 47s
4:	learn: 79376.5395787	total: 1.08s	remaining: 1m 46s
5:	learn: 79285.3879406	total: 1.25s	remaining: 1m 43s
6:	learn: 79196.2387357	total: 1.5s	remaining: 1m 45s
7:	learn: 79110.1479581	total: 1.79s	remaining: 1m 49s
8:	learn: 79020.4932313	total: 2s	remaining: 1m 49s
9:	learn: 78941.9873069	total: 2.21s	remaining: 1m 48s
10:	learn: 78859.8647105	total: 2.43s	remaini

89:	learn: 75066.1166635	total: 18.4s	remaining: 1m 23s
90:	learn: 75040.3275958	total: 18.6s	remaining: 1m 23s
91:	learn: 75013.6714531	total: 18.8s	remaining: 1m 23s
92:	learn: 74986.2075855	total: 19s	remaining: 1m 23s
93:	learn: 74961.8045997	total: 19.2s	remaining: 1m 22s
94:	learn: 74937.0333961	total: 19.4s	remaining: 1m 22s
95:	learn: 74911.0188640	total: 19.6s	remaining: 1m 22s
96:	learn: 74888.6655060	total: 19.8s	remaining: 1m 22s
97:	learn: 74865.1411352	total: 20s	remaining: 1m 22s
98:	learn: 74840.7281658	total: 20.2s	remaining: 1m 21s
99:	learn: 74815.5452616	total: 20.4s	remaining: 1m 21s
100:	learn: 74792.4383288	total: 20.6s	remaining: 1m 21s
101:	learn: 74769.7313514	total: 20.9s	remaining: 1m 21s
102:	learn: 74745.6582841	total: 21.1s	remaining: 1m 21s
103:	learn: 74720.0583786	total: 21.3s	remaining: 1m 21s
104:	learn: 74695.9640869	total: 21.5s	remaining: 1m 21s
105:	learn: 74674.7308812	total: 21.8s	remaining: 1m 20s
106:	learn: 74653.4329829	total: 22s	remaining

236:	learn: 72787.6818559	total: 48.4s	remaining: 53.7s
237:	learn: 72775.9618580	total: 48.6s	remaining: 53.5s
238:	learn: 72766.3127317	total: 48.8s	remaining: 53.3s
239:	learn: 72755.6921941	total: 49s	remaining: 53.1s
240:	learn: 72745.5439773	total: 49.2s	remaining: 52.8s
241:	learn: 72736.3332306	total: 49.4s	remaining: 52.7s
242:	learn: 72728.2102337	total: 49.7s	remaining: 52.5s
243:	learn: 72718.2576849	total: 50s	remaining: 52.5s
244:	learn: 72707.2676746	total: 50.3s	remaining: 52.3s
245:	learn: 72700.3849508	total: 50.6s	remaining: 52.2s
246:	learn: 72688.6873833	total: 50.8s	remaining: 52.1s
247:	learn: 72678.9305233	total: 51.1s	remaining: 52s
248:	learn: 72670.7080625	total: 51.4s	remaining: 51.8s
249:	learn: 72661.7387224	total: 51.8s	remaining: 51.8s
250:	learn: 72650.1505900	total: 52.1s	remaining: 51.7s
251:	learn: 72642.3633075	total: 52.4s	remaining: 51.6s
252:	learn: 72632.1865450	total: 52.8s	remaining: 51.5s
253:	learn: 72623.3728053	total: 53.1s	remaining: 51.4

384:	learn: 71725.5095478	total: 1m 22s	remaining: 24.6s
385:	learn: 71719.6056061	total: 1m 22s	remaining: 24.3s
386:	learn: 71709.5659731	total: 1m 22s	remaining: 24.1s
387:	learn: 71707.4001805	total: 1m 22s	remaining: 23.9s
388:	learn: 71699.2848698	total: 1m 22s	remaining: 23.7s
389:	learn: 71692.4084479	total: 1m 23s	remaining: 23.4s
390:	learn: 71690.9041217	total: 1m 23s	remaining: 23.2s
391:	learn: 71684.9768458	total: 1m 23s	remaining: 23s
392:	learn: 71683.4836430	total: 1m 23s	remaining: 22.8s
393:	learn: 71677.0106170	total: 1m 23s	remaining: 22.6s
394:	learn: 71667.4373192	total: 1m 24s	remaining: 22.4s
395:	learn: 71659.3857025	total: 1m 24s	remaining: 22.2s
396:	learn: 71652.9939694	total: 1m 24s	remaining: 21.9s
397:	learn: 71651.4323100	total: 1m 24s	remaining: 21.7s
398:	learn: 71648.6011197	total: 1m 24s	remaining: 21.5s
399:	learn: 71642.3179623	total: 1m 25s	remaining: 21.3s
400:	learn: 71635.0573377	total: 1m 25s	remaining: 21.1s
401:	learn: 71628.0721956	total: 



Test RMSE (VotingRegressor): 68438.03726809588
