In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from catboost import CatBoostRegressor
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, RegressorMixin


# Veri yolları
train_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/train.csv"
test_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/test.csv"

# Verileri yükleyin
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

# Eksik verileri doldurma
train_data['clean_title'].fillna('Unknown', inplace=True)
test_data['clean_title'].fillna('Unknown', inplace=True)

# Kategorik verileri one-hot encoding ile sayısal değerlere dönüştürme
categorical_columns = ['brand', 'model', 'fuel_type', 'engine', 'transmission', 'ext_col', 'int_col', 'accident', 'clean_title']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# LightGBM modeli
model_lgb = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', lgb.LGBMRegressor(
        subsample=0.9, num_leaves=31, n_estimators=750, 
        min_child_samples=30, max_depth=10, learning_rate=0.01,
        colsample_bytree=0.8
    ))
])

# CatBoost modeli
model_catboost = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', CatBoostRegressor(
        learning_rate=0.01, l2_leaf_reg=3, iterations=500, depth=12, border_count=32
    ))
])

# Derin Öğrenme (ANN) Modeli
def build_ann_model(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Eğitim verisi için preprocessing
X = train_data.drop(columns=['price', 'id'])
y = train_data['price']
X = preprocessor.fit_transform(X)  # Önce OneHotEncoding yapıyoruz

# Veriyi eğitim ve test olarak ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Derin Öğrenme Modelini oluşturma ve eğitme
ann_model = build_ann_model(X_train.shape[1])
ann_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)


class ANNWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y=None):
        self.model.fit(X, y, epochs=20, batch_size=32, verbose=1)
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

# Derin öğrenme modelini bir wrapper ile kullanma
ann_wrapper = ANNWrapper(ann_model)

# VotingRegressor ile üç modeli birleştirme
voting_model = VotingRegressor([
    ('lgb', model_lgb),
    ('catboost', model_catboost),
    ('ann', ann_wrapper)
])

# Modeli eğitme
voting_model.fit(X_train, y_train)

# Test seti üzerindeki tahminleri yap
y_pred = voting_model.predict(X_test)

# Performans değerlendirmesi (RMSE)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"Test RMSE (VotingRegressor): {rmse}")

# Batch'ler Halinde Test Setinde Tahminler
batch_size = 1000
all_predictions = pd.DataFrame()

for i in range(0, len(test_data), batch_size):
    batch = test_data[i:i+batch_size]
    batch_preprocessed = preprocessor.transform(batch.drop(columns=['id']))
    batch_predictions = voting_model.predict(batch_preprocessed)
    
    batch_output = pd.DataFrame({'id': batch['id'], 'price': batch_predictions})
    all_predictions = pd.concat([all_predictions, batch_output])

# Sonuçları CSV Dosyasına Kaydetme
all_predictions.to_csv('predictions_voting_ann.csv', index=False)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 5801356288.0000
Epoch 2/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 6039350784.0000
Epoch 3/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 5696570880.0000
Epoch 4/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 6162098688.0000
Epoch 5/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 6055246848.0000
Epoch 6/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 5206101504.0000
Epoch 7/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 5285877248.0000
Epoch 8/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 6249755136.0000
Epoch 9/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 68886502

ValueError: Specifying the columns using strings is only supported for dataframes.