In [3]:
# Gerekli kütüphaneleri yükleyelim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
train_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/train.csv"

test_path = "C:/Users/ASUS/Desktop/Regression of Used Car Prices/test.csv"

train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

In [4]:
# Eksik verileri doldurma
train_data['clean_title'].fillna('Unknown', inplace=True)
test_data['clean_title'].fillna('Unknown', inplace=True)

# Kategorik verileri one-hot encoding ile sayısal değerlere dönüştürme
categorical_columns = ['brand', 'model', 'fuel_type', 'engine', 'transmission', 'ext_col', 'int_col', 'accident', 'clean_title']
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_train_array = encoder.fit_transform(train_data[categorical_columns])
encoded_test_array = encoder.transform(test_data[categorical_columns])

# Encoder tarafından oluşturulan yeni sütun adlarını alalım
encoded_columns = encoder.get_feature_names_out(categorical_columns)

# Numpy array'lerini DataFrame'e çevirirken sütun adlarını ekleyelim
encoded_train_data = pd.DataFrame(encoded_train_array, columns=encoded_columns, index=train_data.index)
encoded_test_data = pd.DataFrame(encoded_test_array, columns=encoded_columns, index=test_data.index)

# Orijinal veriyle birleştirme
train_data = train_data.join(encoded_train_data).drop(columns=categorical_columns)
test_data = test_data.join(encoded_test_data).drop(columns=categorical_columns)

# Bağımlı ve bağımsız değişkenler
X = train_data.drop(columns=['price', 'id'])
y = train_data['price']

# Veri setini eğitim ve doğrulama setlerine ayırma
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Yapay sinir ağı modelini tanımlama
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))  # Regresyon için linear aktivasyon

# Modeli derleme
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])

# Modeli eğitme
model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    epochs=50,  # Eğitim süresini burada ayarlayın
    batch_size=32,
    verbose=1  # Eğitim süreci hakkında bilgi vermek için
)

# Tahminleri yapma
y_pred = model.predict(X_valid)

# Performans değerlendirmesi (RMSE)
rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
print(f"Validation RMSE: {rmse}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - loss: 6475069440.0000 - mae: 25115.6797 - val_loss: 4927894016.0000 - val_mae: 21706.3496
Epoch 2/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 6026162176.0000 - mae: 22666.6582 - val_loss: 4954506752.0000 - val_mae: 22841.3340
Epoch 3/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 5399386112.0000 - mae: 22229.0566 - val_loss: 4963034624.0000 - val_mae: 21310.5156
Epoch 4/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 5803087872.0000 - mae: 22319.5801 - val_loss: 5030561280.0000 - val_mae: 27674.1758
Epoch 5/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 5948643840.0000 - mae: 22718.3164 - val_loss: 4919285760.0000 - val_mae: 22613.2754
Epoch 6/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss

[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 5680045568.0000 - mae: 21493.4121 - val_loss: 4761262080.0000 - val_mae: 20239.6289
Epoch 47/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 5707609600.0000 - mae: 21416.0527 - val_loss: 4764128768.0000 - val_mae: 23405.1445
Epoch 48/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - loss: 5487254016.0000 - mae: 21405.8418 - val_loss: 4893637632.0000 - val_mae: 25597.4609
Epoch 49/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 5735612928.0000 - mae: 21594.4297 - val_loss: 4763022848.0000 - val_mae: 24311.6582
Epoch 50/50
[1m4714/4714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 5143731200.0000 - mae: 21001.6914 - val_loss: 4766175744.0000 - val_mae: 20490.3926
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 915us/step
Validation RMSE: 69037.

In [7]:
# 6. Batch'ler Halinde Test Setinde Tahminler
batch_size = 1000
all_predictions = pd.DataFrame()

for i in range(0, len(test_data), batch_size):
    batch = test_data[i:i+batch_size]
    batch_predictions = model.predict(batch.drop(columns=['id']))
    
    # Eğer batch_predictions 2 boyutluysa düzleştirin
    if batch_predictions.ndim > 1:
        batch_predictions = batch_predictions.flatten()
    
    batch_output = pd.DataFrame({'id': batch['id'].values, 'price': batch_predictions})
    all_predictions = pd.concat([all_predictions, batch_output], ignore_index=True)

# 7. Sonuçları CSV Dosyasına Kaydetme
all_predictions.to_csv('predictions.csv', index=False)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━