In [115]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import re

# 1. Veri Setini Okuma
 # Veri setinin yolu
data = pd.read_csv("used_cars.csv")

# 2. Sütunları Düzenleme
data_cleaned = data.rename(columns={
    'brand': 'Marka',
    'model': 'Model',
    'fuel_type': 'Benzin Türü',
    'transmission': 'Vites',
    'milage': 'Mil',
    'engine': 'Motor',
    'accident': 'Kaza Sayısı',
    'price': 'Fiyat'
})

# 3. Sayısal Dönüştürme ve Temizlik
data_cleaned['Mil'] = data_cleaned['Mil'].str.replace(',', '').str.replace(' mi.', '').astype(float)
data_cleaned['Fiyat'] = data_cleaned['Fiyat'].str.replace('$', '').str.replace(',', '').astype(float)

# 4. Motor Bilgisini Sayısal Forma Dönüştürme
def extract_engine_capacity(engine):
    match = re.search(r"(\d+(\.\d+)?)L", str(engine))
    if match:
        return float(match.group(1))
    return np.nan

data_cleaned['Motor Hacmi'] = data_cleaned['Motor'].apply(extract_engine_capacity)
data_cleaned['Motor Hacmi'] = data_cleaned['Motor Hacmi'].fillna(data_cleaned['Motor Hacmi'].mean())
data_cleaned = data_cleaned.drop(['Motor'], axis=1)

# 5. Kaza Sayısını Dönüştürme
data_cleaned['Kaza Sayısı'] = data_cleaned['Kaza Sayısı'].apply(
    lambda x: 1 if 'accident' in str(x).lower() else 0
)

# 6. Eksik Değerleri Temizleme
data_cleaned = data_cleaned.dropna(subset=['Fiyat'])

# 7. Sayısal Olmayan Diğer Verileri Dönüştürme
categorical_columns = ['Marka', 'Model', 'Benzin Türü', 'Vites', 'ext_col', 'int_col', 'clean_title']
data_encoded = pd.get_dummies(data_cleaned, columns=categorical_columns, drop_first=True)

# 8. Giriş (X) ve Çıkış (y) Değişkenlerini Ayırma
X = data_encoded.drop(['Fiyat'], axis=1)
y = data_encoded['Fiyat']

# 9. Tüm Değişkenlerin Sayısal Olduğundan Emin Olma
print(X.dtypes)

# 10. Veriyi Eğitim ve Test Olarak Ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 11. Modeli Eğitme
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 12. Modelin Test Edilmesi
y_pred = model.predict(X_test)

# Performans Metrikleri
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Model Performansı:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared: {r2}")

# 13. Örnek Bir Tahmin
example_data = X_test.iloc[0:1]
predicted_price = model.predict(example_data)
print("\nÖrnek Tahmin:")
print(f"Gerçek Fiyat: {y_test.iloc[0]}")
print(f"Tahmin Edilen Fiyat: {predicted_price[0]}")

model_year                 int64
Mil                      float64
Kaza Sayısı                int64
Motor Hacmi              float64
Marka_Alfa                  bool
                          ...   
int_col_Whisper Beige       bool
int_col_White               bool
int_col_White / Brown       bool
int_col_Yellow              bool
int_col_–                   bool
Length: 2497, dtype: object
Model Performansı:
Mean Squared Error (MSE): 17983137369.722763
R-squared: 0.12018029496701765

Örnek Tahmin:
Gerçek Fiyat: 28000.0
Tahmin Edilen Fiyat: 34440.1


In [116]:
print(X.columns)

Index(['model_year', 'Mil', 'Kaza Sayısı', 'Motor Hacmi', 'Marka_Alfa',
       'Marka_Aston', 'Marka_Audi', 'Marka_BMW', 'Marka_Bentley',
       'Marka_Bugatti',
       ...
       'int_col_Titan Black / Quarzit', 'int_col_Tupelo',
       'int_col_Very Light Cashmere', 'int_col_WHITE', 'int_col_Walnut',
       'int_col_Whisper Beige', 'int_col_White', 'int_col_White / Brown',
       'int_col_Yellow', 'int_col_–'],
      dtype='object', length=2497)


In [117]:
data_cleaned

Unnamed: 0,Marka,Model,model_year,Mil,Benzin Türü,Vites,ext_col,int_col,Kaza Sayısı,clean_title,Fiyat,Motor Hacmi
0,Ford,Utility Police Interceptor Base,2013,51000.0,E85 Flex Fuel,6-Speed A/T,Black,Black,1,Yes,10300.0,3.700000
1,Hyundai,Palisade SEL,2021,34742.0,Gasoline,8-Speed Automatic,Moonlight Cloud,Gray,1,Yes,38005.0,3.800000
2,Lexus,RX 350 RX 350,2022,22372.0,Gasoline,Automatic,Blue,Black,0,,54598.0,3.709045
3,INFINITI,Q50 Hybrid Sport,2015,88900.0,Hybrid,7-Speed A/T,Black,Black,0,Yes,15500.0,3.500000
4,Audi,Q3 45 S line Premium Plus,2021,9835.0,Gasoline,8-Speed Automatic,Glacier White Metallic,Black,0,,34999.0,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
4004,Bentley,Continental GT Speed,2023,714.0,Gasoline,8-Speed Automatic with Auto-Shift,C / C,Hotspur,0,Yes,349950.0,6.000000
4005,Audi,S4 3.0T Premium Plus,2022,10900.0,Gasoline,Transmission w/Dual Shift Mode,Black,Black,0,Yes,53900.0,3.000000
4006,Porsche,Taycan,2022,2116.0,,Automatic,Black,Black,0,,90998.0,3.709045
4007,Ford,F-150 Raptor,2020,33000.0,Gasoline,A/T,Blue,Black,0,Yes,62999.0,3.500000


In [121]:
#import joblib

# Modeli kaydetme
#joblib.dump(model, 'arac_fiyat_modeli.pkl')



In [None]:
from flask import Flask, request, jsonify
import pandas as pd
import pickle

app = Flask(__name__)

# Model ve özellikleri yükleme
with open('arac_fiyat_modeli.pkl', 'rb') as file:
    model_data = pickle.load(file)

model = model_data['model']  # Model
model_features = model_data['feature_names']  # Beklenen sütunlar

@app.route('/predict', methods=["POST"])
def predict():
    try:
        # JSON verisini al
        if not request.is_json:
            return jsonify({'error': "Request must be JSON and Content-Type should be 'application/json'"}), 415

        user_data = request.get_json()

        # Kullanıcı verilerini modelin beklediği formata uygun hale getir
        input_data = {col: 0 for col in model_features}
        input_data.update(user_data)

        input_df = pd.DataFrame([input_data]).reindex(columns=model_features, fill_value=0)

        prediction = model.predict(input_df)[0]

        return jsonify({'predicted_price': prediction})

    except Exception as e:
        return jsonify({'error': str(e)}), 400
if __name__ == '__main__':
    app.run()  # Varsayılan olarak localhost:5000 üzerinde çalışır


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [19/Dec/2024 20:09:21] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [19/Dec/2024 20:09:28] "POST /%0A HTTP/1.1" 404 -
127.0.0.1 - - [19/Dec/2024 20:09:29] "POST /%0A HTTP/1.1" 404 -
127.0.0.1 - - [19/Dec/2024 20:09:30] "POST /%0A HTTP/1.1" 404 -
127.0.0.1 - - [19/Dec/2024 20:09:37] "POST /pr%0A HTTP/1.1" 404 -
127.0.0.1 - - [19/Dec/2024 20:09:47] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:09:49] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:09:49] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:09:50] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:09:50] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:09:51] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:37:09] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:37:10] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [19/Dec/2024 20:37:19] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - -