In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

In [55]:
def load_data(filepath):
    df = pd.read_csv(filepath)
    return df

In [56]:
def preprocess_data(df):
    numeric_features = ['Size (m²)', 'Building Age', 'Room_Numeric']
    categorical_features = ['District', 'Neighborhood']

    X = df[numeric_features + categorical_features]
    y = df['Price (TL)']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    return X_train, X_test, y_train, y_test, preprocessor

In [57]:
def train_best_model(X_train, y_train, preprocessor):
    model = GradientBoostingRegressor(random_state=42)
    param_grid = {
        'model__n_estimators': [200],
        'model__learning_rate': [0.1],
        'model__max_depth': [7]
    }

    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)

    print(f"En iyi RMSE: {np.sqrt(-grid_search.best_score_):.2f}")

    return grid_search.best_estimator_

In [58]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\nTest seti değerlendirmesi:")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"R2 Score: {r2:.4f}")

    return {'RMSE': rmse, 'MAE': mae, 'R2': r2}

In [59]:
def save_model(model, filename='emlak_fiyat_model.pkl'):
    joblib.dump(model, filename)
    print(f"\nModel kaydedildi: {filename}")
    return filename

In [60]:
def predict_price(model, input_data):
    input_df = pd.DataFrame([input_data])


    predicted_price = model.predict(input_df)[0]
    return predicted_price

In [61]:
def create_prediction_app(model, data_df):
    print("\n===== EMLAK FİYAT TAHMİN UYGULAMASI =====")


    district_list = data_df['District'].unique().tolist()

    print("\nMevcut ilçeler:")
    for i, district in enumerate(district_list, 1):
        print(f"{i}. {district}")

    district_idx = int(input("\nİlçe numarasını seçin: ")) - 1
    selected_district = district_list[district_idx]


    neighborhoods = data_df[data_df['District'] == selected_district]['Neighborhood'].unique().tolist()


    print(f"\n'{selected_district}' ilçesindeki mahalleler:")
    for i, neighborhood in enumerate(neighborhoods, 1):
        print(f"{i}. {neighborhood}")


    neighborhood_idx = int(input("\nMahalle numarasını seçin: ")) - 1
    selected_neighborhood = neighborhoods[neighborhood_idx]

    size = float(input("\nEmlak büyüklüğü (m²): "))
    room_count = int(input("Oda sayısı (örn: 3+1 için 4 girin): "))
    building_age = float(input("Bina yaşı: "))

    input_data = {
        'Size (m²)': size,
        'Building Age': building_age,
        'Room_Numeric': room_count,
        'District': selected_district,
        'Neighborhood': selected_neighborhood
    }

    input_df = pd.DataFrame([input_data])
    predicted_price = model.predict(input_df)[0]

    print(f"\nTahmini Fiyat: {predicted_price:,.2f} TL")
    return

In [64]:
def main(filepath='emlak-veri-egitime-hazir.csv'):
    df = load_data(filepath)

    X_train, X_test, y_train, y_test, preprocessor = preprocess_data(df)

    best_model = train_best_model(X_train, y_train, preprocessor)

    evaluate_model(best_model, X_test, y_test)

    save_model(best_model)

    return best_model, df


if __name__ == "__main__":
    model, df = main()

    create_prediction_app(model, df)

En iyi RMSE: 6814.53

Test seti değerlendirmesi:
RMSE: 7044.29
MAE: 4796.70
R2 Score: 0.6276

Model kaydedildi: emlak_fiyat_model.pkl

===== EMLAK FİYAT TAHMİN UYGULAMASI =====

Mevcut ilçeler:
1. Altındağ
2. Çankaya
3. Etimesgut
4. Polatlı
5. Gölbaşı
6. Yenimahalle
7. Keçiören
8. Mamak
9. Sincan
10. Çubuk
11. Pursaklar
12. Nallıhan
13. Akyurt
14. Şereflikoçhisar
15. Kahramankazan
16. Kızılcahamam
17. Elmadağ

'Altındağ' ilçesindeki mahalleler:
1. Aydınlıkevler Mah.
2. Örnek Mah.
3. Battalgazi Mah.
4. Doğantepe Mah.
5. Güneşevler Mah.
6. Hacettepe Mah.
7. Karapürçek Mah.
8. Zübeyde Hanım Mah.
9. Başpınar Mah.
10. Gültepe Mah.
11. Feridun Çelik Mah.
12. Baraj Mah.
13. Yıldıztepe Mah.
14. Beşikkaya Mah.
15. Karacaören Mah.
16. Merkez
17. Ulubey
18. Yeşilöz
19. Hasköy

Tahmini Fiyat: 22,076.06 TL
