In [None]:
# KÃ¼tÃ¼phaneler
import pandas as pd
import os
import glob
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

In [None]:
# Veri setini yÃ¼kleyin
df = pd.read_csv("kc_house_data.csv")

# Veri setinin boyutunu kontrol edin
print(f"Veri seti boyutu: {df.shape}")

# Eksik deÄŸerleri kontrol edin
print(f"Eksik deÄŸerler: {df.isnull().sum().sum()}")

In [12]:
# ----------------------------
# Veri Ã–n Ä°ÅŸleme
# ----------------------------

# Date sÃ¼tununu datetime formatÄ±na dÃ¶nÃ¼ÅŸtÃ¼rme
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

# Ev yaÅŸÄ± hesaplama
df['house_age'] = 2023 - df['yr_built']

# One-Hot Encoding (OHE) - zipcode iÃ§in
# df = pd.get_dummies(df, columns=['zipcode'], drop_first=True)

In [None]:
# ----------------------------
# Veri GÃ¶rselleÅŸtirme
# ----------------------------

# Ev FiyatlarÄ± DaÄŸÄ±lÄ±mÄ±
plt.figure(figsize=(12, 8))
sns.histplot(df['price'], kde=True)
plt.title('Ev FiyatlarÄ± DaÄŸÄ±lÄ±mÄ±')
plt.show()

# Korelasyon analizi (OHE'den sonra yapÄ±lmalÄ±)
plt.figure(figsize=(15, 10))
numeric_df = df.select_dtypes(include=[np.number])
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('DeÄŸiÅŸkenler ArasÄ± Korelasyon')
plt.show()

# Fiyat ve diÄŸer Ã¶zellikler arasÄ±ndaki iliÅŸki 
plt.figure(figsize=(20, 15))
plt.subplot(2, 3, 1)
sns.scatterplot(x='sqft_living', y='price', data=df)
plt.title('YaÅŸam AlanÄ± - Fiyat')

plt.subplot(2, 3, 2)
sns.scatterplot(x='grade', y='price', data=df)
plt.title('Kalite Derecesi - Fiyat')

# ----------------------------
# EÄŸitim ve test verisine ayÄ±rma
# ----------------------------
from sklearn.model_selection import train_test_split
X = df.drop(['price', 'id', 'date'], axis=1)
y = df['price']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model eÄŸitimi
# ...

# Model deÄŸerlendirmesi
# ...

In [27]:
# Regresyon iÃ§in Gerekli KÃ¼tÃ¼phaneler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

# Ã–zellik Ã–lÃ§eklendirme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

# Modellerin TanÄ±mlanmasÄ±
models = {
    'DoÄŸrusal Regresyon': LinearRegression(),
    'Ridge Regresyon': Ridge(alpha=1.0),
    'Lasso Regresyon': Lasso(alpha=0.1, max_iter=10000),
    'ElasticNet': ElasticNet(alpha=0.1, max_iter=10000),
    'SVR': SVR(),
    'KNN Regressor': KNeighborsRegressor(n_neighbors=5),
    'Karar AÄŸacÄ±': DecisionTreeRegressor(max_depth=10, random_state=42),
    'Rastgele Orman': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradyan ArttÄ±rma': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Hangi modellerin Ã¶lÃ§eklendirme gerektirdiÄŸini belirle
scaled_models = {
    'DoÄŸrusal Regresyon',
    'Ridge Regresyon',
    'Lasso Regresyon',
    'ElasticNet',
    'SVR',
    'KNN Regressor'
}

# Model performans karÅŸÄ±laÅŸtÄ±rma iÃ§in sonuÃ§lar
results = {}

# Model EÄŸitimi ve DeÄŸerlendirme
for name, model in models.items():
    print(f"{name} eÄŸitiliyor...")

    if name in scaled_models:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

    # Performans hesaplama
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    results[name] = {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'R2': r2
    }

    print(f"{name} - MAE: {mae:.2f}, MSE: {mse:.2f}, RMSE: {rmse:.2f}, R2: {r2:.4f}")

# En iyi modeli R2 skoruna gÃ¶re seÃ§
best_model_name = max(results, key=lambda x: results[x]['R2'])
print(f"\nðŸš€ En iyi model: {best_model_name} (R2: {results[best_model_name]['R2']:.4f})")

DoÄŸrusal Regresyon eÄŸitiliyor...
DoÄŸrusal Regresyon - MAE: 126929.17, MSE: 44951491944.93, RMSE: 212017.67, R2: 0.7027
Ridge Regresyon eÄŸitiliyor...
Ridge Regresyon - MAE: 126927.37, MSE: 44951804818.32, RMSE: 212018.41, R2: 0.7027
Lasso Regresyon eÄŸitiliyor...


  model = cd_fast.enet_coordinate_descent(


Lasso Regresyon - MAE: 126929.18, MSE: 44951497729.15, RMSE: 212017.68, R2: 0.7027
ElasticNet eÄŸitiliyor...
ElasticNet - MAE: 125746.22, MSE: 45308527069.61, RMSE: 212858.00, R2: 0.7003
SVR eÄŸitiliyor...
SVR - MAE: 229344.17, MSE: 160777128599.22, RMSE: 400970.23, R2: -0.0635
KNN Regressor eÄŸitiliyor...


[WinError 2] Sistem belirtilen dosyayÄ± bulamÄ±yor
  File "C:\Users\hp\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\hp\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\hp\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\hp\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


KNN Regressor - MAE: 100048.45, MSE: 35181209948.32, RMSE: 187566.55, R2: 0.7673
Karar AÄŸacÄ± eÄŸitiliyor...
Karar AÄŸacÄ± - MAE: 98999.79, MSE: 41460791328.85, RMSE: 203619.23, R2: 0.7257
Rastgele Orman eÄŸitiliyor...



KeyboardInterrupt

