In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv("emlak_verileri2.csv", sep=';')

In [3]:
data.columns

Index(['Metrekare', 'Oda', 'Kat', 'Konum', 'Fiyat'], dtype='object')

In [4]:
data.head()

Unnamed: 0,Metrekare,Oda,Kat,Konum,Fiyat
0,160.0,3 + 1,2.0,"Torbalı, Alpkent Mahallesi",4200000.0
1,155.0,3 + 1,2.0,"Torbalı, Torbalı Mahallesi",2984000.0
2,105.0,2 + 1,5.0,"Torbalı, Tepeköy Mahallesi",2349000.0
3,90.0,2 + 1,1.0,"Buca, Göksu Mahallesi",2100000.0
4,135.0,3 + 1,6.0,"Bornova, Erzene Mahallesi",4450000.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Metrekare  1332 non-null   float64
 1   Oda        1332 non-null   object 
 2   Kat        1332 non-null   float64
 3   Konum      1332 non-null   object 
 4   Fiyat      1332 non-null   float64
dtypes: float64(3), object(2)
memory usage: 53.4+ KB


In [6]:
def oda_donusumu(odabilgi):
    if isinstance(odabilgi, str):
        # String değerse dönüşümü gerçekleştir
        oda_sayisi = int(odabilgi.split('+')[0].strip())
        return oda_sayisi
    else:
        # Zaten sayısal bir değerse dokunma
        return odabilgi

# 'Oda' sütununu dönüştürme
data['Oda'] = data['Oda'].apply(oda_donusumu)

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Metrekare  1332 non-null   float64
 1   Oda        1332 non-null   float64
 2   Kat        1332 non-null   float64
 3   Konum      1332 non-null   object 
 4   Fiyat      1332 non-null   float64
dtypes: float64(4), object(1)
memory usage: 53.4+ KB


In [8]:
def konum_duzenle(konum):
    if isinstance(konum, str):
        # String değerse dönüşümü gerçekleştir
        return konum.split(',')[0].strip()
    else:
        # Zaten sayısal bir değerse dokunma
        return konum

# 'Konum' sütununu düzenleme
data['Konum'] = data['Konum'].apply(konum_duzenle)

In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Metrekare  1332 non-null   float64
 1   Oda        1332 non-null   float64
 2   Kat        1332 non-null   float64
 3   Konum      1332 non-null   object 
 4   Fiyat      1332 non-null   float64
dtypes: float64(4), object(1)
memory usage: 53.4+ KB


In [10]:
data.head()

Unnamed: 0,Metrekare,Oda,Kat,Konum,Fiyat
0,160.0,3.0,2.0,Torbalı,4200000.0
1,155.0,3.0,2.0,Torbalı,2984000.0
2,105.0,2.0,5.0,Torbalı,2349000.0
3,90.0,2.0,1.0,Buca,2100000.0
4,135.0,3.0,6.0,Bornova,4450000.0


In [11]:
data = pd.get_dummies(data, columns=['Konum'], prefix='Konum', drop_first=True)

data = data.fillna(0)

data = data.astype(int)

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   Metrekare          1364 non-null   int32
 1   Oda                1364 non-null   int32
 2   Kat                1364 non-null   int32
 3   Fiyat              1364 non-null   int32
 4   Konum_Balçova      1364 non-null   int32
 5   Konum_Bayraklı     1364 non-null   int32
 6   Konum_Bergama      1364 non-null   int32
 7   Konum_Bornova      1364 non-null   int32
 8   Konum_Buca         1364 non-null   int32
 9   Konum_Dikili       1364 non-null   int32
 10  Konum_Foça         1364 non-null   int32
 11  Konum_Gaziemir     1364 non-null   int32
 12  Konum_Güzelbahçe   1364 non-null   int32
 13  Konum_Karabağlar   1364 non-null   int32
 14  Konum_Karaburun    1364 non-null   int32
 15  Konum_Karşıyaka    1364 non-null   int32
 16  Konum_Kemalpaşa    1364 non-null   int32
 17  Konum_Konak   

In [13]:
data.to_csv('IslenmisVeri.csv', index=False)

In [14]:
# Linear Regresyon Algoritmasi
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import joblib

veri_seti = pd.read_csv('IslenmisVeri.csv')

print(veri_seti.info())

X = veri_seti.drop('Fiyat', axis=1)
y = veri_seti['Fiyat']

imputer_X = SimpleImputer(strategy='mean')
X_imputed = imputer_X.fit_transform(X)

imputer_y = SimpleImputer(strategy='mean')
y_imputed = imputer_y.fit_transform(y.values.reshape(-1, 1)).flatten()

X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_imputed, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)

y_test_pred = model.predict(X_test)

r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f'Training R-squared: {r2_train:.4f}')
print(f'Test R-squared: {r2_test:.4f}')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   Metrekare          1364 non-null   int64
 1   Oda                1364 non-null   int64
 2   Kat                1364 non-null   int64
 3   Fiyat              1364 non-null   int64
 4   Konum_Balçova      1364 non-null   int64
 5   Konum_Bayraklı     1364 non-null   int64
 6   Konum_Bergama      1364 non-null   int64
 7   Konum_Bornova      1364 non-null   int64
 8   Konum_Buca         1364 non-null   int64
 9   Konum_Dikili       1364 non-null   int64
 10  Konum_Foça         1364 non-null   int64
 11  Konum_Gaziemir     1364 non-null   int64
 12  Konum_Güzelbahçe   1364 non-null   int64
 13  Konum_Karabağlar   1364 non-null   int64
 14  Konum_Karaburun    1364 non-null   int64
 15  Konum_Karşıyaka    1364 non-null   int64
 16  Konum_Kemalpaşa    1364 non-null   int64
 17  Konum_Konak   

In [15]:
#Decision Tree Model
from sklearn.tree import DecisionTreeRegressor

veri_seti = pd.read_csv('IslenmisVeri.csv')

# Veriyi inceleyin
print(veri_seti.info())

# Bağımsız değişkenleri (X) ve bağımlı değişkeni (y) belirleyin
X = veri_seti.drop('Fiyat', axis=1)
y = veri_seti['Fiyat']

# Eksik değerleri ortalama ile doldurun (X için)
imputer_X = SimpleImputer(strategy='mean')
X_imputed = imputer_X.fit_transform(X)

# Eksik değerleri ortalama ile doldurun (y için)
imputer_y = SimpleImputer(strategy='mean')
y_imputed = imputer_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Verileri eğitim ve test setlerine bölin
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_imputed, test_size=0.2, random_state=42)

# Decision Tree modelini oluşturun ve eğitin
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Eğitim seti üzerinde tahmin yapın
y_train_pred = model.predict(X_train)

# Test seti üzerinde tahmin yapın
y_test_pred = model.predict(X_test)

# R-squared değerini hesaplayın
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Sonuçları ekrana yazdırın
print(f'Training R-squared: {r2_train:.4f}')
print(f'Test R-squared: {r2_test:.4f}')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1364 entries, 0 to 1363
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   Metrekare          1364 non-null   int64
 1   Oda                1364 non-null   int64
 2   Kat                1364 non-null   int64
 3   Fiyat              1364 non-null   int64
 4   Konum_Balçova      1364 non-null   int64
 5   Konum_Bayraklı     1364 non-null   int64
 6   Konum_Bergama      1364 non-null   int64
 7   Konum_Bornova      1364 non-null   int64
 8   Konum_Buca         1364 non-null   int64
 9   Konum_Dikili       1364 non-null   int64
 10  Konum_Foça         1364 non-null   int64
 11  Konum_Gaziemir     1364 non-null   int64
 12  Konum_Güzelbahçe   1364 non-null   int64
 13  Konum_Karabağlar   1364 non-null   int64
 14  Konum_Karaburun    1364 non-null   int64
 15  Konum_Karşıyaka    1364 non-null   int64
 16  Konum_Kemalpaşa    1364 non-null   int64
 17  Konum_Konak   

In [16]:
# Random Forest Regressor

from sklearn.ensemble import RandomForestRegressor

model_random_forest = RandomForestRegressor(random_state=42)
model_random_forest.fit(X_train, y_train)

y_train_pred_rf = model_random_forest.predict(X_train)

y_test_pred_rf = model_random_forest.predict(X_test)

r2_train_rf = r2_score(y_train, y_train_pred_rf)
r2_test_rf = r2_score(y_test, y_test_pred_rf)

print(f'Random Forest - Training R-squared: {r2_train_rf:.4f}')
print(f'Random Forest - Test R-squared: {r2_test_rf:.4f}')

Random Forest - Training R-squared: 0.8642
Random Forest - Test R-squared: 0.4438


In [17]:
# Support Vector Machine Regressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_svm = SVR()
model_svm.fit(X_train_scaled, y_train)

y_train_pred_svm = model_svm.predict(X_train_scaled)

y_test_pred_svm = model_svm.predict(X_test_scaled)

r2_train_svm = r2_score(y_train, y_train_pred_svm)
r2_test_svm = r2_score(y_test, y_test_pred_svm)

print(f'SVM - Training R-squared: {r2_train_svm:.4f}')
print(f'SVM - Test R-squared: {r2_test_svm:.4f}')

SVM - Training R-squared: -0.0596
SVM - Test R-squared: -0.0547


In [18]:
# K-Nearest Neighbors Regressor
from sklearn.neighbors import KNeighborsRegressor

model_knn = KNeighborsRegressor()
model_knn.fit(X_train_scaled, y_train)

y_train_pred_knn = model_knn.predict(X_train_scaled)

y_test_pred_knn = model_knn.predict(X_test_scaled)

r2_train_knn = r2_score(y_train, y_train_pred_knn)
r2_test_knn = r2_score(y_test, y_test_pred_knn)

print(f'KNN - Training R-squared: {r2_train_knn:.4f}')
print(f'KNN - Test R-squared: {r2_test_knn:.4f}')

KNN - Training R-squared: 0.6216
KNN - Test R-squared: 0.3211


In [19]:
# Gradient Boosting Regressor. Tahmin yaparken model olarak bunu kullaniyoruz.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
import joblib

data = pd.read_csv('IslenmisVeri.csv')

X = data.drop('Fiyat', axis=1)
y = data['Fiyat']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_gradient_boosting = GradientBoostingRegressor(random_state=42)
model_gradient_boosting.fit(X_train, y_train)

y_train_pred_gb = model_gradient_boosting.predict(X_train)

y_test_pred_gb = model_gradient_boosting.predict(X_test)

r2_train_gb = r2_score(y_train, y_train_pred_gb)
r2_test_gb = r2_score(y_test, y_test_pred_gb)

joblib.dump(model_gradient_boosting, 'gradient_boosting_model.pkl')

loaded_model_gb = joblib.load('gradient_boosting_model.pkl')

y_test_pred_loaded_gb = loaded_model_gb.predict(X_test)

r2_test_loaded_gb = r2_score(y_test, y_test_pred_loaded_gb)

print(f'Gradient Boosting - Training R-squared: {r2_train_gb:.4f}')
print(f'Gradient Boosting - Test R-squared: {r2_test_gb:.4f}')

print(f'Gradient Boosting (Loaded) - Test R-squared: {r2_test_loaded_gb:.4f}')

Gradient Boosting - Training R-squared: 0.6634
Gradient Boosting - Test R-squared: 0.5176
Gradient Boosting (Loaded) - Test R-squared: 0.5176


In [20]:
# Linear Support Vector Regression 
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_linear_svr = LinearSVR(random_state=42)
model_linear_svr.fit(X_train_scaled, y_train)

y_train_pred_linear_svr = model_linear_svr.predict(X_train_scaled)

y_test_pred_linear_svr = model_linear_svr.predict(X_test_scaled)

r2_train_linear_svr = r2_score(y_train, y_train_pred_linear_svr)
r2_test_linear_svr = r2_score(y_test, y_test_pred_linear_svr)

print(f'Linear SVR - Training R-squared: {r2_train_linear_svr:.4f}')
print(f'Linear SVR - Test R-squared: {r2_test_linear_svr:.4f}')

Linear SVR - Training R-squared: -2.4463
Linear SVR - Test R-squared: -2.3891


