In [14]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder

from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [15]:
df = pd.read_csv("./utils/final_apartment.csv")
df.columns

Index(['Unnamed: 0', 'id', 'Price', 'Type of property', 'Subtype of property',
       'Locality', 'Surroundings type', 'Energy class', 'Heating type',
       'Province', 'Zip', 'Living area', 'Number of rooms', 'Bathrooms',
       'Construction year', 'Number of facades', 'Swimming pool', 'Furnished',
       'Open fire', 'Terrace', 'Terrace surface', 'Garden', 'Garden surface',
       'Primary energy consumption', 'Kitchen values', 'Building Cond. values',
       'Parking', 'Urban_value', 'Normal_apt', 'Big_apt'],
      dtype='object')

In [22]:
province = pd.get_dummies(df[['Province']])

y = df['Price']
X = pd.concat([df[[
       'Living area', 
       'Number of rooms', 
       'Bathrooms', 
       'Construction year',
       # 'Number of facades', 
       # 'Swimming pool', 
       'Furnished', 
       'Open fire',
       # 'Terrace', 'Terrace surface', 'Garden', 'Garden surface',
       'Primary energy consumption', 'Kitchen values', 'Building Cond. values',
       'Parking', 'Urban_value', 'Normal_apt', 'Big_apt']],province],axis=1)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) 

In [24]:
lr = LinearRegression()
reg = lr.fit(X_train, y_train)
y_predicted = lr.predict(X_test)

rmse = mean_squared_error(y_predicted,y_test)
print("rmse: ",round(rmse)," €")

rmae = mean_absolute_error(y_predicted,y_test)
print("rmae: ",round(rmae)," €")

r2 = r2_score(y_predicted, y_test)
print("r2: ",round(r2,4))

score = lr.score(X_test, y_test)
print("score: ", round(score,4))

rmse:  48393743942  €
rmae:  131099  €
r2:  0.0576
score:  0.4935


In [13]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score
import xgboost


models = []
models.append(('Linear Regression', LinearRegression()))
models.append(("XGBoostRegressor", xgboost.XGBRegressor()))
models.append(('Logistic Regression', LogisticRegression()))
models.append(('Naive Bayes', GaussianNB()))
models.append(('Decision Tree (CART)',DecisionTreeClassifier())) 
models.append(('K-NN', KNeighborsClassifier()))
models.append(('SVM', SVC()))
# models.append(('Gradient Boosting Classifier', GradientBoostingClassifier()))
models.append(('AdaBoostClassifier', AdaBoostClassifier()))
models.append(('BaggingClassifier', BaggingClassifier()))
models.append(('RandomForestClassifier', RandomForestClassifier()))
models.append(('MLPClassifier', MLPClassifier()))

for name, model in models:
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} ----> R2 Score: {r2_score(y_pred,y_test)}")
    print(f"{name} ----> Abs Error: {mean_absolute_error(y_pred,y_test)}")
    print(30*"_")

Linear Regression ----> R2 Score: 0.05825849050354048
Linear Regression ----> Abs Error: 131265.9543674274
______________________________
XGBoostRegressor ----> R2 Score: 0.6922057930574032
XGBoostRegressor ----> Abs Error: 89921.31795202005
______________________________
Logistic Regression ----> R2 Score: 0.0
Logistic Regression ----> Abs Error: 178330.72868525898
______________________________
Naive Bayes ----> R2 Score: -29.437765960238302
Naive Bayes ----> Abs Error: 219327.57729083666
______________________________
Decision Tree (CART) ----> R2 Score: 0.5088179005390182
Decision Tree (CART) ----> Abs Error: 121175.30677290837
______________________________
K-NN ----> R2 Score: -0.2087343430601285
K-NN ----> Abs Error: 169207.81593625498
______________________________
SVM ----> R2 Score: -81.86496484985621
SVM ----> Abs Error: 192824.85219123506
______________________________
AdaBoostClassifier ----> R2 Score: -2.0283857483458845
AdaBoostClassifier ----> Abs Error: 163332.99243027

