In [1]:
import pandas as pd

from sklearn.model_selection import cross_validate
from sklearn.model_selection import ShuffleSplit

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.svm import NuSVR
from sklearn.svm import SVR
from xgboost import XGBRegressor

In [2]:
data = pd.read_csv("cleaned_data.csv")

# Split data
x = data.drop(columns=['monthly_rent']).values
y = data['monthly_rent']

estimator_list = [
    LinearRegression(),
    DecisionTreeRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    RandomForestRegressor(),
    XGBRegressor()
]

In [3]:
cv_split = ShuffleSplit(n_splits=6, train_size=0.7, test_size=0.2, random_state=168)
df_columns = ['Name', 'Parameters', 'Train Accuracy Mean', 'Test Accuracy Mean', 'Test Accuracy Std', 'Consumed Time']
df = pd.DataFrame(columns=df_columns)

row_index = 0
for estimator in estimator_list:
    df.loc[row_index, 'Name'] = estimator.__class__.__name__
    df.loc[row_index, 'Parameters'] = str(estimator.get_params())
    cv_results = cross_validate(estimator, x, y, cv=cv_split, return_train_score=True)
    df.loc[row_index, 'Train Accuracy Mean'] = cv_results['train_score'].mean()
    df.loc[row_index, 'Test Accuracy Mean'] = cv_results['test_score'].mean()
    df.loc[row_index, 'Test Accuracy Std'] = cv_results['test_score'].std()
    df.loc[row_index, 'Consumed Time'] = cv_results['fit_time'].mean()
    print(row_index, estimator.__class__.__name__)
    print(cv_results['test_score'])
    row_index += 1
df = df.sort_values(by='Test Accuracy Mean', ascending=False)
df

0 LinearRegression
[0.45152949 0.45413545 0.46495467 0.44756284 0.45507845 0.45250761]
1 DecisionTreeRegressor
[0.05446323 0.06533436 0.07751641 0.04305873 0.0797045  0.07530964]
2 GradientBoostingRegressor
[0.51643969 0.51605782 0.52698699 0.51454021 0.51993311 0.52043357]
3 AdaBoostRegressor
[0.40264341 0.42490341 0.42992161 0.39797224 0.42563536 0.42645981]
4 RandomForestRegressor
[0.46740568 0.46966009 0.48396499 0.4647815  0.47904971 0.47480648]
5 XGBRegressor
[0.51164719 0.51801495 0.52748892 0.50863039 0.52167531 0.51538483]


Unnamed: 0,Name,Parameters,Train Accuracy Mean,Test Accuracy Mean,Test Accuracy Std,Consumed Time
2,GradientBoostingRegressor,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",0.528182,0.519065,0.004119,3.809669
5,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",0.641318,0.51714,0.006246,0.625877
4,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.905865,0.473278,0.006704,14.163622
0,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",0.452711,0.454295,0.00533,0.021144
3,AdaBoostRegressor,"{'base_estimator': None, 'learning_rate': 1.0,...",0.418998,0.417923,0.012626,1.486045
1,DecisionTreeRegressor,"{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_de...",0.968835,0.065898,0.013335,0.230248
