In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
import xgboost as xgb



from sklearn.metrics import mean_squared_error,r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split


import warnings
warnings.simplefilter('ignore')

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
ds = pd.read_csv("data_set.csv")
print(ds.columns)

Index(['housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
       'households', 'median_income', 'median_house_value',
       'ocean_proximity_INLAND', 'ocean_proximity_ISLAND',
       'ocean_proximity_NEAR BAY', 'ocean_proximity_NEAR OCEAN'],
      dtype='object')


In [3]:
from sklearn.model_selection import train_test_split

X_columns = ['housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
       'households', 'median_income',
       'ocean_proximity_INLAND', 'ocean_proximity_ISLAND',
       'ocean_proximity_NEAR BAY', 'ocean_proximity_NEAR OCEAN']

y_column = ['median_house_value']

X = ds[X_columns]
y = ds[y_column]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


In [4]:
def model_metrics(ml_model, X_train, y_train, X_test, y_test):
    return ([str(ml_model).split("(")[0], 
                       ml_model.score(X_train, y_train), 
                       ml_model.score(X_test, y_test),
                       np.sqrt(mean_squared_error(y_train, ml_model.predict(X_train))),
                       np.sqrt(mean_squared_error(y_test, ml_model.predict(X_test))),
                       mean_absolute_error(y_train, ml_model.predict(X_train)),
                       mean_absolute_error(y_test, ml_model.predict(X_test)),
                       mean_absolute_percentage_error(y_train, ml_model.predict(X_train)),
                       mean_absolute_percentage_error(y_test, ml_model.predict(X_test)),
                      ])
    

In [5]:
lm = LinearRegression()
ridge = Ridge()
lasso = Lasso()
knn = KNeighborsRegressor()
rf = RandomForestRegressor()
xgbt = xgb.XGBRegressor()
dtree = DecisionTreeRegressor()



algo = [xgbt,lm,ridge,lasso,knn,rf,dtree]

result = []

for i in algo:
        start = time.process_time()
        ml_model = i.fit(X_train,y_train)
        result.append(model_metrics(ml_model, X_train, y_train, X_test, y_test))
        print(str(i).split("(")[0],"✓    ", "{}".format(round(time.process_time()-start,3)),"sec")

result = pd.DataFrame(result, columns = ["Algorithm", "Train_Score", "Test_Score", "Train_RMSE",
                                         "Test_RMSE", "Train_MAE", "Test_MAE", "Train_MAPE", "Test_MAPE"]).sort_values("Test_MAPE").set_index("Algorithm")
result

XGBRegressor ✓     1.953 sec
LinearRegression ✓     0.344 sec
Ridge ✓     0.234 sec
Lasso ✓     0.156 sec
KNeighborsRegressor ✓     2.016 sec
RandomForestRegressor ✓     13.703 sec
DecisionTreeRegressor ✓     0.203 sec


Unnamed: 0_level_0,Train_Score,Test_Score,Train_RMSE,Test_RMSE,Train_MAE,Test_MAE,Train_MAPE,Test_MAPE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
RandomForestRegressor,0.988051,0.917208,14210.4224,37347.398906,7076.882082,18609.563432,0.040666,0.109974
DecisionTreeRegressor,0.999928,0.849218,1104.266371,50401.348057,18.628649,20364.75737,5.6e-05,0.119083
XGBRegressor,0.92337,0.875431,35986.170553,45811.191906,23532.074742,28932.61556,0.132658,0.165209
LinearRegression,0.660718,0.660215,75721.252546,75660.544281,59180.996051,59179.786914,0.288939,0.289228
Lasso,0.660718,0.660215,75721.252713,75660.509515,59181.311906,59180.047668,0.288941,0.28923
Ridge,0.660718,0.660216,75721.255408,75660.43322,59182.509379,59181.071826,0.288952,0.28924
KNeighborsRegressor,0.784502,0.643563,60347.445467,77492.256782,37264.410602,49297.570476,0.243185,0.317876
