In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
import xgboost as xgb



from sklearn.metrics import mean_squared_error,r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split


import warnings
warnings.simplefilter('ignore')

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
ds = pd.read_csv("data_set.csv")
print(ds.columns)

Index(['housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
       'households', 'median_income', 'median_house_value',
       'ocean_proximity_INLAND', 'ocean_proximity_ISLAND',
       'ocean_proximity_NEAR BAY', 'ocean_proximity_NEAR OCEAN'],
      dtype='object')


In [3]:
from sklearn.model_selection import train_test_split

X_columns = ['housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
       'households', 'median_income',
       'ocean_proximity_INLAND', 'ocean_proximity_ISLAND',
       'ocean_proximity_NEAR BAY', 'ocean_proximity_NEAR OCEAN']

y_column = ['median_house_value']

X = ds[X_columns]
y = ds[y_column]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


In [4]:
def model_metrics(ml_model, X_train, y_train, X_test, y_test):
    return ([str(ml_model).split("(")[0], 
                       ml_model.score(X_train, y_train), 
                       ml_model.score(X_test, y_test),
                       np.sqrt(mean_squared_error(y_train, ml_model.predict(X_train))),
                       np.sqrt(mean_squared_error(y_test, ml_model.predict(X_test))),
                       mean_absolute_error(y_train, ml_model.predict(X_train)),
                       mean_absolute_error(y_test, ml_model.predict(X_test)),
                       mean_absolute_percentage_error(y_train, ml_model.predict(X_train)),
                       mean_absolute_percentage_error(y_test, ml_model.predict(X_test)),
                      ])
    

In [5]:
lm = LinearRegression()
ridge = Ridge(random_state = 42)
lasso = Lasso(random_state = 42)
knn = KNeighborsRegressor()
rf = RandomForestRegressor(random_state = 42)
xgbt = xgb.XGBRegressor(random_state = 42)
dtree = DecisionTreeRegressor(random_state = 42)



algo = [xgbt,lm,ridge,lasso,knn,rf,dtree]

result = []

for i in algo:
        start = time.process_time()
        ml_model = i.fit(X_train,y_train)
        result.append(model_metrics(ml_model, X_train, y_train, X_test, y_test))
        print(str(i).split("(")[0],"✓    ", "{}".format(round(time.process_time()-start,3)),"sec")

result = pd.DataFrame(result, columns = ["Algorithm", "Train_Score", "Test_Score", "Train_RMSE",
                                         "Test_RMSE", "Train_MAE", "Test_MAE", "Train_MAPE", "Test_MAPE"]).sort_values("Test_MAPE").set_index("Algorithm")
result

XGBRegressor ✓     2.109 sec
LinearRegression ✓     0.281 sec
Ridge ✓     0.312 sec
Lasso ✓     0.281 sec
KNeighborsRegressor ✓     1.703 sec
RandomForestRegressor ✓     12.859 sec
DecisionTreeRegressor ✓     0.141 sec


Unnamed: 0_level_0,Train_Score,Test_Score,Train_RMSE,Test_RMSE,Train_MAE,Test_MAE,Train_MAPE,Test_MAPE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
RandomForestRegressor,0.988019,0.915255,14185.228438,38056.465542,7039.242138,18825.336567,0.04076,0.109716
DecisionTreeRegressor,0.999926,0.852927,1116.221908,50134.795269,22.11149,20451.344717,6.1e-05,0.117333
XGBRegressor,0.92209,0.878878,36173.703145,45497.075831,23673.779068,28784.050885,0.134187,0.163466
LinearRegression,0.657835,0.666833,75807.896683,75457.641085,59305.209103,59021.182236,0.288483,0.290341
Lasso,0.657835,0.666833,75807.89685,75457.689096,59305.507385,59021.491096,0.288486,0.290343
Ridge,0.657835,0.66683,75807.899488,75458.028853,59306.528902,59023.029519,0.288495,0.290355
KNeighborsRegressor,0.780639,0.647832,60698.335245,77579.512592,37350.527721,50063.772052,0.242898,0.324793
