In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor as rfr,\
     AdaBoostRegressor as ada, GradientBoostingRegressor as gbr
from sklearn.linear_model import LinearRegression as lr,\
     BayesianRidge as bay, Ridge as rr, Lasso as l,\
     LassoLars as ll, ElasticNet as en,\
     ARDRegression as ard, RidgeCV as rcv
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor as dtr
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.preprocessing import StandardScaler


In [2]:
def get_error(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [3]:
X = np.load('data/X_tips.npy')
y = np.load('data/y_tips.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [4]:
regressors = [lr(), bay(), rr(alpha=.5, random_state=0),
              l(alpha=0.1, random_state=0), ll(), knn(),
              ard(), rfr(random_state=0, n_estimators=100),
              SVR(gamma='scale', kernel='rbf'), 
              rcv(fit_intercept=False), en(random_state=0),
              dtr(random_state=0), ada(random_state=0),
              gbr(random_state=0)]

In [5]:
print ('unscaled:')
for reg in regressors:
    reg.fit(X_train, y_train)
    rmse, name = get_error(reg, X_test, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

unscaled:
LinearRegression(rmse): 0.9474705746817211
BayesianRidge(rmse): 0.9245217159811321
Ridge(rmse): 0.9471900902779102
Lasso(rmse): 0.9158574785712037
LassoLars(rmse): 1.333812899498391
KNeighborsRegressor(rmse): 1.086204460049883
ARDRegression(rmse): 0.9264801346401472
RandomForestRegressor(rmse): 0.8850975551298138
SVR(rmse): 0.9437629655386353
RidgeCV(rmse): 0.9426372075891951
ElasticNet(rmse): 0.9307377813721576
DecisionTreeRegressor(rmse): 1.2994272932036561
AdaBoostRegressor(rmse): 0.932681302158466
GradientBoostingRegressor(rmse): 0.9112440690311495


In [7]:
print ('scaled:')
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
for reg in regressors:
    reg.fit(X_train_std, y_train)
    rmse, name = get_error(reg, X_test_std, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

scaled:
LinearRegression(rmse): 0.900775117788149
BayesianRidge(rmse): 0.9096823844243739
Ridge(rmse): 0.9010890080377258
Lasso(rmse): 0.8785977911833893
LassoLars(rmse): 1.333812899498391
KNeighborsRegressor(rmse): 0.9613578099280607
ARDRegression(rmse): 0.8745960871429688
RandomForestRegressor(rmse): 0.893772251516372
SVR(rmse): 0.9749204385201589
RidgeCV(rmse): 3.1960055364135647
ElasticNet(rmse): 1.1310151423347359
DecisionTreeRegressor(rmse): 1.1835900827021861
AdaBoostRegressor(rmse): 0.986987944835978
GradientBoostingRegressor(rmse): 0.8908489427010696
