In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor as rfr,\
     AdaBoostRegressor as ada, GradientBoostingRegressor as gbr
from sklearn.linear_model import LinearRegression as lr,\
     BayesianRidge as bay, Ridge as rr, Lasso as l,\
     LassoLars as ll, ElasticNet as en,\
     ARDRegression as ard, RidgeCV as rcv
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor as dtr
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.preprocessing import StandardScaler

In [2]:
def get_error(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [3]:
X = np.load('data/X_white.npy')
y = np.load('data/y_white.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [4]:
regressors = [lr(), bay(), rr(alpha=.5, random_state=0),
              l(alpha=0.1, random_state=0), ll(), knn(),
              ard(), rfr(random_state=0, n_estimators=10),
              SVR(gamma='scale', kernel='rbf'), 
              rcv(fit_intercept=False), en(random_state=0),
              dtr(random_state=0), ada(random_state=0),
              gbr(random_state=0)]

In [5]:
print ('unscaled:')
for reg in regressors:
    reg.fit(X_train, y_train)
    rmse, name = get_error(reg, X_test, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

unscaled:
LinearRegression(rmse): 0.7955515218420902
BayesianRidge(rmse): 0.7960671860879414
Ridge(rmse): 0.7959918979256687
Lasso(rmse): 0.8337402482180647
LassoLars(rmse): 0.9176196407505608
KNeighborsRegressor(rmse): 0.863481325796916
ARDRegression(rmse): 0.7917856544290739
RandomForestRegressor(rmse): 0.6966098665124181
SVR(rmse): 0.8482640826919192
RidgeCV(rmse): 0.7963153609474138
ElasticNet(rmse): 0.9010059098933324
DecisionTreeRegressor(rmse): 0.8670851945996296
AdaBoostRegressor(rmse): 0.7853090591509908
GradientBoostingRegressor(rmse): 0.7341518238895893


In [6]:
print ('scaled:')
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
for reg in regressors:
    reg.fit(X_train_std, y_train)
    rmse, name = get_error(reg, X_test_std, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

scaled:
LinearRegression(rmse): 0.795387506827426
BayesianRidge(rmse): 0.7937076704188429
Ridge(rmse): 0.7952785241294685
Lasso(rmse): 0.8222698302453786
LassoLars(rmse): 0.9176196407505608
KNeighborsRegressor(rmse): 0.7592477480092666
ARDRegression(rmse): 0.7945809310253796
RandomForestRegressor(rmse): 0.7119905984245143
SVR(rmse): 0.7338441268723941
RidgeCV(rmse): 5.877903185343038
ElasticNet(rmse): 0.9176196407505608
DecisionTreeRegressor(rmse): 0.9249379460707797
AdaBoostRegressor(rmse): 0.7910614790494706
GradientBoostingRegressor(rmse): 0.743277636279255
