In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor as rfr,\
     AdaBoostRegressor as ada, GradientBoostingRegressor as gbr
from sklearn.linear_model import LinearRegression as lr,\
     BayesianRidge as bay, Ridge as rr, Lasso as l,\
     LassoLars as ll, ElasticNet as en,\
     ARDRegression as ard, RidgeCV as rcv
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor as dtr
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.preprocessing import StandardScaler

In [2]:
def get_error(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [3]:
X = np.load('data/X_red.npy')
y = np.load('data/y_red.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [4]:
regressors = [lr(), bay(), rr(alpha=.5, random_state=0),
              l(alpha=0.1, random_state=0), ll(), knn(),
              ard(), rfr(random_state=0, n_estimators=10),
              SVR(gamma='scale', kernel='rbf'), 
              rcv(fit_intercept=False), en(random_state=0),
              dtr(random_state=0), ada(random_state=0),
              gbr(random_state=0)]

In [5]:
print ('unscaled:')
for reg in regressors:
    reg.fit(X_train, y_train)
    rmse, name = get_error(reg, X_test, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

unscaled:
LinearRegression(rmse): 0.6325128671300334
BayesianRidge(rmse): 0.6328209929688434
Ridge(rmse): 0.6326849898919567
Lasso(rmse): 0.6685741263275996
LassoLars(rmse): 0.7821789633105511
KNeighborsRegressor(rmse): 0.7280109889280517
ARDRegression(rmse): 0.6400756305490781
RandomForestRegressor(rmse): 0.626079068488957
SVR(rmse): 0.7137359637679885
RidgeCV(rmse): 0.6324998546518101
ElasticNet(rmse): 0.7711443835889616
DecisionTreeRegressor(rmse): 0.7632168761236874
AdaBoostRegressor(rmse): 0.6435629587127757
GradientBoostingRegressor(rmse): 0.6296130984511631


In [6]:
print ('scaled:')
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
for reg in regressors:
    reg.fit(X_train_std, y_train)
    rmse, name = get_error(reg, X_test_std, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

scaled:
LinearRegression(rmse): 0.6336685221332286
BayesianRidge(rmse): 0.633541175414439
Ridge(rmse): 0.6336648958986739
Lasso(rmse): 0.655937227882979
LassoLars(rmse): 0.7821789633105511
KNeighborsRegressor(rmse): 0.6897100840208152
ARDRegression(rmse): 0.6351848608016393
RandomForestRegressor(rmse): 0.6420475060305117
SVR(rmse): 0.6183295652297023
RidgeCV(rmse): 5.653065707475102
ElasticNet(rmse): 0.7821789633105511
DecisionTreeRegressor(rmse): 0.8631338250816034
AdaBoostRegressor(rmse): 0.6467903814015876
GradientBoostingRegressor(rmse): 0.6533241241243033
