In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor as rfr,\
     AdaBoostRegressor as ada, GradientBoostingRegressor as gbr
from sklearn.linear_model import LinearRegression as lr,\
     BayesianRidge as bay, Ridge as rr, Lasso as l,\
     LassoLars as ll, ElasticNet as en,\
     ARDRegression as ard, RidgeCV as rcv
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor as dtr
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.preprocessing import StandardScaler

In [3]:
def get_error(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [4]:
X = np.load('data/X_boston.npy')
y = np.load('data/y_boston.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [5]:
regressors = [lr(), bay(), rr(alpha=.5, random_state=0),
              l(alpha=0.1, random_state=0), ll(), knn(),
              ard(), rfr(random_state=0, n_estimators=100),
              SVR(gamma='scale', kernel='rbf'), 
              rcv(fit_intercept=False), en(random_state=0),
              dtr(random_state=0), ada(random_state=0),
              gbr(random_state=0)]

In [6]:
print ('unscaled:')
for reg in regressors:
    reg.fit(X_train, y_train)
    rmse, name = get_error(reg, X_test, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

unscaled:
LinearRegression(rmse): 4.2367105743872555
BayesianRidge(rmse): 4.317931672041759
Ridge(rmse): 4.243658717030716
Lasso(rmse): 4.300740333025024
LassoLars(rmse): 8.754893348840868
KNeighborsRegressor(rmse): 5.9934937623789
ARDRegression(rmse): 4.28415048500807
RandomForestRegressor(rmse): 3.37169151536684
SVR(rmse): 7.279521589512571
RidgeCV(rmse): 4.392246393675596
ElasticNet(rmse): 4.888448467452128
DecisionTreeRegressor(rmse): 4.346328232622458
AdaBoostRegressor(rmse): 3.652816906059683
GradientBoostingRegressor(rmse): 3.1941117128039194


In [7]:
print ('scaled:')
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
for reg in regressors:
    reg.fit(X_train_std, y_train)
    rmse, name = get_error(reg, X_test_std, y_test)
    name = reg.__class__.__name__
    print (name + '(rmse):',rmse)

scaled:
LinearRegression(rmse): 4.398269524691269
BayesianRidge(rmse): 4.41954372797153
Ridge(rmse): 4.400075160458177
Lasso(rmse): 4.489952156682321
LassoLars(rmse): 8.754893348840868
KNeighborsRegressor(rmse): 4.757936288305807
ARDRegression(rmse): 4.383622227159547
RandomForestRegressor(rmse): 4.053037237125816
SVR(rmse): 5.083294658978756
RidgeCV(rmse): 22.347576364113266
ElasticNet(rmse): 5.277752330669968
DecisionTreeRegressor(rmse): 5.2796587719252726
AdaBoostRegressor(rmse): 4.100148076529094
GradientBoostingRegressor(rmse): 3.7490071027496015
