In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge,Lasso, ElasticNet, SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
def get_scores(model, Xtest, ytest):
    y_pred = model.predict(Xtest)
    return np.sqrt(mean_squared_error(ytest, y_pred)),model.__class__.__name__

In [4]:
X = np.load('data/X_boston.npy')
y = np.load('data/y_boston.npy')
print ('feature shape', X.shape)
print ('target shape', y.shape)

feature shape (506, 13)
target shape (506,)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
rfr_model = RandomForestRegressor(random_state=0,n_estimators=100)
rfr_model.fit(X_train,y_train)
rmse,rfr_model_name = get_scores(rfr_model,X_test,y_test)
print("RMSE:",rmse, '(' + rfr_model_name + ')')

RMSE: 4.091149842219918 (RandomForestRegressor)


In [10]:
lr_model = LinearRegression().fit(X_train, y_train)
rmse, lr_model_name = get_scores(lr, X_test, y_test)
print("RMSE:",rmse, '(' + lr_model_name + ')')

RMSE: 5.4573111595640755 (LinearRegression)


In [11]:
ridge_model = Ridge(random_state=0).fit(X_train, y_train)
rmse, ridge_model_name = get_scores(ridge_model, X_test, y_test)
print("RMSE:",rmse, '(' + ridge_model_name + ')')

RMSE: 5.523126267867209 (Ridge)


In [13]:
lasso_model = Lasso(random_state=0).fit(X_train, y_train)
rmse, lasso_model_name = get_scores(lasso_model, X_test, y_test)
print("RMSE:",rmse, '(' + lasso_model_name + ')')

RMSE: 6.052422661571132 (Lasso)


In [16]:
eN_model = ElasticNet(random_state=0).fit(X_train, y_train)
rmse, eN_model_name = get_scores(eN_model, X_test, y_test)
print("RMSE:",rmse, '(' + eN_model_name + ')')

RMSE: 5.993473468736907 (ElasticNet)


In [17]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)
sgdr_std_model = SGDRegressor(random_state=0,max_iter=1000, tol=0.001)
sgdr_std_model.fit(X_train_std, y_train)
rmse, sgdr_model_name = get_scores(sgdr_std_model, X_test_std, y_test)
print (rmse, '(' + sgdr_name + ' - scaled)')

5.614093877555352 (SGDRegressor - scaled)
