In [14]:
from sklearn import datasets

boston = datasets.load_boston()
X_boston, Y_boston = boston.data, boston.target
boston.target.shape

(506,)

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [16]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(X_boston, Y_boston , train_size=0.80, test_size=0.20, random_state=123)
print('Train/Test Sets Sizes : ',xtrain.shape, xtest.shape, ytrain.shape, ytest.shape)

Train/Test Sets Sizes :  (404, 13) (102, 13) (404,) (102,)


In [17]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [18]:
lr.fit(xtrain,ytrain)
dt.fit(xtrain,ytrain)
knn.fit(xtrain,ytrain)

KNeighborsRegressor()

In [19]:
y_pred1 = lr.predict(xtest)
y_pred2 = dt.predict(xtest)
y_pred3 = knn.predict(xtest)

In [20]:
print("R^2 score for LR",r2_score(ytest,y_pred1))
print("R^2 score for DT",r2_score(ytest,y_pred2))
print("R^2 score for KNN",r2_score(ytest,y_pred3))

R^2 score for LR 0.6592466510354094
R^2 score for DT 0.45338259547007664
R^2 score for KNN 0.5475962186976784


In [21]:
from sklearn.ensemble import BaggingRegressor

bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(xtrain, ytrain)

BaggingRegressor(random_state=1)

In [22]:
Y_preds = bag_regressor.predict(xtest)

print('Training Coefficient of R^2 : %.3f'%bag_regressor.score(xtrain, ytrain))
print('Test Coefficient of R^2 : %.3f'%bag_regressor.score(xtest, ytest))

Training Coefficient of R^2 : 0.980
Test Coefficient of R^2 : 0.818


In [23]:
%%time

n_samples = boston.data.shape[0]
n_features = boston.data.shape[1]

params = {'base_estimator': [None, LinearRegression(), KNeighborsRegressor()],
          'n_estimators': [20,50,100],
          'max_samples': [0.5,1.0],
          'max_features': [0.5,1.0],
          'bootstrap': [True, False],
          'bootstrap_features': [True, False]}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid =params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(xtrain, ytrain)

print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(xtrain, ytrain))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(xtest, ytest))
print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameters : ',bagging_regressor_grid.best_params_)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
Train R^2 Score : 0.983
Test R^2 Score : 0.805
Best R^2 Score Through Grid Search : 0.871
Best Parameters :  {'base_estimator': None, 'bootstrap': True, 'bootstrap_features': False, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 50}
Wall time: 10.7 s
