In [29]:
from sklearn import datasets
import warnings
warnings.filterwarnings('ignore')

In [30]:
boston=datasets.load_boston()
x_boston, y_boston= boston.data, boston.target
print('dataset features names :'+ str(boston.feature_names))
print('dataset features size:'+ str(boston.data.shape))
print('dataset target size :'+str(boston.target.shape))

dataset features names :['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
dataset features size:(506, 13)
dataset target size :(506,)


In [31]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [32]:
from sklearn.model_selection import train_test_split

In [33]:
x_train, x_test, y_train, y_test=train_test_split(x_boston, y_boston, test_size=0.2, random_state=123)
print('train/test sizes : ', x_train.shape, x_test.shape, y_train.shape, y_test.shape)

train/test sizes :  (404, 13) (102, 13) (404,) (102,)


In [35]:
lr=LinearRegression()
dt=DecisionTreeRegressor()
knn=KNeighborsRegressor()

In [36]:
lr.fit(x_train, y_train)
dt.fit(x_train, y_train)
knn.fit(x_train, y_train)

KNeighborsRegressor()

In [37]:
y_predlr=lr.predict(x_test)
y_preddt=dt.predict(x_test)
y_predknn=knn.predict(x_test)

In [38]:
print('r2 score for lr :', r2_score(y_test, y_predlr))
print('r2 score for dt :', r2_score(y_test, y_preddt))
print('r2 score for knn :', r2_score(y_test, y_predknn))

r2 score for lr : 0.6592466510354097
r2 score for dt : 0.44706671648178853
r2 score for knn : 0.5475962186976784


# Bagging with default hyperparameters


In [39]:
from sklearn.ensemble import BaggingRegressor

In [40]:
bag_regressor=BaggingRegressor(random_state=1)
bag_regressor.fit(x_train, y_train)

BaggingRegressor(random_state=1)

In [47]:
y_pred=bag_regressor.predict(x_test)

print('Training coefficient of R^2 :%.3f'%bag_regressor.score(x_train, y_train))
print('Test coefficient of R^2 :%.3f'%bag_regressor.score(x_test, y_test))

Training coefficient of R^2 :0.980
Test coefficient of R^2 :0.818


In [48]:
from sklearn.model_selection import GridSearchCV

In [52]:
params={'base_estimator':[lr,dt,knn],
       'n_estimators':[20,50,100,200],
       'max_samples':[0.5,0.8,1.0],
       'max_features':[0.5,1.0],
       'bootstrap':[True,False],
       'bootstrap_features':[False,True]}
bagging_regressor_grid=GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid=params, cv=5, n_jobs=-1, verbose=1)

In [53]:
bagging_regressor_grid.fit(x_train, y_train)

Fitting 5 folds for each of 288 candidates, totalling 1440 fits


GridSearchCV(cv=5, estimator=BaggingRegressor(n_jobs=-1, random_state=1),
             n_jobs=-1,
             param_grid={'base_estimator': [LinearRegression(),
                                            DecisionTreeRegressor(),
                                            KNeighborsRegressor()],
                         'bootstrap': [True, False],
                         'bootstrap_features': [False, True],
                         'max_features': [0.5, 1.0],
                         'max_samples': [0.5, 0.8, 1.0],
                         'n_estimators': [20, 50, 100, 200]},
             verbose=1)

In [55]:
print('Train R^2 score : %.3f'%bagging_regressor_grid.best_estimator_.score(x_train,y_train))
print('Test R^2 score : %.3f'%bagging_regressor_grid.best_estimator_.score(x_test,y_test))
print('best R^2 score through grid search : %.3f'%bagging_regressor_grid.best_score_)
print('best parametes through grid search :' ,bagging_regressor_grid.best_params_)

Train R^2 score : 0.983
Test R^2 score : 0.793
best R^2 score through grid search : 0.863
best parametes through grid search : {'base_estimator': DecisionTreeRegressor(), 'bootstrap': True, 'bootstrap_features': False, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 100}
