In [4]:
from sklearn import datasets

boston = datasets.load_boston()
X_boston, Y_boston = boston.data , boston.target

print('Dataset features names : '+ str(boston.feature_names))
print('Dataset features size : '+ str(boston.data.shape))
print('Dataset target names : '+ str(boston.target.shape))


Dataset features names : ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
Dataset features size : (506, 13)
Dataset target names : (506,)


In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_boston, Y_boston, train_size=0.80, test_size=0.20, random_state=123,)
print('Train / Test Sets Sizes :', X_train.shape, X_test.shape, y_train.shape, y_train.shape)

Train / Test Sets Sizes : (404, 13) (102, 13) (404,) (404,)


In [16]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [17]:
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)

KNeighborsRegressor()

In [18]:
y_pred1 = lr.predict(X_test)
y_pred2 = dt.predict(X_test)
y_pred3 = knn.predict(X_test)

In [19]:
print('R^2 score of LR', r2_score(y_test, y_pred1))
print('R^2 score of DT', r2_score(y_test, y_pred2))
print('R^2 score of KNN', r2_score(y_test, y_pred3))

R^2 score of LR 0.6592466510354097
R^2 score of DT 0.349840099035436
R^2 score of KNN 0.5475962186976784


In [20]:
from sklearn.ensemble import BaggingRegressor

bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(X_train, y_train)

BaggingRegressor(random_state=1)

In [22]:
y_preds = bag_regressor.predict(X_test)

print('Training coficient of R^2 : %.3f' %bag_regressor.score(X_train, y_train))
print('Testing coficient of R^2 : %.3f' %bag_regressor.score(X_test, y_test))

Training coficient of R^2 : 0.980
Testing coficient of R^2 : 0.818


In [24]:
%%time
n_samples = boston.data.shape[0]
n_features = boston.data.shape[1]

params = {
    'base_estimator':[None, LinearRegression(), KNeighborsRegressor()],
    'max_samples' : [0.5, 1.0], 
    'max_features' : [0.5, 1.0],
    'bootstrap' : [True, False],
    'bootstrap_features' : [True, False]
}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid=params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(X_train, y_train)

Fitting 3 folds for each of 48 candidates, totalling 144 fits
Wall time: 9.38 s


GridSearchCV(cv=3, estimator=BaggingRegressor(n_jobs=-1, random_state=1),
             n_jobs=-1,
             param_grid={'base_estimator': [None, LinearRegression(),
                                            KNeighborsRegressor()],
                         'bootstrap': [True, False],
                         'bootstrap_features': [True, False],
                         'max_features': [0.5, 1.0],
                         'max_samples': [0.5, 1.0]},
             verbose=1)

In [26]:
print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_test, y_test))
print('Train R^2 Score through GridSearch : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameter : ', bagging_regressor_grid.best_params_)

Train R^2 Score : 0.980
Test R^2 Score : 0.818
Train R^2 Score through GridSearch : 0.853
Best Parameter :  {'base_estimator': None, 'bootstrap': True, 'bootstrap_features': False, 'max_features': 1.0, 'max_samples': 1.0}
