In [6]:
from sklearn import datasets
diabetes = datasets.load_diabetes()
X_diabetes,Y_diabetes=diabetes.data,diabetes.target
print("datasets features names: "+str(diabetes.feature_names))
print("datasets features size: "+str(diabetes.data.shape))
print("datasets target size: "+str(diabetes.target.shape))

datasets features names: ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
datasets features size: (442, 10)
datasets target size: (442,)


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X_diabetes,Y_diabetes,train_size=0.80,test_size=0.20,random_state=123)
print("train/test sets size:",x_train.shape,x_test.shape,y_train.shape,y_test.shape)

train/test sets size: (353, 10) (89, 10) (353,) (89,)


In [15]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()
     

lr.fit(x_train,y_train)
dt.fit(x_train,y_train)
knn.fit(x_train,y_train)

In [17]:
y_pred1 = lr.predict(x_test)
y_pred2 = dt.predict(x_test)
y_pred3 = knn.predict(x_test)
     

print("R^2 score for LR",r2_score(y_test,y_pred1))
print("R^2 score for DT",r2_score(y_test,y_pred2))
print("R^2 score for KNN",r2_score(y_test,y_pred3))
     

R^2 score for LR 0.5675895725793205
R^2 score for DT 0.13297827296156817
R^2 score for KNN 0.438839665879189


In [20]:
from sklearn.ensemble import BaggingRegressor
# here we use base setting of bagging regressor default base model decision tree
bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(x_train, y_train
                 )

In [23]:
Y_preds = bag_regressor.predict(x_test)

print('Training Coefficient of R^2 : %.3f'%bag_regressor.score(x_train, y_train))
print('Test Coefficient of R^2 : %.3f'%bag_regressor.score(x_test, y_test))
     

Training Coefficient of R^2 : 0.897
Test Coefficient of R^2 : 0.499


In [28]:
%%time

n_samples = diabetes.data.shape[0]
n_features = diabetes.data.shape[1]
params = {'base_estimator': [None, LinearRegression(), KNeighborsRegressor()],
          'n_estimators': [20,50,100],
          'max_samples': [0.5,1.0],
          'max_features': [0.5,1.0],
          'bootstrap': [True, False],
          'bootstrap_features': [True, False]}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid =params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(x_train, y_train)

print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(x_train, y_train))
print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(x_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print('Best Parameters : ',bagging_regressor_grid.best_params_)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
Train R^2 Score : 0.487
Test R^2 Score : 0.533
Best R^2 Score Through Grid Search : 0.446
Best Parameters :  {'base_estimator': LinearRegression(), 'bootstrap': False, 'bootstrap_features': True, 'max_features': 1.0, 'max_samples': 0.5, 'n_estimators': 100}
CPU times: total: 250 ms
Wall time: 15.7 s


