In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score

In [36]:
diabetes = load_diabetes()

df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df['target'] = diabetes.target

df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [37]:
X = df.iloc[:, 0:10]
y = df.iloc[:, -1]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# 1. Bagging

## Bagging Using KNN

In [38]:
knn_bag = BaggingRegressor(
    estimator = KNeighborsRegressor(),
    n_estimators = 500,
    max_samples = 0.5,
    bootstrap = True,
    random_state = 42,
    n_jobs = -1
)

In [39]:
knn_bag.fit(X_train,y_train)

y_pred = knn_bag.predict(X_test)

print("R2-Score of KNN Bagging: ", r2_score(y_test,y_pred))

R2-Score of KNN Bagging:  0.43936847446220983


## Bagging Using SVM

In [40]:
svc_bag = BaggingRegressor(
    estimator = SVR(),
    n_estimators = 500,
    max_samples = 0.5,
    bootstrap = True,
    random_state = 42,
    n_jobs = -1
)

In [41]:
svc_bag.fit(X_train,y_train)

y_pred = svc_bag.predict(X_test)

print("R2-Score of SVC Bagging: ", r2_score(y_test,y_pred))

R2-Score of SVC Bagging:  0.10974284975348292


## Bagging Using Decision Trees

In [42]:
dt_bag = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    random_state=42,
    n_jobs = -1
)

In [43]:
dt_bag.fit(X_train,y_train)

y_pred = dt_bag.predict(X_test)

print("R2-Score of Decision Tree Bagging: ", r2_score(y_test,y_pred))

R2-Score of Decision Tree Bagging:  0.45922693713209484


## 2. Pasting

In [44]:
bag = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False, # For Pasting bootstrap=False
    random_state=42,
    n_jobs=-1
)

In [45]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Pasting Regressor", r2_score(y_test,y_pred))

Pasting Regressor 0.47333345336417054


## 3. Random Subspaces

In [46]:
bag = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    n_jobs=-1
)

In [47]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Random Subspaces ragressor",r2_score(y_test,y_pred))

Random Subspaces ragressor 0.4447262431798349


## 4. Random Patches

In [48]:
bag = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    n_jobs=-1
)

In [49]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Random Patches Regressor",r2_score(y_test,y_pred))

Random Patches Regressor 0.42545614847274493


# Out-of-Bag (OOB) Score

In [50]:
bag = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42,
    n_jobs=-1
)

In [51]:
bag.fit(X_train,y_train)

In [52]:
bag.oob_score_

0.4696425443799265

In [53]:
y_pred = bag.predict(X_test)
print("Accuracy Score",r2_score(y_test,y_pred))

Accuracy Score 0.4815735196194312


## Applying GridSearchCV

In [54]:
from sklearn.model_selection import GridSearchCV

In [55]:
parameters = {
    'n_estimators': [50,100,150], 
    'max_samples': [0.1,1.0],
    'bootstrap' : [True,False],
    'max_features' : [0.1,1.0],
    'n_jobs' : [-1]
    }

In [56]:
search = GridSearchCV(BaggingRegressor(), parameters, cv=5) # Default estimator is DecisionTreeRegressor()

In [57]:
search.fit(X_train,y_train)

In [58]:
search.best_score_

0.4283175559949532

In [59]:
search.best_params_

{'bootstrap': True,
 'max_features': 1.0,
 'max_samples': 0.1,
 'n_estimators': 100,
 'n_jobs': -1}