In [48]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier,BaggingRegressor
from sklearn.datasets import make_classification,load_breast_cancer

In [3]:
X,y = make_classification(n_samples=10000,n_features=10,n_informative=3)

In [4]:
X.shape

(10000, 10)

In [5]:
y.shape

(10000,)

In [11]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [12]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)
ypred = dt.predict(X_test)

print('Accuracy = ', accuracy_score(y_test,ypred))

Accuracy =  0.868


# Bagging Classifications

In [14]:
bgcls = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.5,bootstrap=True,random_state=42)

In [15]:
bgcls.fit(X_train,y_train)

In [16]:
y_pred = bgcls.predict(X_test)

In [17]:
print('Accuracy of Bagging_Classifier = ',accuracy_score(y_test,y_pred))

Accuracy of Bagging_Classifier =  0.91


In [24]:
bgcls.estimators_samples_[0].shape

(4000,)

In [25]:
bgcls.estimators_features_[0].shape

(10,)

# Pasting 

In [36]:
bgpcls = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.5,bootstrap=False,random_state=42)

In [37]:
bgpcls.fit(X_train,y_train)

In [38]:
y_pred = bgpcls.predict(X_test)
print('Accuracy of Bagging_Classifier = ',accuracy_score(y_test,y_pred))

Accuracy of Bagging_Classifier =  0.908


# Random Subspaces 

In [42]:
bgfcls = BaggingClassifier(estimator=DecisionTreeClassifier(),
                           n_estimators=500,
                           max_samples=1.0,
                           bootstrap=False,
                           random_state=42,
                           max_features=0.5,bootstrap_features=True)

In [43]:
bgfcls.fit(X_train,y_train)

In [44]:
y_pred = bgfcls.predict(X_test)
print('Accuracy of Bagging_Classifier = ',accuracy_score(y_test,y_pred))

Accuracy of Bagging_Classifier =  0.8965


In [46]:
bgfcls.estimators_features_[0].shape

(5,)

In [47]:
bgfcls.estimators_samples_[0].shape

(8000,)

# Bagging Regressor

In [55]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV

In [56]:
data = load_breast_cancer()

In [57]:
print(data.feature_names)
print(data.data.shape)
print(data.target.shape)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
(569, 30)
(569,)


In [58]:
X = data.data
y = data.target

In [61]:
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.80,test_size=0.20,random_state=123)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

(455, 30) (114, 30) (455,) (114,)


### WithOut Bagging Regressor

In [62]:
lr = LinearRegression()
kl = KNeighborsRegressor()
dtr = DecisionTreeRegressor()

In [63]:
lr.fit(X_train,y_train)
kl.fit(X_train,y_train)
dtr.fit(X_train,y_train)

In [64]:
y_pred1 = lr.predict(X_test)
y_pred2 = kl.predict(X_test)
y_pred3 = dtr.predict(X_test)

In [65]:
print("R2 for lr = ",r2_score(y_test,y_pred1))
print("R2 for kl = ",r2_score(y_test,y_pred2))
print("R2 for dtr = ",r2_score(y_test,y_pred3))

R2 for lr =  0.7622146624106364
R2 for kl =  0.8491680588038757
R2 for dtr =  0.885733377881724


# Applying Bagging Regressor

In [70]:
bgr = BaggingRegressor()
bgr.fit(X_train,y_train)
Ypred = bgr.predict(X_test)
print('R2 for bgr on train data = ',bgr.score(X_train,y_train))
print('R2 for bgr on test data = ',bgr.score(X_test,y_test))

R2 for bgr on train data =  0.9717990692694177
R2 for bgr on test data =  0.9066822586034079
