In [1]:
from sklearn.datasets import make_classification
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
X,y=make_classification(n_samples=10000,n_features=10,n_informative=3)

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [4]:
dt=DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)
y_pred=dt.predict(X_test)
print("Decision Tree accuracy",accuracy_score(y_test,y_pred))    

Decision Tree accuracy 0.88


# Bagging

In [5]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=500,
                     max_samples=0.5,
                     bootstrap=True,
                     random_state=42
                     )

In [6]:
bag.fit(X_train,y_train)



In [7]:
ypred_1=bag.predict(X_test)
print(accuracy_score(y_test,ypred_1))

0.9175


In [None]:
# Types of Bagging
# pasting
# random subspaces
# random patches

# Pasting

In [8]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=500,
                     max_samples=0.25,
                     bootstrap=False,
                     random_state=42,
                      verbose=1,
                      n_jobs=-1
                     )

In [9]:
bag.fit(X_train,y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   18.3s remaining:   18.3s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   19.8s finished


In [10]:
ypred_2=bag.predict(X_test)
print(accuracy_score(y_test,ypred_2))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


0.9195


[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    4.1s remaining:    4.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    4.2s finished


# Random subspaces

In [11]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=500,
                     max_samples=1.0,
                     bootstrap=False,
                    max_features=0.5,
                    bootstrap_features=True,
                     random_state=42
                     )

In [12]:
bag.fit(X_train,y_train)



In [13]:
ypred_3=bag.predict(X_test)
print(accuracy_score(y_test,ypred_3))

0.902


# Random patches

In [14]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=500,
                     max_samples=0.25,
                     bootstrap=True,
                    max_features=0.5,
                    bootstrap_features=True,
                     random_state=42
                     )

In [15]:
bag.fit(X_train,y_train)



In [16]:
ypred_4=bag.predict(X_test)
print(accuracy_score(y_test,ypred_4))

0.9075


# OOB Score (out of bag)

In [17]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=500,
                     max_samples=0.25,
                     bootstrap=True,
                      oob_score=True,
                      random_state=42)

In [18]:
bag.fit(X_train,y_train)



In [19]:
bag.oob_score_

0.91525

In [20]:
ypred_5=bag.predict(X_test)
print(accuracy_score(y_test,ypred_5))    

0.916


# Applying GridSearchCV

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
parameters={
    'n_estimators': [50,100,500], 
    'max_samples': [0.1,0.4,0.7,1.0],
    'bootstrap' : [True,False],
    'max_features' : [0.1,0.4,0.7,1.0]
    }

In [23]:
search=GridSearchCV(BaggingClassifier(), parameters, cv=5)

In [None]:
search.fit(X_train,y_train)

In [None]:
search.best_score_

In [None]:
search.best_params_