In [1]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [2]:
x,y = make_classification(n_samples = 10000, n_features=10,n_informative=3)

In [3]:
x_train,x_test,y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state = 10)

In [4]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)

print("Decision tree accuracy : ",accuracy_score(y_test,y_pred))

Decision tree accuracy :  0.9505


## Bagging using Decision Tree

In [5]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    random_state=42)

In [6]:
bag.fit(x_train,y_train)



In [7]:
y_pred = bag.predict(x_test)

In [8]:
accuracy_score(y_test,y_pred)

0.972

## Bagging Using SVC

In [9]:
bag1 = BaggingClassifier(estimator=SVC(),
                        n_estimators=500,
                        max_samples=0.25,
                        bootstrap=True,
                        random_state=42)

In [10]:
bag1.fit(x_train,y_train)

In [11]:
y_pred = bag1.predict(x_test)
print("Accuracy of bagging using SVC is :",accuracy_score(y_test,y_pred))

Accuracy of bagging using SVC is : 0.959


## Pasting

In [12]:
bag = BaggingClassifier(estimator= DecisionTreeClassifier(),
                        n_estimators=500,
                        max_samples=0.25,
                        bootstrap=False,
                        verbose=1,
                        n_jobs=-1,
                        random_state=42
                       )

In [13]:
bag.fit(x_train,y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    4.4s remaining:    4.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    4.5s finished


In [14]:
y_pred = bag.predict(x_test)
print(accuracy_score(y_test,y_pred))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.2s remaining:    0.2s


0.9705


[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.2s finished


## Random Subspaces

In [15]:
bag3 = BaggingClassifier(estimator=DecisionTreeClassifier(),
                        n_estimators=500,
                        max_samples=1.0,
                        max_features=0.5,
                        bootstrap=False,
                        random_state=42,bootstrap_features=True)

In [16]:
bag3.fit(x_train,y_train)

In [17]:
y_pred = bag3.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9655


## Random Patches

In [18]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),
                       n_estimators=500,
                       max_samples=0.25,
                       max_features=0.5,
                       bootstrap=True,
                       bootstrap_features=True,
                       random_state=42)

In [19]:
bag.fit(x_train,y_train)

In [20]:
y_pred = bag.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.965


## OOG Score 

In [21]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),
                       n_estimators=500,
                       max_samples=0.25,
                       max_features=0.5,
                       bootstrap=True,
                       bootstrap_features=True,
                       random_state=42,
                       oob_score=True)

In [22]:
bag.fit(x_train,y_train)

In [23]:
y_pred = bag.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.965


## Applying Grid Search CV

In [24]:
from sklearn.model_selection import GridSearchCV

In [25]:
parameters = {
    'n_estimators':[50,100,500],
    'max_samples':[0.1,0.4,0.7,1.0],
    'bootstrap':[True,False],
    'max_features':[0.1,0.4,0.7,1.0]
}

In [26]:
search = GridSearchCV(BaggingClassifier(),parameters,cv = 5)

In [27]:
search.fit(x_train,y_train)

In [28]:
y_pred = search.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.974
