# Bagging Classification

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score

In [2]:
X, y = make_classification(n_samples=10000, n_features=10, n_informative=3)

In [19]:
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Decision Tree Classifier

In [20]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

print('Decison Tree Accuracy: ', accuracy_score(y_test, y_pred))

Decison Tree Accuracy:  0.8405


## Bagging

In [36]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.25,
    bootstrap=True,
    random_state=42,
    )

In [37]:
bag.fit(X_train,y_train)

In [38]:
y_pred = bag.predict(X_test)

In [39]:
accuracy_score(y_test, y_pred)

0.876

In [40]:
bag.estimators_samples_[0].shape

(2000,)

### Bagging using SVC

In [26]:
bag = BaggingClassifier(
    estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42,
    )

In [27]:
bag.fit(X_train,y_train)

In [28]:
y_pred = bag.predict(X_test)

In [29]:
accuracy_score(y_test, y_pred)

0.8655

## Pasting

In [30]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose=1,
    n_jobs=-1,
    )

In [31]:
bag.fit(X_train,y_train)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    3.5s remaining:   17.8s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    3.6s finished


In [32]:
y_pred = bag.predict(X_test)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    0.0s remaining:    0.4s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    0.2s finished


In [33]:
accuracy_score(y_test, y_pred)

0.874

## Random Subspaces

In [41]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    verbose=1,
    n_jobs=-1,
    )

In [42]:
bag.fit(X_train,y_train)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:   22.7s remaining:  1.9min
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:   24.7s finished


In [43]:
y_pred = bag.predict(X_test)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    0.2s remaining:    1.4s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.0s finished


In [44]:
accuracy_score(y_test, y_pred)

0.868

In [46]:
bag.estimators_samples_

[array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 7997, 7998, 7999]),
 array([   0,    1,    2, ..., 

In [48]:
bag.estimators_features_

[array([9, 2, 9, 7, 7]),
 array([7, 3, 7, 3, 9]),
 array([6, 0, 7, 7, 9]),
 array([0, 5, 7, 4, 1]),
 array([6, 9, 6, 3, 3]),
 array([5, 2, 1, 2, 7]),
 array([3, 2, 1, 5, 2]),
 array([6, 2, 8, 2, 5]),
 array([8, 4, 4, 3, 2]),
 array([8, 1, 8, 9, 0]),
 array([3, 2, 9, 8, 4]),
 array([4, 7, 8, 6, 7]),
 array([3, 9, 4, 8, 6]),
 array([0, 9, 6, 7, 4]),
 array([0, 8, 2, 0, 2]),
 array([8, 4, 6, 6, 8]),
 array([7, 7, 2, 0, 3]),
 array([8, 4, 2, 8, 1]),
 array([7, 4, 2, 8, 6]),
 array([8, 1, 5, 0, 2]),
 array([5, 8, 8, 3, 0]),
 array([4, 6, 2, 7, 7]),
 array([1, 5, 2, 7, 1]),
 array([8, 8, 6, 9, 9]),
 array([1, 4, 2, 3, 7]),
 array([4, 1, 3, 1, 4]),
 array([8, 8, 0, 0, 8]),
 array([4, 2, 3, 2, 8]),
 array([2, 7, 8, 8, 6]),
 array([8, 5, 2, 7, 4]),
 array([3, 0, 6, 0, 6]),
 array([7, 6, 8, 0, 4]),
 array([3, 9, 7, 5, 8]),
 array([4, 5, 0, 5, 4]),
 array([4, 8, 7, 2, 0]),
 array([9, 2, 1, 0, 0]),
 array([8, 4, 4, 1, 5]),
 array([9, 7, 9, 3, 8]),
 array([8, 8, 9, 6, 3]),
 array([8, 9, 5, 3, 5]),


## Random Patches

In [53]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    verbose=1,
    n_jobs=-1,
    )

In [54]:
bag.fit(X_train,y_train)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    7.1s remaining:   36.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    8.2s finished


In [55]:
y_pred = bag.predict(X_test)

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    0.1s remaining:    0.7s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    0.4s finished


In [56]:
accuracy_score(y_test, y_pred)

0.8745

In [57]:
bag.estimators_samples_

[array([5011, 4466,  632, ..., 4138,  870, 3686]),
 array([3429, 4330,  515, ..., 7024, 7759,   51]),
 array([2298, 1192, 7356, ..., 1359, 2475, 1397]),
 array([5331,  959, 2422, ..., 6817,  832, 4035]),
 array([4104, 7646, 6243, ..., 7038, 3275, 6833]),
 array([2777, 7155, 1485, ..., 3946, 2564, 7155]),
 array([ 227,  895, 5937, ..., 4988,  401, 2494]),
 array([3161, 6576, 1236, ..., 3456, 3641, 4089]),
 array([3980, 7795, 4662, ..., 5546, 2150, 3376]),
 array([5575, 3894, 7994, ..., 2889,  736, 4201]),
 array([3371, 6086,  726, ..., 5830, 6815, 1218]),
 array([7374, 5553,  204, ..., 3690, 6251, 7185]),
 array([ 350, 6177, 1573, ..., 5865, 6871, 3481]),
 array([4864, 3781, 6313, ..., 6605, 1123, 4100]),
 array([4731, 7693, 3820, ..., 2474, 1326, 1708]),
 array([2769, 4054,  275, ..., 3978, 5458, 3679]),
 array([1703, 1354, 1173, ...,  403, 5003, 6694]),
 array([6708, 6406, 4267, ..., 2592, 3867, 7559]),
 array([3352, 1055, 6838, ..., 2338, 3688, 2911]),
 array([1047, 1158, 7386, ..., 

In [59]:
bag.estimators_features_

[array([9, 2, 9, 7, 7]),
 array([7, 3, 7, 3, 9]),
 array([6, 0, 7, 7, 9]),
 array([0, 5, 7, 4, 1]),
 array([6, 9, 6, 3, 3]),
 array([5, 2, 1, 2, 7]),
 array([3, 2, 1, 5, 2]),
 array([6, 2, 8, 2, 5]),
 array([8, 4, 4, 3, 2]),
 array([8, 1, 8, 9, 0]),
 array([3, 2, 9, 8, 4]),
 array([4, 7, 8, 6, 7]),
 array([3, 9, 4, 8, 6]),
 array([0, 9, 6, 7, 4]),
 array([0, 8, 2, 0, 2]),
 array([8, 4, 6, 6, 8]),
 array([7, 7, 2, 0, 3]),
 array([8, 4, 2, 8, 1]),
 array([7, 4, 2, 8, 6]),
 array([8, 1, 5, 0, 2]),
 array([5, 8, 8, 3, 0]),
 array([4, 6, 2, 7, 7]),
 array([1, 5, 2, 7, 1]),
 array([8, 8, 6, 9, 9]),
 array([1, 4, 2, 3, 7]),
 array([4, 1, 3, 1, 4]),
 array([8, 8, 0, 0, 8]),
 array([4, 2, 3, 2, 8]),
 array([2, 7, 8, 8, 6]),
 array([8, 5, 2, 7, 4]),
 array([3, 0, 6, 0, 6]),
 array([7, 6, 8, 0, 4]),
 array([3, 9, 7, 5, 8]),
 array([4, 5, 0, 5, 4]),
 array([4, 8, 7, 2, 0]),
 array([9, 2, 1, 0, 0]),
 array([8, 4, 4, 1, 5]),
 array([9, 7, 9, 3, 8]),
 array([8, 8, 9, 6, 3]),
 array([8, 9, 5, 3, 5]),


### OOB Score

In [61]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42,
    )

In [62]:
bag.fit(X_train,y_train)

In [64]:
y_pred = bag.predict(X_test)

In [65]:
bag.oob_score_

0.89025

In [66]:
accuracy_score(y_test, y_pred)

0.876