In [1]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split

In [2]:
# Generate random classification data
X, Y = make_classification(n_samples=10000, n_features=10, n_informative=3,random_state=42)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [3]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,Y_train)
Y_pred = dt.predict(X_test)

print("Decision Tree Accurcay", accuracy_score(Y_test,Y_pred))

Decision Tree Accurcay 0.9265


## Bagging

In [4]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True, # sample with replacement
    random_state=42
)

In [5]:
bag.fit(X_train,Y_train)



In [6]:
bag.__dict__

{'estimator': None,
 'n_estimators': 500,
 'estimator_params': (),
 'base_estimator': DecisionTreeClassifier(),
 'max_samples': 0.5,
 'max_features': 1.0,
 'bootstrap': True,
 'bootstrap_features': False,
 'oob_score': False,
 'warm_start': False,
 'n_jobs': None,
 'random_state': 42,
 'verbose': 0,
 'n_features_in_': 10,
 '_n_samples': 8000,
 'classes_': array([0, 1]),
 'n_classes_': 2,
 'estimator_': DecisionTreeClassifier(),
 '_max_samples': 4000,
 '_max_features': 10,
 'estimators_': [DecisionTreeClassifier(random_state=1952926171),
  DecisionTreeClassifier(random_state=1761383086),
  DecisionTreeClassifier(random_state=1449071958),
  DecisionTreeClassifier(random_state=1910541088),
  DecisionTreeClassifier(random_state=1341730541),
  DecisionTreeClassifier(random_state=1286572245),
  DecisionTreeClassifier(random_state=1005142668),
  DecisionTreeClassifier(random_state=502852014),
  DecisionTreeClassifier(random_state=186414760),
  DecisionTreeClassifier(random_state=1956263048),


In [7]:
Y_pred = bag.predict(X_test)
accuracy_score(Y_test,Y_pred)

0.95

In [8]:
bag.estimators_samples_[0].shape

(4000,)

In [9]:
bag.estimators_samples_

[array([2523, 3113, 7114, ..., 5668, 7066, 3677]),
 array([4782,  663, 7155, ..., 3122, 2141, 5379]),
 array([5462, 6574, 4896, ..., 6686, 4871, 6424]),
 array([2848, 2629, 1591, ...,  595, 4333, 1671]),
 array([3821, 6494, 1606, ..., 1645, 3358,  388]),
 array([2261, 7922, 3649, ..., 7532, 6138, 1483]),
 array([ 652, 1676, 2291, ..., 2618,  113, 7628]),
 array([2478, 4107, 1958, ..., 5175, 7270, 3055]),
 array([5800, 3548, 6540, ..., 5313, 2336, 6608]),
 array([5256, 7181, 3409, ..., 3860, 3417, 5180]),
 array([2675, 2834, 3817, ...,  270, 2233,  103]),
 array([3236, 7607, 4600, ..., 5180, 5002, 3192]),
 array([4563, 4137, 6298, ..., 5296, 5676,  860]),
 array([2816, 5343, 5817, ..., 5489, 4283,  456]),
 array([2448, 2733, 5480, ..., 7046, 5681, 1372]),
 array([4248, 3828, 4630, ...,  767, 3976, 5795]),
 array([4815, 1867,  503, ..., 3767, 7526, 2513]),
 array([1688, 4132, 5218, ..., 5248, 3524, 6444]),
 array([4167, 2900, 3602, ..., 2322, 2670, 6725]),
 array([2920, 7645, 5452, ..., 

In [10]:
bag.estimators_features_[0].shape[0] # row sampling only, since cloumn were fixed.

10

## Bagging using SVM

In [11]:
bag = BaggingClassifier(
    base_estimator=SVC(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True, # sample with replacement
    random_state=42
)

In [12]:
bag.fit(X_train,Y_train)




In [13]:
Y_pred = bag.predict(X_test)
print("Bagging using SVM",accuracy_score(Y_test,Y_pred))

Bagging using SVM 0.919


## Pasting

In [14]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=False, # Sampling without replacement
    random_state=42,
    verbose=1, # during training it will show the result
    n_jobs=-1 # uses all cores of CPU
)

In [15]:
bag.fit(X_train,Y_train)
Y_pred = bag.predict(X_test)
print("Pasting Calssifier",accuracy_score(Y_test,Y_pred)) # concurent worker indicatw CPU

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   11.4s remaining:   11.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   11.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Pasting Calssifier 0.9525


[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.3s finished


## Random Subspaces

In [16]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5, # Column Sampling
    bootstrap_features=True,
    random_state=42
)

In [17]:
bag.fit(X_train,Y_train)
Y_pred = bag.predict(X_test)
print('Random Subspaces Classifier', accuracy_score(Y_test,Y_pred))



Random Subspaces Classifier 0.9415


In [18]:
bag.estimators_samples_[0].shape

(8000,)

In [19]:
bag.estimators_features_[0].shape

(5,)

## Random Patches

In [20]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True, # roe sampling
    max_features=0.5, # Column Sampling
    bootstrap_features=True,
    random_state=42
)

In [21]:
bag.fit(X_train,Y_train)
Y_pred = bag.predict(X_test)
print("Random Patches Classifier", accuracy_score(Y_test,Y_pred))



Random Patches Classifier 0.9385


## OOB Score

In [22]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True, # roe sampling
    max_features=0.5, # Column Sampling
    bootstrap_features=True,
    oob_score=True, 
    random_state=42
)

In [23]:
bag.fit(X_train,Y_train)
Y_pred = bag.predict(X_test)
print("Random Patches Classifier", accuracy_score(Y_test,Y_pred))



Random Patches Classifier 0.9385


In [24]:
bag.oob_score

True