In [1]:
import numpy as np 
import pandas as pd 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier 
from sklearn.svm import SVC

In [2]:
X,y = make_classification(n_samples=10000,n_features=10,n_informative=3)

In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [4]:
dt = DecisionTreeClassifier()

In [5]:
dt.fit(X_train,y_train)

In [6]:
y_pred = dt.predict(X_test)

In [7]:
accuracy_score(y_test,y_pred)

0.884

# Bagging 

In [8]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42
)

In [9]:
bag.fit(X_train,y_train)



In [10]:
y_pred1 = bag.predict(X_test)

In [11]:
accuracy_score(y_test,y_pred1)

0.9035

In [12]:
bag.estimators_samples_[0].shape

(2000,)

In [13]:
bag.estimators_features_[0].shape

(10,)

# Bagging using SVM

In [30]:
bag1 = BaggingClassifier(
    base_estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42,
    n_jobs=-1
)

In [31]:
bag1.fit(X_train,y_train)



In [32]:
y_pred2=bag1.predict(X_test)

In [34]:
accuracy_score(y_test,y_pred2)

0.8895

# Pasting

In [35]:
bag2 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose = 1,
    n_jobs = -1
)

In [36]:
bag2.fit(X_train,y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   24.0s remaining:   24.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   24.1s finished


In [37]:
y_pred3 = bag2.predict(X_test)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.3s remaining:    0.3s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.5s finished


In [38]:
accuracy_score(y_test,y_pred3)

0.9045

# Random Subspaces

In [39]:
# Not Row Sampling only Column Sampling
bag3 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    random_state=42,
    max_features=5,
    bootstrap_features=True,
    verbose = 1,
    n_jobs = -1
)

In [40]:
bag3.fit(X_train,y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  1.3min finished


In [41]:
y_pred4 = bag3.predict(X_test)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.9s remaining:    0.9s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.5s finished


In [42]:
accuracy_score(y_test,y_pred4)

0.8985

In [43]:
bag3.estimators_samples_[0].shape

(8000,)

In [44]:
bag3.estimators_features_[0].shape

(5,)