## Importing all the files

In [30]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import warnings

## We will generate the toy dataset using make_classification

In [31]:
X,y = make_classification(n_samples=20000, n_features=10,n_informative=3)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## fitting the decision tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)
print("Decision Tree accuracy",accuracy_score(y_test,y_pred)*100)

Decision Tree accuracy 92.925


## Now we should learn Bagging how to do this?

In [32]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.5,
    bootstrap=True,
    random_state=42,
    verbose=True
)

In [33]:
warnings.filterwarnings('ignore')
bag.fit(X_train,y_train)
warnings.filterwarnings('ignore')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   20.2s finished


In [34]:
warnings.filterwarnings('ignore')
y_pred = bag.predict(X_test)
z = accuracy_score(y_test,y_pred)
warnings.filterwarnings('ignore')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


In [35]:
z*100

95.625

In [36]:
bag.estimators_samples_[0].shape

(8000,)

In [37]:
bag.estimators_features_[0].shape

(10,)

## Pasting

##### this is done usually without replacement means bootstrap = false;

In [39]:
warnings.filterwarnings('ignore')
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose = True,
    n_jobs=-1
)
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Pasting classifier",accuracy_score(y_test,y_pred)*100)
warnings.filterwarnings('ignore')

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    6.5s remaining:   32.8s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    6.8s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    0.0s remaining:    0.6s


Pasting classifier 95.625


[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    0.3s finished
