## Basic Decision Tree

In [1]:
## Importing the required libraries
from sklearn.datasets import make_classification

In [2]:
X,y = make_classification(n_samples=10000, n_features=10,n_informative=3)

In [3]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

print('Decision tree accuracy',accuracy_score(y_test,y_pred))

Decision tree accuracy 0.9163333333333333


## Bagging (Row sampling with Replacement)

In [5]:
from sklearn.ensemble import BaggingClassifier
Bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    random_state=42)

In [6]:
Bag.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=500, random_state=42)

In [7]:
y_pred = Bag.predict(X_test)

In [8]:
accuracy_score(y_test,y_pred)

0.939

In [9]:
Bag.estimators_samples_[0].shape

(3500,)

In [10]:
Bag.estimators_features_[0].shape

(10,)

## Bagging using SVM

In [11]:
from sklearn.svm import SVC
Bag = BaggingClassifier(
    base_estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42)

In [12]:
Bag.fit(X_train,y_train)
y_pred = Bag.predict(X_test)
print('Bagging using SVM',accuracy_score(y_test,y_pred))

Bagging using SVM 0.8913333333333333


## Pasting (Row sampling without replacement)

In [13]:
Bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose=1,
    n_jobs=-1)

In [14]:
Bag.fit(X_train,y_train)
y_pred = Bag.predict(X_test)
print('Pasting Classifier',accuracy_score(y_test,y_pred))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   31.2s remaining:   31.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   31.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.5s remaining:    0.5s


Pasting Classifier 0.937


[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.8s finished


In [15]:
Bag.estimators_samples_[0].shape

(1750,)

In [16]:
Bag.estimators_features_[0].shape

(10,)

## Random Subspaces With Column sampling (With Replacement Without Replacement)

In [17]:
Bag= BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42
)

In [18]:
Bag.fit(X_train,y_train)
y_pred = Bag.predict(X_test)
print('Random Subspaces Classifier',accuracy_score(y_test,y_pred))

Random Subspaces Classifier 0.9256666666666666


In [19]:
Bag.estimators_samples_[0].shape

(7000,)

In [20]:
Bag.estimators_features_[0].shape

(5,)

## Random Patches (Row sampling and column Sampling)

In [21]:
Bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42
)

In [22]:
Bag.fit(X_train,y_train)
y_pred = Bag.predict(X_test)
print("Random Patches classifier",accuracy_score(y_test,y_pred))

Random Patches classifier 0.9106666666666666


## OOB Score

In [23]:
Bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42
)

In [24]:
Bag.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.25,
                  n_estimators=500, oob_score=True, random_state=42)

In [25]:
Bag.oob_score_

0.9374285714285714

In [26]:
y_pred = Bag.predict(X_test)
print("Accuracy",accuracy_score(y_test,y_pred))

Accuracy 0.929
