In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

In [3]:
x,y = make_classification(n_samples=10000 , n_features=10 , n_informative=3)

In [4]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [7]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)

print("Decision Tree accuracy = ",accuracy_score(y_test,y_pred))

Decision Tree accuracy =  0.927


# Bagging

In [11]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.25,bootstrap=True,random_state=42)

In [12]:
bag.fit(x_train,y_train)

In [13]:
y_pred = bag.predict(x_test)

In [14]:
print("bagging accuracy = ",accuracy_score(y_test,y_pred))

bagging accuracy =  0.941


# Bagging using SVM (row sampling only)

In [15]:
bag = BaggingClassifier(estimator=SVC(),n_estimators=500,max_samples=0.25,bootstrap=True,random_state=42)

In [16]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("SVM bagging accuracy = ",accuracy_score(y_test,y_pred))

SVM bagging accuracy =  0.9475


# Pasting (without replacement)

In [18]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.25,bootstrap=False,verbose=True,n_jobs=-1,random_state=42)

In [19]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("pasting bagging accuracy = ",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    4.3s remaining:   22.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.5s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   2 out of  12 | elapsed:    0.0s remaining:    0.1s


pasting bagging accuracy =  0.944


[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    0.0s finished


# Random Subspaces( column sampling only)

In [23]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=1.0,bootstrap=False,max_features=0.5,bootstrap_features=False,random_state=42,n_jobs=-1)

In [24]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("pasting bagging accuracy = ",accuracy_score(y_test,y_pred))

pasting bagging accuracy =  0.9415


# Random Patches(both row and colum sampling)

In [25]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.25,bootstrap=True,max_features=0.5,bootstrap_features=True,random_state=42,n_jobs=-1)

In [26]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("pasting bagging accuracy = ",accuracy_score(y_test,y_pred))

pasting bagging accuracy =  0.944


# OOB score(out of bag samples)

In [27]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=0.25,bootstrap=True,oob_score=True,random_state=42)

In [29]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
print("pasting bagging accuracy = ",accuracy_score(y_test,y_pred))
bag.oob_score_

pasting bagging accuracy =  0.941


0.940875

# Applying GridSearchCV

In [30]:
from sklearn.model_selection import GridSearchCV as GSCV

In [34]:
parameters = {
    'estimator' : [DecisionTreeClassifier(),SVC()],
    'n_estimators' : [50,100,500],
    'max_samples' : [0.1,0.4,0.7,1.0],
    'bootstrap' : [True,False],
    'max_features' : [0.1,0.4,0.7,1.0],
    'bootstrap_features' : [True,False],
    'n_jobs' : [-1]
}

In [35]:
search = GSCV(BaggingClassifier(),parameters,cv=5)

In [36]:
search.fit(x_train,y_train)

KeyboardInterrupt: 