In [1]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [2]:
x,y = make_classification(n_samples=10000,n_features=10,n_informative=3)

In [3]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

Decision Tree

In [4]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)
print("DecisionTreeClassifier accuracy", accuracy_score(y_test,y_pred))

DecisionTreeClassifier accuracy 0.907


In [6]:
bag = BaggingClassifier(
  estimator=DecisionTreeClassifier(),
  n_estimators=500,
  max_samples=0.25,
  bootstrap=True,
  random_state=42
)

In [7]:
bag.fit(x_train,y_train)

In [8]:
y_pred = bag.predict(x_test)

In [10]:
accuracy_score(y_test,y_pred)

0.9325

Bagging using SVM

In [12]:
bag = BaggingClassifier(
  estimator=SVC(),
  n_estimators=500,
  max_samples=0.25,
  bootstrap=True,
  random_state=42
)

In [13]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
accuracy_score(y_test,y_pred)

0.9195

Pasting

In [15]:
bag = BaggingClassifier(
  estimator=DecisionTreeClassifier(),
  n_estimators=500,
  max_samples=0.25,
  bootstrap=False,
  random_state=42,
  n_jobs=-1
)

In [16]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
accuracy_score(y_test,y_pred)

0.933

Random Subspaces

In [27]:
bag = BaggingClassifier(
  estimator=DecisionTreeClassifier(),
  n_estimators=500,
  max_samples=1.0,
  bootstrap=False,
  random_state=42,
  max_features=5,
  bootstrap_features=True,
  n_jobs=-1
)

In [28]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
accuracy_score(y_test,y_pred)

0.929

In [29]:
bag.estimators_samples_[0].shape

(8000,)

In [30]:
bag.estimators_features_[0].shape

(5,)

Random Patches

In [31]:
bag = BaggingClassifier(
  estimator=DecisionTreeClassifier(),
  n_estimators=500,
  max_samples=0.25,
  bootstrap=False,
  random_state=42,
  max_features=5,
  bootstrap_features=True,
  n_jobs=-1
)

In [32]:
bag.fit(x_train,y_train)
y_pred = bag.predict(x_test)
accuracy_score(y_test,y_pred)

0.924

OOB SCORE


In [33]:
bag = BaggingClassifier(
  estimator=DecisionTreeClassifier(),
  n_estimators=500,
  max_samples=0.25,
  bootstrap=True,
  random_state=42,
  max_features=5,
  n_jobs=-1,
  oob_score=True
)

In [34]:
bag.fit(x_train,y_train)

In [35]:
bag.oob_score_


0.928875

In [36]:
y_pred = bag.predict(x_test)
print(accuracy_score(y_test,y_pred))

0.9265


Applying GridSearchCV

In [37]:
from sklearn.model_selection import GridSearchCV

In [None]:
parameters = {
  'n_estimators': [50,100,200,300,400,500],
  'max_samples': [0.1,0.4,0.7,1.0],
  'bootstrap': [True,False],
  'max_features': [0.1,0.4,0.7,1.0]
}

search = GridSearchCV(BaggingClassifier(),parameters,cv=5,n_jobs=-1)

In [39]:
search.fit(x_train,y_train)

KeyboardInterrupt: 