In [2]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [3]:
X, y = make_classification(n_samples=10000, n_features=10, n_informative=3)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
accuracy_score(y_test, y_pred)

0.899

### Bagging

In [6]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=500, max_samples=0.25, bootstrap=True, random_state=42)

bag.fit(X_train, y_train)
y_pred = bag.predict(X_test)
accuracy_score(y_test, y_pred)

0.936

### Bagging Using SVM

In [7]:
bag = BaggingClassifier(estimator=SVC(), n_estimators=500, max_samples=0.25, bootstrap=True, random_state=42)

bag.fit(X_train, y_train)
y_pred = bag.predict(X_test)
accuracy_score(y_test, y_pred)

0.93

### Pasting

In [8]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=500, max_samples=0.25, bootstrap=False, random_state=42, verbose=1, n_jobs=-1)

bag.fit(X_train, y_train)
y_pred = bag.predict(X_test)
accuracy_score(y_test, y_pred)

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    5.8s remaining:   17.5s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    6.0s finished
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.1s finished


0.937

### Random Subspaces

In [9]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(), 
                        n_estimators=500,
                        max_samples=1.0,
                        bootstrap=False,
                        max_features=0.5,
                        bootstrap_features=True,
                        random_state=42,
                        )

bag.fit(X_train, y_train)
y_pred = bag.predict(X_test)
accuracy_score(y_test, y_pred)

0.933

### Random Patches

In [10]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(), 
                        n_estimators=500,
                        max_samples=0.25,
                        bootstrap=True,
                        max_features=0.5,
                        bootstrap_features=True,
                        random_state=42,
                        )

bag.fit(X_train, y_train)
y_pred = bag.predict(X_test)
accuracy_score(y_test, y_pred)

0.931

### Applying GridSearchCV

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
parameter = {
    'n_estimators': [50, 100, 500],
    'max_samples': [0.1, 0.4, 0.7, 1.0],
    'bootstrap': [True, False],
    'max_features': [0.1, 0.4, 0.7, 1.0],
}

search = GridSearchCV(BaggingClassifier(), parameter, cv=5)
search.fit(X_train, y_train)
search.best_params_

KeyboardInterrupt: 