In [None]:
import ch9util
from sklearn.ensemble import BaggingClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import dautil as dl
from IPython.display import HTML
import warnings

warnings.filterwarnings(action='ignore', category=FutureWarning) 

In [None]:
X_train, X_test, y_train, y_test = ch9util.rain_split()
clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(
    min_samples_leaf=3, max_depth=4), random_state=43)

In [None]:
params = {
    'n_estimators': [320, 640],
    'bootstrap_features': [True, False],
    'base_estimator__criterion': ['gini', 'entropy']
}

gscv = GridSearchCV(estimator=clf, param_grid=params,
                    cv=5, n_jobs=-1)

In [None]:
gscv.fit(X_train, y_train)
preds = gscv.predict(X_test)
ch9util.npy_save('bagging', preds)

In [None]:
%matplotlib inline
context = dl.nb.Context('bagging')
dl.nb.RcWidget(context)

In [None]:
sp = dl.plotting.Subplotter(2, 2, context)
html = ch9util.report_rain(preds, y_test, gscv.best_params_, sp.ax)

ntrees = 2 ** np.arange(4, 11)
ch9util.plot_validation(sp.next_ax(), gscv.best_estimator_, 
                        X_train, y_train, 'n_estimators', ntrees)

nsamples = 2 ** np.arange(4, 14)
ch9util.plot_validation(sp.next_ax(), gscv.best_estimator_, 
                        X_train, y_train, 'max_samples', nsamples)

ch9util.plot_learn_curve(sp.next_ax(), gscv.best_estimator_, 
                         X_train, y_train)
HTML(html + sp.exit())