In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold, cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.multioutput import MultiOutputRegressor

from datasets import get_datasets
from multioutput_ensemble import MultiOutputBaggingRegressor

from tqdm import tqdm_notebook

In [None]:
CV = KFold(n_splits=3, shuffle=True, random_state=1234)
def rmse(model, X, y):
    return np.sqrt(-np.mean(cross_val_score(model, X, y, cv=CV, scoring="neg_mean_squared_error", n_jobs=3)))

In [None]:
datasets = get_datasets()

In [None]:
datasets.keys()

In [None]:
tmp = pd.DataFrame(columns=names, index=depth_vals)

In [None]:
scores = {}

depth_vals = np.arange(1, 21)

names = ['Single_Tree', 'Multi_Tree', 
         'Single_Bagging_10', 'Single_Bagging_100', 'Single_Bagging_1000',
         'Multi_Bagging_10', 'Multi_Bagging_100', 'Multi_Bagging_1000']

for dataset_name, (X, y) in tqdm_notebook(datasets.items(), total=len(datasets)):
    cur_scores = pd.DataFrame(columns=names, index=depth_vals)
    
    
    
    for depth in depth_vals:
        models = [MultiOutputRegressor(DecisionTreeRegressor(max_depth=depth)),
                  DecisionTreeRegressor(max_depth=depth),
                  MultiOutputRegressor(BaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=10)),
                  MultiOutputRegressor(BaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=100)),
                  MultiOutputRegressor(BaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=1000)),
                  MultiOutputBaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=10),
                  MultiOutputBaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=100),
                  MultiOutputBaggingRegressor(DecisionTreeRegressor(max_depth=depth), n_estimators=1000)]
        
        depth_scores = []
        for model_name, model in zip(names, models):
            cur_score = rmse(model, X, y)
            depth_scores.append(cur_score)
            print('{} depth = {} rmse = {}'.format(model_name, depth, cur_score))
        cur_scores.loc[depth] = depth_scores
        
    scores[dataset_name] = cur_scores