In this example, the stacker was handled with cross-validation instead of a train-test-split.
The whole training set was used in both of the two stacking phases.

In [12]:
from __future__ import division

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing

cali_housing = fetch_california_housing()

X = cali_housing.data
y = cali_housing.target

bins = np.arange(6)
 

from sklearn.model_selection import train_test_split

binned_y = np.digitize(y, bins)

from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, GradientBoostingRegressor
 
from sklearn.model_selection import GridSearchCV

X_train_prin, X_test_prin, y_train_prin, y_test_prin = train_test_split(X, y,test_size=0.2,stratify=binned_y,random_state=7)

binned_y_train_prin = np.digitize(y_train_prin, bins)

In [44]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 3, random_state = 7)
skf.split(X_train_prin, binned_y_train_prin)

'\nfor train_index, test_index in skf.split(X_train_prin, binned_y_train_prin):\n    print("TRAIN:", train_index, "TEST:", test_index)\n    print (pd.Series(binned_y_train_prin[test_index]).value_counts())\n    \n'

In [45]:
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor

from sklearn.model_selection import RandomizedSearchCV

param_dist = {
 'max_samples': [0.5,1.0],
 'max_features' : [0.5,1.0],
 'oob_score' : [True, False],
 'base_estimator__n_neighbors': [3,5],
 'n_estimators': [100]
 }

single_estimator = KNeighborsRegressor()
ensemble_estimator = BaggingRegressor(base_estimator = single_estimator)

pre_gs_inst_bag = RandomizedSearchCV(ensemble_estimator,
                                     param_distributions = param_dist,
                                     cv = skf,
                                     n_iter = 5,
                                     n_jobs=-1,
                                    random_state=7)

pre_gs_inst_bag.fit(X_train_prin, y_train_prin)



RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=7, shuffle=False),
          error_score='raise',
          estimator=BaggingRegressor(base_estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=5, n_jobs=-1,
          param_distributions={'max_features': [0.5, 1.0], 'oob_score': [True, False], 'base_estimator__n_neighbors': [3, 5], 'max_samples': [0.5, 1.0], 'n_estimators': [100]},
          pre_dispatch='2*n_jobs', random_state=7, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [47]:
pre_gs_inst_bag.best_params_

{'base_estimator__n_neighbors': 5,
 'max_features': 0.5,
 'max_samples': 1.0,
 'n_estimators': 100,
 'oob_score': True}

In [48]:
rs_bag = BaggingRegressor(**{'max_features': 0.5,
 'max_samples': 1.0,
 'n_estimators': 3000,
 'oob_score': True, 
 'base_estimator': KNeighborsRegressor(n_neighbors=5)})

In [49]:
from sklearn.model_selection import cross_val_predict

bag_predicted = cross_val_predict(rs_bag, X_train_prin, y_train_prin, cv=skf, n_jobs=-1)



In [51]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV

param_dist = {'max_features' : ['log2',0.4,0.5,0.6,1.0],
 'max_depth' : [2,3, 4, 5,6, 7, 10],
 'min_samples_leaf' : [1,2, 3, 4, 5, 10],
 'n_estimators': [50, 100],
 'learning_rate' : [0.01,0.05,0.1,0.25,0.275,0.3,0.325],
 'loss' : ['ls','huber']
 }
pre_gs_inst_gb = RandomizedSearchCV(GradientBoostingRegressor(warm_start=True),
                                   param_distributions = param_dist,
                                   cv=skf, 
                                   n_iter = 30, 
                                   n_jobs=-1,random_state=7)
pre_gs_inst_gb.fit(X_train_prin, y_train_prin)



RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=7, shuffle=False),
          error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=True),
          fit_params=None, iid=True, n_iter=30, n_jobs=-1,
          param_distributions={'max_features': ['log2', 0.4, 0.5, 0.6, 1.0], 'n_estimators': [50, 100], 'learning_rate': [0.01, 0.05, 0.1, 0.25, 0.275, 0.3, 0.325], 'max_depth': [2, 3, 4, 5, 6, 7, 10], 'min_samples_leaf': [1, 2, 3, 4, 5, 10], 'loss': ['ls', 'huber']},
          pre_dispatch='2*n_jobs', random_state=7, refit=True,
          return_trai

In [52]:
pre_gs_inst_gb.best_estimator_

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.25, loss='huber', max_depth=6,
             max_features=1.0, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=10, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=100,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=True)

In [53]:
gbt_inst = GradientBoostingRegressor(**{'learning_rate': 0.25,
 'loss': 'huber',
 'max_depth': 6,
 'max_features': 1.0,
 'min_samples_leaf': 10,
 'n_estimators': 3000,
 'warm_start': True})

In [54]:
gbt_predicted = cross_val_predict(gbt_inst, X_train_prin, y_train_prin, cv=skf, n_jobs=-1)



In [66]:
preds_df = pd.DataFrame(X_train_prin.copy(),columns = cali_housing .feature_names )#pd.DataFrame(columns = ['bag', 'gbt'])

preds_df['bag'] = bag_predicted
preds_df['gbt'] = gbt_predicted


preds_df[['bag','gbt']].corr()

Unnamed: 0,bag,gbt
bag,1.0,0.878562
gbt,0.878562,1.0


In [67]:
preds_df.shape

(16512, 10)

In [68]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV

param_dist = {'max_features' : ['sqrt','log2',1.0],
 'min_samples_leaf' : [1, 2, 3, 7, 11],
 'n_estimators': [50, 100],
 'oob_score': [True, False]}

pre_gs_inst_etr = RandomizedSearchCV(ExtraTreesRegressor(warm_start=True,bootstrap=True),
                                 param_distributions = param_dist,
                                 cv=skf,
                                 n_iter = 15,
                                 random_state = 7)

pre_gs_inst_etr.fit(preds_df.values, y_train_prin)



RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=7, shuffle=False),
          error_score='raise',
          estimator=ExtraTreesRegressor(bootstrap=True, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
          oob_score=False, random_state=None, verbose=0, warm_start=True),
          fit_params=None, iid=True, n_iter=15, n_jobs=1,
          param_distributions={'max_features': ['sqrt', 'log2', 1.0], 'oob_score': [True, False], 'min_samples_leaf': [1, 2, 3, 7, 11], 'n_estimators': [50, 100]},
          pre_dispatch='2*n_jobs', random_state=7, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [69]:
pre_gs_inst_etr.best_params_

{'max_features': 1.0,
 'min_samples_leaf': 11,
 'n_estimators': 100,
 'oob_score': False}

In [70]:
final_etr = ExtraTreesRegressor(**{'max_features': 1.0,
 'min_samples_leaf': 11,
 'n_estimators': 2000,
 'oob_score': False})
final_etr.fit(preds_df.values, y_train_prin)

ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0,
          min_impurity_split=None, min_samples_leaf=11,
          min_samples_split=2, min_weight_fraction_leaf=0.0,
          n_estimators=2000, n_jobs=1, oob_score=False, random_state=None,
          verbose=0, warm_start=False)

In [78]:
rs_bag.fit(X_train_prin, y_train_prin)

BaggingRegressor(base_estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'),
         bootstrap=True, bootstrap_features=False, max_features=0.5,
         max_samples=1.0, n_estimators=3000, n_jobs=1, oob_score=True,
         random_state=None, verbose=0, warm_start=False)

In [79]:
gbt_inst.fit(X_train_prin, y_train_prin)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.25, loss='huber', max_depth=6,
             max_features=1.0, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=10, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=3000,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=True)

In [80]:
def handle_X_set(X_set):
    X_copy = X_set.copy()
    
    y_pred_bag = rs_bag.predict(X_copy)
    y_pred_gbt = gbt_inst.predict(X_copy)
    preds_df = pd.DataFrame(X_copy, columns = cali_housing .feature_names)

    preds_df['bag'] = y_pred_bag
    preds_df['gbt'] = y_pred_gbt
 
    return preds_df.values

def predict_from_X_set(X_set):
    return final_etr.predict(handle_X_set(X_set)) 

y_pred = predict_from_X_set(X_test_prin)

In [81]:
def mase(y_test, y_pred):
    y_avg = y_test.mean()
    denominator = np.abs(y_test - y_avg).mean()
    numerator = y_test - y_pred
    
    return np.abs(numerator/denominator).mean()

In [89]:
# https://www.otexts.org/fpp/2/5 : contains SMAPE (attributed to Armstrong) and MASE (Hyndman and Koehler)
from sklearn.metrics import r2_score, mean_absolute_error

print ("R-squared",r2_score(y_test_prin, y_pred))
print ("MAE   :  ",mean_absolute_error(y_test_prin, y_pred))
print ("MAPE  :  ",(np.abs(y_test_prin- y_pred)/y_test_prin).mean())
print ("SMAPE :  ",(np.abs(y_test_prin- y_pred)/((y_test_prin + y_pred)/2)).mean())
print ("MASE  :  ",mase(y_test_prin, y_pred)) 

R-squared 0.845963221997
MAE   :   0.296563386223
MAPE  :   0.165997578898
SMAPE :   0.15271275169
MASE  :   0.325969762163
