In [3]:
# GERAL
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston

In [36]:
# MODELAGEM
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

# MODELO
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from sklearn.ensemble import StackingRegressor

In [14]:
load_df = load_boston()
dataset = pd.concat([pd.DataFrame(load_df.data,columns = load_df.feature_names).add_prefix('X_')
                     ,pd.DataFrame(load_df.target,columns = ['target'])], axis=1)

In [9]:
estimators = [('ridge', RidgeCV()),
              ('lasso', LassoCV(random_state=42)),
              ('knr', KNeighborsRegressor(n_neighbors=20,metric='euclidean'))]
               

In [17]:
final_estimator = GradientBoostingRegressor(n_estimators=25,
                                            subsample=0.5,
                                            min_samples_leaf=25, 
                                            max_features=1, 
                                            random_state=42)

reg = StackingRegressor(estimators=estimators,
                        final_estimator=final_estimator)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(dataset.filter(like='X_'), dataset['target'],random_state=42)
reg.fit(X_train, y_train)

StackingRegressor(cv=None,
                  estimators=[('ridge',
                               RidgeCV(alphas=array([ 0.1,  1. , 10. ]),
                                       cv=None, fit_intercept=True,
                                       gcv_mode=None, normalize=False,
                                       scoring=None, store_cv_values=False)),
                              ('lasso',
                               LassoCV(alphas=None, copy_X=True, cv=None,
                                       eps=0.001, fit_intercept=True,
                                       max_iter=1000, n_alphas=100, n_jobs=None,
                                       normalize=False, positive=False,
                                       precompute='auto', random_s...
                                                            max_depth=3,
                                                            max_features=1,
                                                            max_leaf_nodes=None,
      

In [19]:
y_pred = reg.predict(X_test)

In [20]:
y_pred

array([25.9271049 , 38.48913878, 12.09805488, 23.52389065, 16.80033124,
       21.74847934, 18.77919867, 12.69894965, 19.71166145, 20.82316827,
       21.50116069, 19.90266201, 11.91753838, 21.65500634, 20.68319729,
       24.18015222, 19.98908563, 11.91753838, 38.28659275, 15.09246894,
       23.64656434, 26.36304436, 16.56965826, 24.38340216, 15.87388021,
       15.89349739, 21.7217799 , 12.09805488, 20.70844197, 20.1153618 ,
       21.85605626, 24.33120737, 18.92198572, 19.41624447, 13.83862812,
       19.45354956, 31.62135651, 20.64062896, 21.77967552, 23.71409706,
       16.93271406, 30.80367213, 38.28659275, 18.36400415, 24.09599178,
       15.09246894, 16.75017476, 23.63222957, 16.91705419, 26.14470439,
       21.8059593 , 38.28659275, 16.0630013 , 24.27853108, 38.48913878,
       21.85605626, 16.40229589, 34.23716166, 24.59975953, 16.93271406,
       22.70147359, 30.28378495, 31.01643783, 17.82536289, 20.61574799,
       16.51158938, 16.40229589, 24.09599178, 30.28378495, 11.91

In [21]:
from sklearn.metrics import r2_score
print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))


R2 score: 0.75


In [23]:
reg.transform(X_test.iloc[:5,:])

array([[28.78639573, 28.433658  , 24.31      ],
       [35.96334615, 32.58788457, 26.73      ],
       [14.97187156, 14.05965525, 14.89      ],
       [25.19918851, 25.54000515, 26.245     ],
       [18.93352632, 19.26508705, 16.88      ]])

In [24]:
reg.predict(X_test.iloc[:5,:])

array([25.9271049 , 38.48913878, 12.09805488, 23.52389065, 16.80033124])

In [27]:
final_layer_rfr = RandomForestRegressor(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
final_layer_gbr = GradientBoostingRegressor(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)

final_layer = StackingRegressor(estimators=[('rf', final_layer_rfr),
                                            ('gbrt', final_layer_gbr)],
                                final_estimator=RidgeCV())

multi_layer_regressor = StackingRegressor(
    estimators=[('ridge', RidgeCV()),
                ('lasso', LassoCV(random_state=42)),
                ('knr', KNeighborsRegressor(n_neighbors=20,
                                            metric='euclidean'))],
    final_estimator=final_layer)

multi_layer_regressor.fit(X_train, y_train)
print('R2 score: {:.2f}'.format(multi_layer_regressor.score(X_test, y_test)))

R2 score: 0.78


In [35]:
for model in (final_layer_rfr, 
              final_layer_gbr, 
              RidgeCV(), 
              LassoCV(random_state=42),
              KNeighborsRegressor(n_neighbors=20,metric='euclidean')):
    model.fit(X_train, y_train)
    print(model.__class__)
    print('R2 score: {:.2f}'.format(model.score(X_test, y_test)))

<class 'sklearn.ensemble._forest.RandomForestRegressor'>
R2 score: 0.52
<class 'sklearn.ensemble._gb.GradientBoostingRegressor'>
R2 score: 0.41
<class 'sklearn.linear_model._ridge.RidgeCV'>
R2 score: 0.68
<class 'sklearn.linear_model._coordinate_descent.LassoCV'>
R2 score: 0.66
<class 'sklearn.neighbors._regression.KNeighborsRegressor'>
R2 score: 0.46


In [38]:
# Comparação com Regressor Tunado
base_model = GradientBoostingRegressor(random_state=42)

parameters={'n_estimators':[50,100,500], 
            'learning_rate': [0.1,0.05,0.02],
            'max_depth':[4,5,9], 
            'min_samples_leaf':[3,10,20], 
            'max_features':[1.0, 0.5] } 

regressor = GridSearchCV(base_model, parameters,cv=5, verbose=3,return_train_score=True)
regressor.fit(X_train, y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.982, test=0.896), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.981, test=0.786), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.982, test=0.914), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.979, test=0.897), total=   0.0s
[CV] lear

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.985, test=0.784), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100, score=(train=0.992, test=0.898), total=   0.1s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100, score=(train=0.993, test=0.782), total=   0.1s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100, score=(train=0.993, test=0.919), total=   0.1s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_sampl

[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500, score=(train=0.995, test=0.896), total=   0.4s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500, score=(train=0.996, test=0.798), total=   0.4s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500, score=(train=0.995, test=0.892), total=   0.4s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500, score=(train=0.994, test=0.903), total=   0.4s
[CV] learning_rate=0.1, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=4, max_features=1.0, 

[CV]  learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100, score=(train=0.958, test=0.825), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100, score=(train=0.952, test=0.899), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100, score=(train=0.950, test=0.890), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=100, score=(train=0.966, test=0.738), total=   0.0s
[CV] learning_rate=0.1, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=4, max_features=0.5, 

[CV]  learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500, score=(train=1.000, test=0.769), total=   0.4s
[CV] learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500, score=(train=1.000, test=0.899), total=   0.4s
[CV] learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500, score=(train=1.000, test=0.895), total=   0.4s
[CV] learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=500, score=(train=1.000, test=0.751), total=   0.4s
[CV] learning_rate=0.1, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=50 
[CV]  learning_rate=0.1, max_depth=5, max_features=1.0, m

[CV]  learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.983, test=0.901), total=   0.1s
[CV] learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.984, test=0.828), total=   0.1s
[CV] learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.986, test=0.911), total=   0.1s
[CV] learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.981, test=0.903), total=   0.1s
[CV] learning_rate=0.1, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=5, max_features=0.5, 

[CV]  learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.889), total=   0.7s
[CV] learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.772), total=   0.7s
[CV] learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.890), total=   0.6s
[CV] learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.888), total=   0.6s
[CV] learning_rate=0.1, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=1.0, min_samp

[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=50, score=(train=0.999, test=0.697), total=   0.0s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=1.000, test=0.870), total=   0.1s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=1.000, test=0.814), total=   0.1s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=1.000, test=0.899), total=   0.1s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_sampl

[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.998, test=0.893), total=   0.3s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.999, test=0.837), total=   0.3s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.998, test=0.901), total=   0.3s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.998, test=0.920), total=   0.3s
[CV] learning_rate=0.1, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.1, max_depth=9, max_features=0.5, 

[CV]  learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.928, test=0.884), total=   0.1s
[CV] learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.929, test=0.799), total=   0.1s
[CV] learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.922, test=0.887), total=   0.1s
[CV] learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.926, test=0.879), total=   0.1s
[CV] learning_rate=0.05, max_depth=4, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=4, max_featu

[CV]  learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.994, test=0.909), total=   0.2s
[CV] learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.995, test=0.816), total=   0.2s
[CV] learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.995, test=0.910), total=   0.2s
[CV] learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.993, test=0.905), total=   0.2s
[CV] learning_rate=0.05, max_depth=4, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=4, max_featu

[CV]  learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.949, test=0.716), total=   0.0s
[CV] learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100, score=(train=0.967, test=0.881), total=   0.1s
[CV] learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100, score=(train=0.969, test=0.783), total=   0.1s
[CV] learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100, score=(train=0.966, test=0.897), total=   0.1s
[CV] learning_rate=0.05, max_depth=5, max_features=1.0, min_samples_leaf=10, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=5, max_featur

[CV]  learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.992, test=0.759), total=   0.1s
[CV] learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.886), total=   0.3s
[CV] learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.819), total=   0.3s
[CV] learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.907), total=   0.3s
[CV] learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=3, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=5, max_features=0.5,

[CV]  learning_rate=0.05, max_depth=5, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.991, test=0.753), total=   0.2s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.988, test=0.871), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.989, test=0.772), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50, score=(train=0.988, test=0.872), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=50 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_s

[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=50, score=(train=0.913, test=0.689), total=   0.0s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.941, test=0.882), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.945, test=0.797), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.934, test=0.882), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.05, max_depth=9, max_featur

[CV]  learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.984, test=0.742), total=   0.1s
[CV] learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.999, test=0.911), total=   0.3s
[CV] learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.999, test=0.848), total=   0.3s
[CV] learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.999, test=0.916), total=   0.3s
[CV] learning_rate=0.05, max_depth=9, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.05, max_depth=9, max_featu

[CV]  learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=0.993, test=0.782), total=   0.4s
[CV] learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.758, test=0.666), total=   0.0s
[CV] learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.765, test=0.707), total=   0.0s
[CV] learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.751, test=0.720), total=   0.0s
[CV] learning_rate=0.02, max_depth=4, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=4, max_features=1.0,

[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.928, test=0.843), total=   0.0s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.929, test=0.827), total=   0.1s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.925, test=0.850), total=   0.0s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.925, test=0.852), total=   0.1s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5,

[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.954, test=0.894), total=   0.2s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.958, test=0.824), total=   0.2s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.952, test=0.903), total=   0.2s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.952, test=0.893), total=   0.2s
[CV] learning_rate=0.02, max_depth=4, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=4, max_featu

[CV]  learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=50, score=(train=0.753, test=0.605), total=   0.0s
[CV] learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.870, test=0.823), total=   0.1s
[CV] learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.869, test=0.783), total=   0.1s
[CV] learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100, score=(train=0.861, test=0.836), total=   0.1s
[CV] learning_rate=0.02, max_depth=5, max_features=1.0, min_samples_leaf=20, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=5, max_featur

[CV]  learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=100, score=(train=0.925, test=0.704), total=   0.1s
[CV] learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.984, test=0.904), total=   0.3s
[CV] learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.985, test=0.832), total=   0.3s
[CV] learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500, score=(train=0.985, test=0.913), total=   0.3s
[CV] learning_rate=0.02, max_depth=5, max_features=0.5, min_samples_leaf=10, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=5, max_featu

[CV]  learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=3, n_estimators=500, score=(train=1.000, test=0.729), total=   0.7s
[CV] learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.773, test=0.669), total=   0.1s
[CV] learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.780, test=0.701), total=   0.0s
[CV] learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50, score=(train=0.771, test=0.725), total=   0.0s
[CV] learning_rate=0.02, max_depth=9, max_features=1.0, min_samples_leaf=10, n_estimators=50 
[CV]  learning_rate=0.02, max_depth=9, max_features=1.0,

[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.971, test=0.852), total=   0.1s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.972, test=0.825), total=   0.1s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.970, test=0.868), total=   0.1s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100, score=(train=0.969, test=0.862), total=   0.1s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=3, n_estimators=100 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5,

[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.969, test=0.894), total=   0.3s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.971, test=0.834), total=   0.3s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.967, test=0.909), total=   0.3s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500, score=(train=0.965, test=0.899), total=   0.3s
[CV] learning_rate=0.02, max_depth=9, max_features=0.5, min_samples_leaf=20, n_estimators=500 
[CV]  learning_rate=0.02, max_depth=9, max_featu

[Parallel(n_jobs=1)]: Done 810 out of 810 | elapsed:  2.2min finished


GridSearchCV(cv=5, error_score=nan,
             estimator=GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='ls', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_n...
                            

In [60]:
print('R2 score: {:.2f}'.format(regressor.best_estimator_.score(X_test, y_test)))

R2 score: 0.85


Portanto, um Gradient Boosting tunado performou melhor do stacking de modelos default.

In [59]:
# sorted(regressor.cv_results_.keys())

results_cv = pd.DataFrame(regressor.cv_results_)
results_cv.sort_values(by='rank_test_score', ascending=True).head(3).T

Unnamed: 0,104,137,119
mean_fit_time,0.335303,0.283626,0.243748
std_fit_time,0.003116,0.010509,0.002721
mean_score_time,0.002992,0.002202,0.002194
std_score_time,0.0,0.000395,0.000399
param_learning_rate,0.05,0.02,0.02
param_max_depth,9,5,4
param_max_features,0.5,0.5,0.5
param_min_samples_leaf,10,3,3
param_n_estimators,500,500,500
params,"{'learning_rate': 0.05, 'max_depth': 9, 'max_f...","{'learning_rate': 0.02, 'max_depth': 5, 'max_f...","{'learning_rate': 0.02, 'max_depth': 4, 'max_f..."


GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.05, loss='ls', max_depth=9,
                          max_features=0.5, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=10, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=500,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=42, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)