# Ensemble
= 여러 개의 분류기를 생성하고, 그 예측을 결합함으로써 보다 정확한 예측을 도출하는 기법

- 앙상블에는 크게 두가지의 종류가 존재한다

> **평균 방법**
> - 여러개의 추정값을 독립적으로 구한뒤 평균을 취하는 방법이다.  
> - 결합 추정값은 분산이 줄어들기 때문에 단일 추정값보다 좋은 성능을 보인다.


> **부스팅 방법**  
> - 순차적으로 모델을 생성  
> - 결합된 모델의 편향을 감소 시키기 위해 노력
> - 부스팅 방법의 목표는 여러개의 약한 모델들을 결합하여 하나의 강력한 앙상블 모델을 구축하는 것

In [51]:
from sklearn.datasets import load_iris, load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [7]:
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

## Bagging
- **원래 훈련 데이터셋의 일부를 사용하여 여러 모델을 훈련시킨다.**
- 각각의 결과를 결합하여 최종 결과를 생성한다.
- 분산을 줄이고 과대적합을 막을 수 있다.
- 강력하며, 복잡한 모델에서도 잘 동작한다.

In [41]:
import warnings
warnings.filterwarnings(action='ignore')
iris = load_iris()
boston = load_boston()

### Classification

#### iris KNN

In [43]:
iris_knn = make_pipeline(StandardScaler(), 
                          KNeighborsClassifier()
                          )

iris_knn_bagging = BaggingClassifier(base_estimator = iris_model, n_estimators = 10, 
                                max_features = 0.5, max_samples = 0.5)

# 기본의iris_knn에서 0.5정도만 사용

In [45]:
cross_val = cross_validate(estimator = iris_knn,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.0019933223724365235 (+/- 2.7746842433883964e-06)
average score time : 0.004004192352294922 (+/- 0.0011065113818782804)
average test score : 0.96 (+/- 0.024944382578492935)


#### iris KNN Bagging

In [30]:
cross_val = cross_validate(estimator = iris_knn_bagging,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.021535062789916994 (+/- 0.004797821733570343)
average score time : 0.007786417007446289 (+/- 0.00040252209768852805)
average test score : 0.9533333333333334 (+/- 0.03399346342395189)


#### iris SVC

In [31]:
iris_svc = make_pipeline(StandardScaler(),
                        SVC()
                        )

iris_svc_bagging = BaggingClassifier(base_estimator = iris_svc, n_estimators = 10,
                                    max_features = 0.5, max_samples = 0.5)

In [33]:
cross_val = cross_validate(estimator = iris_svc,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.0021936893463134766 (+/- 0.0007455882741681918)
average score time : 0.0009976863861083985 (+/- 0.000631052762736764)
average test score : 0.9666666666666666 (+/- 0.02108185106778919)


#### iris SVC Bagging

In [34]:
cross_val = cross_validate(estimator = iris_svc_bagging,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.02992706298828125 (+/- 0.006490435433792393)
average score time : 0.004191255569458008 (+/- 0.0003982642200416217)
average test score : 0.96 (+/- 0.024944382578492935)


#### iris DecisionTree

In [35]:
iris_tree = make_pipeline(StandardScaler(),
                         DecisionTreeClassifier()
                         )

iris_tree_bagging = BaggingClassifier(base_estimator = iris_tree, n_estimators = 10,
                                     max_features = 0.5, max_samples = 0.5)

In [65]:
corss_val = cross_validate(estimator = iris_tree,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.02412114143371582 (+/- 0.007954490027704174)
average score time : 0.009182357788085937 (+/- 0.0003956886163107236)
average test score : 0.44556487915030996 (+/- 0.13415137085068926)


#### iris DecisionTree Bagging

In [68]:
corss_val = cross_validate(estimator = iris_tree_bagging,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.02412114143371582 (+/- 0.007954490027704174)
average score time : 0.009182357788085937 (+/- 0.0003956886163107236)
average test score : 0.44556487915030996 (+/- 0.13415137085068926)


### Regression

#### boston knn

In [48]:
boston_knn = make_pipeline(StandardScaler(),
                          KNeighborsRegressor()
                          )

boston_knn_bagging = BaggingRegressor(base_estimator = boston_knn, n_estimators = 10,
                                      max_features = 0.5, max_samples = 0.5)

In [49]:
cross_val = cross_validate(estimator = boston_knn,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.0027962207794189455 (+/- 0.0007418829424542464)
average score time : 0.003586435317993164 (+/- 0.0004916793146278901)
average test score : 0.47357748833823543 (+/- 0.13243123464477455)


#### boston knn bagging

In [50]:
cross_val = cross_validate(estimator = boston_knn_bagging,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.022930669784545898 (+/- 0.0070117737310611845)
average score time : 0.008983230590820313 (+/- 2.503581267199729e-05)
average test score : 0.4989683535998036 (+/- 0.1084425246780574)


#### boston SVR

In [52]:
boston_svr = make_pipeline(StandardScaler(),
                          SVR()
                          )

boston_svr_bagging = BaggingRegressor(base_estimator = boston_knn, n_estimators = 10,
                                      max_features = 0.5, max_samples = 0.5)

In [53]:
cross_val = cross_validate(estimator = boston_svr,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.013968229293823242 (+/- 0.002751560513061811)
average score time : 0.005778646469116211 (+/- 0.0011632634527032667)
average test score : 0.17631266230186618 (+/- 0.5224914915128981)


#### boston SVR Bagging

In [54]:
cross_val = cross_validate(estimator = boston_svr_bagging,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.023935794830322266 (+/- 0.008052494211595275)
average score time : 0.009773540496826171 (+/- 0.001162875697232742)
average test score : 0.38587448537050645 (+/- 0.12392405727203822)


#### boston DecisionTree 

In [56]:
boston_tree = make_pipeline(StandardScaler(),
                          DecisionTreeRegressor()
                          )

boston_tree_bagging = BaggingRegressor(base_estimator = boston_knn, n_estimators = 10,
                                      max_features = 0.5, max_samples = 0.5)

In [57]:
cross_val = cross_validate(estimator = boston_tree,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.005784845352172852 (+/- 0.0007482912194524381)
average score time : 0.001196432113647461 (+/- 0.00039957472675885484)
average test score : 0.21915663487761144 (+/- 0.6225445810950914)


#### boston DecisionTree Bagging

In [58]:
cross_val = cross_validate(estimator = boston_tree_bagging,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.02412114143371582 (+/- 0.007954490027704174)
average score time : 0.009182357788085937 (+/- 0.0003956886163107236)
average test score : 0.44556487915030996 (+/- 0.13415137085068926)


## Forests of randomized trees
= 훈련 과정에서 구성한 다수의 결정 트리로부터 분류 또는 평균 회귀를 출력함으로써 동작한다.

- sklearn 모듈에는 무작위 결정 트리를 기반으로하는 두개의 평균화 알고리즘이 존재한다.
> - RandomForest  
> - Extra-Trees

In [69]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

## RandomForest

### Classification

In [70]:
randomforest_c = make_pipeline(StandardScaler(),
                              RandomForestClassifier()
                              )

In [73]:
cross_val = cross_validate(estimator = randomforest_c,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

# 성능이 향상

average fit time : 0.13385615348815919 (+/- 0.020059034488315777)
average score time : 0.010163593292236327 (+/- 0.00040684257710672746)
average test score : 0.96 (+/- 0.024944382578492935)


###  Regression

In [75]:
randomforest_r = make_pipeline(StandardScaler(),
                              RandomForestRegressor()
                              )

In [77]:
cross_val = cross_validate(estimator = randomforest_r,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

# 성능이 향상

average fit time : 0.28603382110595704 (+/- 0.02261478515490323)
average score time : 0.010172796249389649 (+/- 0.0011639402947409513)
average test score : 0.623148569100421 (+/- 0.23222488470273897)


## Extra-Trees

### Classification

In [78]:
extra_c = make_pipeline(StandardScaler(),
                              ExtraTreesClassifier()
                              )

In [83]:
cross_val = cross_validate(estimator = extra_c,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.09295186996459961 (+/- 0.00693884215397248)
average score time : 0.0107696533203125 (+/- 0.00039830351965931246)
average test score : 0.9533333333333334 (+/- 0.03399346342395189)


### Regression

In [89]:
extra_r = make_pipeline(StandardScaler(),
                              ExtraTreesRegressor()
                              )

In [106]:
cross_val = cross_validate(estimator = extra_r,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.1883028507232666 (+/- 0.008917615360325438)
average score time : 0.009766912460327149 (+/- 0.0003955036578596711)
average test score : 0.6354965106421536 (+/- 0.2593049813175649)


## AdaBoost
= 일련의 약한 모델들을 학습하고, 수정된 버전의 데이터를 반복 학습한다(가중치 적용)

- 가중치 투표(합)을 통해 각 모델의 예측 값을 결합
- 첫 단계에서는 원본 데이터를 학습하고 연속적인 반복마다 개별 샘플에 대한 가중치가 수정되고 다시 모델이 학습된다.
- 잘못 예측된 샘플을 가중치가 증가, 올바르게 예측된 샘플은 가중치가 감소한다.
- 각각의 약한 모델들은 예측하기 어려운 샘플에 집중하게 된다.

In [92]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor

### Classification

In [93]:
ada_c = make_pipeline(StandardScaler(),
                     AdaBoostClassifier()
                     )

In [94]:
cross_val = cross_validate(estimator = ada_c,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.07201619148254394 (+/- 0.01305048609306411)
average score time : 0.008774471282958985 (+/- 0.0014650649177584778)
average test score : 0.9466666666666667 (+/- 0.03399346342395189)


### Regression

In [95]:
ada_r = make_pipeline(StandardScaler(),
                     AdaBoostRegressor()
                     )

In [114]:
cross_val = cross_validate(estimator = ada_r,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.09035835266113282 (+/- 0.013714208168123142)
average score time : 0.004786348342895508 (+/- 0.0003985654047703985)
average test score : 0.5934762459746185 (+/- 0.20548122707660724)


## GradientBoost
= 임의의 차별화 가능한 손실함수로 일반화한 부스팅 알고리즘

- 웹 검색, 분류 및 회귀 등 다양한 분야에서 모두 사용이 가능하다.

In [98]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor

### Classification

In [99]:
gb_c = make_pipeline(StandardScaler(),
                     GradientBoostingClassifier()
                     )

In [100]:
cross_val = cross_validate(estimator = gb_c,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.1966726303100586 (+/- 0.020000318288203093)
average score time : 0.0009993553161621095 (+/- 1.910921565009487e-06)
average test score : 0.9666666666666668 (+/- 0.02108185106778919)


### Regression

In [103]:
gb_r = make_pipeline(StandardScaler(),
                     GradientBoostingRegressor()
                     )

In [104]:
cross_val = cross_validate(estimator = gb_r,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

# test score가 향상

average fit time : 0.11708593368530273 (+/- 0.0068690894225995605)
average score time : 0.00079803466796875 (+/- 0.0003990175334262532)
average test score : 0.6802070477119095 (+/- 0.1497104387996135)


## Voting
= 서로 다른 모델들의 결과를 투표를 통해 결합하는 방식이다.
- 두가지의 방식으로 투표 가능하다.
> - 다수의 분류기가 예측한 결과값을 최종 결과로 선정(hard voting)
> - 모든 분류기가 예측한 레이블 값의 결정 확률 평균을 구한 뒤 가장 확률이 높은 레이블 값을 최종 결과로 선정(soft voting)

In [132]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor

### Classification

#### hard voting

In [116]:
model1 = SVC()
model2 = GaussianNB()
model3 = RandomForestClassifier()
hard_voting = VotingClassifier(estimators = [("SVC", model1), ("GaussianNB", model2), ("RandomForest", model3)],
                              voting = "hard")

In [127]:
for i in (model1, model2, model3, hard_voting):
    model_name = str(type(i)).split(".")[-1][:-2]
    scores = cross_val_score( i, iris.data, iris.target, cv = 5)
    print("[%s] : Accuracy = %0.2f" %(model_name, scores.mean()))
    
# 다수의 분류기가 0.97로 예측하여 해당 결과를 최종 결과로 선정

[SVC] : Accuracy = 0.97
[GaussianNB] : Accuracy = 0.95
[RandomForestClassifier] : Accuracy = 0.97
[VotingClassifier] : Accuracy = 0.97


#### soft voting

In [128]:
model1 = SVC(probability = True)
model2 = GaussianNB()
model3 = RandomForestClassifier()
soft_voting = VotingClassifier(estimators = [("SVC", model1), ("GaussianNB", model2), ("RandomForest", model3)],
                              voting = "soft",
                              weights = [2, 1, 2])

In [130]:
for i in (model1, model2, model3, hard_voting):
    model_name = str(type(i)).split(".")[-1][:-2]
    scores = cross_val_score( i, iris.data, iris.target, cv = 5)
    print("[%s] : Accuracy = %0.2f" %(model_name, scores.mean()))
    
# 다수의 분류기가 예측한 결과를 평균내어 해당 결과를 최종 결과로 선정  

[SVC] : Accuracy = 0.97
[GaussianNB] : Accuracy = 0.95
[RandomForestClassifier] : Accuracy = 0.97
[VotingClassifier] : Accuracy = 0.96


### Regression

In [136]:
model1 = LinearRegression()
model2 = GradientBoostingRegressor()
model3 = RandomForestRegressor()
voting = VotingRegressor(estimators = [("Linear", model1), ("GB", model2), ("RF", model3)],
                             weights = [1, 1, 1])

In [137]:
for i in (model1, model2, model3, voting):
    model_name = str(type(i)).split(".")[-1][:-2]
    scores = cross_val_score( i, boston.data, boston.target, cv = 5)
    print("[%s] : R2 = %0.2f" %(model_name, scores.mean()))

[LinearRegression] : R2 = 0.35
[GradientBoostingRegressor] : R2 = 0.67
[RandomForestRegressor] : R2 = 0.62
[VotingRegressor] : R2 = 0.66


## Stacked Generalization
= 각 모델의 예측값을 최종 모델의 입력으로 사용한다

- 모델의 편향을 줄이는데 효과적이다.

In [141]:
from sklearn.linear_model import Ridge, Lasso, LogisticRegression
from sklearn.svm import SVR, SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor, StackingClassifier

### Classification

In [144]:
estimators = [("logistic", LogisticRegression(max_iter = 10000)),
              ("svc", SVC()),
              ("GNB", GaussianNB())]

clf = StackingClassifier(estimators = estimators,
                        final_estimator = RandomForestClassifier())

In [145]:
cross_val = cross_validate(estimator = clf,
                          X = iris.data, y = iris.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.33370089530944824 (+/- 0.036935397695164775)
average score time : 0.011574554443359374 (+/- 0.0007919144996185815)
average test score : 0.9666666666666666 (+/- 0.02108185106778919)


### Regression

In [139]:
estimators = [("ridge", Ridge()),
             ("lasso", Lasso()),
             ("svr", SVR())]

reg = make_pipeline(StandardScaler(),
                   StackingRegressor(estimators = estimators,
                                    final_estimator = GradientBoostingRegressor()),
                   )

In [140]:
cross_val = cross_validate(estimator = reg,
                          X = boston.data, y = boston.target,
                          cv = 5)

print("average fit time : {} (+/- {})".format(cross_val["fit_time"].mean(), cross_val["fit_time"].std()))
print("average score time : {} (+/- {})".format(cross_val["score_time"].mean(), cross_val["score_time"].std()))
print("average test score : {} (+/- {})".format(cross_val["test_score"].mean(), cross_val["test_score"].std()))

average fit time : 0.19128780364990233 (+/- 0.01856398236198369)
average score time : 0.009375762939453126 (+/- 0.0017377884978557812)
average test score : 0.3262265367383581 (+/- 0.33062351487314573)
