In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

data = fetch_openml('mnist_784', version=1)#Get data from https://www.openml.org/d/554
dfData = pd.DataFrame(np.c_[data["data"],data["target"]],columns = data["feature_names"]+["target"])

In [4]:
stratSplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)

for train_index, test_index in stratSplit.split(dfData[data["feature_names"]], dfData["target"]):
    X_train = dfData[data["feature_names"]].iloc[train_index]
    X_test = dfData[data["feature_names"]].iloc[test_index]
    
    y_train = dfData["target"].iloc[train_index]
    y_test = dfData["target"].iloc[test_index]

In [5]:
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier

stacked_clf = StackingClassifier(estimators=[('dt_depth8',DecisionTreeClassifier(max_depth=8)),
                                               ('rbf_svc',SVC()),
                                               ('log_reg',LogisticRegression())],
                                    final_estimator=RandomForestClassifier(),
                                    cv=5, # split for holdout set
                                    stack_method='auto', # which function to call when doing the prediction
                                                         # uses 'predict_proba', 'decision_function', 'predict'
                                                         # in that order (for each estimator separately)
                                    n_jobs=2,
                                    passthrough=False, # if final estimator should use raw training inputs
                                                       # in addition to estimators layer prediction output
                                    verbose=0
                                    )

stacked_clf.fit(X_train, y_train)

StackingClassifier(cv=5,
                   estimators=[('dt_depth8',
                                DecisionTreeClassifier(ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=8,
                                                       max_features=None,
                                                       max_leaf_nodes=None,
                                                       min_impurity_decrease=0.0,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=1,
                                                       min_samples_split=2,
                                                       min_weight_fraction_leaf=0.0,
                                                       presort='deprecated',
                    

In [6]:
print(stacked_clf.named_estimators_)
print(stacked_clf.final_estimator_)
print(stacked_clf.stack_method_) # prediction method used by each estimator

{'dt_depth8': DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=8, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best'), 'rbf_svc': SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'log_reg': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,

In [7]:
stacked_clf.score(X_test, y_test)

0.9800714285714286

In [10]:
from sklearn.ensemble import GradientBoostingClassifier
final_layers_clf = StackingClassifier(estimators=[('rand_forest',RandomForestClassifier(max_depth=3)),
                                               ('grad_boost',GradientBoostingClassifier(max_depth=3))],
                                    final_estimator=RandomForestClassifier(), 
                                    n_jobs=2
                                    )
stack_base_clf = StackingClassifier(estimators=[('dt_depth8',DecisionTreeClassifier(max_depth=8)),
                                               ('rbf_svc',SVC()),
                                               ('log_reg',LogisticRegression())],
                                    final_estimator=final_layers_clf,
                                    n_jobs=2
                                    )
stack_base_clf.fit(X_train, y_train)

StackingClassifier(cv=None,
                   estimators=[('dt_depth8',
                                DecisionTreeClassifier(ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=8,
                                                       max_features=None,
                                                       max_leaf_nodes=None,
                                                       min_impurity_decrease=0.0,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=1,
                                                       min_samples_split=2,
                                                       min_weight_fraction_leaf=0.0,
                                                       presort='deprecated',
                 

In [11]:
stack_base_clf.score(X_test, y_test)

0.9795