In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score

In [None]:
X, y = load_digits(return_X_y=True)

In [None]:
kf = KFold(n_splits=10, shuffle=True)

In [None]:
hy = np.empty_like(y)
for tr, vs in kf.split(X):
    m = LinearSVC().fit(X[tr], y[tr])
    hy[vs] = m.predict(X[vs]) 

In [None]:
lsvc_perf = f1_score(y, hy, average='macro')

# Bagging

In [None]:
hy = np.empty_like(y)
for tr, vs in kf.split(X):
    models = []
    for _ in range(10):
        tr1, tr2 = train_test_split(tr, test_size=0.5, shuffle=True)
        models.append(LinearSVC().fit(X[tr1], y[tr1]))
    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis=0).argmax(axis=1)

In [None]:
lsvc_perf, f1_score(y, hy, average='macro')

# Video
* [Video Bagging](https://youtu.be/fi8QVE4pxWw)

# Stack Generalization

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
hy = np.empty_like(y)
for tr, vs in kf.split(X):
    m = GaussianNB().fit(X[tr], y[tr])
    hy[vs] = m.predict(X[vs])

In [None]:
f1_score(y, hy, average='macro')

In [None]:
def train(base_cl, X, y):
    df = np.empty((X.shape[0], np.unique(y).shape[0]))
    kf = KFold(n_splits=10, shuffle=True, random_state=1)
    for tr, vs in kf.split(X):
        m = base_cl().fit(X[tr], y[tr])
        try:
            df[vs] = m.predict_proba(X[vs])
        except AttributeError:
            df[vs] = m.decision_function(X[vs])
    return df

In [None]:
hy_svc = np.empty_like(y)
hy_nb = np.empty_like(y)
for tr, vs in kf.split(X):
    m1 = GaussianNB().fit(X[tr], y[tr])
    m2 = LinearSVC().fit(X[tr], y[tr])
    hy_svc[vs] = m2.predict(X[vs])
    hy_nb[vs] = m1.predict(X[vs])
    _ = np.concatenate((train(GaussianNB, X[tr], y[tr]), train(LinearSVC, X[tr], y[tr])), axis=1)
    st = RandomForestClassifier().fit(_, y[tr])
    _ = np.concatenate((m1.predict_proba(X[vs]), m2.decision_function(X[vs])), axis=1)
    hy[vs] = st.predict(_)

In [None]:
f1_score(y, hy_nb, average='macro'), f1_score(y, hy_svc, average='macro'), f1_score(y, hy, average='macro')

# Video
* [Video Stack Generalization](https://youtu.be/RSJf49O-IaI)