In [1]:
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [14]:
def stacking(models, meta_alg, data_train, targets_train, data_test, targets_test=None, random_state=None, test_size=None, cv=5):
    if test_size is None:
        meta_mtrx = np.empty((data_train.shape[0], len(models)))
        for n, model in enumerate(models):
            meta_mtrx[:, n] = cross_val_predict(model, data_train, targets_train, cv, method='predict')
            model.fit(x_train, y_train)
    
        meta_model = meta_alg.fit(meta_mtrx, targets_train)

        meta_mtrx_test = np.empty((test.shape[0], len(models))) 
        for n, model in enumerate(models): 
            meta_mtrx_test[:, n] = model.predict(data_test)

        meta_predict = meta_alg.predict(meta_mtrx_test)
        if targets_test is not None: 
            print(f'Stacking AUC: {roc_auc_score(test_true, meta_predict)}')
        
        return meta_predict
    
    elif test_size > 0 and test_size < 1:
        train, test, train_true, test_true = train_test_split(data_train, 
                                                      targets_train,
                                                      test_size=test_size)
        x_train, valid, y_train, valid_true = train_test_split(train, 
                                                      train_true, 
                                                      test_size=test_size)
       
        meta_mtrx = np.empty((valid.shape[0], len(models)))
        for n, model in enumerate(models):
            model.fit(x_train, y_train)
            meta_mtrx[:, n] = model.predict(valid)
            
        meta_model = meta_alg.fit(meta_mtrx, valid_true)
        
        meta_mtrx_test = np.empty((test.shape[0], len(models))) 
        for n, model in enumerate(models):
            meta_mtrx_test[:, n] = model.predict(test)
        
        meta_predict = meta_alg.predict(meta_mtrx_test)
        
        if targets_test is not None:
            print(f'Stacking AUC: {roc_auc_score(test_true, meta_predict)}')
        
        return meta_predict
    
    else:
        raise ValueError("test_size must be between 0 and 1")

In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier


titanic = pd.read_csv('titanic.csv')
targets = titanic.Survived
data = titanic.drop(columns='Survived')

x_train, x_test, y_train, y_test = train_test_split(data, 
                                                    targets,
                                                    train_size=0.8,
                                                    random_state=17)

knn = KNeighborsClassifier(n_neighbors=3)
lr = LogisticRegression(random_state=17)
svc = SVC(random_state=17)

meta = XGBClassifier(n_estimators=40)

In [15]:
models = [knn, lr, svc]
stacking(models, meta, x_train, y_train, x_test, y_test, test_size=3, cv=5)

ValueError: test_size must be between 0 and 1

In [17]:
stacking(models, meta, x_train, y_train, x_test, y_test, test_size=0.3, cv=5)

Stacking AUC: 0.7094178239676262


array([0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0])

In [16]:
stacking(models, meta, x_train, y_train, x_test, targets_test=None, test_size=0.3, cv=5)

array([0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0])