In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from itertools import combinations
from vecstack import stacking

## Stacking

* Ensemble model that combines predictions from multiple models to create a better prediction
* Very popular on Kaggle and other online competitions

In [3]:
data = pd.read_csv('german_credit.csv')

In [4]:
X = data.loc[:, data.columns != 'Creditability']
y = data["Creditability"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [5]:
sc = StandardScaler()
sc.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [6]:
# Apply the scaler to the X training data
X_train_std = sc.transform(X_train)

# Apply the SAME scaler to the X test data
X_test_std = sc.transform(X_test)

In [15]:
models = [
    GradientBoostingClassifier(random_state=0,
                           n_estimators=100, max_depth=3),
        
    RandomForestClassifier(random_state=0, n_jobs=-1, 
                           n_estimators=100, max_depth=3),
        
    MLPClassifier(hidden_layer_sizes=(22, 22, 22),max_iter=1000)
]

In [20]:
S_train, S_test = stacking(models,                   
                           X_train_std, y_train, X_test_std,   
                           regression=False, 
     
                           mode='oof_pred_bag', 
       
                           needs_proba=False,
         
                           save_dir=None, 
            
                           metric=accuracy_score, 
    
                           n_folds=4, 
                 
                           stratified=True,
            
                           shuffle=True,  
            
                           random_state=0,    
         
                           verbose=2)

task:         [classification]
n_classes:    [2]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [GradientBoostingClassifier]
    fold  0:  [0.77272727]
    fold  1:  [0.73714286]
    fold  2:  [0.73714286]
    fold  3:  [0.75862069]
    ----
    MEAN:     [0.75140842] + [0.01511227]
    FULL:     [0.75142857]

model  1:     [RandomForestClassifier]
    fold  0:  [0.74431818]
    fold  1:  [0.71428571]
    fold  2:  [0.71428571]
    fold  3:  [0.70689655]
    ----
    MEAN:     [0.71994654] + [0.01439070]
    FULL:     [0.72000000]

model  2:     [MLPClassifier]
    fold  0:  [0.75000000]
    fold  1:  [0.72571429]
    fold  2:  [0.69714286]
    fold  3:  [0.68965517]
    ----
    MEAN:     [0.71562808] + [0.02397632]
    FULL:     [0.71571429]



In [21]:
model = MLPClassifier(hidden_layer_sizes=(22, 22, 22),max_iter=1000)
    
model = model.fit(S_train, y_train)
y_pred = model.predict(S_test)

In [22]:
print('Final prediction score: [%.8f]' % accuracy_score(y_test, y_pred))

Final prediction score: [0.75333333]
