In [1]:
import pandas as pd

from stack_ensembler import stack_ensemble

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
# generate toy dataset
x_train, y_train = make_classification(n_samples=1000, n_features=5, n_informative=2, 
                                       n_redundant=3, random_state=42)

# create train-test split
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25)

# print dataset
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4
0,-2.305015,1.157283,-1.963526,-1.665606,0.6352
1,-1.143589,0.341192,-1.119419,-0.819139,0.424018
2,-1.164148,-0.262691,-1.519878,-0.814962,0.716721
3,-0.617729,-0.173771,-0.827924,-0.431376,0.396379
4,2.803911,-0.702825,2.828028,2.004265,-1.102124


In [3]:
# use logistic regression for baseline
model = LogisticRegression()
preds = model.fit(x_train, y_train).predict(x_test)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.80      0.86      0.83       113
          1       0.88      0.82      0.85       137

avg / total       0.84      0.84      0.84       250

0.84


In [4]:
# define first-level models
models = [ 
    RandomForestClassifier(), 
    KNeighborsClassifier(),
    MLPClassifier()
]

# get data with meta-features
x_train_stack, x_test_stack = stack_ensemble(models, pd.DataFrame(x_train), y_train, pd.DataFrame(x_test))

# print new dataframe
x_train_stack.head()

Getting predictions from RandomForestClassifier..
Getting predictions from KNeighborsClassifier..
Getting predictions from MLPClassifier..




Unnamed: 0,0,1,2,3,4,KNeighborsClassifier,RandomForestClassifier,MLPClassifier
0,-2.305015,1.157283,-1.963526,-1.665606,0.6352,0,0,0
1,-1.143589,0.341192,-1.119419,-0.819139,0.424018,0,0,0
2,-1.164148,-0.262691,-1.519878,-0.814962,0.716721,1,1,0
3,-0.617729,-0.173771,-0.827924,-0.431376,0.396379,0,0,0
4,2.803911,-0.702825,2.828028,2.004265,-1.102124,1,1,1


In [5]:
# use logistic regression on stack ensembled data
preds = model.fit(x_train_stack, y_train).predict(x_test_stack)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.90      0.91      0.90       120
          1       0.91      0.91      0.91       130

avg / total       0.91      0.91      0.91       250

0.908
