In [1]:
from stack_ensembler import StackEnsemble

import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# generate toy dataset
x_train, y_train = make_classification(n_samples=1000, n_features=5, n_informative=2, 
                                       n_redundant=3, random_state=42)

# create train-test split
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25)

# print dataset
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4
0,0.910363,-1.555301,0.090764,0.691861,0.262369
1,2.750141,-1.860651,2.043508,2.002125,-0.5336
2,0.53969,-1.201483,-0.120428,0.418815,0.286138
3,0.940785,-0.445974,0.817847,0.678995,-0.271577
4,0.860166,-1.018003,0.367286,0.63972,0.036883


In [3]:
# define first-level models
models = [ 
    RandomForestClassifier(), 
    KNeighborsClassifier(),
    MLPClassifier()
]

# get data with meta-features
x_train_stack, x_test_stack = StackEnsemble(models, x_train, y_train, x_test, proba=True, orig_data=True)

# print new dataframe
pd.DataFrame(x_train_stack).head()

Getting predictions from RandomForestClassifier..
Getting predictions from KNeighborsClassifier..
Getting predictions from MLPClassifier..




Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.910363,-1.555301,0.090764,0.691861,0.262369,0.0,1.0,0.0,1.0,0.192428,0.807572
1,2.750141,-1.860651,2.043508,2.002125,-0.5336,0.0,1.0,0.0,1.0,0.008147,0.991853
2,0.53969,-1.201483,-0.120428,0.418815,0.286138,0.4,0.6,0.0,1.0,0.400527,0.599473
3,0.940785,-0.445974,0.817847,0.678995,-0.271577,0.8,0.2,0.8,0.2,0.334899,0.665101
4,0.860166,-1.018003,0.367286,0.63972,0.036883,0.0,1.0,0.0,1.0,0.318575,0.681425


In [4]:
# use logistic regression on original data
model = LogisticRegression()
preds = model.fit(x_train, y_train).predict(x_test)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.87      0.85      0.86       127
          1       0.85      0.87      0.86       123

avg / total       0.86      0.86      0.86       250

0.86


In [5]:
# use logistic regression on data with meta-features
preds = model.fit(x_train_stack, y_train).predict(x_test_stack)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.94      0.91      0.93       128
          1       0.91      0.94      0.93       122

avg / total       0.93      0.93      0.93       250

0.928
