In [1]:
from stack_ensembler import StackEnsemble

import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# generate toy dataset
x_train, y_train = make_classification(n_samples=1000, n_features=5, n_informative=2, 
                                       n_redundant=3, random_state=42)

# create train-test split
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25)

# print dataset
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4
0,0.225824,1.195602,1.008493,0.122619,-0.673961
1,-1.829013,1.348138,-1.290046,-1.334966,0.303148
2,-0.659491,-0.177663,-0.878999,-0.460783,0.419505
3,-1.468723,1.087383,-1.032926,-1.072146,0.241185
4,0.410086,0.865449,1.017293,0.26313,-0.613682


In [3]:
# define first-level models
models = [ 
    RandomForestClassifier(), 
    KNeighborsClassifier(),
    MLPClassifier()
]

# get data with meta-features
x_train_stack, x_test_stack = StackEnsemble(models, x_train, y_train, x_test, prob=True, orig_data=True)

# print new dataframe
pd.DataFrame(x_train_stack).head()

Getting predictions from RandomForestClassifier..
Getting predictions from KNeighborsClassifier..
Getting predictions from MLPClassifier..




Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.225824,1.195602,1.008493,0.122619,-0.673961,0.0,1.0,0.0,1.0,0.005501,0.994499
1,-1.829013,1.348138,-1.290046,-1.334966,0.303148,1.0,0.0,0.8,0.2,0.984064,0.015936
2,-0.659491,-0.177663,-0.878999,-0.460783,0.419505,0.4,0.6,0.6,0.4,0.827411,0.172589
3,-1.468723,1.087383,-1.032926,-1.072146,0.241185,1.0,0.0,1.0,0.0,0.957629,0.042371
4,0.410086,0.865449,1.017293,0.26313,-0.613682,0.0,1.0,0.0,1.0,0.010351,0.989649


In [4]:
# use logistic regression on original data
model = LogisticRegression()
preds = model.fit(x_train, y_train).predict(x_test)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.86      0.84      0.85       126
          1       0.84      0.86      0.85       124

avg / total       0.85      0.85      0.85       250

0.852


In [5]:
# use logistic regression on data with meta-features
preds = model.fit(x_train_stack, y_train).predict(x_test_stack)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.93      0.90      0.91       127
          1       0.90      0.93      0.91       123

avg / total       0.91      0.91      0.91       250

0.912
