In [1]:
from stack_ensembler import StackEnsemble

import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# generate toy dataset
x_train, y_train = make_classification(n_samples=1000, n_features=5, n_informative=2, 
                                       n_redundant=3, random_state=42)

# create train-test split
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25)

# print dataset
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4
0,0.012068,-0.527895,-0.315075,0.024891,0.240546
1,-0.051266,-0.934581,-0.642412,-0.007287,0.462916
2,0.374295,1.167638,1.16401,0.228461,-0.736643
3,0.405975,1.189238,1.214381,0.250191,-0.762901
4,0.057043,0.766853,0.544567,0.016569,-0.387479


In [3]:
# define first-level models
models = [ 
    RandomForestClassifier(), 
    KNeighborsClassifier(),
    MLPClassifier()
]

# get data with meta-features
x_train_stack, x_test_stack = StackEnsemble(models, x_train, y_train, x_test, prob=True, orig_data=True)

# print new dataframe
pd.DataFrame(x_train_stack).head()

Getting predictions from RandomForestClassifier..
Getting predictions from KNeighborsClassifier..
Getting predictions from MLPClassifier..




Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.012068,-0.527895,-0.315075,0.024891,0.240546,0.0,1.0,0.2,0.8,0.537114,0.462886
1,-0.051266,-0.934581,-0.642412,-0.007287,0.462916,0.8,0.2,0.6,0.4,0.685577,0.314423
2,0.374295,1.167638,1.16401,0.228461,-0.736643,0.0,1.0,0.0,1.0,0.004568,0.995432
3,0.405975,1.189238,1.214381,0.250191,-0.762901,0.0,1.0,0.0,1.0,0.004114,0.995886
4,0.057043,0.766853,0.544567,0.016569,-0.387479,0.0,1.0,0.0,1.0,0.025173,0.974827


In [4]:
# use logistic regression on original data
model = LogisticRegression()
preds = model.fit(x_train, y_train).predict(x_test)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.85      0.89      0.87       118
          1       0.90      0.86      0.88       132

avg / total       0.87      0.87      0.87       250

0.872


In [5]:
# use logistic regression on data with meta-features
preds = model.fit(x_train_stack, y_train).predict(x_test_stack)

# print results
print(classification_report(preds, y_test))
print(accuracy_score(preds, y_test))

             precision    recall  f1-score   support

          0       0.94      0.91      0.92       127
          1       0.91      0.93      0.92       123

avg / total       0.92      0.92      0.92       250

0.924
