In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from eipy.ei import EnsembleIntegration
import eipy.utils as ut
from eipy.additional_ensembles import MeanAggregation, CES
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn import datasets
pd.set_option('display.max_columns', None)

In [2]:
# If data is multi-class, run a check on the allowable base and meta models.

base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(multi_class="auto", solver="lbfgs"),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True)
}

In [3]:

"""
For filtering base predictors by whether or not they rely on heursitics for multiclass extension

natively_multi_class_predictors = ["XGBClassifier",
"BernoulliNB",
"DecisionTreeClassifier",
"ExtraTreeClassifier",
"GaussianNB",
"KNeighborsClassifier",
"LabelPropagation",
"LabelSpreading",
"LinearDiscriminantAnalysis",
"LinearSVC", #(setting multi_class=”crammer_singer”)
"LogisticRegression", #(setting multi_class=”multinomial”)
"LogisticRegressionCV", #(setting multi_class=”multinomial”)
"MLPClassifier",
"NearestCentroid",
"QuadraticDiscriminantAnalysis",
"RadiusNeighborsClassifier",
"RandomForestClassifier",
"RidgeClassifier",
"RidgeClassifierCV"]

base_predictors = {k : v for k,v in base_predictors.items() if str(v).split("(")[0] in natively_multi_class_predictors}
"""

'\nFor filtering base predictors by whether or not they rely on heursitics for multiclass extension\n\nnatively_multi_class_predictors = ["XGBClassifier",\n"BernoulliNB",\n"DecisionTreeClassifier",\n"ExtraTreeClassifier",\n"GaussianNB",\n"KNeighborsClassifier",\n"LabelPropagation",\n"LabelSpreading",\n"LinearDiscriminantAnalysis",\n"LinearSVC", #(setting multi_class=”crammer_singer”)\n"LogisticRegression", #(setting multi_class=”multinomial”)\n"LogisticRegressionCV", #(setting multi_class=”multinomial”)\n"MLPClassifier",\n"NearestCentroid",\n"QuadraticDiscriminantAnalysis",\n"RadiusNeighborsClassifier",\n"RandomForestClassifier",\n"RidgeClassifier",\n"RidgeClassifierCV"]\n\nbase_predictors = {k : v for k,v in base_predictors.items() if str(v).split("(")[0] in natively_multi_class_predictors}\n'

In [4]:
"""https://dev.pages.lis-lab.fr/scikit-multimodallearn/tutorial/auto_examples/combo/plot_combo_3_views_3_classes.html#
multi modal multi-class toy data generation"""

def generate_data(n_samples, lim):
    """Generate random data in a rectangle"""
    lim = np.array(lim)
    n_features = lim.shape[0]
    data = np.random.random((n_samples, n_features))
    data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0]
    return data
seed = 12
np.random.seed(seed)

n_samples = 300

modality_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]])))

modality_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]])))

modality_2 = np.concatenate((generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]])))

X = np.concatenate([modality_0,modality_1,modality_2], axis=1)

y = np.zeros(3*n_samples, dtype=np.int64)
y[n_samples:2*n_samples] = 1
y[2*n_samples:] = 2



In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

In [6]:
X_0_train, X_0_test = X_train[:, 0:2], X_test[:, 0:2]
X_1_train, X_1_test = X_train[:, 2:4], X_test[:, 2:4]
X_2_train, X_2_test = X_train[:, 4:], X_test[:, 4:]

In [7]:
data_train = {
                "Modality_0": X_0_train,
                "Modality_1": X_1_train,
                "Modality_2": X_2_train
                }

data_test = {
                "Modality_0": X_0_test,
                "Modality_1": X_1_test,
                "Modality_2": X_2_test
                }

In [8]:
EI = EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        n_jobs=-1,
                        random_state=42,
                        project_name="toy",
                        model_building=True,
                        )


In [9]:
EI.train_base(X=data_train, y=y_train)

Training base predictors on None...
        
... for ensemble performance analysis...


Generating meta training data: |          |  0%

Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%






Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%






Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%






In [10]:
EI.base_summary

{'metrics': modality       Modality_0                                                    \
 base predictor       ADAB        DT        GB       KNN        LR       MLP   
 precision        0.525585  0.525310  0.554165  0.518227  0.565432  0.561388   
 recall           0.663835  0.525739  0.602771  0.565332  0.592944  0.629683   
 f1               0.533093  0.525519  0.566932  0.534460  0.575164  0.568177   
 
 modality                                               Modality_1            \
 base predictor        NB        RF       SVM       XGB       ADAB        DT   
 precision       0.560510  0.534797  0.544832  0.543818   0.441781  0.557991   
 recall          0.622572  0.572382  0.622702  0.563717   0.666667  0.552616   
 f1              0.569864  0.548096  0.556542  0.552061   0.531397  0.555141   
 
 modality                                                                    \
 base predictor        GB       KNN        LR       MLP        NB        RF   
 precision       0.555493  

In [11]:
EI.meta_training_data[0]

modality,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,labels
base predictor,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,Unnamed: 91_level_1
sample,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Unnamed: 91_level_2
class,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,Unnamed: 91_level_3
0,5.030195e-01,2.222401e-16,0.496980,0.484714,0.000684,0.514602,1.0,0.0,0.0,0.43,0.00,0.57,0.409006,0.004771,0.586223,0.6,0.0,0.4,0.721130,0.031602,0.247268,0.805465,0.000146,0.194389,0.717909,0.006161,0.275930,0.695547,0.005991,0.298462,5.035328e-01,0.496467,2.223108e-16,0.213391,0.785733,0.000876,0.0,1.0,0.0,0.65,0.34,0.01,0.650757,0.342503,0.006740,1.0,0.0,0.0,0.797229,0.192462,0.010309,0.675195,0.324791,0.000014,0.700685,0.295151,0.004164,0.685856,0.305368,0.008776,0.495806,5.041936e-01,2.224167e-16,0.004174,0.995806,0.000020,0.0,1.0,0.0,0.01,0.99,0.00,0.104903,0.891624,0.003473,0.0,1.0,0.0,0.383272,0.499935,0.116793,0.327217,0.653934,0.018849,0.247105,0.712563,0.040333,0.236559,0.754778,0.008663,0
1,2.223461e-16,5.037648e-01,0.496235,0.000010,0.996938,0.003051,0.0,1.0,0.0,0.00,0.94,0.06,0.009681,0.758107,0.232212,0.0,1.0,0.0,0.137229,0.396308,0.466464,0.035621,0.526522,0.437858,0.098913,0.490241,0.410846,0.019456,0.594349,0.386195,2.222698e-16,0.496755,5.032448e-01,0.000985,0.076523,0.922492,0.0,0.0,1.0,0.00,0.25,0.75,0.004877,0.258560,0.736563,0.0,0.4,0.6,0.018039,0.208020,0.773941,0.000114,0.259888,0.739997,0.002379,0.302475,0.695146,0.004094,0.312267,0.683639,0.496877,2.222535e-16,5.031229e-01,0.024270,0.000238,0.975492,0.0,0.0,1.0,0.06,0.01,0.93,0.163425,0.002798,0.833777,0.4,0.0,0.6,0.299646,0.032553,0.667801,0.253231,0.000652,0.746117,0.331831,0.007943,0.660226,0.298987,0.002287,0.698725,2
2,5.030195e-01,2.222401e-16,0.496980,0.188556,0.001075,0.810369,1.0,0.0,0.0,0.43,0.07,0.50,0.081958,0.004527,0.913515,0.4,0.2,0.4,0.205876,0.245101,0.549022,0.152868,0.171413,0.675719,0.178818,0.280640,0.540543,0.195511,0.253426,0.551063,2.222698e-16,0.496755,5.032448e-01,0.000459,0.034713,0.964828,0.0,0.0,1.0,0.00,0.23,0.77,0.014593,0.238293,0.747114,0.0,0.2,0.8,0.213275,0.459031,0.327694,0.131340,0.453101,0.415558,0.177695,0.433174,0.389131,0.069253,0.442537,0.488210,0.496877,2.222535e-16,5.031229e-01,0.570341,0.006727,0.422932,0.0,0.0,1.0,0.48,0.06,0.46,0.562777,0.018351,0.418872,0.4,0.4,0.2,0.449444,0.278344,0.272212,0.480379,0.247577,0.272044,0.418021,0.306827,0.275152,0.486638,0.172321,0.341042,2
3,2.223461e-16,5.037648e-01,0.496235,0.000255,0.986222,0.013523,0.0,1.0,0.0,0.00,0.98,0.02,0.005364,0.861940,0.132696,0.0,1.0,0.0,0.030080,0.648603,0.321317,0.000228,0.762409,0.237363,0.019703,0.646094,0.334204,0.003419,0.720182,0.276399,2.222698e-16,0.496755,5.032448e-01,0.000486,0.462935,0.536579,0.0,0.0,1.0,0.00,0.17,0.83,0.003715,0.269705,0.726580,0.0,0.0,1.0,0.174059,0.468855,0.357086,0.091953,0.450819,0.457228,0.134536,0.430699,0.434765,0.026101,0.369548,0.604351,0.495806,5.041936e-01,2.224167e-16,0.831189,0.166820,0.001991,1.0,0.0,0.0,0.69,0.30,0.01,0.427963,0.559257,0.012780,0.4,0.6,0.0,0.310019,0.632428,0.057553,0.234461,0.764028,0.001511,0.253061,0.732241,0.014697,0.214669,0.782990,0.002340,1
4,5.030195e-01,2.222401e-16,0.496980,0.973815,0.001691,0.024494,1.0,0.0,0.0,0.89,0.00,0.11,0.785019,0.008433,0.206548,0.6,0.0,0.4,0.743277,0.020614,0.236109,0.777549,0.000046,0.222405,0.745957,0.004681,0.249361,0.683677,0.004210,0.312113,5.035328e-01,0.496467,2.223108e-16,0.996593,0.003182,0.000225,1.0,0.0,0.0,0.89,0.11,0.00,0.877430,0.116987,0.005582,0.8,0.2,0.0,0.459854,0.412466,0.127680,0.592550,0.372636,0.034814,0.582689,0.340399,0.076912,0.693241,0.299957,0.006803,0.495806,5.041936e-01,2.224167e-16,0.159148,0.840254,0.000598,0.0,1.0,0.0,0.32,0.68,0.00,0.311234,0.677631,0.011134,0.4,0.6,0.0,0.256564,0.714123,0.029313,0.230640,0.769225,0.000135,0.297724,0.694510,0.007766,0.279328,0.717675,0.002997,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,2.223057e-16,5.034980e-01,0.496502,0.000772,0.029771,0.969457,0.0,0.0,1.0,0.07,0.36,0.57,0.005927,0.712334,0.281738,0.4,0.2,0.4,0.242135,0.294048,0.463817,0.174188,0.262971,0.562841,0.252650,0.305457,0.441893,0.202332,0.278453,0.519214,5.035328e-01,0.496467,2.223478e-16,0.610171,0.388793,0.001035,1.0,0.0,0.0,0.74,0.26,0.00,0.787149,0.207030,0.005820,0.6,0.4,0.0,0.315730,0.489924,0.194346,0.364929,0.502705,0.132366,0.433263,0.399316,0.167421,0.525109,0.425192,0.049699,0.496316,5.036842e-01,2.223579e-16,0.019945,0.979872,0.000182,0.0,1.0,0.0,0.06,0.94,0.00,0.183483,0.806244,0.010272,0.4,0.6,0.0,0.250595,0.723974,0.025431,0.234221,0.765676,0.000103,0.294559,0.700795,0.004645,0.320506,0.676903,0.002591,1
572,2.223057e-16,5.034980e-01,0.496502,0.000891,0.451275,0.547833,0.0,0.0,1.0,0.02,0.33,0.65,0.010632,0.836332,0.153036,0.0,0.6,0.4,0.029223,0.667539,0.303238,0.000059,0.799386,0.200555,0.012060,0.684635,0.303305,0.002751,0.727412,0.269837,2.222806e-16,0.496630,5.033698e-01,0.000916,0.857726,0.141358,0.0,1.0,0.0,0.00,0.86,0.14,0.005442,0.619433,0.375126,0.0,0.8,0.2,0.084044,0.395449,0.520507,0.006682,0.290306,0.703012,0.022571,0.373064,0.604365,0.005587,0.318645,0.675768,0.496316,5.036842e-01,2.223579e-16,0.836526,0.158907,0.004567,1.0,0.0,0.0,0.66,0.34,0.00,0.505827,0.482817,0.011356,0.4,0.6,0.0,0.361604,0.562634,0.075761,0.250399,0.745785,0.003816,0.318300,0.654553,0.027147,0.296688,0.700398,0.002914,1
573,2.223057e-16,5.034980e-01,0.496502,0.000244,0.930683,0.069073,0.0,1.0,0.0,0.00,0.94,0.06,0.007694,0.788019,0.204287,0.0,1.0,0.0,0.034983,0.590646,0.374371,0.000131,0.745742,0.254128,0.013664,0.623985,0.362351,0.002738,0.713757,0.283506,2.222806e-16,0.496630,5.033698e-01,0.004135,0.081954,0.913910,0.0,0.0,1.0,0.00,0.32,0.68,0.007987,0.320414,0.671600,0.0,0.4,0.6,0.043379,0.317613,0.639007,0.000882,0.250848,0.748271,0.006066,0.324946,0.668988,0.004722,0.310308,0.684970,0.496316,5.036842e-01,2.223579e-16,0.037843,0.962050,0.000107,0.0,1.0,0.0,0.12,0.88,0.00,0.136444,0.855010,0.008545,0.2,0.8,0.0,0.259011,0.716193,0.024797,0.265322,0.734567,0.000111,0.336251,0.659889,0.003860,0.304264,0.692617,0.003119,1
574,2.223057e-16,5.034980e-01,0.496502,0.005836,0.573228,0.420936,0.0,1.0,0.0,0.00,0.51,0.49,0.013155,0.831022,0.155824,0.0,0.6,0.4,0.134426,0.329729,0.535845,0.028662,0.428924,0.542414,0.076956,0.452999,0.470045,0.018633,0.485104,0.496263,5.035328e-01,0.496467,2.223478e-16,0.001448,0.996829,0.001723,0.0,1.0,0.0,0.43,0.52,0.05,0.699411,0.293505,0.007083,0.2,0.6,0.2,0.275364,0.460226,0.264410,0.280768,0.497197,0.222035,0.315457,0.422209,0.262334,0.219272,0.523001,0.257727,0.496437,2.223071e-16,5.035634e-01,0.055441,0.000996,0.943562,0.0,0.0,1.0,0.22,0.00,0.78,0.210404,0.005283,0.784313,0.4,0.0,0.6,0.435198,0.166057,0.398745,0.406644,0.071456,0.521900,0.366918,0.098371,0.534711,0.391784,0.021362,0.586854,2


In [12]:
EI.train_meta(meta_predictors=base_predictors)

Analyzing ensembles: |██████████|100%
Training final meta models: |██████████|100%


<eipy.ei.EnsembleIntegration at 0x7fc8a831bd30>

In [13]:
EI.meta_summary["metrics"]

Unnamed: 0,ADAB,XGB,DT,RF,GB,KNN,LR,NB,MLP,SVM
precision,0.1125,0.379134,0.1125,0.222994,0.1125,0.355449,0.251544,0.222222,0.29848,0.333333
recall,0.333333,0.379443,0.333333,0.331134,0.333333,0.349724,0.373494,0.334986,0.299372,0.333334
f1,0.168224,0.379208,0.168224,0.263965,0.168224,0.325664,0.280447,0.2642,0.269546,0.326469


In [14]:
preferred_meta_model = EI.meta_summary["metrics"].loc["precision"].idxmax()
y_pred = EI.predict(X_dict=data_test, meta_model_key=preferred_meta_model)
y_pred = [np.argmax(np.array(y)) for y in y_pred]
y_pred

[2,
 0,
 1,
 0,
 2,
 0,
 2,
 0,
 2,
 0,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 0,
 1,
 1,
 0,
 1,
 2,
 2,
 0,
 1,
 2,
 2,
 1,
 2,
 0,
 0,
 1,
 0,
 2,
 0,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 2,
 0,
 2,
 2,
 2,
 1,
 0,
 1,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 0,
 1,
 0,
 2,
 0,
 2,
 0,
 1,
 2,
 2,
 2,
 0,
 0,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 2,
 1,
 0,
 2,
 0,
 0,
 2,
 2,
 0,
 0,
 1,
 2,
 0,
 1,
 2,
 2,
 2,
 2,
 0,
 2,
 0,
 0,
 2,
 2,
 1,
 2,
 0,
 2,
 1,
 0,
 0,
 0,
 2,
 1,
 2,
 1,
 2,
 0,
 1,
 2,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 2,
 0,
 2,
 0,
 2,
 1,
 0,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 0,
 2,
 2,
 1,
 2,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 1,
 1,
 0,
 2,
 2,
 1,
 1,
 0,
 0,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 2]

In [15]:
y_test

array([1, 2, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 0, 2, 0, 0, 2, 1, 0, 2, 2,
       0, 1, 2, 0, 1, 1, 1, 2, 0, 0, 0, 2, 1, 0, 1, 0, 2, 0, 1, 1, 2, 0,
       2, 0, 2, 1, 0, 2, 0, 1, 0, 2, 0, 1, 1, 1, 2, 0, 1, 2, 1, 1, 0, 2,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 0, 0, 1, 2, 0, 1, 2, 2, 1, 2,
       2, 1, 2, 1, 0, 1, 1, 2, 1, 2, 2, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 2,
       2, 1, 2, 2, 2, 2, 2, 0, 2, 1, 1, 1, 0, 2, 0, 2, 0, 0, 2, 0, 2, 1,
       0, 0, 0, 1, 1, 2, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 2, 1, 2, 2, 2, 1,
       1, 1, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 0, 2, 1, 1, 1, 2, 2,
       1, 0, 2, 1])

In [16]:
accuracy = sum([1*(y==y_hat)+0*(y!=y_hat) for y,y_hat in list(zip(y_test, y_pred))])/len(y_test)
accuracy

0.3388888888888889

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

X = np.concatenate([modality_0,modality_1,modality_2], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3, stratify=y)

model = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=1000)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9222222222222223

In [18]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        60
           1       0.89      0.92      0.90        60
           2       0.94      0.85      0.89        60

    accuracy                           0.92       180
   macro avg       0.92      0.92      0.92       180
weighted avg       0.92      0.92      0.92       180



In [19]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

Modality_a = X[:, 0:2]
Modality_b = X[:, 2:4]

X_a_train, X_a_test, y_train, y_test = train_test_split(Modality_a, y, test_size=0.2, random_state=3, stratify=y)
X_b_train, X_b_test, _,_ = train_test_split(Modality_b, y, test_size=0.2, random_state=3, stratify=y)

In [20]:
iris_data_train = {
                "Modality_a": X_a_train,
                "Modality_b": X_b_train
                }

iris_data_test = {
                "Modality_a": X_a_test,
                "Modality_b": X_b_test
                }

In [21]:
base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True)
}

In [22]:
EI_iris = EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        n_jobs=-1,
                        random_state=0,
                        project_name="iris",
                        model_building=True,
                        )


In [23]:
for name, modality in iris_data_train.items():
    EI_iris.train_base(modality, y_train, modality_name=name)

Training base predictors on Modality_a...
        
... for ensemble performance analysis...


Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%




Training base predictors on Modality_b...
        
... for ensemble performance analysis...


Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%






In [24]:
EI_iris.meta_training_data

[modality          Modality_a                                              \
 base predictor          ADAB                               XGB             
 sample                     0                                 0             
 class                      0         1             2         0         1   
 0               4.700114e-01  0.529988  3.032480e-07  0.062144  0.930889   
 1               3.618822e-07  0.498631  5.013683e-01  0.000280  0.996896   
 2               3.618822e-07  0.498631  5.013683e-01  0.001002  0.029516   
 3               3.618822e-07  0.498631  5.013683e-01  0.002396  0.023360   
 4               4.700114e-01  0.529988  3.032480e-07  0.995907  0.002983   
 ..                       ...       ...           ...       ...       ...   
 91              2.142084e-12  0.000414  9.995858e-01  0.000806  0.126304   
 92              1.406017e-05  0.524571  4.754154e-01  0.004578  0.968059   
 93              9.872006e-01  0.012759  4.027025e-05  0.997407  0.001570   

In [25]:
EI_iris.train_meta(meta_predictors=base_predictors)

Analyzing ensembles: |██████████|100%
Training final meta models: |██████████|100%


<eipy.ei.EnsembleIntegration at 0x7fc898822d90>

In [26]:
EI_iris.meta_summary["metrics"]

Unnamed: 0,ADAB,XGB,DT,RF,GB,KNN,LR,NB,MLP,SVM
precision,0.111111,0.333333,0.111111,0.333333,0.111111,0.333333,0.111111,0.222222,0.111111,0.222222
recall,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
f1,0.166667,0.325758,0.166667,0.309524,0.166667,0.325758,0.166667,0.240196,0.166667,0.264069


In [27]:
preferred_meta_model = EI_iris.meta_summary["metrics"].loc["precision"].idxmax()
y_pred_iris = EI_iris.predict(X_dict=iris_data_test, meta_model_key=preferred_meta_model)
y_pred_iris = [np.argmax(np.array(y)) for y in y_pred_iris]
y_pred_iris

[0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 2,
 1,
 2,
 1,
 2,
 0,
 1,
 2,
 2,
 0,
 0]

In [28]:
accuracy = sum([1*(y==y_hat)+0*(y!=y_hat) for y,y_hat in list(zip(y_test, y_pred_iris))])/len(y_test)
accuracy

0.2

In [29]:
xgb_model = XGBClassifier()
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)
accuracy

XGBoostError: [19:33:13] /workspace/src/data/data.cc:501: Check failed: this->labels.Size() % this->num_row_ == 0 (120 vs. 0) : Incorrect size for labels.
Stack trace:
  [bt] (0) /home/opc/.venv/lib64/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x3581ea) [0x7fc7c6e991ea]
  [bt] (1) /home/opc/.venv/lib64/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x389457) [0x7fc7c6eca457]
  [bt] (2) /home/opc/.venv/lib64/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x38a4b1) [0x7fc7c6ecb4b1]
  [bt] (3) /home/opc/.venv/lib64/python3.9/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7fc7c6c9f210]
  [bt] (4) /lib64/libffi.so.6(ffi_call_unix64+0x4c) [0x7fc8b46ba17e]
  [bt] (5) /lib64/libffi.so.6(ffi_call+0x36f) [0x7fc8b46b9b2f]
  [bt] (6) /usr/lib64/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x1375d) [0x7fc8b48d075d]
  [bt] (7) /usr/lib64/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x13db5) [0x7fc8b48d0db5]
  [bt] (8) /lib64/libpython3.9.so.1.0(_PyObject_MakeTpCall+0x2ab) [0x7fc8b6a0364b]

