In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from eipy.ei import EnsembleIntegration
import eipy.utils as ut
from eipy.additional_ensembles import MeanAggregation, CES
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn import datasets
pd.set_option('display.max_columns', None)

In [2]:
# If data is multi-class, run a check on the allowable base and meta models.

base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(multi_class="auto", solver="lbfgs"),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True)
}

In [3]:

"""
For filtering base predictors by whether or not they rely on heursitics for multiclass extension

natively_multi_class_predictors = ["XGBClassifier",
"BernoulliNB",
"DecisionTreeClassifier",
"ExtraTreeClassifier",
"GaussianNB",
"KNeighborsClassifier",
"LabelPropagation",
"LabelSpreading",
"LinearDiscriminantAnalysis",
"LinearSVC", #(setting multi_class=”crammer_singer”)
"LogisticRegression", #(setting multi_class=”multinomial”)
"LogisticRegressionCV", #(setting multi_class=”multinomial”)
"MLPClassifier",
"NearestCentroid",
"QuadraticDiscriminantAnalysis",
"RadiusNeighborsClassifier",
"RandomForestClassifier",
"RidgeClassifier",
"RidgeClassifierCV"]

base_predictors = {k : v for k,v in base_predictors.items() if str(v).split("(")[0] in natively_multi_class_predictors}
"""

'\nFor filtering base predictors by whether or not they rely on heursitics for multiclass extension\n\nnatively_multi_class_predictors = ["XGBClassifier",\n"BernoulliNB",\n"DecisionTreeClassifier",\n"ExtraTreeClassifier",\n"GaussianNB",\n"KNeighborsClassifier",\n"LabelPropagation",\n"LabelSpreading",\n"LinearDiscriminantAnalysis",\n"LinearSVC", #(setting multi_class=”crammer_singer”)\n"LogisticRegression", #(setting multi_class=”multinomial”)\n"LogisticRegressionCV", #(setting multi_class=”multinomial”)\n"MLPClassifier",\n"NearestCentroid",\n"QuadraticDiscriminantAnalysis",\n"RadiusNeighborsClassifier",\n"RandomForestClassifier",\n"RidgeClassifier",\n"RidgeClassifierCV"]\n\nbase_predictors = {k : v for k,v in base_predictors.items() if str(v).split("(")[0] in natively_multi_class_predictors}\n'

In [4]:
"""https://dev.pages.lis-lab.fr/scikit-multimodallearn/tutorial/auto_examples/combo/plot_combo_3_views_3_classes.html#
multi modal multi-class toy data generation"""

def generate_data(n_samples, lim):
    """Generate random data in a rectangle"""
    lim = np.array(lim)
    n_features = lim.shape[0]
    data = np.random.random((n_samples, n_features))
    data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0]
    return data
seed = 12
np.random.seed(seed)

n_samples = 300

modality_0 = np.concatenate((generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]])))

modality_1 = np.concatenate((generate_data(n_samples, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]])))

modality_2 = np.concatenate((generate_data(n_samples, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples, [[1., 2.], [0., 1.]])))


y = np.zeros(3*n_samples, dtype=np.int64)
y[n_samples:2*n_samples] = 1
y[2*n_samples:] = 2



In [5]:
X_0_train, X_0_test, y_train, y_test = train_test_split(modality_0, y, test_size=0.2, random_state=3, stratify=y)
X_1_train, X_1_test, _,_ = train_test_split(modality_1, y, test_size=0.2, random_state=3, stratify=y)
X_2_train, X_2_test, _,_ = train_test_split(modality_2, y, test_size=0.2, random_state=3, stratify=y)

In [6]:
data_train = {
                "Modality_0": X_0_train,
                "Modality_1": X_1_train,
                "Modality_2": X_2_train
                }

data_test = {
                "Modality_0": X_0_test,
                "Modality_1": X_1_test,
                "Modality_2": X_2_test
                }

In [7]:
EI = EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        sampling_aggregation=None,
                        n_jobs=-1,
                        random_state=42,
                        project_name="toy",
                        model_building=True,
                        )

In [8]:
EI.fit_base(X=data_train, y=y_train)

Training base predictors on Modality_0...
        
... for ensemble performance analysis...


Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%




Training base predictors on Modality_1...
        
... for ensemble performance analysis...


Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%




Training base predictors on Modality_2...
        
... for ensemble performance analysis...


Generating meta training data: |██████████|100%
Generating meta test data: |██████████|100%



... for final ensemble...


Generating meta training data: |██████████|100%
Training final base predictors: |██████████|100%






In [9]:
EI.base_summary

{'metrics': modality       Modality_0                                                    \
 base predictor       ADAB        DT        GB       KNN        LR       MLP   
 precision        0.611520  0.533296  0.537513  0.530066  0.559550  0.554041   
 recall           0.666667  0.529167  0.597222  0.579167  0.584722  0.629167   
 f1               0.535912  0.531102  0.553553  0.545643  0.569060  0.562239   
 
 modality                                               Modality_1            \
 base predictor        NB        RF       SVM       XGB       ADAB        DT   
 precision       0.544751  0.523247  0.527670  0.525531   0.443835  0.548634   
 recall          0.608333  0.562500  0.615278  0.540278   0.665278  0.550000   
 f1              0.559038  0.537778  0.545892  0.532115   0.532352  0.549286   
 
 modality                                                                    \
 base predictor        GB       KNN        LR       MLP        NB        RF   
 precision       0.573138  

In [10]:
EI.meta_training_data[0]

modality,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,labels
base predictor,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,RF,RF,GB,GB,GB,KNN,KNN,KNN,LR,LR,LR,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,Unnamed: 91_level_1
sample,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Unnamed: 91_level_2
class,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,Unnamed: 91_level_3
0,5.033375e-01,2.222770e-16,0.496663,0.990476,0.000190,0.009334,1.0,0.0,0.0,0.91,0.00,0.09,0.873335,0.006062,0.120604,1.0,0.0,0.0,0.741568,0.022422,0.236010,0.787548,0.000082,0.212370,0.734722,0.004587,0.260691,0.694402,0.002815,0.302783,5.038713e-01,0.496129,2.223630e-16,0.461601,0.537134,0.001266,0.0,1.0,0.0,0.51,0.49,0.00,0.446751,0.542124,0.011125,0.8,0.2,0.0,0.812660,0.179730,0.007610,0.705237,0.294760,3.893088e-06,0.757580,0.238677,0.003743,0.704742,0.286515,0.008744,0.496191,2.223908e-16,5.038085e-01,0.034906,0.000094,0.965000,0.0,0.0,1.0,0.19,0.00,0.81,0.190731,0.005388,0.803881,0.4,0.0,0.6,0.402319,0.113573,0.484108,0.293706,0.014156,0.692139,0.344770,0.058037,0.597193,0.235021,0.003676,0.761303,0
1,5.033375e-01,2.222770e-16,0.496663,0.082868,0.000607,0.916525,0.0,0.0,1.0,0.54,0.00,0.46,0.305751,0.005220,0.689029,0.4,0.0,0.6,0.688637,0.025537,0.285826,0.743124,0.000103,0.256774,0.661490,0.010317,0.328193,0.685259,0.006523,0.308217,5.038713e-01,0.496129,2.223630e-16,0.120333,0.877135,0.002532,0.0,1.0,0.0,0.35,0.65,0.00,0.205477,0.780552,0.013971,0.6,0.4,0.0,0.643465,0.320709,0.035826,0.728743,0.270921,3.361942e-04,0.651904,0.333255,0.014840,0.636935,0.355380,0.007685,0.496820,5.031799e-01,2.222557e-16,0.130016,0.868783,0.001201,0.0,1.0,0.0,0.27,0.73,0.00,0.616100,0.379187,0.004714,0.2,0.8,0.0,0.381799,0.527239,0.090961,0.271800,0.725416,0.002784,0.356892,0.614702,0.028406,0.314243,0.683171,0.002586,0
2,2.223397e-16,5.036501e-01,0.496350,0.008628,0.492612,0.498759,0.0,0.0,1.0,0.04,0.63,0.33,0.012749,0.491289,0.495962,0.2,0.4,0.4,0.260780,0.271721,0.467498,0.197136,0.270495,0.532369,0.315407,0.241999,0.442594,0.252198,0.188472,0.559330,2.222529e-16,0.496881,5.031186e-01,0.000192,0.005501,0.994307,0.0,0.0,1.0,0.00,0.04,0.96,0.004441,0.080650,0.914909,0.0,0.0,1.0,0.016252,0.182152,0.801596,0.000027,0.219851,7.801217e-01,0.002285,0.205562,0.792153,0.005052,0.318134,0.676815,0.496820,5.031799e-01,2.222557e-16,0.678152,0.319825,0.002023,0.0,1.0,0.0,0.56,0.44,0.00,0.427438,0.565553,0.007009,0.4,0.6,0.0,0.303977,0.650942,0.045081,0.253442,0.746303,0.000256,0.341900,0.651430,0.006671,0.335281,0.662332,0.002388,1
3,2.223397e-16,5.036501e-01,0.496350,0.000314,0.914500,0.085186,0.0,1.0,0.0,0.01,0.74,0.25,0.007984,0.813579,0.178437,0.4,0.4,0.2,0.217677,0.251799,0.530523,0.158370,0.199676,0.641954,0.171173,0.328954,0.499873,0.145179,0.396572,0.458250,2.222529e-16,0.496881,5.031186e-01,0.000097,0.001815,0.998088,0.0,0.0,1.0,0.00,0.01,0.99,0.003233,0.107689,0.889078,0.0,0.2,0.8,0.017392,0.188231,0.794377,0.000033,0.219310,7.806570e-01,0.002537,0.215564,0.781898,0.004159,0.319830,0.676011,0.496191,2.223908e-16,5.038085e-01,0.006253,0.000095,0.993652,0.0,0.0,1.0,0.06,0.00,0.94,0.076960,0.002392,0.920648,0.2,0.0,0.8,0.328162,0.063236,0.608602,0.230185,0.003556,0.766259,0.322704,0.029860,0.647435,0.191561,0.002338,0.806101,2
4,5.033375e-01,2.222770e-16,0.496663,0.205049,0.002299,0.792652,1.0,0.0,0.0,0.52,0.01,0.47,0.495245,0.010051,0.494704,0.4,0.0,0.6,0.389640,0.149040,0.461319,0.522100,0.040081,0.437819,0.513466,0.096828,0.389706,0.514961,0.019730,0.465309,5.038713e-01,0.496129,2.223630e-16,0.729893,0.269725,0.000383,1.0,0.0,0.0,0.89,0.11,0.00,0.656200,0.337153,0.006647,0.8,0.2,0.0,0.689093,0.285340,0.025567,0.737586,0.262294,1.199098e-04,0.676987,0.312717,0.010295,0.632066,0.361283,0.006652,0.496191,2.223908e-16,5.038085e-01,0.559073,0.002290,0.438637,1.0,0.0,0.0,0.60,0.00,0.40,0.256467,0.006856,0.736677,0.2,0.0,0.8,0.334040,0.064163,0.601797,0.238629,0.002247,0.759124,0.334455,0.027040,0.638505,0.235444,0.001804,0.762752,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,5.034012e-01,2.222858e-16,0.496599,0.730658,0.000891,0.268450,1.0,0.0,0.0,0.85,0.00,0.15,0.326098,0.010482,0.663420,0.8,0.0,0.2,0.813545,0.010634,0.175820,0.763151,0.000017,0.236831,0.737883,0.003810,0.258307,0.674201,0.010199,0.315600,5.038713e-01,0.496129,2.223630e-16,0.771925,0.227552,0.000523,1.0,0.0,0.0,0.71,0.29,0.00,0.832982,0.158148,0.008870,0.6,0.4,0.0,0.583089,0.367700,0.049211,0.751509,0.247555,9.359377e-04,0.629503,0.345457,0.025041,0.759947,0.236493,0.003560,0.496942,5.030579e-01,2.222392e-16,0.588948,0.409782,0.001270,1.0,0.0,0.0,0.54,0.46,0.00,0.523892,0.470108,0.005999,0.6,0.4,0.0,0.410048,0.507348,0.082605,0.319283,0.676623,0.004094,0.366665,0.608913,0.024422,0.286633,0.711375,0.001992,0
572,5.034012e-01,2.222858e-16,0.496599,0.741070,0.003562,0.255368,0.0,0.0,1.0,0.73,0.00,0.27,0.758719,0.006605,0.234676,0.8,0.0,0.2,0.491131,0.072969,0.435900,0.655889,0.004847,0.339265,0.592587,0.046026,0.361386,0.672380,0.005945,0.321674,5.038713e-01,0.496129,2.223630e-16,0.709526,0.287658,0.002815,1.0,0.0,0.0,0.69,0.31,0.00,0.758846,0.231045,0.010109,0.8,0.2,0.0,0.744517,0.242041,0.013442,0.733706,0.266279,1.542590e-05,0.670398,0.321818,0.007784,0.710672,0.284397,0.004932,0.496942,5.030579e-01,2.222392e-16,0.105804,0.893397,0.000799,1.0,0.0,0.0,0.44,0.56,0.00,0.243999,0.750289,0.005711,0.4,0.6,0.0,0.267849,0.711016,0.021135,0.278899,0.721085,0.000017,0.341974,0.653626,0.004401,0.324069,0.671813,0.004119,0
573,2.223192e-16,5.035152e-01,0.496485,0.001255,0.920323,0.078422,0.0,1.0,0.0,0.00,0.74,0.26,0.003857,0.919627,0.076515,0.0,0.8,0.2,0.025359,0.779247,0.195393,0.000097,0.819810,0.180093,0.009429,0.766037,0.224534,0.005261,0.701820,0.292919,2.222572e-16,0.496849,5.031511e-01,0.000191,0.012208,0.987600,0.0,0.0,1.0,0.00,0.10,0.90,0.003511,0.452824,0.543666,0.0,0.2,0.8,0.025723,0.207135,0.767142,0.000044,0.205749,7.942068e-01,0.002974,0.234306,0.762720,0.001675,0.305228,0.693098,0.496066,2.223978e-16,5.039340e-01,0.023767,0.001044,0.975189,0.0,0.0,1.0,0.25,0.00,0.75,0.119909,0.010660,0.869431,0.2,0.0,0.8,0.270401,0.039299,0.690299,0.222018,0.000400,0.777582,0.285722,0.017523,0.696755,0.276874,0.003653,0.719473,2
574,5.034012e-01,2.222858e-16,0.496599,0.988050,0.000777,0.011173,1.0,0.0,0.0,0.87,0.00,0.13,0.924327,0.001435,0.074238,0.8,0.0,0.2,0.761829,0.009976,0.228195,0.653560,0.000010,0.346429,0.676873,0.005897,0.317230,0.672044,0.010727,0.317228,5.038713e-01,0.496129,2.223630e-16,0.599464,0.391220,0.009316,1.0,0.0,0.0,0.44,0.56,0.00,0.798176,0.192122,0.009702,0.4,0.6,0.0,0.822668,0.171797,0.005535,0.525422,0.474577,4.761566e-07,0.672010,0.325162,0.002828,0.457654,0.519184,0.023162,0.496942,5.030579e-01,2.222392e-16,0.097810,0.902037,0.000153,0.0,1.0,0.0,0.19,0.81,0.00,0.220555,0.774808,0.004636,0.2,0.8,0.0,0.428953,0.470013,0.101034,0.356938,0.632894,0.010168,0.380057,0.586231,0.033712,0.292738,0.702961,0.004301,0


In [11]:
"labels" in list(EI.meta_training_data[0].columns.get_level_values(level=0))

True

In [12]:
ut.predictive_multiclass_data(EI.meta_training_data)
EI.meta_training_data[0]

modality,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_0,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_1,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,Modality_2,labels
base predictor,ADAB,DT,GB,KNN,LR,MLP,NB,RF,SVM,XGB,ADAB,DT,GB,KNN,LR,MLP,NB,RF,SVM,XGB,ADAB,DT,GB,KNN,LR,MLP,NB,RF,SVM,XGB,Unnamed: 31_level_1
sample,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Unnamed: 31_level_2
0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,2,2,2,2,2,2,2,2,2,2,0
1,0,2,2,2,0,0,0,0,0,2,0,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,0
2,1,2,2,1,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1
3,1,1,1,0,2,2,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
4,0,0,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,2,0,2,2,2,2,2,0,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0
572,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,0
573,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
574,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0


In [13]:
df = EI.meta_training_data[0]
model_cols = df.columns[:-1]

accuracy_dict = {}
for model in model_cols:
    correct_predictions = (df[model] == df["labels"]).sum()
    total_predictions = len(df)
    accuracy = correct_predictions / total_predictions
    accuracy_dict[model] = accuracy

accuracy_dict

{('Modality_0', 'ADAB', 0): 0.6631944444444444,
 ('Modality_0', 'DT', 0): 0.5190972222222222,
 ('Modality_0', 'GB', 0): 0.5746527777777778,
 ('Modality_0', 'KNN', 0): 0.5763888888888888,
 ('Modality_0', 'LR', 0): 0.5798611111111112,
 ('Modality_0', 'MLP', 0): 0.6145833333333334,
 ('Modality_0', 'NB', 0): 0.6059027777777778,
 ('Modality_0', 'RF', 0): 0.5850694444444444,
 ('Modality_0', 'SVM', 0): 0.6163194444444444,
 ('Modality_0', 'XGB', 0): 0.5347222222222222,
 ('Modality_1', 'ADAB', 0): 0.6666666666666666,
 ('Modality_1', 'DT', 0): 0.5815972222222222,
 ('Modality_1', 'GB', 0): 0.6006944444444444,
 ('Modality_1', 'KNN', 0): 0.5868055555555556,
 ('Modality_1', 'LR', 0): 0.5711805555555556,
 ('Modality_1', 'MLP', 0): 0.6163194444444444,
 ('Modality_1', 'NB', 0): 0.6041666666666666,
 ('Modality_1', 'RF', 0): 0.5833333333333334,
 ('Modality_1', 'SVM', 0): 0.6232638888888888,
 ('Modality_1', 'XGB', 0): 0.5572916666666666,
 ('Modality_2', 'ADAB', 0): 0.6684027777777778,
 ('Modality_2', 'DT'

In [14]:
EI.fit_meta(meta_predictors=base_predictors)

Analyzing ensembles: |          |  0%

Analyzing ensembles: |██████████|100%
Training final meta models: |██████████|100%


<eipy.ei.EnsembleIntegration at 0x7f02ec0c5640>

In [15]:
EI.meta_predictions

Unnamed: 0,ADAB,XGB,DT,RF,GB,KNN,LR,NB,MLP,SVM,labels
0,"[2.460906117643051e-06, 3.3159024045645574e-08...","[0.0006074096, 0.0018225338, 0.9975701]","[0.0, 0.0, 1.0]","[0.0, 0.05, 0.95]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.8, 0.2]","[4.691470459764825e-05, 0.12661409660141879, 0...","[0.0, 8.03187518354799e-57, 1.0]","[5.7251503825122835e-05, 0.35460401666758673, ...","[0.0036033589406824595, 0.028779289216808664, ...",2
1,"[1.815167722893416e-05, 1.6360453444818718e-09...","[0.0019455126, 0.0010757219, 0.99697876]","[0.0, 0.0, 1.0]","[0.01, 0.0, 0.99]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.2, 0.8]","[0.0075951906612649524, 0.0002104560301191247,...","[0.0, 7.036055772977667e-110, 1.0]","[0.006447763578538449, 3.204839741207864e-07, ...","[0.016705176901856304, 0.000610423649050003, 0...",2
2,"[0.9999993824788106, 3.8554166600920653e-10, 6...","[0.99828064, 0.00082038547, 0.0008990469]","[1.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.9999987143430977, 6.42828451411504e-07, 6.4...","[1.0, 0.0, 0.0]","[0.9986778213295513, 0.00014571350196340092, 0...","[1.0, 1.3507614681851935e-113, 0.0]","[0.9996176887533161, 2.7422688889596234e-08, 0...","[0.9941357485934554, 0.0004782279350967335, 0....",0
3,"[1.5398452760075055e-05, 1.4709960095830085e-0...","[0.0025355262, 0.0013113541, 0.99615306]","[0.0, 0.0, 1.0]","[0.18, 0.0, 0.82]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.6, 0.4]","[0.005517038521205276, 0.0026989846600619545, ...","[0.0, 4.713636813249683e-99, 1.0]","[0.0007476765786744228, 0.0005507859585771903,...","[0.04100041064911489, 0.0026026029568668417, 0...",2
4,"[0.9999993824788106, 3.8554166600920653e-10, 6...","[0.998898, 0.0004618725, 0.0006401259]","[1.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.9999987143430977, 6.42828451411504e-07, 6.4...","[1.0, 0.0, 0.0]","[0.9993525375531593, 7.209852719193506e-06, 0....","[1.0, 4.954100637401223e-116, 0.0]","[0.9999999232608111, 9.969338684427184e-11, 7....","[0.9984679057927461, 0.0001512566232518741, 0....",0
...,...,...,...,...,...,...,...,...,...,...,...
715,"[2.8989365873360156e-06, 1.7970745029638833e-0...","[0.0013767994, 0.0008409676, 0.9977823]","[0.0, 0.0, 1.0]","[0.0, 0.0, 1.0]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.0, 1.0]","[0.0013414917906367661, 4.424671823678826e-05,...","[0.0, 7.612106365737302e-104, 1.0]","[0.0007542149238046141, 1.609872146994478e-08,...","[4.19226892543794e-06, 2.0952284041446033e-06,...",2
716,"[2.2864616605780478e-06, 1.620640516278768e-09...","[0.0007623872, 0.00078788225, 0.9984497]","[0.0, 0.0, 1.0]","[0.0, 0.0, 1.0]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.0, 1.0]","[0.00045224658670595563, 4.0675894278186774e-0...","[0.0, 5.176371076298323e-100, 1.0]","[0.0017199734579424742, 5.203187748216963e-08,...","[0.002247455315579561, 0.00039247269167400626,...",2
717,"[2.2864616605780478e-06, 1.620640516278768e-09...","[0.0010580685, 0.0010934509, 0.9978485]","[0.0, 0.0, 1.0]","[0.0, 0.0, 1.0]","[6.42828451411504e-07, 6.42828451411504e-07, 0...","[0.0, 0.4, 0.6]","[0.0012489269800181754, 0.0016403658236976847,...","[0.0, 1.8441954013805752e-91, 1.0]","[0.0012299241661890127, 5.790588841318818e-05,...","[0.006369716676155774, 0.0006548325657352242, ...",2
718,"[0.12540607846427176, 0.8745939215356358, 9.24...","[0.0017701975, 0.9975007, 0.00072910707]","[0.0, 1.0, 0.0]","[0.0, 1.0, 0.0]","[6.42828451411504e-07, 0.9999987143430977, 6.4...","[0.0, 1.0, 0.0]","[0.03171027691239693, 0.968240916645783, 4.880...","[3.567950622344031e-08, 0.9999999643204951, 0.0]","[0.007298861937401153, 0.9913371305092522, 0.0...","[0.008789272617844778, 0.9878856340761722, 0.0...",1


In [16]:
argmax = lambda x: np.argmax(x)
cols_to_transform = [col for col in EI.meta_predictions.columns if col != 'labels']
for column in cols_to_transform:
    EI.meta_predictions[column] = EI.meta_predictions[column].apply(argmax)
EI.meta_predictions

Unnamed: 0,ADAB,XGB,DT,RF,GB,KNN,LR,NB,MLP,SVM,labels
0,2,2,2,2,2,1,2,2,2,2,2
1,2,2,2,2,2,2,2,2,2,2,2
2,0,0,0,0,0,0,0,0,0,0,0
3,2,2,2,2,2,1,2,2,2,2,2
4,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
715,2,2,2,2,2,2,2,2,2,2,2
716,2,2,2,2,2,2,2,2,2,2,2
717,2,2,2,2,2,2,2,2,2,2,2
718,1,1,1,1,1,1,1,1,1,1,1


In [17]:
EI.meta_summary["metrics"]

Unnamed: 0,ADAB,XGB,DT,RF,GB,KNN,LR,NB,MLP,SVM
precision,0.994502,0.997245,0.998617,0.998617,1.0,0.962963,0.986667,0.995839,0.987847,0.993141
recall,0.994444,0.997222,0.998611,0.998611,1.0,0.958333,0.986111,0.995833,0.9875,0.993056
f1,0.994435,0.997222,0.998611,0.998611,1.0,0.958874,0.986177,0.995833,0.987527,0.993061


In [18]:
df = EI.meta_predictions
model_columns = df.columns[:-1]

accuracy_dict = {}

for model in model_columns:
    correct_predictions = (df[model] == df["labels"]).sum()
    total_predictions = len(df)
    accuracy = correct_predictions / total_predictions
    accuracy_dict[model] = accuracy

accuracy_dict

{'ADAB': 0.9944444444444445,
 'XGB': 0.9972222222222222,
 'DT': 0.9986111111111111,
 'RF': 0.9986111111111111,
 'GB': 1.0,
 'KNN': 0.9583333333333334,
 'LR': 0.9861111111111112,
 'NB': 0.9958333333333333,
 'MLP': 0.9875,
 'SVM': 0.9930555555555556}

In [19]:
preferred_meta_model = max(accuracy_dict, key=lambda key: accuracy_dict[key])
y_pred = EI.predict(X_dict=data_test, meta_model_key="GB")
y_pred = [np.argmax(np.array(y)) for y in y_pred]

In [20]:
accuracy = sum([1*(y==y_hat)+0*(y!=y_hat) for y,y_hat in list(zip(y_test, y_pred))])/len(y_test)
accuracy

0.9944444444444445

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

X = np.concatenate([modality_0,modality_1,modality_2], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3, stratify=y)

model = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=1000)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9222222222222223

In [22]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        60
           1       0.89      0.92      0.90        60
           2       0.94      0.85      0.89        60

    accuracy                           0.92       180
   macro avg       0.92      0.92      0.92       180
weighted avg       0.92      0.92      0.92       180



In [23]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

Modality_a = X[:, 0:2]
Modality_b = X[:, 2:4]

X_a_train, X_a_test, y_train, y_test = train_test_split(Modality_a, y, test_size=0.2, random_state=3, stratify=y)
X_b_train, X_b_test, _,_ = train_test_split(Modality_b, y, test_size=0.2, random_state=3, stratify=y)

In [24]:
iris_data_train = {
                "Modality_a": X_a_train,
                "Modality_b": X_b_train
                }

iris_data_test = {
                "Modality_a": X_a_test,
                "Modality_b": X_b_test
                }

In [25]:
base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True)
}

In [26]:
EI_iris = EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        n_jobs=-1,
                        random_state=0,
                        project_name="iris",
                        model_building=True,
                        )


In [27]:
for name, modality in iris_data_train.items():
    EI_iris.train_base(modality, y_train, modality_name=name)

AttributeError: 'EnsembleIntegration' object has no attribute 'train_base'

In [None]:
EI_iris.meta_training_data

In [None]:
EI_iris.train_meta(meta_predictors=base_predictors)

In [None]:
EI_iris.meta_summary["metrics"]

In [None]:
preferred_meta_model = EI_iris.meta_summary["metrics"].loc["precision"].idxmax()
y_pred_iris = EI_iris.predict(X_dict=iris_data_test, meta_model_key=preferred_meta_model)
y_pred_iris = [np.argmax(np.array(y)) for y in y_pred_iris]
y_pred_iris

In [None]:
accuracy = sum([1*(y==y_hat)+0*(y!=y_hat) for y,y_hat in list(zip(y_test, y_pred_iris))])/len(y_test)
accuracy

In [None]:
xgb_model = XGBClassifier()
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)
accuracy

In [None]:
import os
csvs = "./data/binary_data"
modalities = {}
for file_name in os.listdir(csvs):
    if not file_name == ".DS_Store":
        if not file_name.startswith("labels"):
            file_path = os.path.join(csvs, file_name)
            modality = os.path.splitext(file_name)[0]

            data = pd.read_csv(file_path)
            modalities[modality] = data

y = pd.read_csv('./data/binary_data/labels.csv', header=None).to_numpy()

In [None]:
for k in modalities.keys():
    modalities[k].drop(columns="Unnamed: 0", axis=1, inplace=True)

In [None]:
for k,v in modalities.items():
    print(k, v.shape)

In [None]:
bin_base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True)
}

In [None]:
EI_bin = EnsembleIntegration(
                        base_predictors=bin_base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        n_jobs=-1,
                        random_state=42,
                        project_name="toy",
                        model_building=True,
                        )

In [None]:
EI_bin.fit_base(X=data_bin_train,y=y_bin_train)