In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
import sys
from ei import EnsembleIntegration, MeanAggregation, MedianAggregation
from interpretation import *
from utils import *
from sklearn.linear_model import Perceptron
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, n_redundant=0,
n_clusters_per_class=1, weights=[0.7], flip_y=0, random_state=1)

X_view_0 = X[:, :5]
X_view_1 = X[:, 5:10]
X_view_2 = X[:, 10:15]
X_view_3 = X[:, 15:]

base_predictors = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    # "DT": DecisionTreeClassifier(),
    "Perceptron": Perceptron(),
    "RF": RandomForestClassifier(), 
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
}
meta_models = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": DecisionTreeClassifier(),
    "RF": RandomForestClassifier(),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
    # "meanSK": MeanAggregation_sk(),
    # "CES_sk": CES_sk()
    # "Voting": VotingClassifier()
    # "Mean": MeanAggregation(),
    # "Median": MedianAggregation()
}

EI = EnsembleIntegration(base_predictors=base_predictors,
                         k_outer=5,
                         k_inner=5,
                         n_samples=5,
                         sampling_strategy="undersampling",
                         sampling_aggregation="mean",
                         n_jobs=-1,  # set as -1 to use all available CPUs
                         random_state=42,
                         project_name="demo",
                        #  calibration=True
                        verbose=0
                         )

EI.save()
EI = EnsembleIntegration().load("EI.demo")

modalities = {"view_0": X_view_0,
              "view_1": X_view_1,
              "view_2": X_view_2,
              "view_3": X_view_3,}

# for name, modality in modalities.items():
#     EI.train_base(modality, y, base_predictors, modality=name)

# EI.save() # save EI as EI.demo




  from .autonotebook import tqdm as notebook_tqdm



Saved to EI.demo



In [2]:
from sklearn.metrics import average_precision_score

EI_int = EI_interpreter(EI_object=EI,
                        base_predictors=base_predictors,
                        meta_models=meta_models,
                        modalities=modalities,
                        y=y,
                        metric=f_minority_score)
# print(EI_int.base_predictors)
# print(EI.base_predictors)
EI_int.rank_product_score()


 Working on view_0 data... 

Training base predictors on outer training sets...

 Working on view_1 data... 

Training base predictors on outer training sets...

 Working on view_2 data... 

Training base predictors on outer training sets...

 Working on view_3 data... 

Training base predictors on outer training sets...
     local_model_PI base predictor modality ensemble_method    LMR  \
0          0.012509       AdaBoost   view_0      S.AdaBoost  0.425   
1          0.012509       AdaBoost   view_0      S.AdaBoost  0.425   
2          0.012509       AdaBoost   view_0      S.AdaBoost  0.425   
3          0.012509       AdaBoost   view_0      S.AdaBoost  0.425   
4          0.012509       AdaBoost   view_0      S.AdaBoost  0.425   
..              ...            ...      ...             ...    ...   
995        0.000000            SVM   view_3      S.AdaBoost  0.875   
996        0.000000            SVM   view_3      S.AdaBoost  0.875   
997        0.000000            SVM   view_3   

In [3]:
EI_int.ensemble_feature_ranking['S.XGB']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.099,1.0,S.XGB
1,view_0,view_0_1,0.1755,2.0,S.XGB
9,view_1,view_1_4,0.2208,3.0,S.XGB
19,view_3,view_3_4,0.2514,4.0,S.XGB
4,view_0,view_0_4,0.2659,5.0,S.XGB
16,view_3,view_3_1,0.2743,6.0,S.XGB
15,view_3,view_3_0,0.277,7.0,S.XGB
6,view_1,view_1_1,0.2784,8.0,S.XGB
2,view_0,view_0_2,0.2878,9.0,S.XGB
3,view_0,view_0_3,0.3097,10.0,S.XGB


In [4]:
EI_int.ensemble_feature_ranking

{'S.AdaBoost':    modality   feature     RPS  feature rank ensemble method
 11   view_2  view_2_1  0.0830           1.0      S.AdaBoost
 9    view_1  view_1_4  0.2132           2.0      S.AdaBoost
 1    view_0  view_0_1  0.2224           3.0      S.AdaBoost
 6    view_1  view_1_1  0.2543           4.0      S.AdaBoost
 12   view_2  view_2_2  0.2594           5.0      S.AdaBoost
 16   view_3  view_3_1  0.2714           6.0      S.AdaBoost
 10   view_2  view_2_0  0.2760           7.0      S.AdaBoost
 19   view_3  view_3_4  0.2774           8.0      S.AdaBoost
 15   view_3  view_3_0  0.3042           9.0      S.AdaBoost
 13   view_2  view_2_3  0.3120          10.0      S.AdaBoost
 14   view_2  view_2_4  0.3146          11.0      S.AdaBoost
 7    view_1  view_1_2  0.3183          12.0      S.AdaBoost
 4    view_0  view_0_4  0.3277          13.0      S.AdaBoost
 18   view_3  view_3_3  0.3596          14.0      S.AdaBoost
 2    view_0  view_0_2  0.3606          15.0      S.AdaBoost
 17   view

In [9]:
EI_int.LFRs.loc[EI_int.LFRs['base predictor']=='Perceptron']

Unnamed: 0,local_feat_PI,local_feat_name,base predictor,modality,LFR,sample
0,0.0,view_0_0,Perceptron,view_0,0.6,0
1,0.0,view_0_1,Perceptron,view_0,0.6,0
2,0.0,view_0_2,Perceptron,view_0,0.6,0
3,0.0,view_0_3,Perceptron,view_0,0.6,0
4,0.0,view_0_4,Perceptron,view_0,0.6,0
...,...,...,...,...,...,...
0,0.0,view_3_0,Perceptron,view_3,0.6,4
1,0.0,view_3_1,Perceptron,view_3,0.6,4
2,0.0,view_3_2,Perceptron,view_3,0.6,4
3,0.0,view_3_3,Perceptron,view_3,0.6,4


In [5]:
EI_int.ensemble_feature_ranking['Mean']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.1025,1.0,Mean
9,view_1,view_1_4,0.1886,2.0,Mean
1,view_0,view_0_1,0.2091,3.0,Mean
6,view_1,view_1_1,0.2378,4.0,Mean
19,view_3,view_3_4,0.27265,5.0,Mean
15,view_3,view_3_0,0.2747,6.0,Mean
16,view_3,view_3_1,0.28495,7.0,Mean
4,view_0,view_0_4,0.2911,8.0,Mean
7,view_1,view_1_2,0.2993,9.0,Mean
2,view_0,view_0_2,0.30545,10.0,Mean


In [4]:
EI_int.ensemble_feature_ranking['S.meanSK']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.0345,1.0,S.meanSK
10,view_2,view_2_0,0.1124,2.0,S.meanSK
14,view_2,view_2_4,0.1153,3.0,S.meanSK
12,view_2,view_2_2,0.1239,4.0,S.meanSK
13,view_2,view_2_3,0.1314,5.0,S.meanSK
9,view_1,view_1_4,0.2454,6.0,S.meanSK
16,view_3,view_3_1,0.2775,7.0,S.meanSK
19,view_3,view_3_4,0.2777,8.0,S.meanSK
15,view_3,view_3_0,0.2964,9.0,S.meanSK
1,view_0,view_0_1,0.3014,10.0,S.meanSK


In [3]:
EI_int.ensemble_feature_ranking['CES']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.1,1.0,CES
1,view_0,view_0_1,0.205,2.0,CES
9,view_1,view_1_4,0.2116,3.0,CES
16,view_3,view_3_1,0.2532,4.0,CES
15,view_3,view_3_0,0.25345,5.0,CES
19,view_3,view_3_4,0.2564,6.0,CES
2,view_0,view_0_2,0.25905,7.0,CES
6,view_1,view_1_1,0.27515,8.0,CES
4,view_0,view_0_4,0.276,9.0,CES
12,view_2,view_2_2,0.3221,10.0,CES


In [4]:
EI_int.ensemble_feature_ranking['S.CES_sk']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.08325,1.0,S.CES_sk
9,view_1,view_1_4,0.21025,2.0,S.CES_sk
1,view_0,view_0_1,0.2327,3.0,S.CES_sk
6,view_1,view_1_1,0.25985,4.0,S.CES_sk
12,view_2,view_2_2,0.2711,5.0,S.CES_sk
19,view_3,view_3_4,0.2783,6.0,S.CES_sk
16,view_3,view_3_1,0.28235,7.0,S.CES_sk
14,view_2,view_2_4,0.29445,8.0,S.CES_sk
15,view_3,view_3_0,0.2982,9.0,S.CES_sk
13,view_2,view_2_3,0.2987,10.0,S.CES_sk


In [5]:
EI_int.LMRs

Unnamed: 0,local_model_PI,base predictor,modality,ensemble_method,LMR
0,0.002491,AdaBoost,view_0,S.AdaBoost,0.4000
1,0.001478,DT,view_0,S.AdaBoost,0.7250
2,0.007497,GradientBoosting,view_0,S.AdaBoost,0.2000
3,0.002099,KNN,view_0,S.AdaBoost,0.4750
4,0.002538,LR,view_0,S.AdaBoost,0.3750
...,...,...,...,...,...
35,1.000000,MLP,view_3,CES,0.3625
36,1.000000,NB,view_3,CES,0.3625
37,0.000000,RF,view_3,CES,0.7500
38,7.000000,SVM,view_3,CES,0.0250


In [6]:
EI_int.merged_lmr_lfr['S.XGB']

Unnamed: 0,local_model_PI,base predictor,modality,ensemble_method,LMR,local_feat_PI,local_feat_name,LFR,sample,LMR_LFR_product
0,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.035168,view_0_0,1.0,0,0.625
1,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.036390,view_0_1,0.8,0,0.500
2,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.062275,view_0_2,0.4,0,0.250
3,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.065453,view_0_3,0.2,0,0.125
4,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.049280,view_0_4,0.6,0,0.375
...,...,...,...,...,...,...,...,...,...,...
995,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.025192,view_3_0,1.0,4,0.325
996,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.001903,view_3_1,0.4,4,0.130
997,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.005680,view_3_2,0.6,4,0.195
998,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.006238,view_3_3,0.8,4,0.260
