In [4]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
import sys
from ei import EnsembleIntegration, MeanAggregation, MedianAggregation
from interpretation import *
from utils import *
from sklearn.linear_model import Perceptron
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, n_redundant=0,
n_clusters_per_class=1, weights=[0.7], flip_y=0, random_state=1)

X_view_0 = X[:, :5]
X_view_1 = X[:, 5:10]
X_view_2 = X[:, 10:15]
X_view_3 = X[:, 15:]

base_predictors = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    # "DT": DecisionTreeClassifier(),
    "Perceptron": Perceptron(),
    "RF": RandomForestClassifier(), 
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
}
meta_models = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": DecisionTreeClassifier(),
    "RF": RandomForestClassifier(),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
    # "Voting": VotingClassifier()
    # "Mean": MeanAggregation(),
    # "Median": MedianAggregation()
}

EI = EnsembleIntegration(base_predictors=base_predictors,
                         k_outer=5,
                         k_inner=5,
                         n_samples=5,
                         sampling_strategy="undersampling",
                         sampling_aggregation="mean",
                         n_jobs=-1,  # set as -1 to use all available CPUs
                         random_state=42,
                         project_name="demo",
                        #  calibration=True
                        verbose=0
                         )

EI.save()
EI = EnsembleIntegration().load("EI.demo")

modalities = {"view_0": X_view_0,
              "view_1": X_view_1,
              "view_2": X_view_2,
              "view_3": X_view_3,}

# for name, modality in modalities.items():
#     EI.train_base(modality, y, base_predictors, modality=name)

# EI.save() # save EI as EI.demo





Saved to EI.demo



In [5]:
from sklearn.metrics import average_precision_score

EI_int = EI_interpreter(EI_object=EI,
                        base_predictors=base_predictors,
                        meta_models=meta_models,
                        modalities=modalities,
                        y=y,
                        metric=f_minority_score)
# print(EI_int.base_predictors)
# print(EI.base_predictors)
EI_int.rank_product_score()


 Working on view_0 data... 

Training base predictors on outer training sets...

 Working on view_1 data... 

Training base predictors on outer training sets...

 Working on view_2 data... 

Training base predictors on outer training sets...

 Working on view_3 data... 

Training base predictors on outer training sets...
     local_model_PI base predictor modality ensemble_method    LMR  \
0          0.017931       AdaBoost   view_0      S.AdaBoost  0.175   
1          0.017931       AdaBoost   view_0      S.AdaBoost  0.175   
2          0.017931       AdaBoost   view_0      S.AdaBoost  0.175   
3          0.017931       AdaBoost   view_0      S.AdaBoost  0.175   
4          0.017931       AdaBoost   view_0      S.AdaBoost  0.175   
..              ...            ...      ...             ...    ...   
995        0.006139            SVM   view_3      S.AdaBoost  0.550   
996        0.006139            SVM   view_3      S.AdaBoost  0.550   
997        0.006139            SVM   view_3   

In [6]:
EI_int.ensemble_feature_ranking['S.XGB']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.0855,1.0,S.XGB
1,view_0,view_0_1,0.1788,2.0,S.XGB
9,view_1,view_1_4,0.2116,3.0,S.XGB
6,view_1,view_1_1,0.2668,4.0,S.XGB
4,view_0,view_0_4,0.2735,5.0,S.XGB
12,view_2,view_2_2,0.2747,6.0,S.XGB
2,view_0,view_0_2,0.2899,7.0,S.XGB
10,view_2,view_2_0,0.295,8.0,S.XGB
19,view_3,view_3_4,0.3059,9.0,S.XGB
15,view_3,view_3_0,0.3082,10.0,S.XGB


In [9]:
EI_int.LFRs.loc[EI_int.LFRs['base predictor']=='Perceptron']

Unnamed: 0,local_feat_PI,local_feat_name,base predictor,modality,LFR,sample
0,0.0,view_0_0,Perceptron,view_0,0.6,0
1,0.0,view_0_1,Perceptron,view_0,0.6,0
2,0.0,view_0_2,Perceptron,view_0,0.6,0
3,0.0,view_0_3,Perceptron,view_0,0.6,0
4,0.0,view_0_4,Perceptron,view_0,0.6,0
...,...,...,...,...,...,...
0,0.0,view_3_0,Perceptron,view_3,0.6,4
1,0.0,view_3_1,Perceptron,view_3,0.6,4
2,0.0,view_3_2,Perceptron,view_3,0.6,4
3,0.0,view_3_3,Perceptron,view_3,0.6,4


In [5]:
EI_int.LMRs

Unnamed: 0,local_model_PI,base predictor,modality,ensemble_method,LMR
0,0.002491,AdaBoost,view_0,S.AdaBoost,0.4000
1,0.001478,DT,view_0,S.AdaBoost,0.7250
2,0.007497,GradientBoosting,view_0,S.AdaBoost,0.2000
3,0.002099,KNN,view_0,S.AdaBoost,0.4750
4,0.002538,LR,view_0,S.AdaBoost,0.3750
...,...,...,...,...,...
35,1.000000,MLP,view_3,CES,0.3625
36,1.000000,NB,view_3,CES,0.3625
37,0.000000,RF,view_3,CES,0.7500
38,7.000000,SVM,view_3,CES,0.0250


In [6]:
EI_int.merged_lmr_lfr['S.XGB']

Unnamed: 0,local_model_PI,base predictor,modality,ensemble_method,LMR,local_feat_PI,local_feat_name,LFR,sample,LMR_LFR_product
0,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.035168,view_0_0,1.0,0,0.625
1,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.036390,view_0_1,0.8,0,0.500
2,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.062275,view_0_2,0.4,0,0.250
3,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.065453,view_0_3,0.2,0,0.125
4,-3.330669e-17,AdaBoost,view_0,S.XGB,0.625,0.049280,view_0_4,0.6,0,0.375
...,...,...,...,...,...,...,...,...,...,...
995,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.025192,view_3_0,1.0,4,0.325
996,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.001903,view_3_1,0.4,4,0.130
997,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.005680,view_3_2,0.6,4,0.195
998,-1.110223e-17,SVM,view_3,S.XGB,0.325,-0.006238,view_3_3,0.8,4,0.260
