In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
import sys
from ei import EnsembleIntegration, MeanAggregation, MedianAggregation
from interpretation import *

from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, n_redundant=0,
n_clusters_per_class=1, weights=[0.7], flip_y=0, random_state=1)

X_view_0 = X[:, :5]
X_view_1 = X[:, 5:10]
X_view_2 = X[:, 10:15]
X_view_3 = X[:, 15:]

base_predictors = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": DecisionTreeClassifier(),
    "RF": RandomForestClassifier(), 
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
}
meta_models = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": DecisionTreeClassifier(),
    "RF": RandomForestClassifier(),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e4),
    # "Mean": MeanAggregation(),
    # "Median": MedianAggregation()
}

EI = EnsembleIntegration(base_predictors=base_predictors,
                         k_outer=5,
                         k_inner=5,
                         n_samples=1,
                         sampling_strategy="undersampling",
                         sampling_aggregation="mean",
                         n_jobs=-1,  # set as -1 to use all available CPUs
                         random_state=42,
                         project_name="demo",
                         calibration=True)

modalities = {"view_0": X_view_0,
              "view_1": X_view_1,
              "view_2": X_view_2,
              "view_3": X_view_3,}

# for name, modality in modalities.items():
#     EI.train_base(modality, y, base_predictors, modality=name)

# EI.save() # save EI as EI.demo




In [2]:
from sklearn.metrics import average_precision_score

EI_int = EI_interpreter(EI_object=EI,
                        base_predictors=base_predictors,
                        meta_models=meta_models,
                        modalities=modalities,
                        y=y,
                        metric=auprc_sklearn)
print(EI_int.base_predictors)
print(EI.base_predictors)
EI_int.rank_product_score()

{'AdaBoost': AdaBoostClassifier(), 'XGB': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, gamma=None,
              gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, n_estimators=100, n_jobs=None,
              num_parallel_tree=None, predictor=None, random_state=None,
              reg_alpha=None, reg_lambda=None, ...), 'DT': DecisionTreeClassifier(), 'RF': RandomForestClassifier(), 'GradientBoosting': GradientBoostingClassifier(), 'KNN': KNeighborsClassifier(), 'LR': LogisticRegression(), 'NB': GaussianNB(), 'MLP': MLPClassifier(),

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed:    6.6s remaining:    1.4s
[Parallel(n_jobs=-1)]: Done  47 out of  50 | elapsed:    8.7s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    8.8s finished



 Working on view_1 data... 


view_1 modality: training base predictors on outer training sets...


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed:    3.0s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done  47 out of  50 | elapsed:    3.9s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.0s finished


In [None]:
EI_int.ensemble_feature_ranking['S.AdaBoost']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
11,view_2,view_2_1,0.08175,1.0,S.AdaBoost
9,view_1,view_1_4,0.2025,2.0,S.AdaBoost
12,view_2,view_2_2,0.22275,3.0,S.AdaBoost
19,view_3,view_3_4,0.23825,4.0,S.AdaBoost
13,view_2,view_2_3,0.24825,5.0,S.AdaBoost
1,view_0,view_0_1,0.25875,6.0,S.AdaBoost
2,view_0,view_0_2,0.2925,7.0,S.AdaBoost
17,view_3,view_3_2,0.31175,8.0,S.AdaBoost
6,view_1,view_1_1,0.315,9.0,S.AdaBoost
10,view_2,view_2_0,0.31525,10.0,S.AdaBoost
