In [4]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
import sys
from ei import EnsembleIntegration, MeanAggregation, MedianAggregation
from interpretation import *

from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, n_redundant=0,
n_clusters_per_class=1, weights=[0.7], flip_y=0, random_state=1)

X_view_0 = X[:, :5]
X_view_1 = X[:, 5:10]
X_view_2 = X[:, 10:15]
X_view_3 = X[:, 15:]

base_predictors = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": XGBClassifier(use_label_encoder=False, n_estimators=1),
    "RF": XGBRFClassifier(n_estimators=100, booster='gbtree'),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e7),
}
meta_models = {
    'AdaBoost': AdaBoostClassifier(),
    "XGB": XGBClassifier(use_label_encoder=False),
    "DT": XGBClassifier(use_label_encoder=False, n_estimators=1),
    "RF": XGBRFClassifier(n_estimators=100, booster='gbtree'),
    'GradientBoosting': GradientBoostingClassifier(),
    'KNN': KNeighborsClassifier(),
    'LR': LogisticRegression(),
    'NB': GaussianNB(),
    'MLP': MLPClassifier(),
    "SVM": SVC(kernel='linear', probability=True, max_iter=1e7),
    "Mean": MeanAggregation(),
    "Median": MedianAggregation()
}

EI = EnsembleIntegration(base_predictors=base_predictors,
                         k_outer=5,
                         k_inner=5,
                         n_samples=1,
                         sampling_strategy="undersampling",
                         sampling_aggregation="mean",
                         n_jobs=-1,  # set as -1 to use all available CPUs
                         random_state=42,
                         project_name="demo")

modalities = {"view_0": X_view_0,
              "view_1": X_view_1,
              "view_2": X_view_2,
              "view_3": X_view_3,}

# for name, modality in modalities.items():
#     EI.train_base(modality, y, base_predictors, modality=name)

# EI.save() # save EI as EI.demo




In [5]:
from sklearn.metrics import average_precision_score

EI_int = EI_interpreter(EI_object=EI,
                        base_predictors=base_predictors,
                        meta_models=meta_models,
                        modalities=modalities,
                        y=y,
                        metric=auprc_sklearn)

EI_int.rank_product_score()


 Working on view_0 data... 


Training base predictors on outer training sets...


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    2.0s


KeyboardInterrupt: 

In [None]:
EI_int.ensemble_feature_ranking['S.AdaBoost']

Unnamed: 0,modality,feature,RPS,feature rank,ensemble method
0,view_0,view_0_0,0.473,1.0,S.AdaBoost
18,view_3,view_3_3,0.41925,2.0,S.AdaBoost
5,view_1,view_1_0,0.4085,3.0,S.AdaBoost
14,view_2,view_2_4,0.38075,4.0,S.AdaBoost
8,view_1,view_1_3,0.344,5.0,S.AdaBoost
16,view_3,view_3_1,0.33325,6.0,S.AdaBoost
15,view_3,view_3_0,0.3225,7.0,S.AdaBoost
13,view_2,view_2_3,0.31875,8.0,S.AdaBoost
3,view_0,view_0_3,0.31175,9.5,S.AdaBoost
7,view_1,view_1_2,0.31175,9.5,S.AdaBoost
