In [1]:
# # for colab compatibility
# !pip install shap
# !pip install ppscore
# !pip install xgboost
# !pip install lightgbm

In [2]:
# %run data_load_wrapper.ipynb

In [51]:
from tqdm.notebook import trange
import sklearn.datasets as data
import pandas as pd
from sklearn.inspection import permutation_importance
from sklearn import *
from sklearn.feature_selection import *
from sklearn.datasets import make_classification
from sklearn.utils import Bunch
import matplotlib as plt
import seaborn as sns
from boruta import BorutaPy
import shap
import numpy as np
import ppscore as pps
from datetime import datetime
import lightgbm as lgbm
import xgboost

In [72]:
from heapq import nlargest

def sklearn_to_df(dataset, with_target=True):
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    y = dataset.target
    if with_target == True:
        df = df.join(pd.DataFrame(y, columns=['target']), on=df.index)
        return df 
    else:
        return df, y

def get_feat_imp_data_based(imp_type, x_train, y_train):
    imp_type_dict = {
        'mutual_info' : 'mutual_info_classif(x_train, y_train)',
        'f_test' : 'f_classif(x_train, y_train)[0]',       
        'pps' : "pps.predictors(x_train.join(y_train), 'target', sorted=False, output='df').model_score",
        'pearson' : "abs(x_train.join(y_train).corr(method='pearson').target).drop('target', axis=0)",
        'spearman': "abs(x_train.join(y_train).corr(method='spearman').target).drop('target', axis=0)",
        'kendall' : "abs(x_train.join(y_train).corr(method='kendall').target).drop('target', axis=0)",
    
    }
    timings = %timeit -o -n1 -r10 eval(imp_type_dict.get(imp_type))
    timings = np.array(timings.timings)
    return eval(imp_type_dict.get(imp_type)), timings

def get_feat_imp_model_based(imp_type, m, x_train, y_train):
    imp_type_dict = {
        'gini' : "m.feature_importances_",
        'pimp' : f"""permutation_importance(m, x_train, y_train, n_repeats=5, 
        random_state=0).importances_mean""",
        'shap' : "abs(np.array(shap.TreeExplainer(m).shap_values(x_train))).sum(axis=0)"
    }
    timings = %timeit -o -n1 -r10 eval(imp_type_dict.get(imp_type))
    timings = np.array(timings.timings)
    return eval(imp_type_dict.get(imp_type)), timings

def get_syn_stats_data_based(imp_types, datasets, dataset_states, 
                             informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
            for k in trange(len(imp_types)):
                imp_type = imp_types[k] 
                dataset_state = dataset_states[i]
                dataset_str = datasets_str[j]
                dataset = datasets.get(datasets_str[j])
                df = sklearn_to_df(dataset)
                x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                                   dataset.target, 
                                                                   test_size=0.3, 
                                                                   random_state=dataset_state,
                                                                   stratify=df.target
                                                                  )
                y_train = x_train.target
                x_train = x_train.drop('target', axis=1)
                feat_size = len(x_train.columns)
                sel_feat_size = int(feat_size * informative_pct)
                fi, timings = get_feat_imp_data_based(imp_type, x_train, y_train)
                fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
                fi_selected = fi_order[:sel_feat_size]
                result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
#                 print(result)
                results.append(result)
#                 print(results)
        columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
        res = pd.DataFrame(results, columns=columns)
    return res
        
def get_syn_stats_model_based(imp_types, datasets, 
                              dataset_states, informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
            for k in trange(len(imp_types)):
                imp_type = imp_types[k] 
                dataset_state = dataset_states[i]
                dataset_str = datasets_str[j]
                dataset = datasets.get(datasets_str[j])
                df = sklearn_to_df(dataset, with_target=True)
                x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                                   dataset.target, 
                                                                   test_size=0.3, 
                                                                   random_state=dataset_state,
                                                                   stratify=df.target
                                                                  )
                y_train = x_train.target
                x_train = x_train.drop('target', axis=1)
                feat_size = len(x_train.columns)
                sel_feat_size = int(feat_size * informative_pct)
                m = ensemble.RandomForestClassifier(random_state=0, n_jobs=-1)
                m.fit(x_train, y_train)
                fi, timings = get_feat_imp_model_based(imp_type, m, x_train, y_train)
                fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
                fi_selected = fi_order[:sel_feat_size]
                result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
#                 print(result)
                results.append(result)
#                 print(results)
        columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
        res = pd.DataFrame(results, columns=columns)
    return res


from wrapt_timeout_decorator import *

@timeout(10)
def get_shap_fi(imp_type, dataset_state, dataset_str, dataset, informative_pct):
    
    result = []
    df = sklearn_to_df(dataset, with_target=True)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                       dataset.target, 
                                                       test_size=0.3, 
                                                       random_state=dataset_state,
                                                       stratify=df.target
                                                      )
    y_train = x_train.target
    x_train = x_train.drop('target', axis=1)
    feat_size = len(x_train.columns)
    sel_feat_size = int(feat_size * informative_pct)
    m = ensemble.RandomForestClassifier(random_state=0, n_jobs=-1)
    m.fit(x_train, y_train)
    fi, timings = get_feat_imp_model_based(imp_type, m, x_train, y_train)
    fi = fi.mean(axis=0)
#     print(fi.shape)
    #                 imp_type, model, x_train, y_train, random_state
    fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
    fi_selected = fi_order[:sel_feat_size]
    result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
    #                 print(result)
#     result.append(result)
    return result

def get_syn_stats_shap_based(imp_types, datasets, 
                              dataset_states, informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        try:
            for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
                for k in trange(len(imp_types)):
                    imp_type = imp_types[k] 
                    dataset_state = dataset_states[i]
                    dataset_str = datasets_str[j]
                    dataset = datasets.get(datasets_str[j])
                    result = get_shap_fi(imp_type, dataset_state, dataset_str, dataset, informative_pct)
                    results.append(result)
        except TimeoutError as e:
            print(f'skipping the execution of {dataset_str} because of {e}')
            
    columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
    result_df = pd.DataFrame(results, columns=columns)
    return result_df

def coerce_synthethic_datasets(samples, features, percent_informative, random_state):
    data, target = make_classification(n_samples=samples, n_features=features, 
                                       n_informative=int(percent_informative*features),
                                       n_redundant=0, shuffle=False,
                                       random_state=random_state
                                      )
    feature_names = [f'x_{i}' for i in range(features)]
    # data, target, feature_names
    data_bunch = Bunch(
        data=data,
        target=target,
        feature_names=feature_names,
        informative_features=feature_names[:int(percent_informative*features)]
                        )
    return data_bunch

In [70]:
imp_types_data = [
             'mutual_info',
             'f_test', 
             'pearson', 
             'spearman', 
             'kendall', 
             'pps'
            ]
imp_types_model = [
             'gini',
             'pimp', 
            ]

n_jobs = 3

random_states = [i for i in range(1)]
dataset_states = [i for i in range(10)]

features = [10, 100]
samples = [100, 1000, 10000]
# features = [100]
# samples = [10000]
# features = [10]
# samples = [100]
synthethic_states = [i for i in range(1)]
percent_informative = 0.2
tuples = []
for i in features:
    for j in samples:
        if i != j:
            for k in synthethic_states:            
                tuples.append((i,j,k))
datasets = {}

for feat, samples, random_seed in tuples:
    datasets[f'classif_features_{feat}_samples_{samples}_seed_{random_seed}'] = coerce_synthethic_datasets(samples, feat, 0.2, random_seed)

In [60]:
res_data = get_syn_stats_data_based(imp_types_data, datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

23.4 ms ± 3.22 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.66 ms ± 168 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.82 ms ± 317 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.56 ms ± 189 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.8 ms ± 934 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
89.9 ms ± 3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

17.2 ms ± 2.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.28 ms ± 108 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.62 ms ± 424 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.08 ms ± 234 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 363 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
92.1 ms ± 3.61 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

18.4 ms ± 2.06 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.42 ms ± 86.6 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.63 ms ± 318 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.04 ms ± 188 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.6 ms ± 371 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
89.4 ms ± 5.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

17.7 ms ± 2.48 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.53 ms ± 122 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.59 ms ± 115 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.1 ms ± 162 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.4 ms ± 2.08 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
99 ms ± 13.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

20.1 ms ± 2.28 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.68 ms ± 89.9 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 274 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.11 ms ± 292 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 380 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
92.7 ms ± 3.85 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

18.4 ms ± 3.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 161 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.83 ms ± 588 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.05 ms ± 230 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.1 ms ± 340 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
93.4 ms ± 5.46 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

16.8 ms ± 2.56 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.3 ms ± 127 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.53 ms ± 304 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.03 ms ± 178 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.6 ms ± 327 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
90.3 ms ± 4.42 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

17.4 ms ± 2.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.42 ms ± 148 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.33 ms ± 88 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.26 ms ± 196 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.9 ms ± 370 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
98 ms ± 5.96 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

17.5 ms ± 2.36 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.54 ms ± 150 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.47 ms ± 145 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.05 ms ± 186 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.6 ms ± 318 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
93.6 ms ± 5.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

17.3 ms ± 2.44 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.57 ms ± 117 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.67 ms ± 578 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.08 ms ± 176 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.7 ms ± 402 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
90.4 ms ± 4.72 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

31.4 ms ± 2.94 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.58 ms ± 97.7 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.66 ms ± 163 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.58 ms ± 162 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
19.7 ms ± 342 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
112 ms ± 4.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

30.4 ms ± 2.95 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.51 ms ± 310 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.6 ms ± 136 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.52 ms ± 237 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
19.6 ms ± 379 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
113 ms ± 795 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

31.2 ms ± 3.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.56 ms ± 135 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.62 ms ± 114 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.43 ms ± 149 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.1 ms ± 825 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
113 ms ± 2.55 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

30.7 ms ± 2.92 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.5 ms ± 123 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.64 ms ± 263 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.68 ms ± 240 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
19.8 ms ± 452 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
114 ms ± 5.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

30.9 ms ± 2.69 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.49 ms ± 175 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 178 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.73 ms ± 181 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.1 ms ± 623 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
121 ms ± 8.99 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

34.9 ms ± 1.32 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.51 ms ± 109 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.8 ms ± 464 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.62 ms ± 184 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
21 ms ± 1.89 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
128 ms ± 17.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

32.2 ms ± 4.43 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.67 ms ± 469 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.66 ms ± 220 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.71 ms ± 165 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
21.1 ms ± 968 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
129 ms ± 10.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

30.9 ms ± 2.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.56 ms ± 186 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 144 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.62 ms ± 160 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20 ms ± 638 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
115 ms ± 4.01 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

34.3 ms ± 4.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.85 ms ± 105 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.15 ms ± 274 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.32 ms ± 114 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.8 ms ± 1.12 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
123 ms ± 6.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

35.1 ms ± 3.68 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 197 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.82 ms ± 197 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.72 ms ± 247 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.1 ms ± 414 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
122 ms ± 10.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

235 ms ± 3.24 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.32 ms ± 127 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.34 ms ± 292 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.85 ms ± 277 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
76.5 ms ± 1.69 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
450 ms ± 4.67 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

238 ms ± 4.85 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.35 ms ± 148 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.51 ms ± 214 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10 ms ± 398 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
77.1 ms ± 1.86 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
459 ms ± 9.97 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

237 ms ± 2.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.38 ms ± 146 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.51 ms ± 255 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.3 ms ± 254 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
80.2 ms ± 2.38 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
442 ms ± 10.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

236 ms ± 778 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.23 ms ± 121 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.37 ms ± 145 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.82 ms ± 239 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
77.3 ms ± 2.02 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
463 ms ± 4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

237 ms ± 4.83 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.3 ms ± 121 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.26 ms ± 239 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.6 ms ± 176 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
77.6 ms ± 1.66 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
426 ms ± 2.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

234 ms ± 1.49 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.21 ms ± 108 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.27 ms ± 218 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.58 ms ± 138 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
79.9 ms ± 1.81 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
421 ms ± 2.84 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

237 ms ± 1.29 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.28 ms ± 225 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.34 ms ± 177 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.67 ms ± 105 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
79.5 ms ± 2.95 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
454 ms ± 4.21 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

237 ms ± 2.92 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.42 ms ± 241 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.64 ms ± 307 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.2 ms ± 249 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
79.8 ms ± 2.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
456 ms ± 4.42 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

245 ms ± 5.05 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.42 ms ± 426 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.4 ms ± 202 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.83 ms ± 179 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
78.2 ms ± 1.71 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
434 ms ± 11.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

234 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.27 ms ± 116 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.35 ms ± 196 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.6 ms ± 128 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
78 ms ± 1.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
457 ms ± 1.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

280 ms ± 8.81 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.23 ms ± 106 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 144 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
35.3 ms ± 598 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.67 s ± 16.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.1 s ± 3.91 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

276 ms ± 5.25 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.2 ms ± 73 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.7 ms ± 182 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
35 ms ± 494 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.66 s ± 7.04 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.11 s ± 3.05 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

299 ms ± 13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.43 ms ± 201 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 106 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.3 ms ± 300 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.77 s ± 19.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.19 s ± 50.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

293 ms ± 8.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.28 ms ± 111 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 218 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
35.6 ms ± 530 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.77 s ± 49.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.11 s ± 16 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

311 ms ± 16.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.31 ms ± 221 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14 ms ± 162 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
35.8 ms ± 362 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.7 s ± 28.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.13 s ± 14.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

285 ms ± 11.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.36 ms ± 132 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14 ms ± 336 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.3 ms ± 2.33 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.76 s ± 30.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.16 s ± 33.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

294 ms ± 11.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.37 ms ± 131 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.3 ms ± 315 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.6 ms ± 659 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.74 s ± 29.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.24 s ± 32.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

313 ms ± 7.75 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.55 ms ± 224 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.5 ms ± 686 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.9 ms ± 2.06 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.7 s ± 21.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.15 s ± 32.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

289 ms ± 10.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.49 ms ± 233 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.2 ms ± 251 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.2 ms ± 692 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.68 s ± 5.51 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.15 s ± 20.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

280 ms ± 9.65 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.27 ms ± 153 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.7 ms ± 145 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.1 ms ± 1.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 s ± 22.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.27 s ± 78.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

2.39 s ± 32.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8 ms ± 396 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
123 ms ± 667 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
349 ms ± 8.88 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.55 s ± 117 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.84 s ± 131 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.43 s ± 72 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.13 ms ± 558 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 992 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
344 ms ± 10.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.65 s ± 311 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.66 s ± 85.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.38 s ± 20.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.23 ms ± 708 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 519 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
341 ms ± 9.34 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.3 s ± 62.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.65 s ± 64.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.39 s ± 14.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.19 ms ± 763 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
123 ms ± 365 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
327 ms ± 4.62 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.52 s ± 156 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.74 s ± 102 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.4 s ± 20.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
9.91 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 215 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
315 ms ± 10.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.6 s ± 151 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.8 s ± 100 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.44 s ± 26 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.89 ms ± 615 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
126 ms ± 556 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
326 ms ± 14.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.86 s ± 313 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.67 s ± 85.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.37 s ± 12.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.31 ms ± 636 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 297 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
291 ms ± 5.01 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.33 s ± 230 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.54 s ± 28.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.3 s ± 5.52 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.14 ms ± 566 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 360 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
337 ms ± 6.63 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.34 s ± 126 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.81 s ± 39.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.4 s ± 4.04 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.23 ms ± 507 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 739 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
322 ms ± 4.37 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.59 s ± 83.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.75 s ± 144 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

2.39 s ± 23.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
8.94 ms ± 528 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
122 ms ± 752 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
316 ms ± 6.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.51 s ± 102 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.69 s ± 72 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [63]:
res_model = get_syn_stats_model_based(imp_types_model, datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

17.4 ms ± 4.69 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.21 s ± 30.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.7 ms ± 1.07 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.28 s ± 131 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.1 ms ± 1.32 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.19 s ± 23.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.1 ms ± 1.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.18 s ± 15 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.1 ms ± 1.28 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.19 s ± 12.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.7 ms ± 1.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.19 s ± 26.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14 ms ± 1.31 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.17 s ± 13.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.4 ms ± 1.73 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.18 s ± 9.27 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14 ms ± 1.82 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.18 s ± 18.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.9 ms ± 1.79 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.26 s ± 117 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16.3 ms ± 4.68 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.4 s ± 45.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.7 ms ± 1.54 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.43 s ± 56 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.4 ms ± 1.05 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.37 s ± 31.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.1 ms ± 631 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.38 s ± 39.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.3 ms ± 1.28 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.55 s ± 127 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.6 ms ± 854 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.39 s ± 62.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.8 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Exception ignored in: <function tqdm.__del__ at 0x7f0250414940>
Traceback (most recent call last):
  File "/home/rob/.local/lib/python3.8/site-packages/tqdm/std.py", line 1134, in __del__
    self.close()
  File "/home/rob/.local/lib/python3.8/site-packages/tqdm/notebook.py", line 280, in close
    self.disp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


1.35 s ± 18.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.2 ms ± 540 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.36 s ± 13.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.1 ms ± 619 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.36 s ± 14.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.5 ms ± 2.11 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.35 s ± 10.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16.7 ms ± 815 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.1 s ± 34.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.3 ms ± 1.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.1 s ± 16.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 1.04 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.11 s ± 34.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.9 ms ± 2.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.12 s ± 27.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.3 ms ± 2.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.09 s ± 28.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.2 ms ± 1.31 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.12 s ± 31.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.1 ms ± 968 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.1 s ± 24 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.5 ms ± 1.76 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.15 s ± 135 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.3 ms ± 383 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.13 s ± 57 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 1.08 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.1 s ± 27.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16 ms ± 988 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.4 s ± 1.12 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.2 ms ± 3.26 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.7 s ± 169 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.5 ms ± 700 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.6 s ± 711 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.4 ms ± 1.52 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.6 s ± 509 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.4 ms ± 1.37 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
15 s ± 224 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.4 ms ± 2.37 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
15 s ± 349 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 1.47 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.9 s ± 194 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.1 ms ± 1.93 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
15 s ± 554 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.5 ms ± 691 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.8 s ± 631 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.6 ms ± 1.43 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.4 s ± 94.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 1.46 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
28.4 s ± 3.99 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18 ms ± 2.77 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
28.1 s ± 2.39 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.9 ms ± 1.31 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
25.1 s ± 113 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 1.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
25.3 s ± 486 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.2 ms ± 806 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
25.1 s ± 95.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.3 ms ± 1.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
25.2 s ± 136 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.5 ms ± 1.36 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
25.1 s ± 145 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 1.67 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
26 s ± 405 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 1.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
26 s ± 108 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.1 ms ± 1.99 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
26.4 s ± 1.57 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [73]:
# shap is slow, need timeout sig to stop process
res_shap = get_syn_stats_shap_based(['shap'], datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

13.8 ms ± 4.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

10.8 ms ± 1.43 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

10.8 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

10.8 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

11.6 ms ± 1.94 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

10.3 ms ± 1.12 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

8.99 ms ± 887 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

11 ms ± 1.95 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

11.3 ms ± 1.82 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

12.5 ms ± 1.98 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

446 ms ± 8.42 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

449 ms ± 3.87 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

400 ms ± 2.02 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

417 ms ± 2.44 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

392 ms ± 2.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

438 ms ± 13.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

434 ms ± 2.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

382 ms ± 2.22 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

440 ms ± 2.93 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/1 [00:00<?, ?it/s]

453 ms ± 6.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_10_samples_10000_seed_0 because of Function get_shap_fi timed out after 10.0 seconds


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_100_samples_1000_seed_0 because of Function get_shap_fi timed out after 10.0 seconds


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_100_samples_10000_seed_0 because of Function get_shap_fi timed out after 10.0 seconds


In [74]:
frames = [res_data, res_model, res_shap]

In [75]:
final_df = pd.concat(frames);

In [76]:
final_df

Unnamed: 0,dataset,imp_type,random_state,data_split,fi_order,fi_selected,timings
0,classif_features_10_samples_100_seed_0,mutual_info,0,0,"[1, 5, 9, 7, 0, 2, 3, 4, 6, 8]","[1, 5]","[0.030104098999800044, 0.02757260100042913, 0...."
1,classif_features_10_samples_100_seed_0,f_test,0,0,"[1, 2, 7, 4, 3, 6, 5, 0, 9, 8]","[1, 2]","[0.0019199010002921568, 0.001570456999615999, ..."
2,classif_features_10_samples_100_seed_0,pearson,0,0,"[1, 2, 7, 4, 3, 6, 5, 0, 9, 8]","[1, 2]","[0.002370285999859334, 0.001950051000676467, 0..."
3,classif_features_10_samples_100_seed_0,spearman,0,0,"[1, 4, 2, 3, 7, 5, 6, 9, 0, 8]","[1, 4]","[0.0026753900001494912, 0.002591398000731715, ..."
4,classif_features_10_samples_100_seed_0,kendall,0,0,"[1, 4, 2, 3, 7, 5, 6, 9, 0, 8]","[1, 4]","[0.01813015900006576, 0.016762551998908748, 0...."
...,...,...,...,...,...,...,...
15,classif_features_10_samples_1000_seed_0,shap,0,5,"[1, 0, 8, 6, 3, 7, 9, 2, 4, 5]","[1, 0]","[0.4789532600007078, 0.4347276909975335, 0.433..."
16,classif_features_10_samples_1000_seed_0,shap,0,6,"[1, 0, 6, 5, 7, 4, 9, 2, 8, 3]","[1, 0]","[0.43566127400117693, 0.43838538199997856, 0.4..."
17,classif_features_10_samples_1000_seed_0,shap,0,7,"[1, 0, 6, 8, 3, 9, 5, 4, 2, 7]","[1, 0]","[0.3879009540032712, 0.3809254849984427, 0.380..."
18,classif_features_10_samples_1000_seed_0,shap,0,8,"[1, 0, 3, 5, 8, 9, 7, 2, 6, 4]","[1, 0]","[0.44764364100046805, 0.43974558300033095, 0.4..."


In [77]:
# export results to local dir as csv file
final_df.to_csv(f'classification_synthethic_data_{datetime.today().strftime("%Y-%b-%d-%H:%M:%S")}.csv',
             index=False)

In [62]:
## plots
# import seaborn as sns
# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "XGBClassifier"''')
#             )

# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "DecisionTreeClassifier"''')
#             )

# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "RandomForestClassifier"''')
#             )

In [250]:
# export results to local dir as csv file
# df_res.to_csv(f'classification_synthethic_data_{datetime.today().strftime("%Y-%b-%d-%H:%M:%S")}.csv',
#              index=False)