In [1]:
# # for colab compatibility
# !pip install shap
# !pip install ppscore
# !pip install xgboost
# !pip install lightgbm

In [2]:
# %run data_load_wrapper.ipynb

In [3]:
from tqdm.notebook import trange
import sklearn.datasets as data
import pandas as pd
from sklearn.inspection import permutation_importance
from sklearn import *
from sklearn.feature_selection import *
from sklearn.datasets import make_regression
from sklearn.utils import Bunch
import matplotlib as plt
import seaborn as sns
import shap
import numpy as np
import ppscore as pps
from datetime import datetime
import xgboost

In [16]:
from heapq import nlargest

def sklearn_to_df(dataset, with_target=True):
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    y = dataset.target
    if with_target == True:
        df = df.join(pd.DataFrame(y, columns=['target']), on=df.index)
        return df 
    else:
        return df, y

def get_feat_imp_data_based(imp_type, x_train, y_train):
    imp_type_dict = {
        'mutual_info' : 'mutual_info_regression(x_train, y_train)',
        'f_test' : 'f_regression(x_train, y_train)[0]',       
        'pps' : "pps.predictors(x_train.join(y_train), 'target', sorted=False, output='df').model_score",
        'pearson' : "abs(x_train.join(y_train).corr(method='pearson').target).drop('target', axis=0)",
        'spearman': "abs(x_train.join(y_train).corr(method='spearman').target).drop('target', axis=0)",
        'kendall' : "abs(x_train.join(y_train).corr(method='kendall').target).drop('target', axis=0)",
    
    }
    timings = %timeit -o -n1 -r10 eval(imp_type_dict.get(imp_type))
    timings = np.array(timings.timings)
    return eval(imp_type_dict.get(imp_type)), timings

def get_feat_imp_model_based(imp_type, m, x_train, y_train):
    imp_type_dict = {
        'gini' : "m.feature_importances_",
        'pimp' : f"""permutation_importance(m, x_train, y_train, n_repeats=5, 
        random_state=0).importances_mean""",
        'shap' : "abs(np.array(shap.TreeExplainer(m).shap_values(x_train))).sum(axis=0)"
    }
    timings = %timeit -o -n1 -r10 eval(imp_type_dict.get(imp_type))
    timings = np.array(timings.timings)
    return eval(imp_type_dict.get(imp_type)), timings

def get_syn_stats_data_based(imp_types, datasets, dataset_states, 
                             informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
            for k in trange(len(imp_types)):
                imp_type = imp_types[k] 
                dataset_state = dataset_states[i]
                dataset_str = datasets_str[j]
                dataset = datasets.get(datasets_str[j])
                df = sklearn_to_df(dataset)
                x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                                   dataset.target, 
                                                                   test_size=0.3, 
                                                                   random_state=dataset_state
                                                                  )
                y_train = x_train.target
                x_train = x_train.drop('target', axis=1)
                feat_size = len(x_train.columns)
                sel_feat_size = int(feat_size * informative_pct)
                fi, timings = get_feat_imp_data_based(imp_type, x_train, y_train)
                fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
                fi_selected = fi_order[:sel_feat_size]
                result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
#                 print(result)
                results.append(result)
#                 print(results)
        columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
        res = pd.DataFrame(results, columns=columns)
    return res
        
def get_syn_stats_model_based(imp_types, datasets, 
                              dataset_states, informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
            for k in trange(len(imp_types)):
                imp_type = imp_types[k] 
                dataset_state = dataset_states[i]
                dataset_str = datasets_str[j]
                dataset = datasets.get(datasets_str[j])
                df = sklearn_to_df(dataset, with_target=True)
                x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                                   dataset.target, 
                                                                   test_size=0.3, 
                                                                   random_state=dataset_state
                                                                  )
                y_train = x_train.target
                x_train = x_train.drop('target', axis=1)
                feat_size = len(x_train.columns)
                sel_feat_size = int(feat_size * informative_pct)
                m = ensemble.RandomForestRegressor(random_state=0, n_jobs=-1)
                m.fit(x_train, y_train)
                fi, timings = get_feat_imp_model_based(imp_type, m, x_train, y_train)
                fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
                fi_selected = fi_order[:sel_feat_size]
                result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
#                 print(result)
                results.append(result)
#                 print(results)
        columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
        res = pd.DataFrame(results, columns=columns)
    return res


from wrapt_timeout_decorator import *

@timeout(60)
def get_shap_fi(imp_type, dataset_state, dataset_str, dataset, informative_pct):
    
    result = []
    df = sklearn_to_df(dataset, with_target=True)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(df,
                                                       dataset.target, 
                                                       test_size=0.3, 
                                                       random_state=dataset_state
                                                      )
    y_train = x_train.target
    x_train = x_train.drop('target', axis=1)
    feat_size = len(x_train.columns)
    sel_feat_size = int(feat_size * informative_pct)
    m = ensemble.RandomForestRegressor(random_state=0, n_jobs=-1)
    m.fit(x_train, y_train)
    fi, timings = get_feat_imp_model_based(imp_type, m, x_train, y_train)
    print(fi.shape)
#     fi = fi.mean(axis=0)
#     print(fi.shape)
    #                 imp_type, model, x_train, y_train, random_state
    fi_order = nlargest(feat_size, range(len(fi)), key=lambda idx: fi[idx])
    fi_selected = fi_order[:sel_feat_size]
    result = [dataset_str, imp_type, 0, dataset_state, fi_order, fi_selected, timings]
    #                 print(result)
#     result.append(result)
    return result

def get_syn_stats_shap_based(imp_types, datasets, 
                              dataset_states, informative_pct):
    results = []
    datasets_str = list(datasets.keys())
    for j in trange(len(datasets_str), desc=f'Dataset progress'):
        try:
            for i in trange(len(dataset_states), desc=f'Processing {datasets_str[j]}'):
                for k in trange(len(imp_types)):
                    imp_type = imp_types[k] 
                    dataset_state = dataset_states[i]
                    dataset_str = datasets_str[j]
                    dataset = datasets.get(datasets_str[j])
                    result = get_shap_fi(imp_type, dataset_state, dataset_str, dataset, informative_pct)
                    results.append(result)
        except TimeoutError as e:
            print(f'skipping the execution of {dataset_str} because of {e}')
            
    columns = ['dataset', 'imp_type', 'random_state', 'data_split', 
                       'fi_order', 'fi_selected', 'timings']
    result_df = pd.DataFrame(results, columns=columns)
    return result_df

def coerce_synthethic_datasets(samples, features, percent_informative, random_state):
    data, target = make_regression(n_samples=samples, n_features=features, 
                                       n_informative=int(percent_informative*features),
                                       shuffle=False,
                                       random_state=random_state
                                      )
    feature_names = [f'x_{i}' for i in range(features)]
    # data, target, feature_names
    data_bunch = Bunch(
        data=data,
        target=target,
        feature_names=feature_names,
        informative_features=feature_names[:int(percent_informative*features)]
                        )
    return data_bunch

In [5]:
imp_types_data = [
             'mutual_info',
             'f_test', 
             'pearson', 
             'spearman', 
             'kendall', 
             'pps'
            ]
imp_types_model = [
             'gini',
             'pimp', 
            ]

n_jobs = 3

random_states = [i for i in range(1)]
dataset_states = [i for i in range(10)]

features = [10, 100]
samples = [100, 1000, 10000]
# features = [100]
# samples = [10000]
# features = [10]
# samples = [100]
synthethic_states = [i for i in range(1)]
percent_informative = 0.2
tuples = []
for i in features:
    for j in samples:
        if i != j:
            for k in synthethic_states:            
                tuples.append((i,j,k))
datasets = {}

for feat, samples, random_seed in tuples:
    datasets[f'regr_features_{feat}_samples_{samples}_seed_{random_seed}'] = coerce_synthethic_datasets(samples, feat, 0.2, random_seed)

In [6]:
res_data = get_syn_stats_data_based(imp_types_data, datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

12.4 ms ± 1.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.55 ms ± 145 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.06 ms ± 180 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.28 ms ± 430 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.1 ms ± 405 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
87 ms ± 5.12 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.9 ms ± 1.13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.57 ms ± 163 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.68 ms ± 227 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.01 ms ± 201 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 313 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.9 ms ± 2.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.5 ms ± 1.55 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.58 ms ± 98.2 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.61 ms ± 187 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.92 ms ± 153 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 348 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.6 ms ± 3.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.4 ms ± 1.42 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.51 ms ± 125 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 ms ± 326 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2 ms ± 239 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.9 ms ± 351 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
83.3 ms ± 3.25 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.6 ms ± 1.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.53 ms ± 118 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.56 ms ± 206 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.91 ms ± 151 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.7 ms ± 319 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
83.1 ms ± 2.94 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.2 ms ± 1.49 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.52 ms ± 138 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.64 ms ± 177 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.92 ms ± 165 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.3 ms ± 2.12 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.4 ms ± 3.48 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.3 ms ± 1.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.6 ms ± 202 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.52 ms ± 135 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.96 ms ± 167 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
15.8 ms ± 2.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.3 ms ± 2.64 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.4 ms ± 1.59 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.97 ms ± 493 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.55 ms ± 126 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.92 ms ± 149 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 360 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
86.8 ms ± 3.96 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.4 ms ± 1.14 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.11 ms ± 562 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.51 ms ± 154 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.87 ms ± 116 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.9 ms ± 365 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.4 ms ± 2.43 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

12.2 ms ± 1.25 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.53 ms ± 154 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.54 ms ± 160 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.97 ms ± 193 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.7 ms ± 351 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
84.3 ms ± 2.99 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

31.2 ms ± 1.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.68 ms ± 635 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.65 ms ± 215 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.56 ms ± 151 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.1 ms ± 289 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
109 ms ± 2.07 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

32.1 ms ± 2.15 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.48 ms ± 134 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.72 ms ± 123 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.62 ms ± 146 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.3 ms ± 354 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
110 ms ± 4.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

35.3 ms ± 773 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.72 ms ± 63.7 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.02 ms ± 172 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.07 ms ± 104 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
21.1 ms ± 2.21 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
110 ms ± 2.72 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

31.6 ms ± 1.55 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.51 ms ± 107 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.84 ms ± 167 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.62 ms ± 204 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.6 ms ± 1.69 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
109 ms ± 3.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

32.2 ms ± 1.96 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.48 ms ± 123 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.74 ms ± 147 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.65 ms ± 132 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20 ms ± 421 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
108 ms ± 2.84 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

32 ms ± 2.77 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.62 ms ± 534 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.81 ms ± 208 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.58 ms ± 93.5 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.2 ms ± 360 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
108 ms ± 1.87 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

31.6 ms ± 1.56 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.49 ms ± 110 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.77 ms ± 194 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.57 ms ± 122 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.2 ms ± 323 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
109 ms ± 2.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

32 ms ± 2.01 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.02 ms ± 369 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.74 ms ± 121 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.58 ms ± 145 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
19.9 ms ± 368 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
108 ms ± 2.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

31.4 ms ± 1.97 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.71 ms ± 754 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.84 ms ± 177 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.75 ms ± 267 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.6 ms ± 446 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
112 ms ± 3.71 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

31.5 ms ± 1.51 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.56 ms ± 110 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.71 ms ± 218 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.6 ms ± 155 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
20.2 ms ± 338 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
109 ms ± 2.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

319 ms ± 1.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.66 ms ± 184 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.69 ms ± 160 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.2 ms ± 171 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
83.2 ms ± 2.91 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
396 ms ± 1.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

324 ms ± 981 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.65 ms ± 102 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.71 ms ± 140 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.3 ms ± 314 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82 ms ± 1.62 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
411 ms ± 2.05 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

322 ms ± 1.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.41 ms ± 1.28 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.71 ms ± 296 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.3 ms ± 268 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82.1 ms ± 1.52 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
399 ms ± 2.06 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

318 ms ± 747 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.84 ms ± 511 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.78 ms ± 294 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.1 ms ± 200 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
80.9 ms ± 1.54 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
414 ms ± 1.88 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

318 ms ± 1.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.71 ms ± 324 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.55 ms ± 248 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.7 ms ± 690 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82.1 ms ± 1.79 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
422 ms ± 2.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

321 ms ± 912 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.72 ms ± 97.6 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.54 ms ± 256 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.5 ms ± 117 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
81.9 ms ± 1.29 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
432 ms ± 3.83 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

322 ms ± 1.44 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.95 ms ± 900 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.58 ms ± 239 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.1 ms ± 268 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82.3 ms ± 1.88 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
437 ms ± 1.65 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

322 ms ± 2.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
The slowest run took 6.70 times longer than the fastest. This could mean that an intermediate result is being cached.
4.09 ms ± 2.86 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.34 ms ± 755 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.4 ms ± 460 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
81.5 ms ± 1.13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
424 ms ± 3.86 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

316 ms ± 2.49 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.21 ms ± 811 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.98 ms ± 562 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.2 ms ± 138 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82.2 ms ± 2.32 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
412 ms ± 2.37 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

315 ms ± 1.61 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.94 ms ± 528 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.48 ms ± 227 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.2 ms ± 84.9 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
82.1 ms ± 1.56 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
419 ms ± 3.65 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

299 ms ± 4.89 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.05 ms ± 256 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 146 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
35.9 ms ± 517 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.67 s ± 5.68 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.07 s ± 7.83 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

297 ms ± 3.35 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.05 ms ± 148 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 121 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
37.1 ms ± 1.95 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.67 s ± 8.49 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.17 s ± 69.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

303 ms ± 7.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.26 ms ± 724 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 103 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.4 ms ± 825 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.68 s ± 21.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.15 s ± 66.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

352 ms ± 18.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
5.28 ms ± 1.21 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
18.1 ms ± 6.13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
38.8 ms ± 1.44 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.79 s ± 90.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.08 s ± 19.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

296 ms ± 13.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2 ms ± 264 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.2 ms ± 184 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.5 ms ± 444 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.72 s ± 42.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.16 s ± 52.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

318 ms ± 19.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.46 ms ± 1.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.8 ms ± 286 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
38.7 ms ± 1.27 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.74 s ± 40 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.13 s ± 55.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

314 ms ± 15.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
6.29 ms ± 1.85 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.5 ms ± 116 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
38 ms ± 1.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.74 s ± 63.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.11 s ± 34 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

292 ms ± 4.98 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
The slowest run took 4.39 times longer than the fastest. This could mean that an intermediate result is being cached.
3.86 ms ± 1.95 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.3 ms ± 427 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.3 ms ± 549 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.7 s ± 15.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.09 s ± 19.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

303 ms ± 5.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.3 ms ± 917 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.1 ms ± 135 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
37.3 ms ± 2.03 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.69 s ± 22.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.11 s ± 36.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

298 ms ± 5.02 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
The slowest run took 4.63 times longer than the fastest. This could mean that an intermediate result is being cached.
3.29 ms ± 2.48 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
14.4 ms ± 485 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
36.7 ms ± 424 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.75 s ± 101 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.1 s ± 38.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

3.21 s ± 20.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
5.37 ms ± 2.27 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 947 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
317 ms ± 7.32 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.27 s ± 12.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.23 s ± 7.07 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.11 s ± 6.12 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.29 ms ± 170 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 955 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
299 ms ± 6.35 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.26 s ± 13.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.21 s ± 40.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.25 s ± 32.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.15 ms ± 1.27 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
126 ms ± 981 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
294 ms ± 2.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.25 s ± 10.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.32 s ± 5.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.2 s ± 4.89 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.75 ms ± 2.24 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 871 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
299 ms ± 2.49 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.27 s ± 17.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.26 s ± 6.31 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.12 s ± 4.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.8 ms ± 1.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 1.23 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
296 ms ± 1.89 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.29 s ± 14.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.38 s ± 98.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.24 s ± 53.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.13 ms ± 80.1 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 662 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
292 ms ± 1.33 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.41 s ± 164 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.37 s ± 156 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.18 s ± 20.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.21 ms ± 152 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
124 ms ± 962 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
299 ms ± 4.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.27 s ± 75 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.17 s ± 6.78 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.16 s ± 6.29 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.7 ms ± 2.63 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
126 ms ± 727 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
300 ms ± 4.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.24 s ± 46.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.25 s ± 3.74 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.18 s ± 4.59 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.05 ms ± 1.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 749 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
293 ms ± 4.09 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.25 s ± 4.85 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.18 s ± 2.88 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/6 [00:00<?, ?it/s]

3.15 s ± 6.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
3.49 ms ± 526 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
125 ms ± 699 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
298 ms ± 4.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
7.23 s ± 37.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
4.14 s ± 7.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [12]:
res_model = get_syn_stats_model_based(imp_types_model, datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

15.3 ms ± 2.74 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.01 s ± 20.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.1 ms ± 839 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.04 s ± 44.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.9 ms ± 1.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.04 s ± 51.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.4 ms ± 1.87 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.08 s ± 92.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.3 ms ± 907 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.07 s ± 64.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.4 ms ± 1.14 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.04 s ± 45.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

20.5 ms ± 4.15 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.11 s ± 63.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

14.4 ms ± 777 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.09 s ± 84.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.4 ms ± 1.99 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.03 s ± 25.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.1 ms ± 4.75 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.1 s ± 145 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16.6 ms ± 886 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.15 s ± 105 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.3 ms ± 1.09 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.13 s ± 93 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.4 ms ± 735 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.14 s ± 91.4 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.5 ms ± 2.26 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.35 s ± 263 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.6 ms ± 542 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.08 s ± 41.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.6 ms ± 1.18 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.09 s ± 88.6 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.9 ms ± 2.37 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.28 s ± 218 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.21 s ± 95.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.7 ms ± 1.38 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.18 s ± 101 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

22.7 ms ± 7.63 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
1.12 s ± 78.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

17.4 ms ± 1.75 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.51 s ± 195 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.2 ms ± 1.57 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.7 s ± 212 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.9 ms ± 321 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.67 s ± 300 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

20.7 ms ± 3.83 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.8 s ± 355 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 1.07 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.88 s ± 551 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

22.6 ms ± 4.44 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.85 s ± 358 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

19.3 ms ± 1.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.8 s ± 259 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 310 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.35 s ± 213 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.9 ms ± 745 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.66 s ± 310 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.5 ms ± 1.66 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
2.48 s ± 357 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

17.8 ms ± 2.13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
11.8 s ± 378 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.5 ms ± 1.96 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
10.9 s ± 669 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

15.9 ms ± 1.06 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
11.5 s ± 867 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

16.2 ms ± 859 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
12.5 s ± 842 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

19.3 ms ± 3.22 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
12.1 s ± 925 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17 ms ± 1.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
11.7 s ± 492 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.6 ms ± 789 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.1 s ± 1.45 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.8 ms ± 1.53 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
12 s ± 274 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.7 ms ± 610 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
13.2 s ± 837 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.5 ms ± 1.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
12.8 s ± 937 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

16.8 ms ± 1.57 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
31.9 s ± 960 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

19.3 ms ± 1.79 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
31.3 s ± 721 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.4 ms ± 2.19 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
34.9 s ± 3.72 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.2 ms ± 375 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
31.7 s ± 223 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

19.1 ms ± 2.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
33.3 s ± 2.23 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

19.3 ms ± 2.04 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
32 s ± 2.57 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.8 ms ± 1.66 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
30.7 s ± 151 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

18.8 ms ± 1.07 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
31.1 s ± 907 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.1 ms ± 633 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
30.4 s ± 847 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


  0%|          | 0/2 [00:00<?, ?it/s]

17.9 ms ± 699 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
32 s ± 1.41 s per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [17]:
# shap is slow, need timeout sig to stop process
res_shap = get_syn_stats_shap_based(['shap'], datasets, dataset_states, 0.2)

Dataset progress:   0%|          | 0/5 [00:00<?, ?it/s]

Processing classif_features_10_samples_100_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

47.2 ms ± 1.58 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

41.9 ms ± 3.15 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

39.7 ms ± 1.58 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

39.5 ms ± 1.93 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

40.9 ms ± 1.17 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

38.3 ms ± 672 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

40.6 ms ± 3.13 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

40.1 ms ± 1.39 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

40.8 ms ± 708 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

38.7 ms ± 1.08 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


Processing classif_features_10_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

3.99 s ± 161 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.82 s ± 149 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.83 s ± 79.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.86 s ± 208 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

4.06 s ± 21.5 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.65 s ± 11.1 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.92 s ± 224 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.96 s ± 43.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.66 s ± 11.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


  0%|          | 0/1 [00:00<?, ?it/s]

3.65 s ± 9.69 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
(10,)


Processing classif_features_10_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_10_samples_10000_seed_0 because of Function get_shap_fi timed out after 60.0 seconds


Processing classif_features_100_samples_1000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_100_samples_1000_seed_0 because of Function get_shap_fi timed out after 60.0 seconds


Processing classif_features_100_samples_10000_seed_0:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

skipping the execution of classif_features_100_samples_10000_seed_0 because of Function get_shap_fi timed out after 60.0 seconds


In [18]:
frames = [res_data, res_model, res_shap]

In [19]:
final_df = pd.concat(frames);

In [21]:
final_df;

In [22]:
# export results to local dir as csv file
final_df.to_csv(f'regression_synthethic_data_{datetime.today().strftime("%Y-%b-%d-%H:%M:%S")}.csv',
             index=False)

In [23]:
final_df

Unnamed: 0,dataset,imp_type,random_state,data_split,fi_order,fi_selected,timings
0,classif_features_10_samples_100_seed_0,mutual_info,0,0,"[0, 1, 6, 3, 8, 7, 4, 2, 5, 9]","[0, 1]","[0.013693295000848593, 0.01221649599756347, 0...."
1,classif_features_10_samples_100_seed_0,f_test,0,0,"[0, 1, 6, 9, 8, 4, 2, 3, 5, 7]","[0, 1]","[0.00182100299934973, 0.0017979320000449661, 0..."
2,classif_features_10_samples_100_seed_0,pearson,0,0,"[0, 1, 6, 9, 8, 4, 2, 3, 5, 7]","[0, 1]","[0.002302085998962866, 0.0017508220007584896, ..."
3,classif_features_10_samples_100_seed_0,spearman,0,0,"[0, 1, 6, 8, 4, 9, 2, 3, 7, 5]","[0, 1]","[0.0024746769995545037, 0.0033698229999572504,..."
4,classif_features_10_samples_100_seed_0,kendall,0,0,"[0, 1, 6, 8, 4, 9, 2, 3, 7, 5]","[0, 1]","[0.015187065000645816, 0.01486127299722284, 0...."
...,...,...,...,...,...,...,...
15,classif_features_10_samples_1000_seed_0,shap,0,5,"[1, 0, 6, 3, 9, 8, 5, 2, 7, 4]","[1, 0]","[3.669437716001994, 3.6497950710036093, 3.6544..."
16,classif_features_10_samples_1000_seed_0,shap,0,6,"[1, 0, 5, 2, 7, 8, 3, 6, 9, 4]","[1, 0]","[3.6540138150012353, 3.6743375939986436, 3.647..."
17,classif_features_10_samples_1000_seed_0,shap,0,7,"[1, 0, 5, 7, 3, 6, 8, 9, 2, 4]","[1, 0]","[4.042630871001165, 3.899649742001202, 3.97792..."
18,classif_features_10_samples_1000_seed_0,shap,0,8,"[1, 0, 5, 7, 9, 4, 3, 8, 6, 2]","[1, 0]","[3.685066628997447, 3.6729397809976945, 3.6540..."


In [None]:
## plots
# import seaborn as sns
# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "XGBClassifier"''')
#             )

# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "DecisionTreeClassifier"''')
#             )

# plot = sns.lineplot(x='iteration', y='mcc_test', hue='imp_type',
#             data=df_res.query('''model == "RandomForestClassifier"''')
#             )

In [None]:
# export results to local dir as csv file
# df_res.to_csv(f'classification_synthethic_data_{datetime.today().strftime("%Y-%b-%d-%H:%M:%S")}.csv',
#              index=False)