In [2]:
%load_ext autoreload
%autoreload 2
%load_ext autotime
%config InlineBackend.figure_format = 'retina'

import itertools
import os
from collections import defaultdict
from pathlib import Path
from sklearn.model_selection import KFold
from toolz import valmap
from typing import Tuple

for folder in itertools.chain([Path.cwd()], Path.cwd().parents):
    if (folder / 'Pipfile').exists():
        os.chdir(folder)
        break

from functional import try_except
from utils import Timer

from IPython.core.display import clear_output
from cache import save_data, load_data
from evaluation_functions import  cross_validate_model_sets, \
    compute_classification_metrics_from_result
from notebooks.heart_transplant.dependencies.heart_transplant_functions import  get_rolling_cv
from notebooks.heart_transplant.dependencies.heart_transplant_pipelines import   get_xgboost_pipeline
from matplotlib import pyplot as plt
from notebooks.heart_transplant.dependencies.heart_transplant_data import get_rolling_cv_inputs_cached

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 28 ms (started: 2021-03-25 11:28:53 +00:00)


In [13]:
_, y, X, dataset_raw, sampling_sets = get_rolling_cv_inputs_cached()

[Memory]997.9s, 16.6min : Loading get_rolling_cv_inputs...
time: 1.67 s (started: 2021-03-25 11:44:20 +00:00)


In [14]:
RESULTS_IDENTIFIER = 'heart_transplant_step_wise_feature_selection_results_optimized'
FEATURES_IDENTIFIER = 'heart_transplant_step_wise_feature_selection_used_features_optimized'

all_potential_features = list(X.columns)

time: 3.19 ms (started: 2021-03-25 11:44:24 +00:00)


In [15]:
try:
    final_results = load_data(RESULTS_IDENTIFIER)
except FileNotFoundError:
    final_results = defaultdict(partial(defaultdict, partial(defaultdict)))

try:
    final_used_features = load_data(FEATURES_IDENTIFIER)
except FileNotFoundError:
    final_used_features = defaultdict(partial(defaultdict))

time: 3.91 ms (started: 2021-03-25 11:44:26 +00:00)


In [16]:
def roc_for_feature(result_for_feature):
    return pipe(
       result_for_feature,
       partial(compute_classification_metrics_from_result,
            y,
            target_variable='y_scores',
            threshold=0.5
       ),
       partial(map, lambda i2: i2['roc_auc']),
       list,
       np.mean
    )

def get_max_roc_auc_from_results(results) -> Tuple[str, float]:
    return pipe(
        results,
        partial(valmap, roc_for_feature),
        dict.items,
        partial(max, key=lambda i: i[1])
    )

time: 1.08 ms (started: 2021-03-25 11:44:28 +00:00)


In [None]:
for method, pipeline in [('xgboost', partial(get_xgboost_pipeline, n_jobs=10))]:
    print(f'- {method}')
    resumed_iteration = len(final_used_features[method])
    if resumed_iteration > 0:
        used_features = final_used_features[method][resumed_iteration - 1]
        print(f'Resuming iteration {resumed_iteration}; Selected features so far: {used_features}' )
    else:
        used_features = []

    potential_features = set(all_potential_features)-set(used_features)

    for iteration in range(resumed_iteration, len(all_potential_features)):
        print('iter', iteration, 'available', len(potential_features))
        timer = Timer()
        current_metrics_per_feature =  {}
        for feature in potential_features.copy():

            if f'log_{feature}' in X.columns:
                feature = f'log_{feature}'

            print(f'{feature}')
            try:
                X_selected = X[list(set(used_features) | { feature })]

                result = cross_validate_model_sets(
                    # Optimized hyperparameters
                    classifier=pipeline(X=X_selected, y=y).set_params(**{
                        'classifier__colsample_bytree': 0.5080817028881132,
                        'classifier__gamma': 3.092864665001854,
                        'classifier__learning_rate': 0.08951636759116892,
                        'classifier__max_depth': 2,
                        'classifier__min_child_weight': 4.0,
                        'classifier__n_estimators': 140,
                        'classifier__subsample': 0.7437975255362691
                    }),
                    X=X_selected,
                    y=y,
                    sets=sampl,
                    return_model=False,
                    parallel=False,
                )

                print('ROC AUC', roc_for_feature(result))
                print('Selected features', ','.join(X_selected.columns))
                print()
                final_results[method][iteration][feature] = result

            except Exception as e:
                print(f'Skipping {feature}')
                print('Exception', e)

        selected_feature, selected_roc_auc = get_max_roc_auc_from_results(final_results[method][iteration])

        potential_features.remove(selected_feature)
        used_features.append(selected_feature)
        final_used_features[method][iteration] = used_features.copy()

        save_data(RESULTS_IDENTIFIER, final_results)
        save_data(FEATURES_IDENTIFIER, final_used_features)
        print()
        print('---------------------')
        print(f'Selected feature: {selected_feature}')
        print('ROC AUC', selected_roc_auc)
        print(f'All features:', used_features)
        print(timer)
        print('---------------------')
        print()

- xgboost
iter 0 available 114
log_tbili_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.511974987067541
Selected features log_tbili_don

log_age
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5478566675385739
Selected features log_age

impl_defibril
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5055078422995338
Selected features impl_defibril

cancer_site_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.501870804205964
Selected features cancer_site_don

prior_card_surg_trr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5303955823860673
Selected features prior_card_surg_trr

coronary_angio
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5258407817458473
Selected features coronary_angio

tah ever
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.5
Selected features tah ever

tattoos
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5122905194732666
Selected features tattoos

biopsy_dgn
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4997917656954215
Selected features biopsy_dgn

log_wgt_kg_don_calc
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5119688127306788
Selected features log_wgt_kg_don_calc

gender_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5169182516504102
Selected features gender_don

cmv_igm
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4956670066676585
Selected features cmv_igm

other_inf_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4975202434382761
Selected features other_inf_don

retransplant
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5146777190240324
Selected features retransplant

hbv_sur_antigen_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49982136416411177
Selected features hbv_sur_antigen_don

diab
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.48871784129357543
Selected features diab

hist_cancer_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5035142592188396
Selected features hist_cancer_don

log_age_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5532694365949629
Selected features log_age_don

prev_tx_any
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5147549569706397
Selected features prev_tx_any

hist_cig_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.506346425488689
Selected features hist_cig_don

lv_eject_meth
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4989796919619423
Selected features lv_eject_meth

dial_prior_tx
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5296986710465021
Selected features dial_prior_tx

hemo_co_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5088299369533676
Selected features hemo_co_tcr

vessels_50sten
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5001292097478396
Selected features vessels_50sten

hbv_core_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5018218332978187
Selected features hbv_core_don

hep_c_anti_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.501020172173484
Selected features hep_c_anti_don

log_cmassratio
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5217489756530779
Selected features log_cmassratio

log_bun_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4966971878891623
Selected features log_bun_don

life_sup_trr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5214646065783312
Selected features life_sup_trr

ecd_donor
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5099262953336916
Selected features ecd_donor

cmv_igg
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.51205614311

  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4999220242086296
Selected features vasodil_don

hemo_pa_mn_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5156450977665993
Selected features hemo_pa_mn_tcr

hiv_serostatus
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49992302853434717
Selected features hiv_serostatus

congenital
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5208856705052487
Selected features congenital

log_tbili
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5681093223982384
Selected features log_tbili

contin_oth_drug_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5167191000276333
Selected features contin_oth_drug_don

hist_oth_drug_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5194927036644292
Selected features hist_oth_drug_don

sexmatch
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5150277418272763
Selected features sexmatch

vent_support_after_list
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5332008126529324
Selected features vent_support_after_list

ecmo_trr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.517399723650776
Selected features ecmo_trr

log_creat_trr
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4949912558477046
Selected features blood_inf_don

hist_alcohol_old_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.502029468714083
Selected features hist_alcohol_old_don

anyecmo
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5190984171463754
Selected features anyecmo

donor insulin
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.49885553853230363
Selected features donor insulin

thoracic_dgn
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5545395308692647
Selected features thoracic_dgn

cereb_vasc
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5022608095807832
Selected features cereb_vasc

extracranial_cancer_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49972467364686374
Selected features extracranial_cancer_don

cmv_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5092320429218002
Selected features cmv_don

alcohol_heavy_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  return all_features / all_features.sum()


ROC AUC 0.4982557861257153
Selected features alcohol_heavy_don

log_most_rcnt_creat
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5536029127864163
Selected features log_most_rcnt_creat

death_mech_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5300806553682477
Selected features death_mech_don

ethcat_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4938664032047868
Selected features ethcat_don

multiorg
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5032807520429519
Selected features multiorg

abo_mat
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5060790510819724
Selected features abo_mat

urine_inf_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4972776251011579
Selected features urine_inf_don

gender
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5050977371275662
Selected features gender

hematocrit_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5012659132790989
Selected features hematocrit_don

tot_serum_album
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5279947064062179
Selected features tot_serum_album

education
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5128444761058304
Selected features education

protein_urine
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4960137678985999
Selected features protein_urine

skin_cancer_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49976986331104034
Selected features skin_cancer_don

hemo_pcw_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.521435101505234
Selected features hemo_pcw_tcr

height ratio
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.510995907950463
Selected features height ratio

pulm_inf_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49553366889455686
Selected features pulm_inf_don

func_stat_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.54050085

  return all_features / all_features.sum()


.


  return all_features / all_features.sum()


ROC AUC 0.5077989748430127
Selected features ebv_igg_cad_don

pvr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5163391930697571
Selected features pvr

ebv_serostatus
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4972316397522444
Selected features ebv_serostatus

log_newpra
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5223526055721963
Selected features log_newpra

transfusions
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5511288980981959
Selected features transfusions

ethcat
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5069795246489714
Selected features ethcat

hbv_sur_antigen
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49930789030981093
Selected features hbv_sur_antigen

clin_infect_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4981807675807023
Selected features clin_infect_don

cod_cad_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5225967105724586
Selected features cod_cad_don

log_wgt_kg_calc
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5205661670458382
Selected features log_wgt_kg_calc

anyvent
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5332008126529324
Selected

  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4987851734201251
Selected features diabetes_don

iabp_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5012732360976664
Selected features iabp_tcr

weight ratio
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5014268070049809
Selected features weight ratio

hist_cocaine_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5052060609178461
Selected features hist_cocaine_don

diag
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5575720371266619
Selected features diag

inotropes_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.4950722831302635
Selected features inotropes_tcr

hist_iv_drug_old_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.5000785211109615
Selected features hist_iv_drug_old_don

log_sgot_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5011746810560649
Selected features log_sgot_don

bmi_calc
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5101334919236663
Selected features bmi_calc

hbv_core
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5010501346527113
Selected features hbv_core

cig_use
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.49581230992792985
Selected features cig_use

ventilator_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5166081693270512
Selected features ventilator_tcr

cmv_status
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5146326381591609
Selected features cmv_status

intracranial_cancer_don
.
.
.
.
.
.
.
.
.
.
.
.
.


  _warn_prf(average, modifier, msg_start, len(result))


ROC AUC 0.4999121426754253
Selected features intracranial_cancer_don

log_distance
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.50628649368982
Selected features log_distance

dial_after_list
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5256913779252333
Selected features dial_after_list

abo_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5125582802156832
Selected features abo_don

prior_card_surg_tcr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5479302968910701
Selected features prior_card_surg_tcr



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



---------------------
Selected feature: log_creat_trr
ROC AUC 0.5767768706158688
All features: ['log_creat_trr']
Time elapsed: 0:08:46
---------------------

iter 1 available 113
log_tbili_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5780831140107239
Selected features log_tbili_don,log_creat_trr

log_age
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5939631623735339
Selected features log_creat_trr,log_age

impl_defibril
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5784136125103819
Selected features impl_defibril,log_creat_trr

cancer_site_don
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5788782206243238
Selected features cancer_site_don,log_creat_trr

prior_card_surg_trr
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5822754020912033
Selected features log_creat_trr,prior_card_surg_trr

coronary_angio
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5843698246283865
Selected features log_creat_trr,coronary_angio

tah ever
.
.
.
.
.
.
.
.
.
.
.
.
.
ROC AUC 0.5775479920770713
Selected features log_creat_trr,tah ever

tattoos
.
.
.
.
.
.
.
.
.


In [None]:
rocs = [ try_except(lambda: get_max_roc_auc_from_results(iteration), {Exception: lambda: None}) for _, iteration in final_results['xgboost'].items()]


In [None]:
rocs

In [None]:
final_used_features