In [31]:
import pandas as pd
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
from IPython.display import display, Markdown

from utilities import data, roc, threshold
from utilities.info import *

In [32]:
FILE_DIR = f"{TEAMS_DIR}/files"
RESULTS_DIR = f"{TEAMS_DIR}/temp-results"
FILE_DIR, RESULTS_DIR

('C:/Users/shaur/OneDrive - Radboudumc/Documents - Master - Shaurya Gaur/General/Malignancy-Estimation Results/files',
 'C:/Users/shaur/OneDrive - Radboudumc/Documents - Master - Shaurya Gaur/General/Malignancy-Estimation Results/temp-results')

In [33]:
NUM_BOOTSTRAPS = 10
THRESHOLD_POLICIES

(('Sensitivity', 0.9), ('Specificity', 0.9))

## DLCST

In [34]:
dlcst_preds = pd.read_csv(f"{FILE_DIR}/dlcst_allmodels_cal.csv", header=0)
print(len(dlcst_preds))

DLCST_MODELCOLS = {
    "Venkadesh": "Ensemble_Kiran_cal",
    "de Haas": "thijmen_mean_cal",
    "PanCan2b": "PanCan2b",
    "Sybil year 1": "sybil_year1",
}

dlcst_preds, dlcst_democols = data.bin_numerical_columns(dlcst_preds, DLCST_DEMOCOLS)
dlcst_democols

599


{'cat': {'demo': ['Age > 61', 'Sex'],
  'other': ['Emphysema', 'FamilyHistoryLungCa', 'NoduleCountPerScan > 1']},
 'num': {'demo': ['Age'], 'other': ['NoduleCountPerScan']}}

In [35]:
roc_dlcst = roc.all_results_subgroups_models(
    dlcst_preds, dlcst_democols['cat'], models=DLCST_MODELCOLS, 
    csvpath=f"{RESULTS_DIR}/auroc-dlcst-{len(dlcst_preds)}.csv", 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_dlcst

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.944806,0.069231,True,0.924025,0.891251,0.969003,False,0.92728,0.858171,0.953068,...,133,27.378965,18.902439,28,407,72.621035,6.436782,Ensemble_Kiran_cal,Age > 61,demo
de Haas,0.702695,0.381685,True,0.911195,0.875237,0.964839,False,0.929601,0.8603,0.957062,...,133,27.378965,18.902439,28,407,72.621035,6.436782,thijmen_mean_cal,Age > 61,demo
PanCan2b,0.395398,0.849869,True,0.846746,0.797585,0.915105,False,0.896056,0.87011,0.933062,...,133,27.378965,18.902439,28,407,72.621035,6.436782,PanCan2b,Age > 61,demo
Sybil year 1,0.837577,0.204994,True,0.854983,0.78427,0.930278,False,0.86733,0.80753,0.923974,...,133,27.378965,18.902439,28,407,72.621035,6.436782,sybil_year1,Age > 61,demo
Venkadesh,0.35117,0.932323,1.0,0.907228,0.822917,0.951451,2.0,0.949818,0.922619,0.968446,...,291,53.923205,9.907121,27,249,46.076795,9.782609,Ensemble_Kiran_cal,Sex,demo
de Haas,0.290078,1.05795,1.0,0.9035,0.823115,0.938939,2.0,0.95192,0.922784,0.96939,...,291,53.923205,9.907121,27,249,46.076795,9.782609,thijmen_mean_cal,Sex,demo
PanCan2b,0.466191,-0.728691,1.0,0.899082,0.870748,0.948949,2.0,0.856798,0.79176,0.90008,...,291,53.923205,9.907121,27,249,46.076795,9.782609,PanCan2b,Sex,demo
Sybil year 1,0.719163,0.359578,1.0,0.852935,0.77639,0.910599,2.0,0.874556,0.811989,0.919715,...,291,53.923205,9.907121,27,249,46.076795,9.782609,sybil_year1,Sex,demo
Venkadesh,0.126349,1.528659,1.0,0.90457,0.866992,0.954101,0.0,0.970446,0.95568,0.990225,...,361,67.612688,10.864198,15,179,32.387312,7.731959,Ensemble_Kiran_cal,Emphysema,other
de Haas,0.31432,1.006199,1.0,0.909866,0.869919,0.960734,0.0,0.957152,0.937853,0.976087,...,361,67.612688,10.864198,15,179,32.387312,7.731959,thijmen_mean_cal,Emphysema,other


In [36]:
dlcst_policies, _ = threshold.get_threshold_policies(dlcst_preds, models=DLCST_MODELCOLS, policies=THRESHOLD_POLICIES, brock=True)
dlcst_policies

Unnamed: 0,Sensitivity=0.9,Specificity=0.9,Brock
Venkadesh,0.134,0.204,0.06
de Haas,0.099,0.193,0.06
PanCan2b,0.02,0.093,0.06
Sybil year 1,0.001,0.018,0.06


In [37]:
dlcst_thresholds = threshold.all_results_subgroups_models(
    dlcst_preds, dlcst_democols['cat'], policies=dlcst_policies, models=DLCST_MODELCOLS, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-dlcst-{len(dlcst_preds)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

dlcst_thresholds

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,599.0,59.0,540.0,54.0,76.0,464.0,5.0,0.915254,0.140741,0.084746,...,0.016188,0.880342,0.819791,0.638543,0.622307,0.001,8.775,Ensemble_Kiran_cal,Age > 61,demo
1,599.0,59.0,540.0,54.0,119.0,421.0,5.0,0.915254,0.220370,0.084746,...,0.018338,0.803172,0.730458,0.499119,0.482318,0.001,8.775,thijmen_mean_cal,Age > 61,demo
2,599.0,59.0,540.0,54.0,160.0,380.0,5.0,0.915254,0.296296,0.084746,...,0.012370,0.745993,0.679609,0.452020,0.440981,0.001,8.775,PanCan2b,Age > 61,demo
3,599.0,59.0,540.0,57.0,453.0,87.0,2.0,0.966102,0.838889,0.033898,...,0.029474,0.262980,0.165148,0.227917,0.137459,0.001,8.775,sybil_year1,Age > 61,demo
4,599.0,59.0,540.0,46.0,54.0,486.0,13.0,0.779661,0.100000,0.220339,...,0.035520,0.904090,0.760838,0.635423,0.591833,0.018,8.775,Ensemble_Kiran_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31,266.0,24.0,242.0,13.0,27.0,215.0,11.0,0.541667,0.111570,0.458333,...,0.070763,0.878853,0.556351,0.484465,0.421422,0.018,8.775,sybil_year1,NoduleCountPerScan > 1,other
32,266.0,24.0,242.0,23.0,81.0,161.0,1.0,0.958333,0.334711,0.041667,...,0.017471,0.711748,0.651971,0.415283,0.403576,0.060,8.775,Ensemble_Kiran_cal,NoduleCountPerScan > 1,other
33,266.0,24.0,242.0,23.0,79.0,163.0,1.0,0.958333,0.326446,0.041667,...,0.018023,0.720489,0.657039,0.418493,0.412942,0.060,8.775,thijmen_mean_cal,NoduleCountPerScan > 1,other
34,266.0,24.0,242.0,16.0,38.0,204.0,8.0,0.666667,0.157025,0.333333,...,0.058168,0.852914,0.620728,0.503419,0.448946,0.060,8.775,PanCan2b,NoduleCountPerScan > 1,other


# NLST

In [42]:
nlst_preds_nodule = pd.read_csv(f"{FILE_DIR}/nlst_allmodels_demos.csv")

with open(f'{FILE_DIR}/nlst_democols.json') as json_data:
    nlst_democols_original = json.load(json_data)
    json_data.close()

nlst_democols_original['cat'].pop('lungcanc')

['Small_cell_carcinoma',
 'Squamous_cell_carcinoma',
 'Adenocarcinoma',
 'Bronchiolo-alveolar_carcinoma',
 'Large_cell_carcinoma',
 'Adenosquamous_carcinoma',
 'Carcinoid_tumor',
 'Unclassified_carcinoma',
 'LC_stage']

## NLST Scans (all)

In [43]:
nlst_allscans, nlst_allscans_democols, nlst_allscans_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True, bin_num=True)
print(len(nlst_allscans), " Scans")

display(nlst_allscans_models)
nlst_allscans_democols

5911  Scans


{'Venkadesh': 'DL_cal',
 'de Haas Local': 'Thijmen_local_cal',
 'de Haas Global (hidden nodule)': 'Thijmen_global_hidden_cal',
 'de Haas Global (shown nodule)': 'Thijmen_global_show_cal',
 'Sybil year 1': 'sybil_year1',
 'PanCan2b': 'PanCan2b'}

{'num': {'demo': ['weight', 'height', 'BMI', 'Age'],
  'smoke': ['pkyr', 'smokeage', 'smokeday', 'smokeyr'],
  'other': ['Diameter_mm', 'NoduleCounts']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkasbe',
   'wrkbaki',
   'wrkbutc',
   'wrkchem',
   'wrkcoal',
   'wrkcott',
   'wrkfarm',
   'wrkfire',
   'wrkflou',
   'wrkfoun',
   'wrkhard',
   'wrkpain',
   'wrksand',
   'wrkweld'],
  'disease': ['diagadas',
   'diagasbe',
   'diagbron',
   'diagchas',
   'diagchro',
   'diagcopd',
   'diagdiab',
   'diagemph',
   'diagfibr',
   'diaghear',
   'diaghype',
   'diagpneu',
   'diagsarc',
   'diagsili',
   'diagstro',
   'd

In [None]:
roc_nlst_allscans = roc.all_results_subgroups_models(nlst_allscans, nlst_allscans_democols['cat'], models=nlst_allscans_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_allscans)}.csv", 
                                            plot=True, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_allscans

In [None]:
nlst_allscans_policies, _ = threshold.get_threshold_policies(nlst_allscans, models=nlst_allscans_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_allscans_policies

Unnamed: 0,Sensitivity=0.9,Specificity=0.9,Brock
Venkadesh,0.049,0.222,0.06
de Haas Local,0.045,0.226,0.06
de Haas Global (hidden nodule),0.066,0.265,0.06
de Haas Global (shown nodule),0.073,0.312,0.06
Sybil year 1,0.003,0.058,0.06
PanCan2b,0.015,0.165,0.06


In [None]:
threshold_allscans = threshold.all_results_subgroups_models(
    nlst_allscans, nlst_allscans_democols['cat'], policies=nlst_allscans_policies, models=nlst_allscans_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_allscans)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_allscans

## NLST Scans (tijmen)

In [None]:
nlst_somescans, nlst_somescans_democols, nlst_somescans_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=True, sybil=True, bin_num=True)
print(len(nlst_somescans), " Scans")

display(nlst_somescans_models)
nlst_somescans_democols

1172  Scans


{'Venkadesh': 'DL_cal',
 'de Haas Combined': 'Thijmen_mean_cal',
 'de Haas Local': 'Thijmen_local_cal',
 'de Haas Global (hidden nodule)': 'Thijmen_global_hidden_cal',
 'de Haas Global (shown nodule)': 'Thijmen_global_show_cal',
 'Sybil year 1': 'sybil_year1',
 'PanCan2b': 'PanCan2b'}

{'num': {'demo': ['weight', 'height', 'BMI', 'Age'],
  'smoke': ['pkyr', 'smokeage', 'smokeday', 'smokeyr'],
  'other': ['Diameter_mm', 'NoduleCounts']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkasbe',
   'wrkbaki',
   'wrkbutc',
   'wrkchem',
   'wrkcoal',
   'wrkcott',
   'wrkfarm',
   'wrkfire',
   'wrkflou',
   'wrkfoun',
   'wrkhard',
   'wrkpain',
   'wrksand',
   'wrkweld'],
  'disease': ['diagadas',
   'diagasbe',
   'diagbron',
   'diagchas',
   'diagchro',
   'diagcopd',
   'diagdiab',
   'diagemph',
   'diagfibr',
   'diaghear',
   'diaghype',
   'diagpneu',
   'diagsarc',
   'diagsili',
   'diagstro',
   'd

In [None]:
roc_nlst_somescans = roc.all_results_subgroups_models(nlst_somescans, nlst_somescans_democols['cat'], models=nlst_somescans_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_somescans)}.csv", 
                                            plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_somescans

KeyboardInterrupt: 

In [None]:
nlst_somescans_policies, _ = threshold.get_threshold_policies(nlst_somescans, models=nlst_somescans_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_somescans_policies

In [None]:
threshold_somescans = threshold.all_results_subgroups_models(
    nlst_somescans, nlst_somescans_democols['cat'], policies=nlst_somescans_policies, models=nlst_somescans_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_somescans)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_somescans

## NLST Nodules (all)

In [48]:
nlst_allnodules, nlst_allnodules_democols, nlst_allnodules_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=False, sybil=False, bin_num=True)
print(len(nlst_allnodules), " nodules")

display(nlst_allnodules_models)
nlst_allnodules_democols

16077  nodules


{'Venkadesh': 'DL_cal',
 'de Haas Local': 'Thijmen_local_cal',
 'de Haas Global (hidden nodule)': 'Thijmen_global_hidden_cal',
 'de Haas Global (shown nodule)': 'Thijmen_global_show_cal',
 'PanCan2b': 'PanCan2b'}

{'num': {'demo': ['weight', 'height', 'BMI', 'Age'],
  'smoke': ['pkyr', 'smokeage', 'smokeday', 'smokeyr'],
  'nodule': ['CoordX', 'CoordY', 'CoordZ'],
  'other': ['Diameter_mm', 'NoduleCounts']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkasbe',
   'wrkbaki',
   'wrkbutc',
   'wrkchem',
   'wrkcoal',
   'wrkcott',
   'wrkfarm',
   'wrkfire',
   'wrkflou',
   'wrkfoun',
   'wrkhard',
   'wrkpain',
   'wrksand',
   'wrkweld'],
  'disease': ['diagadas',
   'diagasbe',
   'diagbron',
   'diagchas',
   'diagchro',
   'diagcopd',
   'diagdiab',
   'diagemph',
   'diagfibr',
   'diaghear',
   'diaghype',
   'diagpneu',
   'di

In [None]:
roc_nlst_allnodules = roc.all_results_subgroups_models(nlst_allnodules, nlst_allnodules_democols['cat'], models=nlst_allnodules_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_allnodules)}.csv", 
                                            plot=True, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_allnodules

In [None]:
nlst_allnodules_policies, _ = threshold.get_threshold_policies(nlst_allnodules, models=nlst_allnodules_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_allnodules_policies

In [None]:
threshold_allnodules = threshold.all_results_subgroups_models(
    nlst_allnodules, nlst_allnodules_democols['cat'], policies=nlst_allnodules_policies, models=nlst_allnodules_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_allnodules)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_allnodules

## NLST Nodules (tijmen)

In [49]:
nlst_somenodules, nlst_somenodules_democols, nlst_somenodules_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=True, sybil=False, bin_num=True)
print(len(nlst_somenodules), " nodules")

display(nlst_somenodules_models)
nlst_somenodules_democols

3240  nodules


{'Venkadesh': 'DL_cal',
 'de Haas Combined': 'Thijmen_mean_cal',
 'de Haas Local': 'Thijmen_local_cal',
 'de Haas Global (hidden nodule)': 'Thijmen_global_hidden_cal',
 'de Haas Global (shown nodule)': 'Thijmen_global_show_cal',
 'PanCan2b': 'PanCan2b'}

{'num': {'demo': ['weight', 'height', 'BMI', 'Age'],
  'smoke': ['pkyr', 'smokeage', 'smokeday', 'smokeyr'],
  'nodule': ['CoordX', 'CoordY', 'CoordZ'],
  'other': ['Diameter_mm', 'NoduleCounts']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkasbe',
   'wrkbaki',
   'wrkbutc',
   'wrkchem',
   'wrkcoal',
   'wrkcott',
   'wrkfarm',
   'wrkfire',
   'wrkflou',
   'wrkfoun',
   'wrkhard',
   'wrkpain',
   'wrksand',
   'wrkweld'],
  'disease': ['diagadas',
   'diagasbe',
   'diagbron',
   'diagchas',
   'diagchro',
   'diagcopd',
   'diagdiab',
   'diagemph',
   'diagfibr',
   'diaghear',
   'diaghype',
   'diagpneu',
   'di

In [None]:
roc_nlst_somenodules = roc.all_results_subgroups_models(nlst_somenodules, nlst_somenodules_democols['cat'], models=nlst_somenodules_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_somenodules)}.csv", 
                                            plot=True, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_somenodules

In [50]:
nlst_somenodules_policies, _ = threshold.get_threshold_policies(nlst_somenodules, models=nlst_somenodules_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_somenodules_policies

Unnamed: 0,Sensitivity=0.9,Specificity=0.9,Brock
Venkadesh,0.037,0.169,0.06
de Haas Combined,0.073,0.206,0.06
de Haas Local,0.059,0.169,0.06
de Haas Global (hidden nodule),0.067,0.217,0.06
de Haas Global (shown nodule),0.057,0.241,0.06
PanCan2b,0.015,0.122,0.06


In [51]:
threshold_somenodules = threshold.all_results_subgroups_models(
    nlst_somenodules, nlst_somenodules_democols['cat'], policies=nlst_somenodules_policies, models=nlst_somenodules_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_somenodules)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_somenodules

KeyboardInterrupt: 

### Demographics, isolating other columns

In [None]:
# nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True)
# print(len(nlst_preds))
# nlst_democols

5911


{'num': {'demo': ['BMI', 'Age', 'height', 'weight'],
  'smoke': ['smokeage', 'smokeday', 'smokeyr', 'pkyr'],
  'other': ['NoduleCounts', 'Diameter_mm', 'SliceCount']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkbaki',
   'wrkfoun',
   'wrkchem',
   'wrkasbe',
   'wrkfire',
   'wrksand',
   'wrkfarm',
   'wrkcoal',
   'wrkpain',
   'wrkweld',
   'wrkflou',
   'wrkbutc',
   'wrkhard',
   'wrkcott'],
  'disease': ['diagasbe',
   'diagchas',
   'diagpneu',
   'diagstro',
   'diagemph',
   'diagbron',
   'diagsili',
   'diagsarc',
   'diaghear',
   'diagdiab',
   'diagadas',
   'diagcopd',
   'diagfibr',
   'diagtube',
   'di

In [None]:
# gender_df = roc.save_results_isolate_confounders(nlst_preds, 'Gender', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-gender-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=NUM_BOOTSTRAPS)
# gender_df

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.884213,-0.145631,1.0,0.905956,0.878714,0.931407,2.0,0.902027,0.872541,0.934186,...,56.496520,7.871321,108,1017,43.503480,9.600000,DL_cal,Age > 61,0.0,demo
de Haas Local,0.510123,0.658647,1.0,0.875166,0.842110,0.907032,2.0,0.894254,0.860443,0.924237,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.014978,-2.432904,1.0,0.848179,0.815810,0.878309,2.0,0.764180,0.724127,0.803389,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.398098,-0.845023,1.0,0.860126,0.827791,0.892714,2.0,0.832872,0.795958,0.867581,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.000169,3.761524,1.0,0.786788,0.744338,0.829957,2.0,0.905246,0.874272,0.935412,...,56.496520,7.871321,108,1017,43.503480,9.600000,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.418341,0.809302,1.0,0.872777,0.822095,0.919544,2.0,0.906687,0.856513,0.948209,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.594455,-0.532392,1.0,0.818488,0.775956,0.857272,2.0,0.790763,0.731651,0.843911,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.349768,0.935040,1.0,0.855157,0.815148,0.896599,2.0,0.896166,0.837969,0.942044,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.057292,1.901075,1.0,0.850117,0.798410,0.897959,2.0,0.928019,0.882871,0.966378,...,60.321101,7.731305,43,476,39.678899,8.285164,sybil_year1,ManufacturersModelName,Volume Zoom,scanner


In [None]:
# race_df = roc.save_results_isolate_confounders(nlst_preds, 'WhiteOrBlack', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-race-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=NUM_BOOTSTRAPS)
# race_df

  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.744035,-0.326515,1.0,0.903781,0.882382,0.924980,2.0,0.882700,0.788183,0.957637,...,93.50348,8.395368,12,70,3.170920,14.634146,DL_cal,Age > 61,0.0,demo
de Haas Local,0.623007,0.491593,1.0,0.879247,0.853497,0.903029,2.0,0.908341,0.821429,0.972973,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.606608,-0.514922,1.0,0.813340,0.786636,0.838516,2.0,0.772134,0.622619,0.894977,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.774396,-0.286630,1.0,0.844453,0.818944,0.870025,2.0,0.823049,0.665672,0.952381,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.840850,0.200807,1.0,0.847350,0.818378,0.875023,2.0,0.861231,0.765685,0.940878,...,93.50348,8.395368,12,70,3.170920,14.634146,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.818468,0.229515,1.0,0.885638,0.853645,0.920035,2.0,0.917280,0.795455,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.783390,-0.274903,1.0,0.807492,0.772214,0.841398,2.0,0.751463,0.590909,0.896341,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.002966,2.971232,1.0,0.869623,0.836893,0.901470,2.0,0.995000,1.000000,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.823027,-0.223653,1.0,0.885600,0.851022,0.918815,2.0,0.846289,0.651163,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,sybil_year1,ManufacturersModelName,Volume Zoom,scanner


In [None]:
# overweight_df = roc.save_results_isolate_confounders(nlst_preds, 'Overweight', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-overweight-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=NUM_BOOTSTRAPS)
# overweight_df

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.307227,-1.021057,True,0.913301,0.890445,0.937428,False,0.8833,0.843533,0.920617,...,68.561485,8.178229,78,735,31.438515,9.594096,DL_cal,Age > 61,0.0,demo
de Haas Local,0.537052,-0.617276,True,0.889163,0.857686,0.918939,False,0.869912,0.82981,0.907515,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.424049,0.799417,True,0.800702,0.7677,0.832463,False,0.829194,0.792832,0.865418,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.147365,-1.448902,True,0.860783,0.832018,0.888554,False,0.810073,0.760422,0.856329,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.5894,-0.539706,True,0.852504,0.816772,0.886023,False,0.834032,0.786166,0.878464,...,68.561485,8.178229,78,735,31.438515,9.594096,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.688994,-0.400221,True,0.889166,0.84276,0.933164,False,0.871577,0.813243,0.921598,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.221799,-1.221758,True,0.827652,0.78267,0.868899,False,0.76338,0.702393,0.821429,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.270828,-1.101159,True,0.888173,0.847957,0.925497,False,0.837073,0.772039,0.898329,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.341261,-0.951676,True,0.899711,0.857764,0.937866,False,0.857508,0.792547,0.913938,...,69.189602,6.740331,43,360,30.810398,10.669975,sybil_year1,ManufacturersModelName,Volume Zoom,scanner
