In [None]:
import pandas as pd
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
from IPython.display import display, Markdown

import sys
sys.path.append('../')

from evalutils.roc import get_bootstrapped_roc_ci_curves
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from utilities import data, roc, threshold

## directory where results are
EXPERIMENT_DIR = f"/data/bodyct/experiments/lung-malignancy-fairness-shaurya"
NLST_PREDS = f"{EXPERIMENT_DIR}/nlst"

TEAMS_DIR = "C:/Users/shaur/OneDrive - Radboudumc/Documents - Master - Shaurya Gaur/General/Malignancy-Estimation Results"
NLST_PREDS = f"{TEAMS_DIR}/nlst" ## Comment out if not using Teams backup (aka Chansey is up :)
RESULTS_DIR = f"{TEAMS_DIR}/fairness-analysis-results"

In [3]:
policies = (
    ("Sensitivity", 0.9),
    ("Sensitivity", 1.0),
    ("Specificity", 0.9),
    ("Specificity", 1.0),
    # ("Youden J", 1.0),  ## Max J statistic
)

# Run and save analysis

## DLCST

In [10]:
dlcst_preds = pd.read_csv(f"{TEAMS_DIR}/dlcst/calibrated_dlcst_thijmen_kiran_sybil_malignancy_estimation_results.csv", header=0)
dlcst_preds

Unnamed: 0,PatientID,StudyDate,SeriesInstanceUID,Age,Sex,FamilyHistoryLungCa,Emphysema,NoduleCountPerScan,sybil_year1,sybil_year2,sybil_year3,sybil_year4,sybil_year5,sybil_year6,PanCan2b,Ensemble_Kiran,thijmen_mean,label,Ensemble_Kiran_cal,thijmen_mean_cal
0,4,20050124,1.2.840.113704.1.111.4964.1106577805.10,55,2,0,0,9,0.021629,0.038573,0.071919,0.079270,0.095846,0.135681,0.053366,0.082652,0.166209,0,0.119066,0.097887
1,35,20051208,1.2.840.113704.1.111.5776.1134059140.11,56,1,1,1,2,0.001170,0.002554,0.007835,0.011039,0.018442,0.030460,0.009543,0.000408,0.003368,0,0.018347,0.006485
2,38,20060109,1.2.840.113704.1.111.2004.1136823831.14,62,2,0,1,4,0.001784,0.003870,0.007835,0.012797,0.019229,0.032957,0.006734,0.002702,0.065888,0,0.035409,0.048255
3,47,20051214,1.2.840.113704.1.111.8148.1134579622.14,57,1,0,1,1,0.003951,0.015674,0.025373,0.034010,0.040605,0.058852,0.007944,0.084158,0.423341,0,0.119856,0.226322
4,56,20051213,1.2.840.113704.1.111.2744.1134487263.11,64,1,0,1,3,0.000000,0.001574,0.003791,0.006847,0.010381,0.017287,0.000899,0.000013,0.005590,0,0.005651,0.008965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,4057,20060314,1.2.840.113704.1.111.4796.1142355218.14,69,1,0,1,2,0.168810,0.273494,0.257961,0.294720,0.327062,0.383196,0.380198,0.893933,0.682322,0,0.439657,0.400826
595,4063,20060313,1.2.840.113704.1.111.5104.1142267340.10,55,1,0,1,4,0.001965,0.006793,0.013618,0.017289,0.021685,0.035951,0.000000,0.000099,0.103020,0,0.011303,0.067322
596,4079,20060328,1.2.840.113704.1.111.1308.1143556124.11,52,2,0,0,1,0.003951,0.011459,0.025373,0.034010,0.040101,0.058852,0.039054,0.070744,0.121373,0,0.112500,0.076349
597,4098,20060403,1.2.840.113704.1.111.5848.1144079789.11,54,1,0,1,3,0.117795,0.167839,0.189976,0.217799,0.229753,0.300137,0.134158,0.452513,0.347016,0,0.241085,0.185779


In [11]:
dlcst_democols = {
    'cat': {'demo': ['Sex'], 'other': ['FamilyHistoryLungCa', 'Emphysema']},
    'num': {'demo': ['Age'], 'other': ['NoduleCountPerScan']}
}

In [12]:
dlcst_preds, dlcst_democols = data.bin_numerical_columns(dlcst_preds, dlcst_democols)
dlcst_democols

{'cat': {'demo': ['Age > 61', 'Sex'],
  'other': ['Emphysema', 'FamilyHistoryLungCa', 'NoduleCountPerScan > 1']},
 'num': {'demo': ['Age'], 'other': ['NoduleCountPerScan']}}

In [13]:
DLCST_MODELCOLS = {
    "Venkadesh": "Ensemble_Kiran_cal",
    "de Haas": "thijmen_mean_cal",
    "PanCan2b": "PanCan2b",
    "Sybil year 1": "sybil_year1",
    # "Sybil year 2": "sybil_year2",
    # "Sybil year 3": "sybil_year3",
    # "Sybil year 4": "sybil_year4",
    # "Sybil year 5": "sybil_year5",
    # "Sybil year 6": "sybil_year6",
}

In [14]:
result_df = roc.all_results_subgroups_models(dlcst_preds, dlcst_democols['cat'], models=DLCST_MODELCOLS, csvpath=f"{RESULTS_DIR}/auroc-dlcst-{len(dlcst_preds)}.csv", num_bootstraps=1000)
result_df

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.99944,-0.000702,True,0.922228,0.867057,0.967006,False,0.922195,0.852941,0.969561,...,133,27.378965,18.902439,28,407,72.621035,6.436782,Ensemble_Kiran_cal,Age > 61,demo
de Haas,0.78118,0.277781,True,0.911341,0.851345,0.956811,False,0.924917,0.859205,0.972071,...,133,27.378965,18.902439,28,407,72.621035,6.436782,thijmen_mean_cal,Age > 61,demo
PanCan2b,0.461394,0.736553,True,0.853788,0.785511,0.913178,False,0.89616,0.839143,0.94508,...,133,27.378965,18.902439,28,407,72.621035,6.436782,PanCan2b,Age > 61,demo
Sybil year 1,0.985486,0.018192,True,0.863703,0.78062,0.938329,False,0.864791,0.781237,0.934117,...,133,27.378965,18.902439,28,407,72.621035,6.436782,sybil_year1,Age > 61,demo
Venkadesh,0.449789,0.755767,1.0,0.908799,0.844736,0.963213,2.0,0.943923,0.914397,0.971301,...,291,53.923205,9.907121,27,249,46.076795,9.782609,Ensemble_Kiran_cal,Sex,demo
de Haas,0.373472,0.889989,1.0,0.903686,0.836199,0.958703,2.0,0.945371,0.912605,0.97561,...,291,53.923205,9.907121,27,249,46.076795,9.782609,thijmen_mean_cal,Sex,demo
PanCan2b,0.354384,-0.92612,1.0,0.911309,0.861594,0.952733,2.0,0.85881,0.787938,0.916154,...,291,53.923205,9.907121,27,249,46.076795,9.782609,PanCan2b,Sex,demo
Sybil year 1,0.88904,-0.13952,1.0,0.869307,0.781449,0.943515,2.0,0.860914,0.781352,0.929134,...,291,53.923205,9.907121,27,249,46.076795,9.782609,sybil_year1,Sex,demo
Venkadesh,0.171382,1.367777,1.0,0.907955,0.855,0.952525,0.0,0.967755,0.941667,0.989286,...,361,67.612688,10.864198,15,179,32.387312,7.731959,Ensemble_Kiran_cal,Emphysema,other
de Haas,0.440274,0.77173,1.0,0.912947,0.862238,0.959383,0.0,0.950535,0.911111,0.98018,...,361,67.612688,10.864198,15,179,32.387312,7.731959,thijmen_mean_cal,Emphysema,other


### Thresholds

In [15]:
dlcst_policies, _ = threshold.get_threshold_policies(dlcst_preds, models=DLCST_MODELCOLS, policies=policies, brock=True)
df = threshold.all_results_subgroups_models(dlcst_preds, dlcst_democols['cat'], policies=dlcst_policies, models=DLCST_MODELCOLS, csvpath=f'{RESULTS_DIR}/threshold-perfs-dlcst-{len(dlcst_preds)}.csv', plot=False, num_bootstraps=1000)
df

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,599.0,59.0,540.0,54.0,76.0,464.0,5.0,0.915254,0.140741,0.084746,...,0.021102,0.893155,0.846743,0.652865,0.635712,0.001,974.025,Ensemble_Kiran_cal,Age > 61,demo
1,599.0,59.0,540.0,54.0,119.0,421.0,5.0,0.915254,0.220370,0.084746,...,0.022336,0.826377,0.768788,0.544819,0.529723,0.001,974.025,thijmen_mean_cal,Age > 61,demo
2,599.0,59.0,540.0,54.0,160.0,380.0,5.0,0.915254,0.296296,0.084746,...,0.025067,0.759641,0.693885,0.465985,0.451115,0.001,974.025,PanCan2b,Age > 61,demo
3,599.0,59.0,540.0,57.0,453.0,87.0,2.0,0.966102,0.838889,0.033898,...,0.056835,0.275459,0.176810,0.244491,0.146191,0.001,974.025,sybil_year1,Age > 61,demo
4,599.0,59.0,540.0,59.0,432.0,108.0,0.0,1.000000,0.800000,0.000000,...,0.000000,0.312229,0.233043,0.259802,0.179057,0.000,974.025,Ensemble_Kiran_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,266.0,24.0,242.0,1.0,0.0,242.0,23.0,0.041667,0.000000,0.958333,...,0.121733,0.943609,0.142857,0.250000,0.374572,0.402,974.025,sybil_year1,NoduleCountPerScan > 1,other
56,266.0,24.0,242.0,23.0,81.0,161.0,1.0,0.958333,0.334711,0.041667,...,0.019231,0.744361,0.708515,0.455299,0.445093,0.060,974.025,Ensemble_Kiran_cal,NoduleCountPerScan > 1,other
57,266.0,24.0,242.0,23.0,79.0,163.0,1.0,0.958333,0.326446,0.041667,...,0.018871,0.755639,0.719694,0.465764,0.452399,0.060,974.025,thijmen_mean_cal,NoduleCountPerScan > 1,other
58,266.0,24.0,242.0,16.0,38.0,204.0,8.0,0.666667,0.157025,0.333333,...,0.066038,0.872180,0.697125,0.541696,0.506834,0.060,974.025,PanCan2b,NoduleCountPerScan > 1,other


In [16]:
# threshold.plot_threshold_stats_subgroups(dlcst_preds, 'Sex', policies=dlcst_policies, models=DLCST_MODELCOLS, stats=df.query('attribute == "Sex"'), plot_metrics=['fpr', 'fnr'], diff=True)

## NLST

In [5]:
nlst_preds_nodule = pd.read_csv(f"{NLST_PREDS}/nlst_demov4_allmodels_cal.csv")

with open(f'{NLST_PREDS}/nlst_demo_v4_cols.json') as json_data:
    nlst_democols_original = json.load(json_data)
    json_data.close()

nlst_democols_original['cat'].pop('lungcanc')
nlst_democols_original

{'num': {'demo': ['BMI', 'Age', 'height', 'weight'],
  'smoke': ['smokeage', 'smokeday', 'smokeyr', 'pkyr'],
  'nodule': ['CoordX', 'CoordZ', 'CoordY', 'Mean_Entropy_Kiran'],
  'other': ['NoduleCounts', 'Diameter_mm', 'SliceCount']},
 'cat': {'demo': ['Overweight',
   'educat',
   'Gender',
   'Married',
   'HighSchoolPlus',
   'NonHispanicWhite',
   'Unfinished_ed',
   'WhiteOrBlack',
   'marital',
   'ethnic',
   'race'],
  'smoke': ['smokelive', 'cigar', 'cigsmok', 'smokework', 'pipe'],
  'work': ['wrkbaki',
   'wrkfoun',
   'wrkchem',
   'wrkasbe',
   'wrkfire',
   'wrksand',
   'wrkfarm',
   'wrkcoal',
   'wrkpain',
   'wrkweld',
   'wrkflou',
   'wrkbutc',
   'wrkhard',
   'wrkcott'],
  'disease': ['diagasbe',
   'diagchas',
   'diagpneu',
   'diagstro',
   'diagemph',
   'diagbron',
   'diagsili',
   'diagsarc',
   'diaghear',
   'diagdiab',
   'diagadas',
   'diagcopd',
   'diagfibr',
   'diagtube',
   'diaghype',
   'diagchro'],
  'canchist': ['canckidn',
   'cancphar',
   'ca

### Nodule-level

In [12]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=False, sybil=False)
print(len(nlst_preds), " nodules")

result_df = roc.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], models=MODELS, csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_preds)}.csv", num_bootstraps=1000)
result_df

16077  nodules


  se = np.sqrt(


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.196405,1.291863,True,0.902206,0.891837,0.913073,False,0.917049,0.904711,0.929655,...,8398,57.237047,8.737231,445,6430,42.762953,6.472727,DL_cal,Age > 61,demo
de Haas Local,0.489252,0.6915,True,0.892639,0.882533,0.903058,False,0.901076,0.887042,0.915745,...,8398,57.237047,8.737231,445,6430,42.762953,6.472727,Thijmen_local_cal,Age > 61,demo
de Haas Global (hidden nodule),0.308993,1.017337,True,0.856749,0.847445,0.86684,False,0.870567,0.857528,0.883171,...,8398,57.237047,8.737231,445,6430,42.762953,6.472727,Thijmen_global_hidden_cal,Age > 61,demo
de Haas Global (shown nodule),0.185338,-1.324496,True,0.895562,0.886433,0.905111,False,0.878587,0.865652,0.891992,...,8398,57.237047,8.737231,445,6430,42.762953,6.472727,Thijmen_global_show_cal,Age > 61,demo
PanCan2b,0.449563,0.756145,True,0.829947,0.816548,0.843712,False,0.840948,0.822406,0.858561,...,8398,57.237047,8.737231,445,6430,42.762953,6.472727,PanCan2b,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venkadesh,0.211865,1.248452,LightSpeed QX/i,0.896388,0.878454,0.915726,Volume Zoom,0.920412,0.899441,0.941253,...,2041,14.194190,10.560911,181,3036,20.009952,5.626360,DL_cal,ManufacturersModelName,scanner
de Haas Local,0.4529,0.750589,LightSpeed QX/i,0.876375,0.855224,0.89731,Volume Zoom,0.892326,0.866555,0.916704,...,2041,14.194190,10.560911,181,3036,20.009952,5.626360,Thijmen_local_cal,ManufacturersModelName,scanner
de Haas Global (hidden nodule),0.83896,0.203224,LightSpeed QX/i,0.862938,0.844004,0.882767,Volume Zoom,0.867532,0.849395,0.884886,...,2041,14.194190,10.560911,181,3036,20.009952,5.626360,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
de Haas Global (shown nodule),0.776331,0.284104,LightSpeed QX/i,0.897734,0.880145,0.915242,Volume Zoom,0.903433,0.883657,0.92236,...,2041,14.194190,10.560911,181,3036,20.009952,5.626360,Thijmen_global_show_cal,ManufacturersModelName,scanner


In [13]:
nlst_policies, _ = threshold.get_threshold_policies(nlst_preds, models=MODELS, policies=policies, brock=True)
df = threshold.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], policies=nlst_policies, models=MODELS, csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_preds)}.csv', plot=False, num_bootstraps=1000)
df

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,16077.0,1249.0,14828.0,1127.0,3895.0,10933.0,122.0,0.902322,0.262679,0.097678,...,0.012881,0.757172,0.656727,0.374074,0.382447,0.015,974.025,DL_cal,Age > 61,demo
1,16077.0,1249.0,14828.0,1125.0,4211.0,10617.0,124.0,0.900721,0.283990,0.099279,...,0.013605,0.737513,0.634705,0.357069,0.364087,0.015,974.025,Thijmen_local_cal,Age > 61,demo
2,16077.0,1249.0,14828.0,1125.0,4766.0,10062.0,124.0,0.900721,0.321419,0.099279,...,0.014294,0.702619,0.596042,0.328722,0.334274,0.015,974.025,Thijmen_global_hidden_cal,Age > 61,demo
3,16077.0,1249.0,14828.0,1126.0,4262.0,10566.0,123.0,0.901521,0.287429,0.098479,...,0.013590,0.734030,0.631576,0.354284,0.360909,0.015,974.025,Thijmen_global_show_cal,Age > 61,demo
4,16077.0,1249.0,14828.0,1131.0,6401.0,8427.0,118.0,0.905524,0.431683,0.094476,...,0.016592,0.602724,0.491774,0.270082,0.266026,0.015,974.025,PanCan2b,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570,3217.0,181.0,3036.0,159.0,602.0,2434.0,22.0,0.878453,0.198287,0.121547,...,0.013072,0.819716,0.731498,0.375391,0.402621,0.060,974.025,DL_cal,ManufacturersModelName,scanner
571,3217.0,181.0,3036.0,152.0,685.0,2351.0,29.0,0.839779,0.225626,0.160221,...,0.016843,0.790799,0.667569,0.332696,0.356564,0.060,974.025,Thijmen_local_cal,ManufacturersModelName,scanner
572,3217.0,181.0,3036.0,168.0,969.0,2067.0,13.0,0.928177,0.319170,0.071823,...,0.009859,0.709978,0.646072,0.285494,0.320311,0.060,974.025,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
573,3217.0,181.0,3036.0,161.0,741.0,2295.0,20.0,0.889503,0.244071,0.110497,...,0.012664,0.778054,0.694118,0.332057,0.364047,0.060,974.025,Thijmen_global_show_cal,ManufacturersModelName,scanner


In [14]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=True, sybil=False)
print(len(nlst_preds), " nodules")

result_df = roc.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], models=MODELS, csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_preds)}.csv", num_bootstraps=1000)
result_df

3240  nodules


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.67436,0.420171,True,0.893538,0.870565,0.915381,False,0.904495,0.87796,0.929234,...,1755,60.061728,9.815005,90,1204,39.938272,6.955178,DL_cal,Age > 61,demo
de Haas Combined,0.475355,-0.713794,True,0.911809,0.892707,0.928925,False,0.893049,0.862531,0.920172,...,1755,60.061728,9.815005,90,1204,39.938272,6.955178,Thijmen_mean_cal,Age > 61,demo
de Haas Local,0.923058,-0.096583,True,0.901864,0.880172,0.921145,False,0.899335,0.866491,0.929144,...,1755,60.061728,9.815005,90,1204,39.938272,6.955178,Thijmen_local_cal,Age > 61,demo
de Haas Global (hidden nodule),0.214665,-1.240839,True,0.867805,0.848135,0.887411,False,0.829183,0.797217,0.862259,...,1755,60.061728,9.815005,90,1204,39.938272,6.955178,Thijmen_global_hidden_cal,Age > 61,demo
de Haas Global (shown nodule),0.109387,-1.600953,True,0.898928,0.878497,0.918454,False,0.852203,0.817184,0.887058,...,1755,60.061728,9.815005,90,1204,39.938272,6.955178,Thijmen_global_show_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Combined,0.524964,-0.635711,LightSpeed QX/i,0.91814,0.885962,0.945324,Volume Zoom,0.891982,0.844494,0.93464,...,437,15.308642,11.895161,41,599,19.753086,6.406250,Thijmen_mean_cal,ManufacturersModelName,scanner
de Haas Local,0.205106,-1.267137,LightSpeed QX/i,0.919946,0.887863,0.949107,Volume Zoom,0.864871,0.807395,0.920532,...,437,15.308642,11.895161,41,599,19.753086,6.406250,Thijmen_local_cal,ManufacturersModelName,scanner
de Haas Global (hidden nodule),0.697835,-0.388244,LightSpeed QX/i,0.887585,0.851441,0.919486,Volume Zoom,0.870055,0.830912,0.910134,...,437,15.308642,11.895161,41,599,19.753086,6.406250,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
de Haas Global (shown nodule),0.569521,-0.568757,LightSpeed QX/i,0.922521,0.891095,0.949959,Volume Zoom,0.899759,0.857001,0.938525,...,437,15.308642,11.895161,41,599,19.753086,6.406250,Thijmen_global_show_cal,ManufacturersModelName,scanner


In [15]:
nlst_policies, _ = threshold.get_threshold_policies(nlst_preds, models=MODELS, policies=policies, brock=True)
df = threshold.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], policies=nlst_policies, models=MODELS, csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_preds)}.csv', plot=False, num_bootstraps=1000)
df

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,3240.0,281.0,2959.0,253.0,906.0,2053.0,28.0,0.900356,0.306185,0.099644,...,0.018572,0.727160,0.630275,0.380980,0.377121,0.015,974.025,DL_cal,Age > 61,demo
1,3240.0,281.0,2959.0,253.0,747.0,2212.0,28.0,0.900356,0.252450,0.099644,...,0.017614,0.775309,0.683895,0.427617,0.423504,0.015,974.025,Thijmen_mean_cal,Age > 61,demo
2,3240.0,281.0,2959.0,253.0,767.0,2192.0,28.0,0.900356,0.259209,0.099644,...,0.017506,0.769144,0.677772,0.422672,0.418432,0.015,974.025,Thijmen_local_cal,Age > 61,demo
3,3240.0,281.0,2959.0,253.0,969.0,1990.0,28.0,0.900356,0.327475,0.099644,...,0.018953,0.708341,0.609638,0.367239,0.360003,0.015,974.025,Thijmen_global_hidden_cal,Age > 61,demo
4,3240.0,281.0,2959.0,253.0,944.0,2015.0,28.0,0.900356,0.319027,0.099644,...,0.019038,0.715131,0.618880,0.371580,0.365825,0.015,974.025,Thijmen_global_show_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,640.0,41.0,599.0,37.0,162.0,437.0,4.0,0.902439,0.270451,0.097561,...,0.018655,0.773438,0.722241,0.383147,0.400591,0.060,974.025,Thijmen_mean_cal,ManufacturersModelName,scanner
566,640.0,41.0,599.0,33.0,142.0,457.0,8.0,0.804878,0.237062,0.195122,...,0.029821,0.795312,0.687850,0.387353,0.389639,0.060,974.025,Thijmen_local_cal,ManufacturersModelName,scanner
567,640.0,41.0,599.0,39.0,184.0,415.0,2.0,0.951220,0.307179,0.048780,...,0.012021,0.743750,0.712386,0.366788,0.390414,0.060,974.025,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
568,640.0,41.0,599.0,36.0,138.0,461.0,5.0,0.878049,0.230384,0.121951,...,0.021413,0.806250,0.744124,0.414417,0.426047,0.060,974.025,Thijmen_global_show_cal,ManufacturersModelName,scanner


### Scan-level

In [23]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True)
print(len(nlst_preds), " scans")
MODELS

5911  scans


{'Venkadesh': 'DL_cal',
 'de Haas Local': 'Thijmen_local_cal',
 'de Haas Global (hidden nodule)': 'Thijmen_global_hidden_cal',
 'de Haas Global (shown nodule)': 'Thijmen_global_show_cal',
 'Sybil year 1': 'sybil_year1',
 'PanCan2b': 'PanCan2b'}

In [16]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True)
print(len(nlst_preds), " scans")

result_df = roc.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], models=MODELS, csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_preds)}.csv", num_bootstraps=1000)
result_df

5911  scans


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.141095,1.471727,True,0.877624,0.859819,0.89614,False,0.903877,0.884958,0.92415,...,2967,56.251057,10.766917,223,2363,43.748943,8.623357,DL_cal,Age > 61,demo
de Haas Local,0.337176,0.95976,True,0.86451,0.84684,0.884064,False,0.882726,0.858917,0.9059,...,2967,56.251057,10.766917,223,2363,43.748943,8.623357,Thijmen_local_cal,Age > 61,demo
de Haas Global (hidden nodule),0.304889,1.026006,True,0.787028,0.767503,0.807933,False,0.809859,0.785691,0.834875,...,2967,56.251057,10.766917,223,2363,43.748943,8.623357,Thijmen_global_hidden_cal,Age > 61,demo
de Haas Global (shown nodule),0.55879,-0.584639,True,0.856412,0.840466,0.874396,False,0.844493,0.819523,0.867883,...,2967,56.251057,10.766917,223,2363,43.748943,8.623357,Thijmen_global_show_cal,Age > 61,demo
Sybil year 1,0.677831,0.415425,True,0.838174,0.815663,0.860905,False,0.84675,0.818444,0.875368,...,2967,56.251057,10.766917,223,2363,43.748943,8.623357,sybil_year1,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.986539,-0.016872,Volume Zoom,0.886477,0.848204,0.920858,Sensation 16,0.885963,0.845861,0.921286,...,1204,22.128235,7.951070,96,719,13.787853,11.779141,Thijmen_local_cal,ManufacturersModelName,scanner
de Haas Global (hidden nodule),0.856321,-0.18106,Volume Zoom,0.806838,0.772187,0.84004,Sensation 16,0.800175,0.763092,0.83586,...,1204,22.128235,7.951070,96,719,13.787853,11.779141,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
de Haas Global (shown nodule),0.463772,-0.732649,Volume Zoom,0.872164,0.837737,0.904662,Sensation 16,0.848053,0.813767,0.880548,...,1204,22.128235,7.951070,96,719,13.787853,11.779141,Thijmen_global_show_cal,ManufacturersModelName,scanner
Sybil year 1,0.279203,-1.082112,Volume Zoom,0.8853,0.850243,0.915996,Sensation 16,0.850446,0.800394,0.893168,...,1204,22.128235,7.951070,96,719,13.787853,11.779141,sybil_year1,ManufacturersModelName,scanner


In [17]:
nlst_policies, _ = threshold.get_threshold_policies(nlst_preds, models=MODELS, policies=policies, brock=True)
df = threshold.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], policies=nlst_policies, models=MODELS, csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_preds)}.csv', plot=False, num_bootstraps=1000)
df

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,5911.0,581.0,5330.0,523.0,1807.0,3523.0,58.0,0.900172,0.339024,0.099828,...,0.020266,0.695652,0.587793,0.383230,0.363005,0.015,974.025,DL_cal,Age > 61,demo
1,5911.0,581.0,5330.0,525.0,2095.0,3235.0,56.0,0.903614,0.393058,0.096386,...,0.021746,0.647610,0.536399,0.349970,0.325298,0.015,974.025,Thijmen_local_cal,Age > 61,demo
2,5911.0,581.0,5330.0,523.0,2475.0,2855.0,58.0,0.900172,0.464353,0.099828,...,0.025079,0.583662,0.462006,0.311922,0.277593,0.015,974.025,Thijmen_global_hidden_cal,Age > 61,demo
3,5911.0,581.0,5330.0,523.0,2059.0,3271.0,58.0,0.900172,0.386304,0.099828,...,0.021736,0.653866,0.540466,0.351082,0.327047,0.015,974.025,Thijmen_global_show_cal,Age > 61,demo
4,5911.0,581.0,5330.0,527.0,2644.0,2686.0,54.0,0.907057,0.496060,0.092943,...,0.025223,0.555921,0.437009,0.299772,0.262829,0.015,974.025,sybil_year1,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,1308.0,104.0,1204.0,90.0,368.0,836.0,14.0,0.865385,0.305648,0.134615,...,0.025241,0.733180,0.632708,0.370389,0.365581,0.060,974.025,Thijmen_local_cal,ManufacturersModelName,scanner
656,1308.0,104.0,1204.0,96.0,572.0,632.0,8.0,0.923077,0.475083,0.076923,...,0.022015,0.586391,0.502424,0.288946,0.279952,0.060,974.025,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
657,1308.0,104.0,1204.0,94.0,431.0,773.0,10.0,0.903846,0.357973,0.096154,...,0.021603,0.688857,0.605571,0.342776,0.342745,0.060,974.025,Thijmen_global_show_cal,ManufacturersModelName,scanner
658,1308.0,104.0,1204.0,65.0,87.0,1117.0,39.0,0.625000,0.072259,0.375000,...,0.044986,0.918960,0.640041,0.578156,0.542346,0.060,974.025,sybil_year1,ManufacturersModelName,scanner


In [18]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=True, sybil=True)
print(len(nlst_preds), " scans")

result_df = roc.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], models=MODELS, csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_preds)}.csv", plot=False, num_bootstraps=1000)
result_df

1172  scans


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.19677,1.290809,True,0.83319,0.781974,0.88481,False,0.886753,0.844595,0.928121,...,591,57.081911,11.659193,49,454,42.918089,9.741551,DL_cal,Age > 61,demo
de Haas Combined,0.732726,0.341502,True,0.861494,0.821164,0.900998,False,0.875573,0.824989,0.916705,...,591,57.081911,11.659193,49,454,42.918089,9.741551,Thijmen_mean_cal,Age > 61,demo
de Haas Local,0.450074,0.755292,True,0.852825,0.809189,0.898429,False,0.883765,0.828703,0.928771,...,591,57.081911,11.659193,49,454,42.918089,9.741551,Thijmen_local_cal,Age > 61,demo
de Haas Global (hidden nodule),0.911239,-0.111476,True,0.774846,0.730144,0.819992,False,0.769348,0.713066,0.822864,...,591,57.081911,11.659193,49,454,42.918089,9.741551,Thijmen_global_hidden_cal,Age > 61,demo
de Haas Global (shown nodule),0.741806,0.329463,True,0.814837,0.770395,0.856655,False,0.829922,0.770117,0.887274,...,591,57.081911,11.659193,49,454,42.918089,9.741551,Thijmen_global_show_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.49966,0.675025,Volume Zoom,0.828564,0.744955,0.901242,Sensation 16,0.873927,0.785915,0.94958,...,233,22.696246,12.406015,19,138,13.395904,12.101911,Thijmen_local_cal,ManufacturersModelName,scanner
de Haas Global (hidden nodule),0.760214,-0.305199,Volume Zoom,0.809611,0.74026,0.870136,Sensation 16,0.786525,0.700992,0.863918,...,233,22.696246,12.406015,19,138,13.395904,12.101911,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
de Haas Global (shown nodule),0.211983,-1.248131,Volume Zoom,0.861805,0.800754,0.91558,Sensation 16,0.769327,0.661339,0.878621,...,233,22.696246,12.406015,19,138,13.395904,12.101911,Thijmen_global_show_cal,ManufacturersModelName,scanner
Sybil year 1,0.513256,-0.653776,Volume Zoom,0.87205,0.811639,0.926766,Sensation 16,0.826622,0.709764,0.92982,...,233,22.696246,12.406015,19,138,13.395904,12.101911,sybil_year1,ManufacturersModelName,scanner


In [19]:
nlst_policies, _ = threshold.get_threshold_policies(nlst_preds, models=MODELS, policies=policies, brock=True)
df = threshold.all_results_subgroups_models(nlst_preds, nlst_democols['cat'], policies=nlst_policies, models=MODELS, csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_preds)}.csv', plot=False, num_bootstraps=1000)
df

Unnamed: 0,num,mal,ben,tp,fp,tn,fn,tpr,fpr,fnr,...,for_hi,acc_hi,j_hi,f1_hi,mcc_hi,threshold_hi,iter_hi,col,attribute,category
0,1172.0,127.0,1045.0,115.0,485.0,560.0,12.0,0.905512,0.464115,0.094488,...,0.034175,0.603264,0.498443,0.358290,0.313953,0.014,974.025,DL_cal,Age > 61,demo
1,1172.0,127.0,1045.0,115.0,381.0,664.0,12.0,0.905512,0.364593,0.094488,...,0.028660,0.691126,0.595970,0.415184,0.382190,0.014,974.025,Thijmen_mean_cal,Age > 61,demo
2,1172.0,127.0,1045.0,115.0,405.0,640.0,12.0,0.905512,0.387560,0.094488,...,0.028491,0.670648,0.573269,0.399410,0.363792,0.014,974.025,Thijmen_local_cal,Age > 61,demo
3,1172.0,127.0,1045.0,115.0,506.0,539.0,12.0,0.905512,0.484211,0.094488,...,0.035029,0.585324,0.477641,0.346907,0.302407,0.014,974.025,Thijmen_global_hidden_cal,Age > 61,demo
4,1172.0,127.0,1045.0,115.0,503.0,542.0,12.0,0.905512,0.481340,0.094488,...,0.034298,0.590444,0.481592,0.349120,0.302700,0.014,974.025,Thijmen_global_show_cal,Age > 61,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
590,266.0,33.0,233.0,28.0,85.0,148.0,5.0,0.848485,0.364807,0.151515,...,0.063392,0.721805,0.617269,0.484889,0.423780,0.060,974.025,Thijmen_local_cal,ManufacturersModelName,scanner
591,266.0,33.0,233.0,31.0,106.0,127.0,2.0,0.939394,0.454936,0.060606,...,0.040000,0.654135,0.581856,0.452387,0.395933,0.060,974.025,Thijmen_global_hidden_cal,ManufacturersModelName,scanner
592,266.0,33.0,233.0,30.0,77.0,156.0,3.0,0.909091,0.330472,0.090909,...,0.042424,0.751880,0.682636,0.527798,0.480267,0.060,974.025,Thijmen_global_show_cal,ManufacturersModelName,scanner
593,266.0,33.0,233.0,18.0,21.0,212.0,15.0,0.545455,0.090129,0.454545,...,0.097345,0.906015,0.632903,0.634166,0.579414,0.060,974.025,sybil_year1,ManufacturersModelName,scanner


### Demographics, isolating other columns

In [7]:
nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True)
print(len(nlst_preds))
nlst_democols

5911


{'num': {'demo': ['BMI', 'Age', 'height', 'weight'],
  'smoke': ['smokeage', 'smokeday', 'smokeyr', 'pkyr'],
  'other': ['NoduleCounts', 'Diameter_mm', 'SliceCount']},
 'cat': {'demo': ['Age > 61',
   'Gender',
   'HighSchoolPlus',
   'Married',
   'NonHispanicWhite',
   'Overweight',
   'Unfinished_ed',
   'WhiteOrBlack',
   'educat',
   'ethnic',
   'height > 68',
   'marital',
   'race',
   'weight > 180'],
  'smoke': ['cigar',
   'cigsmok',
   'pipe',
   'pkyr > 55',
   'smokeage > 16',
   'smokeday > 25',
   'smokelive',
   'smokework',
   'smokeyr > 40'],
  'work': ['wrkbaki',
   'wrkfoun',
   'wrkchem',
   'wrkasbe',
   'wrkfire',
   'wrksand',
   'wrkfarm',
   'wrkcoal',
   'wrkpain',
   'wrkweld',
   'wrkflou',
   'wrkbutc',
   'wrkhard',
   'wrkcott'],
  'disease': ['diagasbe',
   'diagchas',
   'diagpneu',
   'diagstro',
   'diagemph',
   'diagbron',
   'diagsili',
   'diagsarc',
   'diaghear',
   'diagdiab',
   'diagadas',
   'diagcopd',
   'diagfibr',
   'diagtube',
   'di

In [8]:
gender_df = roc.save_results_isolate_confounders(nlst_preds, 'Gender', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-gender-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=1000)
gender_df

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.884213,-0.145631,1.0,0.905956,0.878714,0.931407,2.0,0.902027,0.872541,0.934186,...,56.496520,7.871321,108,1017,43.503480,9.600000,DL_cal,Age > 61,0.0,demo
de Haas Local,0.510123,0.658647,1.0,0.875166,0.842110,0.907032,2.0,0.894254,0.860443,0.924237,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.014978,-2.432904,1.0,0.848179,0.815810,0.878309,2.0,0.764180,0.724127,0.803389,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.398098,-0.845023,1.0,0.860126,0.827791,0.892714,2.0,0.832872,0.795958,0.867581,...,56.496520,7.871321,108,1017,43.503480,9.600000,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.000169,3.761524,1.0,0.786788,0.744338,0.829957,2.0,0.905246,0.874272,0.935412,...,56.496520,7.871321,108,1017,43.503480,9.600000,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.418341,0.809302,1.0,0.872777,0.822095,0.919544,2.0,0.906687,0.856513,0.948209,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.594455,-0.532392,1.0,0.818488,0.775956,0.857272,2.0,0.790763,0.731651,0.843911,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.349768,0.935040,1.0,0.855157,0.815148,0.896599,2.0,0.896166,0.837969,0.942044,...,60.321101,7.731305,43,476,39.678899,8.285164,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.057292,1.901075,1.0,0.850117,0.798410,0.897959,2.0,0.928019,0.882871,0.966378,...,60.321101,7.731305,43,476,39.678899,8.285164,sybil_year1,ManufacturersModelName,Volume Zoom,scanner


In [9]:
race_df = roc.save_results_isolate_confounders(nlst_preds, 'WhiteOrBlack', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-race-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=1000)
race_df

  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(
  se = np.sqrt(


Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.744035,-0.326515,1.0,0.903781,0.882382,0.924980,2.0,0.882700,0.788183,0.957637,...,93.50348,8.395368,12,70,3.170920,14.634146,DL_cal,Age > 61,0.0,demo
de Haas Local,0.623007,0.491593,1.0,0.879247,0.853497,0.903029,2.0,0.908341,0.821429,0.972973,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.606608,-0.514922,1.0,0.813340,0.786636,0.838516,2.0,0.772134,0.622619,0.894977,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.774396,-0.286630,1.0,0.844453,0.818944,0.870025,2.0,0.823049,0.665672,0.952381,...,93.50348,8.395368,12,70,3.170920,14.634146,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.840850,0.200807,1.0,0.847350,0.818378,0.875023,2.0,0.861231,0.765685,0.940878,...,93.50348,8.395368,12,70,3.170920,14.634146,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.818468,0.229515,1.0,0.885638,0.853645,0.920035,2.0,0.917280,0.795455,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.783390,-0.274903,1.0,0.807492,0.772214,0.841398,2.0,0.751463,0.590909,0.896341,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.002966,2.971232,1.0,0.869623,0.836893,0.901470,2.0,0.995000,1.000000,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.823027,-0.223653,1.0,0.885600,0.851022,0.918815,2.0,0.846289,0.651163,1.000000,...,94.41896,8.178138,2,43,3.440367,4.444444,sybil_year1,ManufacturersModelName,Volume Zoom,scanner


In [10]:
overweight_df = roc.save_results_isolate_confounders(nlst_preds, 'Overweight', nlst_democols['cat'], MODELS, csvpath=f'{RESULTS_DIR}/auroc-overweight-by-factors-nlst-{len(nlst_preds)}.csv', num_bootstraps=1000)
overweight_df

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,filter_by,filter_val,category
Venkadesh,0.307227,-1.021057,True,0.913301,0.890445,0.937428,False,0.8833,0.843533,0.920617,...,68.561485,8.178229,78,735,31.438515,9.594096,DL_cal,Age > 61,0.0,demo
de Haas Local,0.537052,-0.617276,True,0.889163,0.857686,0.918939,False,0.869912,0.82981,0.907515,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_local_cal,Age > 61,0.0,demo
de Haas Global (hidden nodule),0.424049,0.799417,True,0.800702,0.7677,0.832463,False,0.829194,0.792832,0.865418,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_global_hidden_cal,Age > 61,0.0,demo
de Haas Global (shown nodule),0.147365,-1.448902,True,0.860783,0.832018,0.888554,False,0.810073,0.760422,0.856329,...,68.561485,8.178229,78,735,31.438515,9.594096,Thijmen_global_show_cal,Age > 61,0.0,demo
Sybil year 1,0.5894,-0.539706,True,0.852504,0.816772,0.886023,False,0.834032,0.786166,0.878464,...,68.561485,8.178229,78,735,31.438515,9.594096,sybil_year1,Age > 61,0.0,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Local,0.688994,-0.400221,True,0.889166,0.84276,0.933164,False,0.871577,0.813243,0.921598,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_local_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (hidden nodule),0.221799,-1.221758,True,0.827652,0.78267,0.868899,False,0.76338,0.702393,0.821429,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_global_hidden_cal,ManufacturersModelName,Volume Zoom,scanner
de Haas Global (shown nodule),0.270828,-1.101159,True,0.888173,0.847957,0.925497,False,0.837073,0.772039,0.898329,...,69.189602,6.740331,43,360,30.810398,10.669975,Thijmen_global_show_cal,ManufacturersModelName,Volume Zoom,scanner
Sybil year 1,0.341261,-0.951676,True,0.899711,0.857764,0.937866,False,0.857508,0.792547,0.913938,...,69.189602,6.740331,43,360,30.810398,10.669975,sybil_year1,ManufacturersModelName,Volume Zoom,scanner


# Load results DataFrames

### DLCST

No significant demographic biases.

In [5]:
dataset_name, dataset_len = 'dlcst', 599
filename = f"{RESULTS_DIR}/auroc-{dataset_name}-{dataset_len}.csv"
dlcst_result_df = pd.read_csv(filename, index_col=0)
dlcst_result_df[(dlcst_result_df['category'].isin(['demo']))].sort_values(by='p')[['attribute', 'p', 'AUC_diff', 'col',
    'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
    'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
    ]]

Unnamed: 0,attribute,p,AUC_diff,col,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
PanCan2b,Sex,0.354384,-0.0525,PanCan2b,2.0,27,0.85881,0.787938,0.916154,1.0,32,0.911309,0.861594,0.952733
de Haas,Sex,0.373472,0.041685,thijmen_mean_cal,2.0,27,0.945371,0.912605,0.97561,1.0,32,0.903686,0.836199,0.958703
Venkadesh,Sex,0.449789,0.035124,Ensemble_Kiran_cal,2.0,27,0.943923,0.914397,0.971301,1.0,32,0.908799,0.844736,0.963213
PanCan2b,Age > 61,0.461394,0.042372,PanCan2b,False,28,0.89616,0.839143,0.94508,True,31,0.853788,0.785511,0.913178
de Haas,Age > 61,0.78118,0.013576,thijmen_mean_cal,False,28,0.924917,0.859205,0.972071,True,31,0.911341,0.851345,0.956811
Sybil year 1,Sex,0.88904,-0.008393,sybil_year1,2.0,27,0.860914,0.781352,0.929134,1.0,32,0.869307,0.781449,0.943515
Sybil year 1,Age > 61,0.985486,0.001089,sybil_year1,False,28,0.864791,0.781237,0.934117,True,31,0.863703,0.78062,0.938329
Venkadesh,Age > 61,0.99944,-3.4e-05,Ensemble_Kiran_cal,False,28,0.922195,0.852941,0.969561,True,31,0.922228,0.867057,0.967006


### Tijmen Combined Model

No demographic biases.

In [6]:
df = pd.read_csv(f"{RESULTS_DIR}/auroc-nlst-1172.csv", index_col=0)
df[
    # (df['col'] == 'Thijmen_mean_cal') & 
    # (df['p'] < 0.05) & 
    (df['category'].isin(['demo']))
].sort_values(by='p')

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
Venkadesh,0.000514,-3.473434,True,0.860591,0.829337,0.894240,False,0.408898,0.000000,1.000000,...,1022,97.696246,10.742358,4,23,2.303754,14.814815,DL_cal,HighSchoolPlus,demo
Venkadesh,0.008255,-2.641477,1.0,0.863658,0.828364,0.895173,2.0,0.611505,0.342342,0.870130,...,986,93.686007,10.200364,9,31,3.412969,22.500000,DL_cal,WhiteOrBlack,demo
Venkadesh,0.008255,-2.641477,1.0,0.863658,0.828364,0.895173,2.0,0.611505,0.342342,0.870130,...,986,93.686007,10.200364,9,31,3.412969,22.500000,DL_cal,race,demo
PanCan2b,0.009248,-2.602754,True,0.789326,0.740362,0.834248,False,0.654600,0.564677,0.741846,...,719,68.344710,10.237203,45,326,31.655290,12.129380,PanCan2b,Overweight,demo
de Haas Global (shown nodule),0.021562,-2.298002,True,0.824864,0.789345,0.860109,False,0.497058,0.200000,0.807692,...,1022,97.696246,10.742358,4,23,2.303754,14.814815,Thijmen_global_show_cal,HighSchoolPlus,demo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Combined,0.918460,0.102373,True,0.867599,0.833302,0.901657,False,0.871888,0.818064,0.920382,...,719,68.344710,10.237203,45,326,31.655290,12.129380,Thijmen_mean_cal,Overweight,demo
de Haas Combined,0.919806,0.100678,True,0.868591,0.829183,0.905725,False,0.872645,0.823435,0.918907,...,512,49.658703,12.027491,57,533,50.341297,9.661017,Thijmen_mean_cal,height > 68,demo
PanCan2b,0.927187,-0.091385,True,0.750904,0.698236,0.799309,False,0.746107,0.652161,0.827392,...,699,66.979522,10.955414,41,346,33.020478,10.594315,PanCan2b,Married,demo
de Haas Global (hidden nodule),0.981727,-0.022904,3.0,0.752351,0.677128,0.823763,5.0,0.750808,0.676002,0.822234,...,271,26.194539,11.726384,31,213,20.819113,12.704918,Thijmen_global_hidden_cal,educat,demo


In [7]:
df = pd.read_csv(f"{RESULTS_DIR}/auroc-nlst-3240.csv", index_col=0)
df[
    # (df['col'] == 'Thijmen_mean_cal') & 
    (df['p'] < 0.05) & 
    (df['category'].isin(['demo']))
].sort_values(by='p')

Unnamed: 0,p,z,Group_1,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1,Group_2,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,...,Group_1_ben,Group_1_pct,Group_1_pct_mal,Group_2_mal,Group_2_ben,Group_2_pct,Group_2_pct_mal,col,attribute,category
PanCan2b,7.492166e-08,5.37895,2.0,0.816693,0.791771,0.839542,1.0,0.995,1.0,1.0,...,2926,98.765432,8.5625,3,24,0.833333,11.111111,PanCan2b,ethnic,demo
PanCan2b,0.004033907,2.875498,False,0.779191,0.745051,0.812496,True,0.865667,0.83861,0.89272,...,1583,54.166667,9.80057,109,1376,45.833333,7.340067,PanCan2b,weight > 180,demo
PanCan2b,0.00554691,-2.773428,1.0,0.85645,0.831581,0.878549,2.0,0.766858,0.717815,0.813474,...,1665,56.91358,9.707158,102,1294,43.08642,7.30659,PanCan2b,Gender,demo
PanCan2b,0.006220424,-2.73593,True,0.84703,0.821237,0.873609,False,0.758111,0.711834,0.803785,...,2031,68.179012,8.057945,103,928,31.820988,9.990301,PanCan2b,Overweight,demo
PanCan2b,0.04340656,-2.019777,True,0.850343,0.820787,0.874958,False,0.78916,0.75259,0.826555,...,1378,47.098765,9.698558,133,1581,52.901235,7.759627,PanCan2b,height > 68,demo


### Other models - scan level.

In [8]:
filename = f"{RESULTS_DIR}/auroc-nlst-5911.csv"
df = pd.read_csv(filename, index_col=0)
df.sort_values(by='p')[['attribute', 'p', 'AUC_diff', 'col',
    'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
    'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
    ]]

Unnamed: 0,attribute,p,AUC_diff,col,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
PanCan2b,Spiculation,1.489906e-18,0.215556,PanCan2b,True,10,0.995000,1.000000,1.000000,False,571,0.779444,0.760436,0.798399
de Haas Global (hidden nodule),NoduleCounts > 1,2.580797e-15,-0.195385,Thijmen_global_hidden_cal,False,155,0.722546,0.684605,0.763516,True,426,0.917931,0.906465,0.930564
PanCan2b,diagsili,7.287603e-14,0.214395,PanCan2b,1.0,4,0.995000,1.000000,1.000000,0.0,574,0.780605,0.762026,0.798616
de Haas Global (shown nodule),NoduleCounts > 1,2.130454e-11,-0.155325,Thijmen_global_show_cal,False,155,0.781893,0.742913,0.817572,True,426,0.937217,0.927690,0.948053
Sybil year 1,Spiculation,1.177415e-10,0.155049,sybil_year1,True,10,0.995000,1.000000,1.000000,False,571,0.839951,0.822017,0.858643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venkadesh,Unfinished_ed,9.913961e-01,-0.000214,DL_cal,True,158,0.889360,0.863247,0.915999,False,423,0.889574,0.872976,0.906317
Sybil year 1,cigsmok,9.975183e-01,-0.000063,sybil_year1,0.0,267,0.842417,0.817840,0.867299,1.0,314,0.842480,0.817564,0.867386
de Haas Global (hidden nodule),diagasbe,9.978240e-01,0.000257,Thijmen_global_hidden_cal,1.0,8,0.797872,0.668860,0.913793,0.0,572,0.797616,0.781199,0.813212
de Haas Global (hidden nodule),wrkfarm,9.980028e-01,-0.000101,Thijmen_global_hidden_cal,1.0,47,0.796130,0.737294,0.852533,0.0,533,0.796231,0.779747,0.812067


In [9]:
filename = f"{RESULTS_DIR}/auroc-nlst-5911.csv"
df = pd.read_csv(filename, index_col=0)
df[
    df['category'].isin(['demo']) &
    # df['attribute'].isin(['Gender'])
    # ~df['category'].isin(['nodule']) & 
    # (df['col'].isin(['sybil_year1', 'DL_cal', 'PanCan2b'])) & 
    (df['p'] < 0.05) 
    # (df['Group_2_mal'] >= 10)
].sort_values(by='p')[['attribute', 'category', 'p', 'AUC_diff',  'z', 'col',
                        'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
                        'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
]].head(100)

Unnamed: 0,attribute,category,p,AUC_diff,z,col,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
Sybil year 1,Gender,demo,0.000363,0.07066,3.565965,sybil_year1,2.0,244,0.881035,0.856631,0.903728,1.0,337,0.810375,0.783843,0.836403
Sybil year 1,height > 68,demo,0.00049,-0.071072,-3.486097,sybil_year1,True,267,0.80135,0.771513,0.831321,False,314,0.872422,0.851696,0.893239
Sybil year 1,marital,demo,0.000933,0.073927,3.309839,sybil_year1,5.0,125,0.89934,0.871228,0.925916,2.0,384,0.825414,0.800982,0.849295
PanCan2b,Overweight,demo,0.001719,-0.073642,-3.134853,PanCan2b,False,209,0.733562,0.69871,0.767721,True,372,0.807204,0.78633,0.827265
Sybil year 1,ethnic,demo,0.005813,0.13908,2.758144,sybil_year1,1.0,4,0.980074,0.952128,1.0,2.0,574,0.840994,0.824248,0.858383
de Haas Global (shown nodule),weight > 180,demo,0.011857,0.04926,2.516383,Thijmen_global_show_cal,True,247,0.878144,0.858742,0.89633,False,334,0.828884,0.807656,0.849576
Sybil year 1,Married,demo,0.015488,0.049398,2.420768,sybil_year1,False,197,0.874812,0.847797,0.901127,True,384,0.825414,0.800982,0.849295
de Haas Global (shown nodule),Overweight,demo,0.0354,-0.044212,-2.103753,Thijmen_global_show_cal,False,209,0.823239,0.796172,0.848152,True,372,0.867451,0.851013,0.883384


In [10]:
sybil_worse_df = df[(df['p'] < 0.05) & (df['col'] == 'sybil_year1')]
gender_df = pd.read_csv(f'{RESULTS_DIR}/auroc-gender-by-factors-nlst-5911.csv')
gender_df[
    (gender_df['filter_by'].isin(list(sybil_worse_df['attribute']))) & 
    (gender_df['p'] > 0.05) & 
    (gender_df['col'] == 'sybil_year1')
].sort_values(by='filter_by', ascending=True)[['filter_by', 'filter_val', 'p', 'AUC_diff', 
                                                       'Group_2', 'Group_2_mal', 'Group_2_ben', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
                                                       'Group_1', 'Group_1_mal', 'Group_1_ben', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
                                                       ]]

Unnamed: 0,filter_by,filter_val,p,AUC_diff,Group_2,Group_2_mal,Group_2_ben,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,Group_1_ben,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
772,Diameter_mm > 7,FALSE,0.231114,0.076944,2,35,1218,0.763092,0.667341,0.852503,1,45,1626,0.686148,0.597905,0.762279
802,FamilyHistoryLungCa,TRUE,0.241906,0.04487,2,82,613,0.829339,0.771717,0.879081,1,98,734,0.784469,0.726391,0.837706
856,Manufacturer,GE MEDICAL SYSTEMS,0.057084,0.059522,2,100,1058,0.852995,0.810388,0.891314,1,166,1306,0.793473,0.75261,0.829729
868,Manufacturer,SIEMENS,0.061371,0.05607,2,99,864,0.904491,0.87422,0.932758,1,117,1287,0.848421,0.806158,0.887553
874,Manufacturer,TOSHIBA,0.414409,0.060932,2,17,90,0.901231,0.831387,0.95942,1,19,170,0.840299,0.766254,0.909542
28,Married,1,0.185575,0.035556,2,122,1183,0.848167,0.812046,0.883832,1,262,2407,0.812611,0.782628,0.843889
814,NoduleCounts > 1,TRUE,0.091372,0.036927,2,188,1016,0.885893,0.859449,0.911794,1,238,1415,0.848965,0.81601,0.878139
598,diaghype,1,0.086508,0.058865,2,83,679,0.849875,0.801426,0.895899,1,140,1091,0.79101,0.749856,0.83327
118,ethnic,1,0.537353,-0.09336,2,2,31,0.90164,0.78125,1.0,1,2,63,0.995,1.0,1.0
130,height > 68,0,0.176726,-0.0416,1,80,740,0.839781,0.791351,0.88421,2,234,2090,0.881381,0.858706,0.904958


In [11]:
# nlst_preds, nlst_democols, MODELS = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=False, sybil=False)
# MODELS = {'Venkadesh': 'DL_cal', 'Sybil year 1': 'sybil_year1', 'PanCan2b': 'PanCan2b'}

# gender_threshold_df = pd.read_csv(f'{RESULTS_DIR}/threshold-gender-by-factors-nlst-5911.csv', index_col=0)
# policy_threshold_df = pd.read_csv(f'{TEAMS_DIR}/nlst/policy-thresholds-5911.csv', index_col=0)[['Sensitivity=0.9', 'Specificity=0.9', 'Brock']]
# display(policy_threshold_df)

# specific_stats_df = gender_threshold_df[(gender_threshold_df['filter_by'] == 'height > 68')]
# subgroup_dfs = specific_stats_df.groupby('filter_val')
# for val, sdf in subgroup_dfs:
#     display(sdf[['model', 'policy', 'group', 'fpr', 'fnr']])
#     threshold.plot_threshold_stats_subgroups(nlst_preds, 'Gender', policies=policy_threshold_df, models=MODELS, diff=False, stats=sdf)

In [12]:
overweight_df = pd.read_csv(f'{RESULTS_DIR}/auroc-overweight-by-factors-nlst-5911.csv')
overweight_df[
    (overweight_df['filter_by'].isin(sybil_worse_df['attribute'])) & 
    (overweight_df['p'] > 0.05) & 
    (overweight_df['col'] == 'sybil_year1')
].sort_values(by='filter_by', ascending=True)[['filter_by', 'filter_val', 'p', 'AUC_diff', 
                                                       'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
                                                       'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
                                                       ]]

Unnamed: 0,filter_by,filter_val,p,AUC_diff,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
802,Diameter_mm > 7,True,0.300775,-0.02483,False,171,0.796282,0.755761,0.834759,True,330,0.821113,0.796164,0.846353
796,Diameter_mm > 7,False,0.97439,-0.002076,False,38,0.715235,0.625761,0.803798,True,42,0.717312,0.624411,0.803425
814,Emphysema,True,0.445369,-0.025284,False,103,0.782344,0.734232,0.830441,True,161,0.807628,0.770075,0.845546
808,Emphysema,False,0.104372,-0.045593,False,106,0.838078,0.793291,0.886147,True,211,0.883671,0.858535,0.907553
826,FamilyHistoryLungCa,True,0.152102,-0.062732,False,52,0.761795,0.679413,0.836569,True,128,0.824527,0.78136,0.863146
820,FamilyHistoryLungCa,False,0.097476,-0.040674,False,157,0.829941,0.797788,0.863684,True,244,0.870615,0.847042,0.893955
22,Gender,2.0,0.118565,-0.044387,False,117,0.853824,0.814829,0.889291,True,127,0.898211,0.867046,0.925601
880,Manufacturer,GE MEDICAL SYSTEMS,0.61128,-0.017298,False,81,0.809423,0.753761,0.860139,True,185,0.826721,0.79438,0.85765
886,Manufacturer,Philips,0.187668,-0.094462,False,22,0.727531,0.588547,0.841796,True,41,0.821992,0.745366,0.885056
898,Manufacturer,TOSHIBA,0.214339,-0.096197,True,17,0.810629,0.732078,0.887226,False,19,0.906826,0.83908,0.963765


In [13]:
kiran_worse_df = df[(df['p'] < 0.15) & (df['col'] == 'DL_cal')]
race_df = pd.read_csv(f'{RESULTS_DIR}/auroc-race-by-factors-nlst-5911.csv')
race_df[
    # (race_df['filter_by'].isin(kiran_worse_df['attribute'])) & 
    (race_df['p'] > 0.15) & 
    (race_df['col'] == 'DL_cal')
].sort_values(by='filter_by', ascending=True)[['filter_by', 'filter_val', 'p', 'AUC_diff', 'z', 
                                                       'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
                                                       'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
                                                       ]].head(40)

Unnamed: 0,filter_by,filter_val,p,AUC_diff,z,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
0,Age > 61,0.0,0.744035,-0.021081,-0.326515,2.0,12,0.8827,0.788183,0.957637,1.0,203,0.903781,0.882382,0.92498
684,Diameter_mm > 7,False,0.787051,-0.034469,-0.270142,2.0,4,0.83715,0.729412,0.931548,1.0,74,0.871619,0.836448,0.905398
696,Emphysema,False,0.483234,-0.04394,-0.70111,2.0,15,0.851154,0.767368,0.924328,1.0,287,0.895093,0.876539,0.914229
714,FamilyHistoryLungCa,True,0.723587,-0.035457,-0.353669,2.0,6,0.848554,0.697368,0.956044,1.0,166,0.884011,0.85654,0.911851
12,Gender,1.0,0.242481,-0.085023,-1.168807,2.0,13,0.805161,0.657143,0.928571,1.0,312,0.890184,0.871542,0.908557
18,Gender,2.0,0.405513,-0.053875,-0.831816,2.0,15,0.837483,0.747994,0.917105,1.0,218,0.891358,0.868132,0.913405
618,GroundGlassOpacity,False,0.161171,-0.066897,-1.401145,2.0,28,0.819119,0.728544,0.897945,1.0,530,0.886016,0.872241,0.902113
30,HighSchoolPlus,1.0,0.291644,-0.053291,-1.054522,2.0,24,0.839423,0.76455,0.905983,1.0,526,0.892714,0.878919,0.906408
768,Manufacturer,SIEMENS,0.210883,-0.1041,-1.25114,2.0,10,0.810407,0.668182,0.936819,1.0,205,0.914507,0.895463,0.934923
762,Manufacturer,GE MEDICAL SYSTEMS,0.421985,-0.047656,-0.802982,2.0,18,0.824871,0.704327,0.92328,1.0,237,0.872527,0.849517,0.896134


### Other models - nodule level.

In [14]:
filename = f"{RESULTS_DIR}/auroc-nlst-16077.csv"
df = pd.read_csv(filename, index_col=0)
df[df['category'].isin(['demo'])].sort_values(by='p')[['attribute', 'p', 'AUC_diff', 
                                                       'Group_2', 'Group_2_mal', 'AUC_2', 'AUC-CI-lo_2', 'AUC-CI-hi_2', 
                                                       'Group_1', 'Group_1_mal', 'AUC_1', 'AUC-CI-lo_1', 'AUC-CI-hi_1', 
                                                       ]]

Unnamed: 0,attribute,p,AUC_diff,Group_2,Group_2_mal,AUC_2,AUC-CI-lo_2,AUC-CI-hi_2,Group_1,Group_1_mal,AUC_1,AUC-CI-lo_1,AUC-CI-hi_1
de Haas Global (shown nodule),weight > 180,0.005228,0.033699,True,502,0.907690,0.897163,0.917725,False,747,0.873991,0.863369,0.885606
PanCan2b,ethnic,0.018233,0.117807,1.0,9,0.952830,0.907500,0.987666,2.0,1230,0.835023,0.824028,0.845834
de Haas Local,ethnic,0.025166,0.080569,1.0,9,0.976576,0.949257,1.000000,2.0,1230,0.896008,0.888047,0.904983
de Haas Global (shown nodule),Married,0.026587,-0.028543,False,453,0.871053,0.855517,0.886020,True,796,0.899596,0.890233,0.908475
de Haas Global (shown nodule),Overweight,0.056542,-0.024613,False,445,0.873085,0.858416,0.886521,True,804,0.897697,0.888816,0.906684
...,...,...,...,...,...,...,...,...,...,...,...,...,...
de Haas Global (shown nodule),HighSchoolPlus,0.960286,-0.002205,False,24,0.887610,0.832258,0.939778,True,1225,0.889815,0.882098,0.897558
de Haas Global (hidden nodule),educat,0.963680,0.000857,5.0,304,0.860336,0.844059,0.875737,3.0,311,0.859479,0.842900,0.877167
de Haas Global (hidden nodule),Overweight,0.984777,0.000262,False,445,0.861721,0.849136,0.875799,True,804,0.861459,0.851698,0.871741
PanCan2b,height > 68,0.985568,0.000253,True,566,0.837605,0.822037,0.853064,False,683,0.837353,0.823928,0.852372
