# Evaluate models on Multi-institute MCL cohorts

In [17]:
import os
import pandas as pd
import numpy as np
import math
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
import scipy.stats
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_colwidth', None)

### Mayo, CBM, Brock, radiomics risk
* using precomputed values
* unclear why subset of cohrot has NaN risk for CBM and radiomic models 

In [18]:
cohort_path = "/home/local/VANDERBILT/litz/github/MASILab/DeepLungScreening/cohorts/multi_mcl/multi_mcl_prep_v1.csv"
cohort_df = pd.read_csv(cohort_path)
cohort_df

Unnamed: 0,pid,id,filename,session,scan_date,cohort,with_image,with_marker,age,race,...,phist,fhist,smo_status,pkyr,cyfra,lung_cancer,mayo_risk,CBM_risk,radiomics_risk,brock_risk
0,376873159,,,,,2.0,False,True,64.0,1.0,...,0,0,0.0,70.0,0.268318,0,0.510189,0.366301,0.742949,0.278794
1,398957338,398957338time20110101,398957338time20110101,0.0,2011-01-01,2.0,True,True,66.0,1.0,...,0,0,1.0,50.0,1.185300,0,0.219309,0.093341,0.335967,0.084037
2,1759365513,1759365513time20100101,1759365513time20100101,0.0,2010-01-01,2.0,True,True,80.0,1.0,...,0,0,0.0,10.0,1.545812,1,0.733087,0.863324,0.795813,0.429047
3,2368954232,2368954232time20120101,2368954232time20120101,0.0,2012-01-01,2.0,True,True,52.0,1.0,...,0,0,1.0,50.0,3.212954,1,0.749581,0.966498,0.822853,0.588785
4,2962357115,2962357115time20110101,2962357115time20110101,0.0,2011-01-01,2.0,True,True,68.0,1.0,...,0,0,0.0,94.5,0.995075,0,0.070825,0.039126,0.299580,0.009568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,20449484282,20449484282time20140218,20449484282time20140218,1.0,2014-02-18,3.0,True,True,77.0,,...,0,0,0.0,94.5,0.046962,1,0.954705,,,0.614620
1457,20719819907,,,,,3.0,False,True,76.0,,...,0,0,1.0,29.5,1.489888,0,0.503275,,,0.272698
1458,19182262002,19182262002time20140225,19182262002time20140225,0.0,2014-02-25,3.0,True,True,51.0,,...,0,0,1.0,30.0,0.083823,1,0.363386,,,0.163168
1459,19182262002,19182262002time20140509,19182262002time20140509,1.0,2014-05-09,3.0,True,True,51.0,,...,0,0,1.0,30.0,0.083823,1,0.363386,,,0.163168


### merge CSCNN results

In [19]:
cs_path = "/home/local/VANDERBILT/litz/github/MASILab/DeepLungScreening/cohorts/multi_mcl/multi_mcl_latest_scan_v1.csv"
cscnn_path = "/home/local/VANDERBILT/litz/github/MASILab/DSB2017/models/config_multi_mcl_1115/preds.csv"
cscnn_pred = pd.read_csv(cscnn_path)
cs = pd.read_csv(cs_path, dtype={'pid':str})
cs['cscnn_risk'] = cscnn_pred['pred']
cohort_df = cohort_df.merge(cs[['pid', 'cscnn_risk']], on='pid', how='left')


### merge tdvit results

In [20]:
twoscan_path = "/home/local/VANDERBILT/litz/github/MASILab/DeepLungScreening/cohorts/multi_mcl/multi_mcl_2scan.csv"
tdvit_path = "/home/local/VANDERBILT/litz/github/MASILab/time-distance-transformer/models/1109_multi_mcl_td/pred.csv"
twoscan = pd.read_csv(twoscan_path, dtype={'pid':str})
tdvit_pred = pd.read_csv(tdvit_path, dtype={'PID':str})

twoscan = twoscan.merge(tdvit_pred[['PID', 'pred']], left_on='pid', right_on='PID')
twoscan = twoscan.rename(columns={'pred':'tdvit_risk'})
cohort_df = cohort_df.merge(twoscan[['pid', 'id', 'tdvit_risk']], on=['pid', 'id'], how='left')

In [21]:
# remove nulls
nonnull_cohort = cohort_df[~cohort_df['CBM_risk'].isnull() & ~cohort_df['mayo_risk'].isnull() 
    & ~cohort_df['brock_risk'].isnull() & ~cohort_df['radiomics_risk'].isnull() 
    & ~cohort_df['cscnn_risk'].isnull()]
print(nonnull_cohort['cohort'].value_counts())


1.0    402
3.0    159
2.0     96
4.0     81
Name: cohort, dtype: int64


### merge DLSTM

In [22]:
twoscan_path = "/home/local/VANDERBILT/litz/github/MASILab/DeepLungScreening/cohorts/multi_mcl/multi_mcl_2scan.csv"
dlstm_path = "/home/local/VANDERBILT/litz/github/MASILab/RNNLung/compare/tumor_NLST/models/1130_multi_mcl_vumc/pred.csv"
twoscan = pd.read_csv(twoscan_path, dtype={'pid':str})
dlstm_pred = pd.read_csv(dlstm_path, dtype={'pid':str})


twoscan = twoscan.merge(dlstm_pred[['pid', 'pred']], on='pid')
twoscan = twoscan.rename(columns={'pred':'dlstm_risk'})
cohort_df = cohort_df.merge(twoscan[['pid', 'id', 'dlstm_risk']], on=['pid', 'id'], how='left')


Unnamed: 0,pid,id,filename,session,scan_date,cohort,with_image,with_marker,age,race,...,pkyr,cyfra,lung_cancer,mayo_risk,CBM_risk,radiomics_risk,brock_risk,cscnn_risk,tdvit_risk,dlstm_risk
0,376873159,,,,,2.0,False,True,64.0,1.0,...,70.0,0.268318,0,0.510189,0.366301,0.742949,0.278794,,,
1,398957338,398957338time20110101,398957338time20110101,0.0,2011-01-01,2.0,True,True,66.0,1.0,...,50.0,1.185300,0,0.219309,0.093341,0.335967,0.084037,0.055502,,
2,1759365513,1759365513time20100101,1759365513time20100101,0.0,2010-01-01,2.0,True,True,80.0,1.0,...,10.0,1.545812,1,0.733087,0.863324,0.795813,0.429047,0.905775,,
3,2368954232,2368954232time20120101,2368954232time20120101,0.0,2012-01-01,2.0,True,True,52.0,1.0,...,50.0,3.212954,1,0.749581,0.966498,0.822853,0.588785,0.768423,,
4,2962357115,2962357115time20110101,2962357115time20110101,0.0,2011-01-01,2.0,True,True,68.0,1.0,...,94.5,0.995075,0,0.070825,0.039126,0.299580,0.009568,0.246700,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,20449484282,20449484282time20140218,20449484282time20140218,1.0,2014-02-18,3.0,True,True,77.0,,...,94.5,0.046962,1,0.954705,,,0.614620,0.281178,0.322683,0.065404
1457,20719819907,,,,,3.0,False,True,76.0,,...,29.5,1.489888,0,0.503275,,,0.272698,,,
1458,19182262002,19182262002time20140225,19182262002time20140225,0.0,2014-02-25,3.0,True,True,51.0,,...,30.0,0.083823,1,0.363386,,,0.163168,0.785776,,
1459,19182262002,19182262002time20140509,19182262002time20140509,1.0,2014-05-09,3.0,True,True,51.0,,...,30.0,0.083823,1,0.363386,,,0.163168,0.785776,0.764752,0.531484


### Eval cross sectional models

In [23]:
random_seed=56
n_bootstrap = 1000 # number of bootstrap samples with replacement (the samples are same size as cohort)

cohort_names = ['VUMC', 'UPMC', 'DECAMP', 'UCD']
model_names = ['CBM', 'mayo', 'brock', 'radiomics', 'cscnn']

def compute_model_auc(sample, model):
    y = sample['lung_cancer']
    y_prob = sample[f"{model}_risk"]
    return roc_auc_score(y, y_prob)

def compute_ci(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    mu, se = np.mean(a), scipy.stats.sem(a)
    h = se*scipy.stats.t.ppf((1+confidence)/2.0, n-1)
    # print(mu)
    return mu, mu-h, mu+h

dfrows= []

for i, cohort_name in enumerate(cohort_names):
    cohort = nonnull_cohort[nonnull_cohort['cohort']==i+1]
    for model in model_names:
        # calculate 95% CI with bootstrap sampling
        aucs = []
        for i in range(n_bootstrap):
            sample = cohort.sample(frac=1.0, replace=True)
            aucs.append(compute_model_auc(sample, model))

        mean_auc, ci_low, ci_high = compute_ci(aucs)
        dfrows.append({'cohort': cohort_name, 'model': model, 'mean_AUC': mean_auc, 'ci_low':ci_low, 'ci_high':ci_high})

metrics = pd.DataFrame(dfrows)
metrics

Unnamed: 0,cohort,model,mean_AUC,ci_low,ci_high
0,VUMC,CBM,0.863089,0.861829,0.864348
1,VUMC,mayo,0.750801,0.748797,0.752804
2,VUMC,brock,0.746083,0.744102,0.748064
3,VUMC,radiomics,0.782391,0.78051,0.784271
4,VUMC,cscnn,0.664714,0.662677,0.66675
5,UPMC,CBM,0.942547,0.941262,0.943833
6,UPMC,mayo,0.858602,0.856177,0.861026
7,UPMC,brock,0.872186,0.87005,0.874322
8,UPMC,radiomics,0.874271,0.872075,0.876466
9,UPMC,cscnn,0.777231,0.774343,0.78012


### Eval Longitudinal models

In [24]:
# remove nulls
nonnull_cohort = cohort_df[~cohort_df['tdvit_risk'].isnull()]
print(nonnull_cohort['cohort'].value_counts())

1.0    286
3.0     92
Name: cohort, dtype: int64


In [25]:
nonnull_cohort

Unnamed: 0,pid,id,filename,session,scan_date,cohort,with_image,with_marker,age,race,...,pkyr,cyfra,lung_cancer,mayo_risk,CBM_risk,radiomics_risk,brock_risk,cscnn_risk,tdvit_risk,dlstm_risk
232,2150250843,2150250843time20120801,2150250843time20120801,0.0,2012-08-01,1.0,True,False,62.0,1.0,...,80.0,,0,0.200897,,,0.011388,0.148497,0.253520,0.327980
233,2150250843,2150250843time20170728,2150250843time20170728,1.0,2017-07-28,1.0,True,False,62.0,1.0,...,80.0,,0,0.200897,,,0.011388,0.148497,0.253520,0.327980
235,5828995798,5828995798time20130604,5828995798time20130604,0.0,2013-06-04,1.0,True,False,57.0,1.0,...,64.5,,0,0.094610,,,0.011377,0.365323,0.348641,0.197371
236,5828995798,5828995798time20161031,5828995798time20161031,1.0,2016-10-31,1.0,True,False,57.0,1.0,...,64.5,,0,0.094610,,,0.011377,0.365323,0.348641,0.197371
238,10820157066,10820157066time20131113,10820157066time20131113,1.0,2013-11-13,1.0,True,False,64.0,1.0,...,75.0,,0,0.064962,,,0.010984,0.215101,0.227476,0.228910
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,28656371809,28656371809time20160217,28656371809time20160217,2.0,2016-02-17,3.0,True,True,55.0,,...,51.0,0.260293,1,0.410718,,,0.108775,0.924885,0.729894,0.663913
1455,20449484282,20449484282time20130506,20449484282time20130506,0.0,2013-05-06,3.0,True,True,77.0,,...,94.5,0.046962,1,0.954705,,,0.614620,0.281178,0.322683,0.065404
1456,20449484282,20449484282time20140218,20449484282time20140218,1.0,2014-02-18,3.0,True,True,77.0,,...,94.5,0.046962,1,0.954705,,,0.614620,0.281178,0.322683,0.065404
1459,19182262002,19182262002time20140509,19182262002time20140509,1.0,2014-05-09,3.0,True,True,51.0,,...,30.0,0.083823,1,0.363386,,,0.163168,0.785776,0.764752,0.531484


In [26]:
random_seed=56
n_bootstrap = 1000 # number of bootstrap samples with replacement (the samples are same size as cohort)

cohort_names = [(1, 'VUMC'), (3, 'DECAMP')]
model_names = ['tdvit', 'dlstm']

dfrows= []

for i, cohort_name in cohort_names:
    cohort = nonnull_cohort[nonnull_cohort['cohort']==i]
    for model in model_names:
        # calculate 95% CI with bootstrap sampling
        aucs = []
        for i in range(n_bootstrap):
            sample = cohort.sample(frac=1.0, replace=True)
            aucs.append(compute_model_auc(sample, model))

        mean_auc, ci_low, ci_high = compute_ci(aucs)
        dfrows.append({'cohort': cohort_name, 'model': model, 'mean_AUC': mean_auc, 'ci_low':ci_low, 'ci_high':ci_high})

metrics = pd.DataFrame(dfrows)
metrics

Unnamed: 0,cohort,model,mean_AUC,ci_low,ci_high
0,VUMC,tdvit,0.752998,0.750931,0.755065
1,VUMC,dlstm,0.743395,0.741405,0.745384
2,DECAMP,tdvit,0.822402,0.819672,0.825132
3,DECAMP,dlstm,0.77755,0.77443,0.78067
