DeLong test for ROC curves  
  
E. Lavrova 30.08.2020  

References:  
* Elizabeth DeLong et al. “Comparing the Areas under Two or More Correlated Receiver Operating Characteristic Curves: A Nonparametric Approach.” Biometrics 1988.
* Foster Provost, Tom Fawcett. “Robust Classification for Imprecise Environments.” Machine Learning 2001.

In [1]:
import numpy as np
import math
import scipy.stats as st
from statsmodels.stats import multitest
import pandas as pd

In [2]:
def hs(x, y):
    output = 0
    if x>y:
        output = 1
    elif x==y:
        output = 0.5
    return output

In [3]:
def theta(X, Y):
    theta = 0
    len_X = len(X)
    len_Y = len(Y)
    for i in range (0, len_X):
        for j in range (0, len_Y):
            theta += hs(X[i], Y[j])
    theta /= (len_X*len_Y)
    return theta

In [4]:
def V_10(X, Y):
    V_10 = []
    len_X = len(X)
    len_Y = len(Y)
    for i in range (0, len_X):
        v = 0
        for j in range (0, len_Y):
            v += hs(X[i], Y[j])
        v /= len(Y)
        V_10.append(v)
    return V_10

In [5]:
def V_01(X, Y):
    V_01 = []
    len_X = len(X)
    len_Y = len(Y)
    for i in range (0, len_Y):
        v = 0
        for j in range (0, len_X):
            v += hs(X[i], Y[j])
        v /= len(X)
        V_01.append(v) 
    return V_01

In [6]:
def pDelong(scores_filename_A, scores_filename_B, labels_filename_A, labels_filename_B):
    
    with open(scores_filename_A, "r") as f:
        scores_list_A=[float(i[:-1]) for line in f for i in line.split('/n')]

    with open(scores_filename_B, "r") as f:
        scores_list_B=[float(i[:-1]) for line in f for i in line.split('/n')]

    with open(labels_filename_A, "r") as f:
        labels_list_A=[int(eval(i[:-1])) for line in f for i in line.split('/n')]

    with open(labels_filename_B, "r") as f:
        labels_list_B=[int(eval(i[:-1])) for line in f for i in line.split('/n')]

    X_A = np.array(scores_list_A)[np.where(np.array(labels_list_A) == 1)]
    Y_A = np.array(scores_list_A)[np.where(np.array(labels_list_A) == 0)]

    X_B = np.array(scores_list_B)[np.where(np.array(labels_list_B) == 1)]
    Y_B = np.array(scores_list_B)[np.where(np.array(labels_list_B) == 0)]

    theta_A = theta(X_A, Y_A)
    theta_B = theta(X_B, Y_B)

    V_10_A = V_10(X_A, Y_A)
    V_01_A = V_01(X_A, Y_A)
    V_10_B = V_10(X_B, Y_B)
    V_01_B = V_01(X_B, Y_B)

    S_10 = np.zeros((2, 2))
    S_01 = np.zeros((2, 2))

    for i in range (0, len(X_A)):
        S_10[0,0] += (V_10_A[i] - theta_A)*(V_10_A[i] - theta_A)
    for i in range (0, len(X_A)):
        S_10[0,1] += (V_10_A[i] - theta_A)*(V_10_B[i] - theta_B)
    for i in range (0, len(X_A)):
        S_10[1,0] += (V_10_B[i] - theta_B)*(V_10_A[i] - theta_A)
    for i in range (0, len(X_A)):
        S_10[1,1] += (V_10_B[i] - theta_B)*(V_10_B[i] - theta_B)

    S_10 /= (len(X_A)-1)

    for i in range (0, len(Y_A)):
        S_01[0,0] += (V_01_A[i] - theta_A)*(V_01_A[i] - theta_A)
    for i in range (0, len(Y_A)):
        S_01[0,1] += (V_01_A[i] - theta_A)*(V_01_B[i] - theta_B)
    for i in range (0, len(Y_A)):
        S_01[1,0] += (V_01_B[i] - theta_B)*(V_01_A[i] - theta_A)
    for i in range (0, len(Y_A)):
        S_01[1,1] += (V_01_B[i] - theta_B)*(V_01_B[i] - theta_B)

    S_01 /= (len(Y_A)-1)

    S = S_10/len(X_A) + S_01/len(Y_A)

    if S[0,0]+S[1,1]-S[0,1]-S[1,0] > 0:
        Z = (theta_A-theta_B)/math.sqrt(S[0,0]+S[1,1]-S[0,1]-S[1,0])
        p = st.norm.sf(abs(Z))*2
    else:
        if theta_A==theta_B:
            p = 1
        else:
            p = 0
    return p, theta_A, theta_B       


In [7]:
models = ['RFC', 'SVC', 'LRC']
roi_names = ['WM', 'NAWM', 'GM']
modality_names = ['T1w', 'PD', 'MT', 'R1', 'R2s', 'qMRI']

For different ML models

In [8]:
for roi in roi_names:
    for modality in modality_names:
        p_RF_SV, theta_RF, theta_SV = pDelong('ys/ys_score_RFC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_score_SVC_'+modality+'_'+roi+'.txt',
                                              'ys/ys_true_RFC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_true_SVC_'+modality+'_'+roi+'.txt')
        p_RF_LR, theta_RF, theta_LR = pDelong('ys/ys_score_RFC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_score_LRC_'+modality+'_'+roi+'.txt',
                                              'ys/ys_true_RFC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_true_LRC_'+modality+'_'+roi+'.txt')
        p_LR_SV, theta_LR, theta_SV = pDelong('ys/ys_score_LRC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_score_SVC_'+modality+'_'+roi+'.txt',
                                              'ys/ys_true_LRC_'+modality+'_'+roi+'.txt', 
                                              'ys/ys_true_SVC_'+modality+'_'+roi+'.txt')
        reject, p_corr, a_S_corr, a_B_corr = multitest.multipletests([p_RF_SV, p_RF_LR, p_LR_SV], 
                                                                     alpha=0.01, method='bonferroni')
        print (roi, modality, 'RF-SV, RF-LR, LR-SV', p_corr)

WM T1w RF-SV, RF-LR, LR-SV [0.00000000e+000 5.04934383e-036 6.94807589e-107]
WM PD RF-SV, RF-LR, LR-SV [2.63554211e-082 1.00982774e-188 1.99145348e-165]
WM MT RF-SV, RF-LR, LR-SV [1. 1. 1.]
WM R1 RF-SV, RF-LR, LR-SV [1.58688829e-73 1.58688829e-73 1.58688829e-73]
WM R2s RF-SV, RF-LR, LR-SV [0.00000000e+00 7.07705944e-30 0.00000000e+00]
WM qMRI RF-SV, RF-LR, LR-SV [2.63554211e-82 1.00000000e+00 2.63554211e-82]
NAWM T1w RF-SV, RF-LR, LR-SV [1.17212480e-144 1.58335246e-113 4.89609073e-077]
NAWM PD RF-SV, RF-LR, LR-SV [5.66039948e-174 3.07544493e-305 0.00000000e+000]
NAWM MT RF-SV, RF-LR, LR-SV [0.00000000e+000 8.63534666e-268 1.10178870e-075]
NAWM R1 RF-SV, RF-LR, LR-SV [1.91513286e-80 1.91513286e-80 1.00000000e+00]
NAWM R2s RF-SV, RF-LR, LR-SV [9.30964812e-026 5.60942168e-177 6.26194931e-015]
NAWM qMRI RF-SV, RF-LR, LR-SV [1.14138122e-086 1.07792089e-128 1.35658401e-186]
GM T1w RF-SV, RF-LR, LR-SV [5.52457569e-176 1.03450033e-023 1.53725140e-019]
GM PD RF-SV, RF-LR, LR-SV [3.05506638e-001

For different image types within fixed ROI

In [12]:
for roi in roi_names[1:]:
    p_vals = []
    modalities = []
    for modality_1 in modality_names:
        for modality_2 in modality_names:
            p, theta_1, theta_2 = pDelong('ys/ys_score_LRC_'+modality_1+'_'+roi+'.txt',
                                          'ys/ys_score_LRC_'+modality_2+'_'+roi+'.txt',
                                          'ys/ys_true_LRC_'+modality_1+'_'+roi+'.txt',
                                          'ys/ys_true_LRC_'+modality_2+'_'+roi+'.txt')
            modalities.append([modality_1, modality_2])
            p_vals.append(p)
    reject, p_corr, a_S_corr, a_B_corr = multitest.multipletests(p_vals, alpha=0.01, method='bonferroni')
    print (roi)
    display (pd.DataFrame(modalities, p_vals))

NAWM


Unnamed: 0,0,1
1.0,T1w,T1w
2.7957050000000004e-54,T1w,PD
3.039642e-40,T1w,MT
2.048702e-74,T1w,R1
1.16584e-09,T1w,R2s
4.241196e-07,T1w,qMRI
2.7957050000000004e-54,PD,T1w
1.0,PD,PD
8.309465999999999e-20,PD,MT
5.115071e-239,PD,R1


GM


Unnamed: 0,0,1
1.0,T1w,T1w
0.0,T1w,PD
6.465420000000001e-159,T1w,MT
1.619658e-233,T1w,R1
0.0,T1w,R2s
0.0,T1w,qMRI
0.0,PD,T1w
1.0,PD,PD
6.313164e-09,PD,MT
8.714956e-31,PD,R1


For comparison between the conventional model and permutation test results

In [19]:
p_vals = []
for roi in roi_names:
    for modality in modality_names:
        p, theta_1, theta_2 = pDelong('ys/ys_score_LRC_'+modality+'_'+roi+'.txt', 
                                      'ys/ys_score_LRC_'+modality+'_'+roi+'_rand.txt',
                                      'ys/ys_true_LRC_'+modality+'_'+roi+'.txt', 
                                      'ys/ys_true_LRC_'+modality+'_'+roi+'_rand.txt')
        p_vals.append(p)

reject, p_corr, a_S_corr, a_B_corr = multitest.multipletests(p_vals, alpha=0.01, method='bonferroni')
print (p_corr)

[0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 9.14337166e-244 5.15061057e-004
 1.75335388e-031 0.00000000e+000 0.00000000e+000 0.00000000e+000
 1.40315368e-044 0.00000000e+000 0.00000000e+000 1.55713221e-180
 2.75503106e-230 0.00000000e+000]
