In [2]:
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as st
from sklearn import metrics
import pandas as pd

In [3]:
class DelongTest():
    def __init__(self,preds1,preds2,label,threshold=0.05):
        '''
        preds1:the output of model1
        preds2:the output of model2
        label :the actual label
        '''
        self._preds1=preds1
        self._preds2=preds2
        self._label=label
        self.threshold=threshold
        self._show_result()

    def _auc(self,X, Y)->float:
        return 1/(len(X)*len(Y)) * sum([self._kernel(x, y) for x in X for y in Y])

    def _kernel(self,X, Y)->float:
        '''
        Mann-Whitney statistic
        '''
        return .5 if Y==X else int(Y < X)

    def _structural_components(self,X, Y)->list:
        V10 = [1/len(Y) * sum([self._kernel(x, y) for y in Y]) for x in X]
        V01 = [1/len(X) * sum([self._kernel(x, y) for x in X]) for y in Y]
        return V10, V01

    def _get_S_entry(self,V_A, V_B, auc_A, auc_B)->float:
        return 1/(len(V_A)-1) * sum([(a-auc_A)*(b-auc_B) for a,b in zip(V_A, V_B)])
    
    def _z_score(self,var_A, var_B, covar_AB, auc_A, auc_B):
        return (auc_A - auc_B)/((var_A + var_B - 2*covar_AB )**(.5)+ 1e-8)

    def _group_preds_by_label(self,preds, actual)->list:
        X = [p for (p, a) in zip(preds, actual) if a]
        Y = [p for (p, a) in zip(preds, actual) if not a]
        return X, Y

    def _compute_z_p(self):
        X_A, Y_A = self._group_preds_by_label(self._preds1, self._label)
        X_B, Y_B = self._group_preds_by_label(self._preds2, self._label)

        V_A10, V_A01 = self._structural_components(X_A, Y_A)
        V_B10, V_B01 = self._structural_components(X_B, Y_B)

        auc_A = self._auc(X_A, Y_A)
        auc_B = self._auc(X_B, Y_B)

        # Compute entries of covariance matrix S (covar_AB = covar_BA)
        var_A = (self._get_S_entry(V_A10, V_A10, auc_A, auc_A) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_A01, auc_A, auc_A) * 1/len(V_A01))
        var_B = (self._get_S_entry(V_B10, V_B10, auc_B, auc_B) * 1/len(V_B10)+ self._get_S_entry(V_B01, V_B01, auc_B, auc_B) * 1/len(V_B01))
        covar_AB = (self._get_S_entry(V_A10, V_B10, auc_A, auc_B) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_B01, auc_A, auc_B) * 1/len(V_A01))

        # Two tailed test
        z = self._z_score(var_A, var_B, covar_AB, auc_A, auc_B)
        p = st.norm.sf(abs(z))*2

        return z,p

    def _show_result(self):
        z,p=self._compute_z_p()
        print(f"z score = {z:.5f};\np value = {p:.5f};")
        if p < self.threshold :print("There is a significant difference")
        else:        print("There is NO significant difference")


In [4]:
# for example
# Model A (random) vs. "good" model B
preds_A = np.array([.5, .5, .5, .5, .5, .5, .5, .5, .5, .5])
preds_B = np.array([.2, .5, .1, .4, .9, .8, .7, .5, .9, .8])
actual=    np.array([0, 0, 0, 0, 1, 0, 1, 1, 1, 1])
DelongTest(preds_A,preds_B,actual)

z score = -3.35876;
p value = 0.00078;
There is a significant difference


<__main__.DelongTest at 0x17fd34a7d68>

## Our predicted data

In [5]:
df = pd.read_csv('./predictions.csv',encoding='gbk')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ID             96 non-null     int64  
 1   Group          96 non-null     int64  
 2   检查时间           96 non-null     object 
 3   End            96 non-null     int64  
 4   combine_pre    96 non-null     float64
 5   radiomics_pre  96 non-null     float64
 6   clinical_pre   96 non-null     float64
dtypes: float64(3), int64(3), object(1)
memory usage: 5.4+ KB


In [6]:
df.head()

Unnamed: 0,ID,Group,检查时间,End,combine_pre,radiomics_pre,clinical_pre
0,151,1,2012/1/17,0,0.02006,0.36242,0.02576
1,23,1,2013/5/31,0,0.56533,0.56366,0.90391
2,152,1,2013/7/5,0,0.78507,0.09873,0.93959
3,29,1,2013/10/24,1,0.89326,0.90853,0.81419
4,53,1,2013/11/26,0,0.18475,0.08831,0.28709


In [7]:
X1 = df.iloc[:, 4].values
X2 = df.iloc[:, 5].values
X3 = df.iloc[:, 6].values
y = df.iloc[:, 3].values

In [8]:
y

array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [9]:
X1

array([0.02006, 0.56533, 0.78507, 0.89326, 0.18475, 0.26267, 0.00711,
       0.88963, 0.02004, 0.5    , 0.90903, 0.02006, 0.02005, 0.38687,
       0.01859, 0.02006, 0.93368, 0.01068, 0.02003, 0.93418, 0.07652,
       0.04501, 0.64736, 0.9155 , 0.02005, 0.0583 , 0.04508, 0.2253 ,
       0.58377, 0.92795, 0.02006, 0.02003, 0.02005, 0.40678, 0.62657,
       0.01158, 0.03999, 0.17422, 0.02003, 0.8207 , 0.93519, 0.08522,
       0.01932, 0.03779, 0.02004, 0.02004, 0.80417, 0.02003, 0.06928,
       0.02003, 0.08664, 0.06612, 0.10816, 0.77946, 0.02671, 0.35094,
       0.01052, 0.01853, 0.02367, 0.02007, 0.9248 , 0.01434, 0.93517,
       0.07418, 0.01323, 0.19348, 0.93521, 0.02003, 0.0631 , 0.64404,
       0.00405, 0.02007, 0.01029, 0.02475, 0.01216, 0.0103 , 0.32335,
       0.14368, 0.30943, 0.05924, 0.09393, 0.05163, 0.02003, 0.00864,
       0.02008, 0.93518, 0.43701, 0.02003, 0.14949, 0.019  , 0.02002,
       0.0242 , 0.00472, 0.07084, 0.02006, 0.0715 ])

In [10]:
X2

array([0.36242, 0.56366, 0.09873, 0.90853, 0.08831, 0.05771, 0.02249,
       0.68034, 0.02915, 0.86306, 0.88526, 0.0165 , 0.42651, 0.59631,
       0.01652, 0.04372, 0.91938, 0.61257, 0.03449, 0.90397, 0.42509,
       0.07525, 0.83874, 0.89176, 0.00649, 0.06251, 0.2853 , 0.17889,
       0.80948, 0.9151 , 0.01651, 0.06184, 0.01653, 0.51094, 0.81021,
       0.00365, 0.14122, 0.01515, 0.01649, 0.61438, 0.91933, 0.05154,
       0.11385, 0.15279, 0.04439, 0.00848, 0.90481, 0.04694, 0.09002,
       0.0165 , 0.15405, 0.0748 , 0.54815, 0.87318, 0.04179, 0.51197,
       0.00795, 0.00824, 0.04594, 0.02063, 0.91208, 0.00348, 0.91986,
       0.02895, 0.11767, 0.26842, 0.9194 , 0.34698, 0.19014, 0.7206 ,
       0.00276, 0.12501, 0.01288, 0.07841, 0.28278, 0.0165 , 0.91802,
       0.31487, 0.55986, 0.05289, 0.22938, 0.32969, 0.01651, 0.45771,
       0.0165 , 0.91916, 0.48789, 0.01651, 0.65814, 0.01688, 0.00259,
       0.457  , 0.02178, 0.03257, 0.07779, 0.13095])

In [11]:
X3

array([0.02576, 0.90391, 0.93959, 0.81419, 0.28709, 0.33502, 0.01782,
       0.90257, 0.03336, 0.16511, 0.89784, 0.02571, 0.00719, 0.79876,
       0.02488, 0.02575, 0.91444, 0.0107 , 0.01894, 0.92663, 0.03637,
       0.05994, 0.82365, 0.92508, 0.02574, 0.1005 , 0.02577, 0.24771,
       0.38325, 0.92666, 0.02305, 0.03669, 0.02575, 0.48447, 0.73888,
       0.02574, 0.0351 , 0.28999, 0.02576, 0.75926, 0.92661, 0.21088,
       0.00969, 0.03201, 0.02577, 0.01792, 0.52903, 0.01693, 0.07229,
       0.0257 , 0.012  , 0.11111, 0.04429, 0.81189, 0.02572, 0.5    ,
       0.02176, 0.02171, 0.06719, 0.02573, 0.92134, 0.02572, 0.92662,
       0.10083, 0.01733, 0.32762, 0.88877, 0.01761, 0.1973 , 0.52144,
       0.00862, 0.02573, 0.01712, 0.04605, 0.01028, 0.01724, 0.14482,
       0.09337, 0.46774, 0.09916, 0.04132, 0.08225, 0.02572, 0.01466,
       0.02301, 0.92666, 0.88778, 0.02482, 0.0708 , 0.01635, 0.02575,
       0.02144, 0.00907, 0.05011, 0.02578, 0.09093])

In [12]:
DelongTest(X1,X2,y)

z score = -1.14365;
p value = 0.25277;
There is NO significant difference


<__main__.DelongTest at 0x17fd34e4160>

In [13]:
DelongTest(X1,X3,y)

z score = 2.45566;
p value = 0.01406;
There is a significant difference


<__main__.DelongTest at 0x17fd34e4278>

In [14]:
DelongTest(X2,X3,y)

z score = 2.06336;
p value = 0.03908;
There is a significant difference


<__main__.DelongTest at 0x17fd34e44a8>