In [1]:
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as st
from sklearn import metrics
import pandas as pd

D:\Python3\lib\site-packages\numpy\.libs\libopenblas.IPBC74C7KURV7CB2PKT5Z5FNR3SIBV4J.gfortran-win_amd64.dll
D:\Python3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
D:\Python3\lib\site-packages\numpy\.libs\libopenblas.TXA6YQSD3GCQQC22GEQ54J2UDCXDXHWN.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
class DelongTest():
    def __init__(self,preds1,preds2,label,threshold=0.05):
        '''
        preds1:the output of model1
        preds2:the output of model2
        label :the actual label
        '''
        self._preds1=preds1
        self._preds2=preds2
        self._label=label
        self.threshold=threshold
        self._show_result()

    def _auc(self,X, Y)->float:
        return 1/(len(X)*len(Y)) * sum([self._kernel(x, y) for x in X for y in Y])

    def _kernel(self,X, Y)->float:
        '''
        Mann-Whitney statistic
        '''
        return .5 if Y==X else int(Y < X)

    def _structural_components(self,X, Y)->list:
        V10 = [1/len(Y) * sum([self._kernel(x, y) for y in Y]) for x in X]
        V01 = [1/len(X) * sum([self._kernel(x, y) for x in X]) for y in Y]
        return V10, V01

    def _get_S_entry(self,V_A, V_B, auc_A, auc_B)->float:
        return 1/(len(V_A)-1) * sum([(a-auc_A)*(b-auc_B) for a,b in zip(V_A, V_B)])
    
    def _z_score(self,var_A, var_B, covar_AB, auc_A, auc_B):
        return (auc_A - auc_B)/((var_A + var_B - 2*covar_AB )**(.5)+ 1e-8)

    def _group_preds_by_label(self,preds, actual)->list:
        X = [p for (p, a) in zip(preds, actual) if a]
        Y = [p for (p, a) in zip(preds, actual) if not a]
        return X, Y

    def _compute_z_p(self):
        X_A, Y_A = self._group_preds_by_label(self._preds1, self._label)
        X_B, Y_B = self._group_preds_by_label(self._preds2, self._label)

        V_A10, V_A01 = self._structural_components(X_A, Y_A)
        V_B10, V_B01 = self._structural_components(X_B, Y_B)

        auc_A = self._auc(X_A, Y_A)
        auc_B = self._auc(X_B, Y_B)

        # Compute entries of covariance matrix S (covar_AB = covar_BA)
        var_A = (self._get_S_entry(V_A10, V_A10, auc_A, auc_A) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_A01, auc_A, auc_A) * 1/len(V_A01))
        var_B = (self._get_S_entry(V_B10, V_B10, auc_B, auc_B) * 1/len(V_B10)+ self._get_S_entry(V_B01, V_B01, auc_B, auc_B) * 1/len(V_B01))
        covar_AB = (self._get_S_entry(V_A10, V_B10, auc_A, auc_B) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_B01, auc_A, auc_B) * 1/len(V_A01))

        # Two tailed test
        z = self._z_score(var_A, var_B, covar_AB, auc_A, auc_B)
        p = st.norm.sf(abs(z))*2

        return z,p

    def _show_result(self):
        z,p=self._compute_z_p()
        print(f"z score = {z:.5f};\np value = {p:.5f};")
        if p < self.threshold :print("There is a significant difference")
        else:        print("There is NO significant difference")


In [3]:
# for example
# Model A (random) vs. "good" model B
preds_A = np.array([.5, .5, .5, .5, .5, .5, .5, .5, .5, .5])
preds_B = np.array([.2, .5, .1, .4, .9, .8, .7, .5, .9, .8])
actual=    np.array([0, 0, 0, 0, 1, 0, 1, 1, 1, 1])
DelongTest(preds_A,preds_B,actual)

z score = -3.35876;
p value = 0.00078;
There is a significant difference


<__main__.DelongTest at 0x14050bd9cc0>

## Our predicted data

In [5]:
df = pd.read_csv('E:/Experiments/PeilunHan/LVNC/radiomics/run-new/run-1223/Delong-train.csv',encoding='gbk')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Group      76 non-null     int64  
 1   End        76 non-null     int64  
 2   ID         76 non-null     int64  
 3   检查时间       76 non-null     object 
 4   Clinical   76 non-null     float64
 5   Radiomics  76 non-null     float64
 6   RC         76 non-null     float64
 7   Combine    76 non-null     float64
dtypes: float64(4), int64(3), object(1)
memory usage: 4.9+ KB


In [6]:
df.head()

Unnamed: 0,Group,End,ID,检查时间,Clinical,Radiomics,RC,Combine
0,1,0,151,2012/1/17,0.02855,0.22703,0.0729,0.14401
1,1,0,23,2013/5/31,0.89371,0.11153,0.20118,0.21312
2,1,0,152,2013/7/5,0.93175,0.05854,0.13821,0.02796
3,1,1,29,2013/10/24,0.80107,0.11069,0.19213,0.03623
4,1,0,53,2013/11/26,0.28769,0.08967,0.07614,0.0282


In [8]:
X1 = df.iloc[:, 4].values
X2 = df.iloc[:, 5].values
X3 = df.iloc[:, 6].values
X4 = df.iloc[:, 7].values
y = df.iloc[:, 1].values

In [9]:
y

array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int64)

In [10]:
X1

array([0.02855, 0.89371, 0.93175, 0.80107, 0.28769, 0.33403, 0.02002,
       0.89229, 0.03661, 0.16914, 0.88731, 0.0285 , 0.00836, 0.78545,
       0.02761, 0.02854, 0.90484, 0.01225, 0.02123, 0.91783, 0.03978,
       0.06145, 0.81069, 0.91616, 0.02853, 0.1051 , 0.02855, 0.24963,
       0.38034, 0.91785, 0.02565, 0.04011, 0.02853, 0.47747, 0.72547,
       0.02853, 0.03844, 0.29049, 0.02855, 0.74579, 0.9178 , 0.21388,
       0.01114, 0.03518, 0.02855, 0.02013, 0.5204 , 0.01906, 0.07663,
       0.02848, 0.01368, 0.11571, 0.04806, 0.79873, 0.02851, 0.5    ,
       0.02426, 0.02421, 0.07144, 0.02852, 0.91217, 0.02851, 0.91781,
       0.10543, 0.0195 , 0.32318, 0.8778 , 0.01979, 0.20065, 0.51308,
       0.00995, 0.02852, 0.01926, 0.0499 , 0.01179, 0.0194 ])

In [11]:
DelongTest(X1,X4,y)

z score = -5.52600;
p value = 0.00000;
There is a significant difference


<__main__.DelongTest at 0x14050d18b70>

In [12]:
DelongTest(X2,X4,y)

z score = -5.05440;
p value = 0.00000;
There is a significant difference


<__main__.DelongTest at 0x14050d18c88>

In [13]:
DelongTest(X3,X4,y)

z score = -0.73555;
p value = 0.46200;
There is NO significant difference


<__main__.DelongTest at 0x14050d18f28>

In [14]:
DelongTest(X1,X3,y)

z score = -5.89149;
p value = 0.00000;
There is a significant difference


<__main__.DelongTest at 0x14050d18f98>

In [15]:
DelongTest(X2,X3,y)

z score = -4.92414;
p value = 0.00000;
There is a significant difference


<__main__.DelongTest at 0x14050d2f0b8>

In [16]:
DelongTest(X1,X2,y)

z score = -0.90156;
p value = 0.36729;
There is NO significant difference


<__main__.DelongTest at 0x14050d2f160>

## DeLong test for training and test

In [17]:
df1 = pd.read_csv('E:/Experiments/PeilunHan/LVNC/radiomics/run-new/run-1223/Delong-test.csv',encoding='gbk')
df1_X1 = df1.iloc[:, 4].values
df1_X2 = df1.iloc[:, 5].values
df1_X3 = df1.iloc[:, 6].values
df1_X4 = df1.iloc[:, 7].values
df1_y = df1.iloc[:, 1].values

In [20]:
df1.head()

Unnamed: 0,Group,End,ID,检查时间,Clinical,Radiomics,RC,Combine
0,2,1,35,2020/1/15,0.14916,0.64519,0.42783,0.88422
1,2,1,127,2020/2/4,0.09794,0.44603,0.31803,0.65107
2,2,1,128,2020/3/19,0.46138,0.15629,0.13203,0.5569
3,2,0,115,2020/4/3,0.10375,0.18137,0.10123,0.15086
4,2,0,64,2020/4/7,0.04497,0.16375,0.08137,0.04683


In [18]:
df1_X4

array([0.88422, 0.65107, 0.5569 , 0.15086, 0.04683, 0.72275, 0.23438,
       0.60325, 0.01741, 0.04502, 0.40248, 0.02053, 0.0424 , 0.07397,
       0.35127, 0.41549, 0.51972, 0.02153, 0.25264, 0.30348])

In [21]:
DelongTest(df1_X1, df1_X4, df1_y)

z score = -2.14842;
p value = 0.03168;
There is a significant difference


<__main__.DelongTest at 0x14050d2f668>

In [22]:
DelongTest(df1_X2, df1_X4, df1_y)

z score = -2.35804;
p value = 0.01837;
There is a significant difference


<__main__.DelongTest at 0x14050d2fe10>

In [23]:
DelongTest(df1_X3, df1_X4, df1_y)

z score = -1.99429;
p value = 0.04612;
There is a significant difference


<__main__.DelongTest at 0x14050d2f5f8>

In [24]:
DelongTest(df1_X1, df1_X3, df1_y)

z score = -1.18322;
p value = 0.23672;
There is NO significant difference


<__main__.DelongTest at 0x14050d2fef0>

In [25]:
DelongTest(df1_X2, df1_X3, df1_y)

z score = -1.39462;
p value = 0.16313;
There is NO significant difference


<__main__.DelongTest at 0x14050d2fd30>

In [26]:
DelongTest(df1_X1, df1_X2, df1_y)

z score = 0.34759;
p value = 0.72815;
There is NO significant difference


<__main__.DelongTest at 0x14050d2f3c8>