In [None]:
# common
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from datetime import datetime
from itertools import product
from collections import OrderedDict

In [None]:
import pandas as pd
import numpy as np
import scipy.stats

def compute_midrank(x):
    J = np.argsort(x)
    Z = x[J]
    N = len(x)
    T = np.zeros(N, dtype=np.float64)
    i = 0
    while i < N:
        j = i
        while j < N and Z[j] == Z[i]:
            j += 1
        T[i:j] = 0.5*(i + j - 1)
        i = j
    T2 = np.empty(N, dtype=np.float64)
    T2[J] = T + 1
    return T2


def fastDeLong(predictions_sorted_transposed, label_1_count):
    m = label_1_count
    n = predictions_sorted_transposed.shape[1] - m
    positive_examples = predictions_sorted_transposed[:, :m]
    negative_examples = predictions_sorted_transposed[:, m:]
    k = predictions_sorted_transposed.shape[0]

    tx = np.empty([k, m], dtype=np.float64)
    ty = np.empty([k, n], dtype=np.float64)
    tz = np.empty([k, m + n], dtype=np.float64)
    for r in range(k):
        tx[r, :] = compute_midrank(positive_examples[r, :])
        ty[r, :] = compute_midrank(negative_examples[r, :])
        tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :])
    aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
    v01 = (tz[:, :m] - tx[:, :]) / n
    v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
    sx = np.cov(v01)
    sy = np.cov(v10)
    delongcov = sx / m + sy / n
    return aucs, delongcov


def calc_pvalue(aucs, sigma):
    l = np.array([[1, -1]])
    z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
    return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10)


def compute_ground_truth_statistics(ground_truth):
    assert np.array_equal(np.unique(ground_truth), [0, 1])
    order = (-ground_truth).argsort()
    label_1_count = int(ground_truth.sum())
    return order, label_1_count


def delong_roc_variance(ground_truth, predictions):
    order, label_1_count = compute_ground_truth_statistics(ground_truth)
    predictions_sorted_transposed = predictions[np.newaxis, order]
    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
    assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers"
    return aucs[0], delongcov


def delong_roc_test(ground_truth, predictions_one, predictions_two):
    order, label_1_count = compute_ground_truth_statistics(ground_truth)
    predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order]
    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
    return calc_pvalue(aucs, delongcov)

In [None]:
reference_df = pd.read_excel()

In [None]:
label_info = reference_df[reference_df['combination'] == 'K12']['TEST_test_label'].values[0][1:-1].split(',')
label_info = np.array([int(i.replace(' ', '') ) for i in label_info])

In [None]:
total_df = pd.DataFrame()


main_model_df = reference_df.loc[0]
main_model_label = reference_df[reference_df['combination'] == 'K12']['TEST_test_label'].values[0][1:-1].split(',')
main_model_label = np.array([int(i.replace(' ', '') ) for i in main_model_label])
main_model_proba = np.array([ float(i) for i in reference_df[reference_df['combination'] == 'K12']['TEST_proba'].values[0][1:-1].split(',')])

main_index = reference_df[reference_df['combination'] == 'K12']['combination'].values[0]

for i in range(4):
    compare_model_df = reference_df.iloc[i+1:i+2]
    
    
    compare_model_label = compare_model_df['TEST_test_label'].values[0][1:-1].split(',')
    compare_model_label = np.array([int(i.replace(' ', '') ) for i in compare_model_label])
    compare_model_proba = np.array([ float(i) for i in compare_model_df['TEST_proba'].values[0][1:-1].split(',')])


    if np.array_equal(compare_model_label, main_model_label):
        
        ground_truth = main_model_label.astype('float64')
        main_model_proba = main_model_proba.astype('float64')
        compare_model_proba = compare_model_proba.astype('float64')

        log10_p_value = delong_roc_test(ground_truth,main_model_proba, compare_model_proba)[0][0]

        p_value = 10**log10_p_value

        ## DataFrame
        info_dict = dict()

        info_dict['Reference_Model_Index'] = main_index
        info_dict['Compare_Model_Index'] = compare_model_df['combination'].values[0]

        info_dict['log10(p-value)'] = [log10_p_value]
        info_dict['P-value'] = [p_value]


        info_df = pd.DataFrame(info_dict)

        total_df = pd.concat([total_df, info_df])
            
            