In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import scipy as scp
from scipy import stats
import pingouin as pg

In [5]:
def get_pvals_uncorrected(df_DMD, target='lge', paired=False):
    """
    Derive uncorrected per patient, phase and segment T-Test p-values between two different groups in a pd.DataFrame
    Parameters
    ----------
    df_DMD : (pandas.DataFrame) -->either keyframe2keyframe or composed Strain results from calc_strain()
    alpha0 : (float) Significant threshold
    target : (str) group either by lge+/lge- or by dmd vs control
    paired : (bool)

    Returns (pandas.DataFrame) with a shape of 10 x 16 (5xRS + 5xCS = 10) (16 AHA segments)
    -------

    """
    # define results array
    n_tests = 10  # is the number of columns
    n_aha = 16
    pvalue_error = 999  # a value that is written when the pvalue is None
    dec_p = 5
    results_pvalues = np.ndarray((n_aha, n_tests))
    results_cintervals = np.ndarray((n_aha, n_tests), dtype=object)

    for i in range(n_tests):
        for aha in range(1, n_aha + 1):  # 1-16

            # our dmd splitted by LGE
            # our dmd splitted by lge: RS
            our_dmd_p2p0_rs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 0)][
                'our_rs']
            our_dmd_p2p1_rs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 1)][
                'our_rs']
            our_dmd_p2p2_rs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 2)][
                'our_rs']
            our_dmd_p2p3_rs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 3)][
                'our_rs']
            our_dmd_p2p4_rs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 4)][
                'our_rs']
            our_dmd_p2p0_rs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 0)][
                'our_rs']
            our_dmd_p2p1_rs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 1)][
                'our_rs']
            our_dmd_p2p2_rs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 2)][
                'our_rs']
            our_dmd_p2p3_rs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 3)][
                'our_rs']
            our_dmd_p2p4_rs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 4)][
                'our_rs']

            # our dmd splitted by lge: CS
            our_dmd_p2p0_cs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 0)][
                'our_cs']
            our_dmd_p2p1_cs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 1)][
                'our_cs']
            our_dmd_p2p2_cs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 2)][
                'our_cs']
            our_dmd_p2p3_cs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 3)][
                'our_cs']
            our_dmd_p2p4_cs_lgeneg = df_DMD[(df_DMD[target] == 0) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 4)][
                'our_cs']
            our_dmd_p2p0_cs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 0)][
                'our_cs']
            our_dmd_p2p1_cs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 1)][
                'our_cs']
            our_dmd_p2p2_cs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 2)][
                'our_cs']
            our_dmd_p2p3_cs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 3)][
                'our_cs']
            our_dmd_p2p4_cs_lgepos = df_DMD[(df_DMD[target] == 1) & (df_DMD['aha'] == aha) & (df_DMD['phase'] == 4)][
                'our_cs']

            # define here, which p values shall be computed.
            # the first element of listing_a will be computed against the first element of listing_b etc.
            listing_a = [our_dmd_p2p0_rs_lgeneg, our_dmd_p2p1_rs_lgeneg, our_dmd_p2p2_rs_lgeneg, our_dmd_p2p3_rs_lgeneg,
                         our_dmd_p2p4_rs_lgeneg,
                         our_dmd_p2p0_cs_lgeneg, our_dmd_p2p1_cs_lgeneg, our_dmd_p2p2_cs_lgeneg, our_dmd_p2p3_cs_lgeneg,
                         our_dmd_p2p4_cs_lgeneg]
            listing_b = [our_dmd_p2p0_rs_lgepos, our_dmd_p2p1_rs_lgepos, our_dmd_p2p2_rs_lgepos, our_dmd_p2p3_rs_lgepos,
                         our_dmd_p2p4_rs_lgepos,
                         our_dmd_p2p0_cs_lgepos, our_dmd_p2p1_cs_lgepos, our_dmd_p2p2_cs_lgepos, our_dmd_p2p3_cs_lgepos,
                         our_dmd_p2p4_cs_lgepos]

            # define testing sets here
            if listing_a[i].size == 1 or listing_b[i].size == 1:
                results_pvalues[aha - 1, i] = pvalue_error
            else:
                res = pg.ttest(listing_a[i], listing_b[i], paired=paired)
                results_pvalues[aha - 1, i] = float(res['p-val'][0])

    # rounding
    results_pvalues = np.around(pd.DataFrame(results_pvalues), dec_p)

    return results_pvalues

In [6]:
def get_pvals_corrected(df_pvals_uncorrected, alpha0):
    # correct the pvalues via Holm-Bonferroni method
    # Erläuterung der Heidelberg-Statistiker:
    #     hat dir die Holm-Bonferroni-Korrektur empfohlen. Die ist besser als die normale Bonferroni-Korrektur, 
    #     weil die normale Bonferroni-Korrektur zu selten signifikant wird. Die Holm-Bonferroni-Korrektur funktioniert 
    #     wie folgt. Du sortierst alle 6 mal 16 p-Werte der Größe nach, beginnend mit dem kleinsten. Dann vergleichst 
    #     du den Allerkleinsten mit dem Signifikanzniveau α/(6*16). Wenn dieser p-Wert kleiner als das Signifikanzniveau 
    #     ist, gehst du zum Zweitkleinsten und vergleichst ihn mit dem Signifikanzniveau α/(6*16 - 1). Wenn dieser wieder 
    #     kleiner ist, gehst du zum Drittkleinsten und vergleichst ihn mit dem Signifikanzniveau α/(6*16 - 2). So machst 
    #     du weiter mit langsamer größer werdenden Signifikanzniveaus, bis ein p-Wert nicht mehr kleiner als sein jeweiliges 
    #     Niveau ist. Dann brichst du das Verfahren ab. Alle p-Werte, die bis dahin kleiner als ihr jeweiliges Niveau waren, 
    #     sind dann signifikant. Alle anderen sind nicht signifikant.
    msk_ss, pvals_corr = pg.multicomp(pvals=df_pvals_uncorrected.to_numpy(), alpha=alpha0, method='holm')
    pvals_corr = pd.DataFrame(pvals_corr)
    # df_pvals.style.apply(style_specific_cell, coords=np.where(msk_ss), axis=None)
    return msk_ss, pvals_corr

In [7]:
path_to_our_dmd = '/mnt/sds/sd20i001/sven/code/dynamic-cmr-models/exp/p2p/dmd/jcmr/iso2_5_32_p2p/img05_flow01_msk1/ssimssim_grad_dice/2023-05-31_15_52_920901/'
df_DMD = pd.read_csv(path_to_our_dmd+'df_DMD_time_p2p.csv')
alpha0=0.05
df_pvals_uncorrected = get_pvals_uncorrected(df_DMD)
msk_ss, df_pvals_corrected = get_pvals_corrected(df_pvals_uncorrected, alpha0=alpha0)

In [8]:
df_pvals_uncorrected

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.09043,0.0881,0.37136,0.8679,0.16149,0.1482,0.159,0.10789,0.03865,0.28991
1,0.35214,0.07507,0.99108,0.10356,0.83411,0.70457,0.17371,0.46808,0.68823,1e-05
2,0.85221,0.20996,0.85045,0.65702,0.20336,0.60872,0.66466,0.82096,0.77626,0.52445
3,0.81142,0.36125,0.8933,0.69683,0.42442,0.0179,0.58231,0.06518,0.71034,0.81399
4,0.31767,0.03135,0.79194,0.08613,0.77359,0.01606,0.00205,1e-05,0.00564,4e-05
5,0.67243,0.84514,0.20063,0.77929,0.12775,0.9228,0.00066,0.00185,0.00053,0.01824
6,0.89608,0.04703,0.25226,0.14966,0.87023,0.01156,0.03432,0.00083,0.1146,0.00204
7,0.71382,0.16715,0.24481,0.68948,0.18329,0.55249,0.46502,0.70311,0.21341,0.00378
8,0.52088,0.13887,0.36649,0.03932,0.26087,0.02878,0.71183,0.15619,0.38659,0.00104
9,0.83329,0.96452,0.80814,0.04693,0.52001,0.03219,0.97293,0.02283,0.20436,0.84409


In [10]:
msk_ss.shape

(16, 10)