In [1]:
## plot histogram of entropy (grouped by AD diagnosis) for each brain region and frequency.
import glob
import re
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from scipy import stats
rng = np.random.default_rng()


output_dir = 'adni_out05_entropy_histogram/'

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

input_files = glob.glob('adni_out04_power_histogram_and_entropy/adni_out04_entropy_freq*.csv')
input_files.sort()

def cohend(d1, d2) -> float:

    # calculate the size of samples
    n1, n2 = len(d1), len(d2)

    # calculate the variance of the samples
    s1, s2 = np.var(d1, ddof=1), np.var(d2, ddof=1)

    # calculate the pooled standard deviation
    s = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))

    # calculate the means of the samples
    u1, u2 = np.mean(d1), np.mean(d2)

    # return the effect size
    return (u1 - u2) / s


In [6]:
sns.set(rc = {'figure.figsize':(20,8)})
sns.set_style("whitegrid", {'axes.grid': False})
        
num_roi = 264

## as we selected the frist session as test set (subjects in current files), we use DX_bl as the diagnosis.
## but it seems DX_bl and DX are identical...

column_index = pd.MultiIndex.from_product([[i for i in range(num_roi)], ['p value', 'cohen''s d']])
row_index = np.arange(len(input_files))
result_table = pd.DataFrame(index = row_index, columns = column_index)
result_table.index.name = 'wavelet frequency'    

result_table_emci = result_table.copy()

for f in input_files:
    
    freq = re.search('(.*)_freq_(.*).csv', f).group(2)
    data = pd.read_csv(f, index_col = 0)
    
    for roi in range(num_roi):
        
        # plot histogram of entropy grouped by DX_bl (AD diagnosis at baseline).
        entropy = data[['DX_bl', str(roi)]]
        entropy = entropy[entropy['DX_bl'].isin(['CN', 'AD', 'EMCI'])]
        n_sample = entropy.groupby('DX_bl').count()[str(roi)].min()
        entropy = entropy.groupby("DX_bl").sample(n=n_sample, random_state=1)
        
        # plot histogram of selected roi and frequency.
        if roi % 50 == 0 and int(freq) % 3 == 0:
            ax = sns.histplot(entropy, x=str(roi), hue="DX_bl", element="step")
            sns.move_legend(ax, "upper left")

            figure_name = 'entropy_histogram_freq_' + freq + '_roi_' + str(roi) + '.png'
            plt.savefig(output_dir + figure_name)
            plt.clf()
    
        # t-test between AD and CN for each roi and frequency: 
        ad = entropy.query('DX_bl == "CN"')[str(roi)]
        cn = entropy.query('DX_bl == "AD"')[str(roi)]
        ttest = stats.ttest_ind(ad, cn, permutations=10000, random_state=rng)
        result_table.loc[int(freq), (roi, 'p value')] = ttest.pvalue
        result_table.loc[int(freq), (roi, 'cohen''s d')] = cohend(ad, cn)
        
        # t-test between EMCI and CN for each roi and frequency: 
        emci = entropy.query('DX_bl == "EMCI"')[str(roi)]
        ttest = stats.ttest_ind(emci, cn, permutations=10000, random_state=rng)
        result_table_emci.loc[int(freq), (roi, 'p value')] = ttest.pvalue
        result_table_emci.loc[int(freq), (roi, 'cohen''s d')] = cohend(emci, cn)

        # break

    ## line plot of mean values across subject of entropy for each frequency.
    ## x: roi index
    ## y: entropy
    
    ## selected groups to be displayed. add by xin May-04-2022.
    
    data_mean = data.groupby('DX_bl').mean().reset_index()
    data_mean = data_mean[data_mean['DX_bl'] != 'SMC']
    data_mean = pd.melt(data_mean, id_vars = ['DX_bl'], value_vars = [str(i) for i in range(264)])
    data_mean

    p = sns.lineplot(data=data_mean, x="variable", y="value", hue="DX_bl")
    p.set_xlabel("roi index", fontsize = 20)
    p.set_ylabel("mean entropy across subjects", fontsize = 20)

    # only show every 10 roi index.
    ax = plt.gca()
    temp = ax.xaxis.get_ticklabels()
    temp = list(set(temp) - set(temp[::10]))

    for label in temp:
        label.set_visible(False)

    figure_name = 'entropy_mean_freq_' + freq + '.png'
    plt.savefig(output_dir + figure_name)
    plt.clf()
    
    # break


<Figure size 1440x576 with 0 Axes>

In [217]:
pd.concat([data[['DX', 'subject_id']].groupby('DX').count(), data[['DX', 'AGE']].groupby('DX').mean()], axis = 1)

Unnamed: 0_level_0,subject_id,AGE
DX,Unnamed: 1_level_1,Unnamed: 2_level_1
AD,33,72.490909
CN,94,73.737234
EMCI,85,70.44
LMCI,59,71.001695
SMC,40,71.5325


In [223]:

def add_stars(report, start_col = 0):
    
    cols = slice(0,None,2)
    report2 = report.copy()
    
    report2.iloc[:, cols] = report2.iloc[:, cols].astype(float).round(3)
    # report.iloc[:,1:]=report.iloc[:,1:].mask(report.iloc[:,1:].le(0.05), report.astype(str).apply(lambda x : x.str[:5]).add('*'))

    report2.loc[:,:] = report2.loc[:,:].astype(str).apply(lambda x : x.str[:5]).apply(lambda x : x.str.ljust(5, fillchar='0'))

    report2[report.iloc[:, cols].le(0.05)] = report2[
        report.iloc[:, cols].le(0.05)].astype(str).apply(lambda x : x.str[:5]).add('*')

    report2[report.iloc[:, cols].le(0.01)] = report2[
        report.iloc[:, cols].le(0.01)].astype(str).apply(lambda x : x.str[:5]).add('**')

    # report2[report.iloc[:,skip_col:].le(0.001)] = report2[
    #     report.iloc[:,skip_col:].le(0.001)].astype(str).apply(lambda x : x.str[:5]).add('***')
    report2[report.iloc[:, cols].le(0.001)] = '<.001***'
    
    return report2
    
report = add_stars(result_table, start_col = 0)
report.transpose().to_csv(output_dir + 'ttest_entropy_ad.csv')

report_emci = add_stars(result_table_emci, start_col = 0)
report_emci.transpose().to_csv(output_dir + 'ttest_entropy_emci.csv')

In [224]:
report_emci.iloc[:,::2].transpose()

Unnamed: 0,wavelet frequency,0,1,2,3,4,5,6,7,8,9
0,p value,0.030*,0.564,0.113,0.011*,0.017*,0.072,0.021*,0.549,0.331,0.563
1,p value,0.895,0.934,0.216,0.630,0.954,0.514,0.007**,0.554,0.535,0.552
2,p value,0.130,0.338,0.012*,0.429,0.116,0.340,0.984,0.810,0.134,0.166
3,p value,0.178,0.099,0.672,0.849,0.063,0.806,0.405,0.028*,0.073,0.883
4,p value,0.292,0.115,0.757,0.573,0.212,0.418,0.576,0.797,0.045*,0.473
...,...,...,...,...,...,...,...,...,...,...,...
259,p value,0.338,0.017*,0.152,0.074,0.065,0.183,0.676,0.520,0.089,0.109
260,p value,0.386,0.298,0.006**,0.914,0.746,0.241,0.456,0.071,0.938,0.069
261,p value,0.138,0.129,0.030*,0.251,0.930,0.566,0.237,0.214,<.001***,0.958
262,p value,0.619,0.095,0.089,0.189,0.291,0.028*,0.938,0.045*,0.432,0.192


In [225]:
report.iloc[:,::2].transpose()

Unnamed: 0,wavelet frequency,0,1,2,3,4,5,6,7,8,9
0,p value,<.001***,0.029*,0.037*,0.001**,<.001***,<.001***,0.093,0.236,0.002**,0.087
1,p value,0.053,0.250,0.598,0.064,0.022*,0.007**,0.014*,0.042*,0.083,0.884
2,p value,0.026*,0.012*,<.001***,0.058,0.007**,0.048*,0.948,0.044*,0.018*,0.045*
3,p value,0.009**,0.221,0.036*,0.046*,0.036*,0.533,0.179,<.001***,0.066,0.783
4,p value,0.023*,0.003**,0.004**,0.008**,0.015*,0.002**,<.001***,0.030*,0.039*,0.417
...,...,...,...,...,...,...,...,...,...,...,...
259,p value,0.001**,0.006**,0.106,0.002**,<.001***,0.063,0.007**,0.036*,0.026*,0.006**
260,p value,<.001***,0.164,<.001***,0.113,0.480,0.012*,0.233,<.001***,0.069,0.002**
261,p value,0.047*,0.116,0.156,0.238,0.338,0.220,0.240,0.078,<.001***,0.883
262,p value,0.397,<.001***,0.382,0.121,0.027*,0.034*,0.028*,0.177,0.171,0.135
