In [92]:
## plot histogram of entropy (grouped by AD diagnosis) for each brain region and frequency.
import glob
import re
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from scipy import stats
rng = np.random.default_rng()


output_dir = 'adni_out05_entropy_histogram/'

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

input_files = glob.glob('adni_out04_power_histogram_and_entropy/adni_out04_entropy_freq*.csv')
input_files.sort()

def cohend(d1, d2) -> float:

    # calculate the size of samples
    n1, n2 = len(d1), len(d2)

    # calculate the variance of the samples
    s1, s2 = np.var(d1, ddof=1), np.var(d2, ddof=1)

    # calculate the pooled standard deviation
    s = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))

    # calculate the means of the samples
    u1, u2 = np.mean(d1), np.mean(d2)

    # return the effect size
    return (u1 - u2) / s


In [117]:
sns.set(rc = {'figure.figsize':(20,8)})
sns.set_style("whitegrid", {'axes.grid': False})
        
num_roi = 264

## as we selected the frist session as test set (subjects in current files), we use DX_bl as the diagnosis.
## but it seems DX_bl and DX are identical...

column_index = pd.MultiIndex.from_product([[i for i in range(num_roi)], ['p value', 'cohen''s d']])
row_index = np.arange(len(input_files))
result_table = pd.DataFrame(index = row_index, columns = column_index)
result_table.index.name = 'wavelet frequency'                

for f in input_files:
    
    freq = re.search('(.*)_freq_(.*).csv', f).group(2)
    data = pd.read_csv(f, index_col = 0)
    
    for roi in range(num_roi):
        
        # plot histogram of entropy grouped by DX_bl (AD diagnosis at baseline).
        entropy = data[['DX_bl', str(roi)]]
        entropy = entropy[entropy['DX_bl'].isin(['CN', 'AD'])]
        n_sample = entropy.groupby('DX_bl').count()[str(roi)].min()
        entropy = entropy.groupby("DX_bl").sample(n=n_sample, random_state=1)
        
        if roi % 50 == 0 and int(freq) % 3 == 0:
            ax = sns.histplot(entropy, x=str(roi), hue="DX_bl", element="step")
            sns.move_legend(ax, "upper left")

            figure_name = 'entropy_histogram_freq_' + freq + '_roi_' + str(roi) + '.png'
            plt.savefig(output_dir + figure_name)
            plt.clf()
            
    
        # t-test between AD and CN for each roi and frequency: 
        ad = entropy.query('DX_bl == "CN"')[str(roi)]
        cn = entropy.query('DX_bl == "AD"')[str(roi)]
        ttest = stats.ttest_ind(ad, cn, permutations=10000, random_state=rng)
        result_table.loc[int(freq), (roi, 'p value')] = ttest.pvalue
        result_table.loc[int(freq), (roi, 'cohen''s d')] = cohend(ad, cn)

        # break
        
            
    data_mean = data.groupby('DX_bl').mean().reset_index()
    data_mean = pd.melt(data_mean, id_vars = ['DX_bl'], value_vars = [str(i) for i in range(264)])
    data_mean

    p = sns.lineplot(data=data_mean, x="variable", y="value", hue="DX_bl")
    p.set_xlabel("roi index", fontsize = 20)
    p.set_ylabel("mean entropy across subjects", fontsize = 20)

    # only show every 10 roi index.
    ax = plt.gca()
    temp = ax.xaxis.get_ticklabels()
    temp = list(set(temp) - set(temp[::10]))

    for label in temp:
        label.set_visible(False)

    figure_name = 'entropy_mean_freq_' + freq + '.png'
    plt.savefig(output_dir + figure_name)
    plt.clf()
    
    # break


<Figure size 1440x576 with 0 Axes>

In [125]:
result_table.transpose()

Unnamed: 0,wavelet frequency,0,1,2,3,4,5,6,7,8,9
0,p value,0.0002,0.0281,0.0334,0.0012,0.0005,0.0002,0.0887,0.2387,0.0022,0.0843
0,cohens d,0.919148,0.551334,0.525793,0.80877,0.863343,0.996308,0.424486,0.297398,0.728325,0.428918
1,p value,0.0488,0.2459,0.6158,0.0628,0.0258,0.0072,0.014,0.0405,0.0832,0.8869
1,cohens d,0.486374,0.292908,0.12655,0.461526,0.562538,0.679143,0.615434,0.507702,0.435572,0.036843
2,p value,0.0249,0.0119,0.0007,0.0572,0.0058,0.0443,0.9436,0.041,0.0187,0.0447
...,...,...,...,...,...,...,...,...,...,...,...
261,cohens d,0.493932,0.39072,0.35431,0.295836,0.244362,0.309501,0.297536,0.442236,1.196435,0.039116
262,p value,0.4074,0.0008,0.3846,0.1274,0.0269,0.0318,0.026,0.1715,0.1772,0.1393
262,cohens d,0.208081,0.878249,0.217044,0.38454,0.542884,0.537248,0.553526,0.342789,0.338119,0.372913
263,p value,0.1025,0.0645,0.9994,0.1074,0.3845,0.7876,0.8965,0.0403,0.0876,0.1575


In [129]:

def add_stars(report, skip_col = 1):
    
    report2 = report.copy()
    report2.iloc[:, skip_col:] = report2.iloc[:, skip_col:].astype(float).round(3)
    # report.iloc[:,1:]=report.iloc[:,1:].mask(report.iloc[:,1:].le(0.05), report.astype(str).apply(lambda x : x.str[:5]).add('*'))

    report2[report2.iloc[:,skip_col:].le(2)] = report2[
        report2.iloc[:,skip_col:].le(2)].astype(str).apply(lambda x : x.str[:5]).apply(lambda x : x.str.ljust(5, fillchar='0'))

    report2[report.iloc[:,skip_col:].le(0.05)] = report2[
        report.iloc[:,skip_col:].le(0.05)].astype(str).apply(lambda x : x.str[:5]).add('*')

    report2[report.iloc[:,skip_col:].le(0.01)] = report2[
        report.iloc[:,skip_col:].le(0.01)].astype(str).apply(lambda x : x.str[:5]).add('**')

    # report2[report.iloc[:,skip_col:].le(0.001)] = report2[
    #     report.iloc[:,skip_col:].le(0.001)].astype(str).apply(lambda x : x.str[:5]).add('***')
    report2[report.iloc[:,skip_col:].le(0.001)] = '<.001***'
    
    return report2
    
report = add_stars(result_table, skip_col = 0)
report.transpose().to_csv(output_dir + 'ttest_entropy.csv')

In [130]:
report.transpose()

Unnamed: 0,wavelet frequency,0,1,2,3,4,5,6,7,8,9
0,p value,<.001***,0.028*,0.033*,0.001**,<.001***,<.001***,0.089,0.239,0.002**,0.084
0,cohens d,0.919,0.551,0.526,0.809,0.863,0.996,0.424,0.297,0.728,0.429
1,p value,0.049*,0.246,0.616,0.063,0.026*,0.007**,0.014*,0.040*,0.083,0.887
1,cohens d,0.486,0.293,0.127,0.462,0.563,0.679,0.615,0.508,0.436,0.037*
2,p value,0.025*,0.012*,<.001***,0.057,0.006**,0.044*,0.944,0.041*,0.019*,0.045*
...,...,...,...,...,...,...,...,...,...,...,...
261,cohens d,0.494,0.391,0.354,0.296,0.244,0.310,0.298,0.442,1.196,0.039*
262,p value,0.407,<.001***,0.385,0.127,0.027*,0.032*,0.026*,0.172,0.177,0.139
262,cohens d,0.208,0.878,0.217,0.385,0.543,0.537,0.554,0.343,0.338,0.373
263,p value,0.102,0.064,0.999,0.107,0.384,0.788,0.896,0.040*,0.088,0.158
