In [1]:
## plot histogram across time points, compute entropy, 
## and plot histogram of entropy across subjects.
## xin Apr 11 2022.

import glob
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt

## hcp data:
# input_harmonics_wavelets = 'hcp_out02_harmonics_100'
# input_time_signal = '/home/xin/Downloads/FullData_Oct26/Scan1/'
# output_dir = 'hcp_out03_spectrum_power'

## adni data:
input_dir = 'adni_out03_spectrum_power'
output_dir = 'adni_out04_power_histogram_and_entropy'
outfile_name_header = '/adni_out04_entropy'

## subject info:
input_subject_info = 'adni_out02_test_subject_info/subject_info_merge.csv'
subject_info = pd.read_csv(input_subject_info, index_col = 0)

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)
    
spectrum_files = glob.glob(input_dir + '/spectrum_subj*_roi_*.csv')
spectrum_files.sort()


In [2]:
subject_info

Unnamed: 0,subject_id,ses_id,AGE,PTGENDER,DX_bl,DX
0,002_S_0295,2012-05-10_15_42_37.0,84.8,Male,CN,CN
1,002_S_0413,2017-06-21_13_23_38.0,76.3,Female,CN,CN
2,002_S_0685,2011-07-08_07_04_27.0,89.6,Female,CN,CN
3,002_S_0729,2012-08-07_07_21_09.0,65.1,Female,LMCI,LMCI
4,002_S_1155,2017-04-24_13_21_32.0,57.8,Male,LMCI,LMCI
...,...,...,...,...,...,...
324,941_S_4100,2017-10-27_11_05_51.0,78.5,Female,CN,CN
325,941_S_4187,2017-06-05_14_05_55.0,62.0,Male,LMCI,LMCI
326,941_S_4292,2017-08-21_15_05_09.0,70.9,Male,CN,CN
327,941_S_4365,2017-08-28_14_06_46.0,80.3,Male,CN,CN


In [3]:
from math import log, e

def entropy2(labels, base=None):
    """ Computes entropy of label distribution. """

    n_labels = len(labels)

    if n_labels <= 1:
        return 0

    value,counts = np.unique(labels, return_counts=True)
    probs = counts / n_labels
    n_classes = np.count_nonzero(probs)

    if n_classes <= 1:
        return 0

    ent = 0.

    # Compute entropy
    base = e if base is None else base
    for i in probs:
        ent -= i * log(i, base)

    return ent

nan_col = []

last_subjid = int(re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', spectrum_files[-1]).group(2))
entropy_list = [[None] * 264 for _ in range(last_subjid+1)]

for file in spectrum_files:
    ## each file is a matrix (subjects by brain region)
    ## the values are inner product of harmonics and time course signal.
    
    print(file)
    subj_id = re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', file).group(2)
    roi = re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', file).group(3)
    
    # get AD diagnosis:
    diagnosis = subject_info.iloc[int(subj_id),:]['DX']
    
    spectrum = pd.read_csv(file, header = None, sep = ',')
    num_roi = spectrum.shape[1]
    
    result = []
    for col in spectrum:
        
        if spectrum[col].isna().any():
            
            print('nan column found for: ', col)
            #spectrum[col][spectrum[col].isna()] = 0
            nan_col.append(col)
            result.append(np.nan)
            #entropy = np.nan
            continue
 
        # compute entropy:
        bins = np.linspace(min(spectrum[col]), max(spectrum[col]), num = 50)
        discretized_signal = np.digitize(spectrum[col], bins)
        entropy = entropy2(discretized_signal)
        result.append(entropy)
        # histogram:
        ## plot figure on every n frequency, subject and brain region :
        if col % 5 == 0 and int(subj_id) % 20 == 0 and int(roi) % 50 == 0:
            plt.hist(spectrum[col], bins = 30)
            figure_name = file.replace('.csv', '_freq_{:03}'.format(col) + diagnosis + '.png')
            figure_name = figure_name.replace(input_dir, output_dir)
            plt.savefig(figure_name)
            plt.clf()
    
        # break
    entropy_list[int(subj_id)][int(roi)] = result
    # outfile_name = outfile_name_header + '_subj_' + subj_id + '_roi_' + roi + '.csv'
    # np.savetxt(output_dir + outfile_name, np.array(result), delimiter = ',')
    
    # break
    
print('finished!')

adni_out03_spectrum_power/spectrum_subj000_roi_000.csv
adni_out03_spectrum_power/spectrum_subj000_roi_001.csv
adni_out03_spectrum_power/spectrum_subj000_roi_002.csv
adni_out03_spectrum_power/spectrum_subj000_roi_003.csv
adni_out03_spectrum_power/spectrum_subj000_roi_004.csv
adni_out03_spectrum_power/spectrum_subj000_roi_005.csv
adni_out03_spectrum_power/spectrum_subj000_roi_006.csv
adni_out03_spectrum_power/spectrum_subj000_roi_007.csv
adni_out03_spectrum_power/spectrum_subj000_roi_008.csv
adni_out03_spectrum_power/spectrum_subj000_roi_009.csv
adni_out03_spectrum_power/spectrum_subj000_roi_010.csv
adni_out03_spectrum_power/spectrum_subj000_roi_011.csv
adni_out03_spectrum_power/spectrum_subj000_roi_012.csv
adni_out03_spectrum_power/spectrum_subj000_roi_013.csv
adni_out03_spectrum_power/spectrum_subj000_roi_014.csv
adni_out03_spectrum_power/spectrum_subj000_roi_015.csv
adni_out03_spectrum_power/spectrum_subj000_roi_016.csv
adni_out03_spectrum_power/spectrum_subj000_roi_017.csv
adni_out03

<Figure size 432x288 with 0 Axes>

In [4]:
## save result as subject by roi .csv files.

num_subject = len(entropy_list)
num_roi = len(entropy_list[0])
num_freq = len(entropy_list[0][0]) 

for i in range(num_freq):
    print('frequency: ', i)
    
    res = [[np.nan] * num_roi for _ in range(num_subject)]
    for subj in range(num_subject):
        for roi in range(num_roi):
            if entropy_list[subj][roi] and entropy_list[subj][roi][i]:
                res[subj][roi] = entropy_list[subj][roi][i]
                df = pd.DataFrame(data = res)
                df = pd.concat([subject_info, df], axis = 1)
                
    outfile_name = outfile_name_header + '_freq_{:02}'.format(i) + '.csv'
    # np.savetxt(output_dir + outfile_name, np.array(df), delimiter = ',')
    df.to_csv(output_dir + outfile_name)
    

KeyboardInterrupt: 