In [8]:
## plot histogram across time points, compute entropy, 
## and plot histogram of entropy across subjects.
## xin Apr 11 2022.

import glob
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt

## hcp data:
# input_harmonics_wavelets = 'hcp_out02_harmonics_100'
# input_time_signal = '/home/xin/Downloads/FullData_Oct26/Scan1/'
# output_dir = 'hcp_out03_spectrum_power'

## adni data:
input_dir = 'adni_out03_spectrum_power'
output_dir = 'adni_out04_power_histogram_and_entropy'
outfile_name_header = '/adni_out04_entropy'

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)
    
spectrum_files = glob.glob(input_dir + '/spectrum_subj*_roi_*.csv')
spectrum_files.sort()


In [16]:
from math import log, e

def entropy2(labels, base=None):
    """ Computes entropy of label distribution. """

    n_labels = len(labels)

    if n_labels <= 1:
        return 0

    value,counts = np.unique(labels, return_counts=True)
    probs = counts / n_labels
    n_classes = np.count_nonzero(probs)

    if n_classes <= 1:
        return 0

    ent = 0.

    # Compute entropy
    base = e if base is None else base
    for i in probs:
        ent -= i * log(i, base)

    return ent

nan_col = []

last_subjid = int(re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', spectrum_files[-1]).group(2))
entropy_list = [[None] * 264 for _ in range(last_subjid+1)]

for file in spectrum_files:
    ## each file is a matrix (subjects by brain region)
    ## the values are inner product of harmonics and time course signal.
    
    print(file)
    subj_id = re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', file).group(2)
    roi = re.search('(.*)spectrum_subj(.*)_roi_(.*).csv', file).group(3)
    
    spectrum = pd.read_csv(file, header = None, sep = ',')
    num_roi = spectrum.shape[1]
    
    result = []
    for col in spectrum:
        
        if spectrum[col].isna().any():
            
            print('nan column found for: ', col)
            #spectrum[col][spectrum[col].isna()] = 0
            nan_col.append(col)
            result.append(np.nan)
            #entropy = np.nan
            continue
 
        # compute entropy:
        bins = np.linspace(min(spectrum[col]), max(spectrum[col]), num = 50)
        discretized_signal = np.digitize(spectrum[col], bins)
        entropy = entropy2(discretized_signal)
        result.append(entropy)
        # histogram:
        ## plot figure on every n frequency, subject and brain region :
        if col % 3 == 0 and int(subj_id) % 50 == 0 and int(roi) % 20 == 0:
            plt.hist(spectrum[col], bins = 30)
            figure_name = file.replace('.csv', '_freq_{:03}'.format(col) + '.png')
            figure_name = figure_name.replace(input_dir, output_dir)
            plt.savefig(figure_name)
            plt.clf()
    
        # break
    entropy_list[int(subj_id)][int(roi)] = result
    # outfile_name = outfile_name_header + '_subj_' + subj_id + '_roi_' + roi + '.csv'
    # np.savetxt(output_dir + outfile_name, np.array(result), delimiter = ',')
    
    # break

adni_out03_spectrum_power/spectrum_subj000_roi_000.csv
0
0
adni_out03_spectrum_power/spectrum_subj000_roi_003.csv
0
3
adni_out03_spectrum_power/spectrum_subj000_roi_004.csv
0
4
adni_out03_spectrum_power/spectrum_subj000_roi_005.csv
0
5
adni_out03_spectrum_power/spectrum_subj000_roi_007.csv
0
7
adni_out03_spectrum_power/spectrum_subj000_roi_008.csv
0
8
adni_out03_spectrum_power/spectrum_subj000_roi_009.csv
0
9
adni_out03_spectrum_power/spectrum_subj000_roi_010.csv
0
10
adni_out03_spectrum_power/spectrum_subj000_roi_011.csv
0
11
adni_out03_spectrum_power/spectrum_subj000_roi_012.csv
0
12
adni_out03_spectrum_power/spectrum_subj000_roi_013.csv
0
13
adni_out03_spectrum_power/spectrum_subj000_roi_014.csv
0
14
adni_out03_spectrum_power/spectrum_subj000_roi_015.csv
0
15
adni_out03_spectrum_power/spectrum_subj000_roi_016.csv
0
16
adni_out03_spectrum_power/spectrum_subj000_roi_018.csv
0
18
adni_out03_spectrum_power/spectrum_subj000_roi_019.csv
0
19
adni_out03_spectrum_power/spectrum_subj000_roi_

<Figure size 432x288 with 0 Axes>

In [50]:
## save result as subject by roi .csv files.

num_subject = len(entropy_list)
num_roi = len(entropy_list[0])
num_freq = len(entropy_list[0][0]) 

res = [[np.nan] * num_roi for _ in range(num_subject)]
for i in range(num_freq):
    for subj in range(num_subject):
        for roi in range(num_roi):
            if entropy_list[subj][roi] and entropy_list[subj][roi][i]:
                res[subj][roi] = entropy_list[subj][roi][i]
                
    outfile_name = outfile_name_header + 'freq_{:02}'.format(i) + '.csv'
    np.savetxt(output_dir + outfile_name, np.array(res), delimiter = ',')
    