In [46]:
from math import log, e

def entropy2(labels, base=None):
    """ Computes entropy of label distribution. """

    n_labels = len(labels)

    if n_labels <= 1:
        return 0

    value,counts = np.unique(labels, return_counts=True)
    probs = counts / n_labels
    n_classes = np.count_nonzero(probs)

    if n_classes <= 1:
        return 0

    ent = 0.

    # Compute entropy
    base = e if base is None else base
    for i in probs:
        ent -= i * log(i, base)

    return ent



In [51]:
## input: sepectrum P: time points by frequency
## output: entropy (calculate on the distribution across time points) for each frequency.

## plot histogram across time points, compute entropy, 
## and plot histogram of entropy across subjects.
## xin Apr 11 2022.

import glob
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt

## as we have more than 2 million files in total (subjects * task * roi), 
## split results of different tasks in separate folders.

# task_label = ['2back_body', '0back_face', '2back_tools', '0back_body', 
#               '0back_place', '2back_face', '0back_tools', '2back_place']

task_label = ['0back', '2back']
# task_label = ['2back']
outfile_name_header = '/out04_entropy_'
    
num_roi = 268
num_freq = 10

In [52]:

# spectrum_files = glob.glob(input_dir + '/spectrum_' + task_label[0] + '*_roi_*.csv')
# spectrum_files.sort()

# num_subj = len(spectrum_files)/num_roi
# num_subj


In [53]:

for task in task_label:
    
    print(task)
    input_dir = 'hcp_out03_spectrum_power_' + task 
    output_dir = 'hcp_out04_power_histogram_and_entropy_' + task
    
    spectrum_files = glob.glob(input_dir + '/spectrum_*_roi_*.csv')
    spectrum_files.sort()
    
    # process a subset for testing:
    # spectrum_files = spectrum_files[: int(num_subj)//200*num_roi]
    
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    
    for col in range(num_freq):   
        print(col)

        result_subj = []
        result_roi = []

        for file in spectrum_files:
            ## each file is a matrix (subjects by brain region)
            ## the values are inner product of harmonics and time course signal.
            
            subj_id = re.search('(.*)spectrum_(.*)_roi_(.*).csv', file).group(2)
            roi = re.search('(.*)spectrum_(.*)_roi_(.*).csv', file).group(3)

            spectrum = pd.read_csv(file, header = None, sep = ',')
            if spectrum[col].isna().any():

                print('nan column found for: ', col)
                #spectrum[col][spectrum[col].isna()] = 0
                nan_col.append(col)
                result_roi.append(np.nan)
            else:
                # compute entropy:
                bins = np.linspace(min(spectrum[col]), max(spectrum[col]), num = 50)
                discretized_signal = np.digitize(spectrum[col], bins)
                entropy = entropy2(discretized_signal)
                result_roi.append(entropy)

            if int(roi) == num_roi:
                result_subj.append(result_roi)
                result_roi = []

            # histogram:
            ## plot figure on every n frequency, subject and brain region :
            if col % 3 == 0 and int(subj_id) % 200 == 0 and int(roi) % 50 == 0:
                plt.hist(spectrum[col], bins = 30)
                figure_name = file.replace('.csv', '_' + task + '_freq_{:03}'.format(col) + '.png')
                figure_name = figure_name.replace(input_dir, output_dir)
                plt.savefig(figure_name)
                plt.clf()

        out_file_name = output_dir + outfile_name_header + '_freq_{:02}'.format(int(col)) + '.csv'
        np.savetxt(out_file_name, np.array(result_subj), delimiter = ',')
    
print('finished!')

2back
0
1
2
3
4
5
6
7
8
9
finished!


<Figure size 432x288 with 0 Axes>

In [55]:
spectrum_files

['hcp_out03_spectrum_power_2back/spectrum_1000_roi_001.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_002.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_003.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_004.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_005.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_006.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_007.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_008.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_009.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_010.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_011.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_012.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_013.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_014.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_015.csv',
 'hcp_out03_spectrum_power_2back/spectrum_1000_roi_016.csv',
 'hcp_out03_spectrum_pow