In [36]:
## plot histogram of BOLD signals across time points, compute entropy, 
## and plot histogram of entropy across subjects.
## xin May-9-2022.

import glob
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt

## adni data:
time_signal_files = pd.read_csv('/home/xin/Downloads/Harmonics/adni_out01_time_signal_test_train_split/test_set_roi_signals.csv', header = None)    
time_signal_files = time_signal_files.iloc[:,0].values.tolist()

output_dir = 'adni_out04_bold_entropy'
outfile_name_header = '/adni_out04_bold_entropy'

## subject info:
## we use this to get DX (diagnosis) for BOLD signals.
input_subject_info = 'adni_out02_test_subject_info/subject_info_merge.csv'
subject_info = pd.read_csv(input_subject_info, index_col = 0)

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)


In [37]:
subject_info

Unnamed: 0,subject_id,ses_id,AGE,PTGENDER,DX_bl,DX
0,002_S_0295,2012-05-10_15_42_37.0,84.8,Male,CN,CN
1,002_S_0413,2017-06-21_13_23_38.0,76.3,Female,CN,CN
2,002_S_0685,2011-07-08_07_04_27.0,89.6,Female,CN,CN
3,002_S_0729,2012-08-07_07_21_09.0,65.1,Female,LMCI,LMCI
4,002_S_1155,2017-04-24_13_21_32.0,57.8,Male,LMCI,LMCI
...,...,...,...,...,...,...
324,941_S_4100,2017-10-27_11_05_51.0,78.5,Female,CN,CN
325,941_S_4187,2017-06-05_14_05_55.0,62.0,Male,LMCI,LMCI
326,941_S_4292,2017-08-21_15_05_09.0,70.9,Male,CN,CN
327,941_S_4365,2017-08-28_14_06_46.0,80.3,Male,CN,CN


In [52]:
from math import log, e

def entropy2(labels, base=None):
    """ Computes entropy of label distribution. """

    n_labels = len(labels)

    if n_labels <= 1:
        return 0

    value,counts = np.unique(labels, return_counts=True)
    probs = counts / n_labels
    n_classes = np.count_nonzero(probs)

    if n_classes <= 1:
        return 0

    ent = 0.

    # Compute entropy
    base = e if base is None else base
    for i in probs:
        ent -= i * log(i, base)

    return ent

nan_col = []

num_subject = len(time_signal_files)
entropy_list = [None] * num_subject

row_idx = 0
for file in time_signal_files:
    ## each file is a matrix (subjects by brain region)
    ## the values are inner product of harmonics and time course signal.
    
    print(file)
    time_signal = pd.read_csv(file, header = 0, sep = '\t')
    time_signal.drop(columns = ['Unnamed: 264'], inplace = True)
    time_signal.columns = time_signal.columns.astype(int)
    time_signal = time_signal.sort_index(ascending=True, axis=1)
    
    # get AD diagnosis:
    diagnosis = subject_info.iloc[row_idx,:]['DX']
    num_roi = time_signal.shape[1]
    
    result = []
    for col in time_signal:
        if time_signal[col].isna().any():
            
            print('nan column found for: ', col)
            #spectrum[col][spectrum[col].isna()] = 0
            nan_col.append(col)
            result.append(np.nan)
            #entropy = np.nan
            continue
 
        # compute entropy:
        bins = np.linspace(min(time_signal[col]), max(time_signal[col]), num = 50)
        discretized_signal = np.digitize(time_signal[col], bins)
        entropy = entropy2(discretized_signal)
        result.append(entropy)
        # histogram:
        ## plot figure on every n frequency, subject and brain region :
        if col % 5 == 0 and int(subj_id) % 20 == 0 and int(roi) % 50 == 0:
            plt.hist(time_signal[col], bins = 30)
            figure_name = file.replace('.csv', '_freq_{:03}'.format(col) + diagnosis + '.png')
            figure_name = figure_name.replace(input_dir, output_dir)
            plt.savefig(figure_name)
            plt.clf()
    
        # break
    entropy_list[row_idx] = result
    row_idx += 1
    # outfile_name = outfile_name_header + '_subj_' + subj_id + '_roi_' + roi + '.csv'
    # np.savetxt(output_dir + outfile_name, np.array(result), delimiter = ',')
    
    # break
    
print('finished!')

/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals1/roi_signals_power264_sub-002_S_0295-ses-2012-05-10_15_42_37.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals2/roi_signals_power264_sub-002_S_0413-ses-2017-06-21_13_23_38.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals1/roi_signals_power264_sub-002_S_0685-ses-2011-07-08_07_04_27.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals1/roi_signals_power264_sub-002_S_0729-ses-2012-08-07_07_21_09.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals2/roi_signals_power264_sub-002_S_1155-ses-2017-04-24_13_21_32.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals2/roi_signals_power264_sub-002_S_1261-ses-2017-03-15_11_23_54.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals1/roi_signals_power264_sub-002_S_1268-ses-2012-03-22_08_12_04.0.txt
/home/xin/Downloads/BrainImaging_UNC/out04_adni_roi_signals2/roi_signals_power264_sub-002_S_1280-ses-2017-03-13

In [56]:
entropy_list

[[3.0364009766573283,
  3.5908353614144977,
  3.2680565182668397,
  3.3096454162582245,
  3.150809353982718,
  3.391483769497299,
  3.565326767340654,
  3.2116833275066705,
  3.341619938974708,
  3.5632211177585034,
  3.4409456674378016,
  3.280212790393687,
  3.3019290589435735,
  3.2660648917860136,
  2.915993451415652,
  3.1049654690011215,
  2.8925947505532332,
  3.6944548587553143,
  3.61092683394696,
  3.564223581387535,
  3.4548546289058795,
  2.710822019327148,
  3.2069156366914924,
  3.064647874193415,
  3.5490078110061445,
  3.051920397424511,
  3.44834136255298,
  3.393516079815797,
  3.193384476934651,
  3.3179642239733536,
  2.899370861037288,
  3.473193127374484,
  2.969851049561849,
  2.988064302063025,
  3.1851850379882007,
  3.3382561117778193,
  3.111243575347867,
  3.4365704658718013,
  3.1407980827707833,
  3.401613385686542,
  3.150732483391655,
  2.8518808643105573,
  3.590836178729954,
  3.2541349057776623,
  3.0889274835521623,
  3.6000016410970397,
  3.48416367

In [57]:
## save result as subject by roi .csv files.
df = pd.DataFrame(data = entropy_list)
df = pd.concat([subject_info, df], axis = 1)

outfile_name = outfile_name_header + '.csv'
# np.savetxt(output_dir + outfile_name, np.array(df), delimiter = ',')
df.to_csv(output_dir + outfile_name)
    