In [45]:
## compute entropy of spectrum signals as biomarkers
## udpates on Apr 11 2022:
## save spectrum as a table of time points by frequency for each subject.
## calculate spectrum only on test set.

import glob
import pandas as pd
import numpy as np
import re
import os

## read harmonics and time signals:

input_harmonics_wavelets = 'hcp_out02_harmonics'
input_time_signal = 'hcp_out01_task_time_course_scan2'
output_dir = 'hcp_out03_spectrum_power'

## Notes:
# got NaN values in wavelets (e.g for ROI 3 all values are nan), 
# but time signals does not have nans...
# need to check where those NaNs are derived...

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

harmonic_wavelets_files = glob.glob(input_harmonics_wavelets + '/CommonHarWavelets_Harmonics*.csv')
harmonic_wavelets_files.sort()

time_signal_files = glob.glob(input_time_signal + '/TimeSeries*.csv')
time_signal_files.sort()


In [46]:
time_signal_files

['hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1000.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1001.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1002.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1003.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1004.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1005.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1006.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1007.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1008.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1009.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_101.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1010.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1011.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0back_body_1012.csv',
 'hcp_out01_task_time_course_scan2/TimeSeries_0ba

In [47]:
# for each wavelets (corresponding to a brain region) calcluate inner product with roi signals for each subject:

for file_wavelets in harmonic_wavelets_files:
    print(file_wavelets)
    wavelets = pd.read_csv(file_wavelets, header = None)
    
    if wavelets.isna().any(axis = None):
        print('Nan values for wavelets:' + file_wavelets)
        continue
    
    # brain_region id is the last 3 digits with pattern: 001.
    brain_region = file_wavelets[-7:-4]
    
    for file_time_signal in time_signal_files:
        
        subject_id = re.search('(.*)TimeSeries_(.*).csv', file_time_signal).group(2)
        time_signal = pd.read_csv(file_time_signal, header = 0, sep = ',')
        time_signal.columns = time_signal.columns.astype(int)
        time_signal = time_signal.sort_index(ascending=True, axis=1)
        
        ## global signal regression:
        time_signal = time_signal.sub(time_signal.mean(axis = 1), axis = 0)
        
        # print(subject_id)
        # check nan values:
        if time_signal.isna().any(axis = None):
            print('Nan value for time_signal:' + subject_id)
            continue

        ## compute inner product for each time point:
        P = np.matmul(time_signal.values, wavelets.values)
        
        out_file_name = output_dir + '/spectrum_' + subject_id + '_roi_' + '{:03}'.format(int(brain_region)) + '.csv'
        np.savetxt(out_file_name, np.array(P), delimiter = ',')

        # break
                

hcp_out02_harmonics/CommonHarWavelets_Harmonics001.csv
Nan value for time_signal:0back_body_440
Nan value for time_signal:0back_face_440
Nan value for time_signal:0back_place_440
Nan value for time_signal:0back_tools_440
Nan value for time_signal:15_sec_rest_440
Nan value for time_signal:2back_body_440
Nan value for time_signal:2back_face_440
Nan value for time_signal:2back_place_440
Nan value for time_signal:2back_tools_440
hcp_out02_harmonics/CommonHarWavelets_Harmonics002.csv
Nan value for time_signal:0back_body_440
Nan value for time_signal:0back_face_440
Nan value for time_signal:0back_place_440
Nan value for time_signal:0back_tools_440
Nan value for time_signal:15_sec_rest_440
Nan value for time_signal:2back_body_440
Nan value for time_signal:2back_face_440
Nan value for time_signal:2back_place_440
Nan value for time_signal:2back_tools_440
hcp_out02_harmonics/CommonHarWavelets_Harmonics003.csv
Nan value for time_signal:0back_body_440
Nan value for time_signal:0back_face_440
Nan v

In [43]:
brain_region

'001'