In [1]:
# import
import os, sys, shutil, pickle
import numpy as np
import pandas as pd
import scipy as sp
import nibabel as nib
from tqdm import tqdm
from sklearn.linear_model import LinearRegression

sys.path.extend([r'/home/lindenmp/research_projects/snaplab_tools'])
from snaplab_tools.utils import threshold_consistency, normalize_x
from snaplab_tools.plotting.utils import get_my_colors, get_p_val_string

# import plotting libraries
import matplotlib.pyplot as plt

plt.rcParams.update({"font.size": 8})
plt.rcParams["svg.fonttype"] = "none"
import seaborn as sns

## Load data

In [2]:
in_dir = '/mnt/storage_ssd_raid/research_data/HCP_YA/collated_outputs'

atlas = 'Schaefer4007'
if atlas == 'Schaefer4007':
    n_parcels = 400
elif atlas == 'Schaefer2007':
    n_parcels = 200
elif atlas == 'Schaefer1007':
    n_parcels = 100

### rest fmri

In [3]:
# Load data (deserialize)
with open(os.path.join(in_dir, 'hcpya_rsfmri_7T.pkl'), 'rb') as handle:
    data = pickle.load(handle)

data.keys()

dict_keys(['100610', '102311', '102816', '104416', '105923', '108323', '109123', '111312', '111514', '114823', '115017', '115825', '116726', '118225', '125525', '126426', '126931', '128935', '130114', '130518', '131217', '131722', '132118', '134627', '134829', '135124', '137128', '140117', '144226', '145834', '146129', '146432', '146735', '146937', '148133', '150423', '155938', '156334', '157336', '158035', '158136', '159239', '162935', '164131', '164636', '165436', '167036', '167440', '169040', '169343', '169444', '169747', '171633', '172130', '173334', '175237', '176542', '177140', '177645', '177746', '178142', '178243', '178647', '180533', '181232', '181636', '182436', '182739', '185442', '186949', '187345', '191033', '191336', '191841', '192439', '192641', '193845', '195041', '196144', '197348', '198653', '199655', '200210', '200311', '200614', '201515', '203418', '204521', '205220', '209228', '212419', '214019', '214524', '221319', '233326', '239136', '246133', '249947', '251833',

In [4]:
subject_ids = list(data.keys())
n_subs = len(subject_ids)
n_subs

184

In [5]:
subject_filter = np.zeros(n_subs).astype(bool)

In [6]:
scans = [
    'rfMRIREST17TPA',
    'rfMRIREST27TAP',
    'rfMRIREST37TPA',
    'rfMRIREST47TAP'
    ]

In [7]:
rsts = np.zeros((900, n_parcels, len(scans), n_subs))

for s, scan in enumerate(scans):
    print(scan)
    for i in tqdm(np.arange(n_subs)):
        try:
            ts = data[subject_ids[i]][atlas][scan].copy()
            
            gs = np.mean(ts, axis=1).reshape(-1, 1)
            gs_diff = np.append(0, np.diff(gs, axis=0)).reshape(-1, 1)
            nuis = np.concatenate((gs, gs_diff), axis=1)
            nuis = np.concatenate((nuis, np.square(nuis)), axis=1)

            regr = LinearRegression()
            regr.fit(nuis, ts)
            y_pred = regr.predict(nuis)
            ts = ts - y_pred
            
            # z score
            ts = sp.stats.zscore(ts, axis=0)
            
            if np.any(np.isnan(ts)):
                print('Found NaNs!')
            if np.any(np.isinf(ts)):
                print('Found infs!')

            rsts[:, :, s, i] = ts
        except:
            subject_filter[i] = True
    print(np.sum(subject_filter))

print(np.sum(subject_filter))

rfMRIREST17TPA


100%|██████████| 184/184 [00:02<00:00, 68.84it/s]


3
rfMRIREST27TAP


100%|██████████| 184/184 [00:02<00:00, 89.29it/s]


5
rfMRIREST37TPA


100%|██████████| 184/184 [00:01<00:00, 107.25it/s]


10
rfMRIREST47TAP


100%|██████████| 184/184 [00:01<00:00, 106.13it/s]

11
11





### movie fmri

In [8]:
# Load data (deserialize)
with open(os.path.join(in_dir, 'hcpya_tfmri_7T.pkl'), 'rb') as handle:
    data = pickle.load(handle)

data.keys()

dict_keys(['100610', '102311', '102816', '104416', '105923', '108323', '109123', '111312', '111514', '114823', '115017', '115825', '116726', '118225', '125525', '126426', '126931', '128935', '130114', '130518', '131217', '131722', '132118', '134627', '134829', '135124', '137128', '140117', '144226', '145834', '146129', '146432', '146735', '146937', '148133', '150423', '155938', '156334', '157336', '158035', '158136', '159239', '162935', '164131', '164636', '165436', '167036', '167440', '169040', '169343', '169444', '169747', '171633', '172130', '173334', '175237', '176542', '177140', '177645', '177746', '178142', '178243', '178647', '180533', '181232', '181636', '182436', '182739', '185442', '186949', '187345', '191033', '191336', '191841', '192439', '192641', '193845', '195041', '196144', '197348', '198653', '199655', '200210', '200311', '200614', '201515', '203418', '204521', '205220', '209228', '212419', '214019', '214524', '221319', '233326', '239136', '246133', '249947', '251833',

In [9]:
scans = [
    'tfMRIMOVIE17TAP',
    'tfMRIMOVIE27TPA',
    'tfMRIMOVIE37TPA',
    'tfMRIMOVIE47TAP']
taskts_dict = dict()

In [10]:
taskts = np.zeros((1, n_parcels, n_subs))

for scan in scans:
    n_trs_task = data[subject_ids[0]][atlas][scan].shape[0]
    print(scan, n_trs_task)
    taskts_tmp = np.zeros((n_trs_task, n_parcels, n_subs))
    for i in tqdm(np.arange(n_subs)):
        try:
            ts = data[subject_ids[i]][atlas][scan].copy()
            
            gs = np.mean(ts, axis=1).reshape(-1, 1)
            gs_diff = np.append(0, np.diff(gs, axis=0)).reshape(-1, 1)
            nuis = np.concatenate((gs, gs_diff), axis=1)
            nuis = np.concatenate((nuis, np.square(nuis)), axis=1)

            regr = LinearRegression()
            regr.fit(nuis, ts)
            y_pred = regr.predict(nuis)
            ts = ts - y_pred
            
            # z score
            ts = sp.stats.zscore(ts, axis=0)
            
            if np.any(np.isnan(ts)):
                print('Found NaNs!')
            if np.any(np.isinf(ts)):
                print('Found infs!')

            taskts_tmp[:, :, i] = ts
        except:
            subject_filter[i] = True
    
    taskts = np.concatenate((taskts, taskts_tmp.copy()), axis=0)
    taskts_dict[scan] = taskts_tmp.copy()
    print(np.sum(subject_filter))

taskts = taskts[1:]
print(np.sum(subject_filter))

tfMRIMOVIE17TAP 921


100%|██████████| 184/184 [00:02<00:00, 89.57it/s]


11
tfMRIMOVIE27TPA 918


100%|██████████| 184/184 [00:01<00:00, 96.28it/s] 


11
tfMRIMOVIE37TPA 915


100%|██████████| 184/184 [00:01<00:00, 96.37it/s] 


11
tfMRIMOVIE47TAP 901


100%|██████████| 184/184 [00:02<00:00, 89.91it/s] 


11
11


## Filter

In [11]:
np.asarray(subject_ids)[subject_filter]

array(['111312', '126931', '181636', '473952', '536647', '552241',
       '585256', '745555', '951457', '973770', '995174'], dtype='<U6')

In [12]:
taskts.shape

(3655, 400, 184)

In [13]:
rsts = rsts[:, :, :, ~subject_filter]
for scan in taskts_dict.keys():
    taskts_dict[scan] = taskts_dict[scan][:, :, ~subject_filter]

try:
    tasksts = taskts[:, :, ~subject_filter]
    print(rsts.shape, tasksts.shape)
except:
    print(rsts.shape)

(900, 400, 4, 173) (3655, 400, 173)


In [14]:
try:
    for scan in taskts_dict.keys():
        print(taskts_dict[scan].shape)
except:
    pass

(921, 400, 173)
(918, 400, 173)
(915, 400, 173)
(901, 400, 173)


## Save

In [15]:
outdir = '/home/lindenmp/research_projects/nct_xr/data/int_deflections'
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [16]:
# save
file_str = os.path.join(outdir, 'HCP-YA-7T_{0}_rsts.npy'.format(atlas))
np.save(file_str, rsts)
for scan in taskts_dict.keys():
    file_str = os.path.join(outdir, 'HCP-YA-7T_{0}_taskts-{1}.npy'.format(atlas, scan))
    np.save(file_str, taskts_dict[scan])

In [17]:
file_str = os.path.join(outdir, 'HCP-YA-7T_{0}_subjids.txt'.format(atlas))
subject_ids_filt = np.asarray(subject_ids)[~subject_filter]
np.savetxt(file_str, subject_ids_filt, fmt='%s')