Gather necessary pre-requisites:

In [3]:
# import everything you need
#from nipype import Node, Workflow
import nipype.pipeline.engine as pe # pipeline engine
import nipype.interfaces.utility as util  # utility
import nipype.algorithms.modelgen as model  # model generation
import nipype.algorithms.rapidart as ra  # artifact detection
import nipype.interfaces.fsl as fsl
#import nipype.interfaces.spm as spm
#import nibabel as nb
from nilearn.image import mean_img
from nilearn.plotting import plot_anat, view_img
import nibabel as nb
import numpy as np
import pandas as pd
# activate inline magics
%matplotlib inline 
import matplotlib.pyplot as plt
import matplotlib.image as mpl_img
import os, operator, re, json, random
from functools import reduce
from itertools import zip_longest

Write useful functions

In [4]:
# write update function for file list to data dictionary
def update_files():
    for sub in FEDs:
        # collect image files from source dirs
        files = sorted([os.path.join(subdir, content)
                       for subdir in FED_dirs
                       for content in os.listdir(subdir)
                       if re.match(r'(.*.(nii|json))', content)])
        # extend content
        content = [file for file in files
                   if re.match(fr'(.*{sub}.*)', file)]
        # update files in data
        data[sub]["files"] = content

Build data structure that holds all relevant information

In [5]:
# define base diectory for data, fMRI folder structure and FEDs
fsldir = "/usr/share/fsl/5.0/data/"  # data directory in bash's $FSLDIR
scriptdatadir = "/home/martin/FED/"
credentials = f"{scriptdatadir}FED_Subject_Covariates.xlsx"
modelinfo = f"{scriptdatadir}FED_Day_2_modelparams.xls"

basedir = "/fMRI/"
FED_dirs = [os.path.join(basedir, FED)
            for FED in os.listdir(basedir)
            if os.path.isdir(os.path.join(basedir, FED))]

In [6]:
# collect image files
files = sorted([os.path.join(subdir, content)
               for subdir in FED_dirs
               for content in os.listdir(subdir)
               if re.match(r'(.*.(nii|json))', content)])
# create a dictionary storing all files/subject
data = {}
for fed in FED_dirs:
    ID = fed.rsplit('/', 1)[1]
    # define partition for each subject
    subject = {"files": [], "parameters": {}}
    content = []
    for file in files:
        if re.match(fr'(.*{ID}.*)', file):
            content.append(file)
    subject["files"].extend(content)
    data[ID] = subject

# create shortcut to FEDs
FEDs = sorted([key for key in data.keys()])

Get relevant file parameters

In [7]:
# define exclusion set for FEDs that do not meet the requirements
#GRE_excluded = []
EPI_excluded = []
T1_excluded = []

# FED control
for subject in FEDs:
    # collect relevant files/FED for parameter extraction
    #GREs = [file for file in data[subject]["files"]
           #if re.match(r'(.*(_e1|_e2(?!_ph)).*.json)', file)]
    EPIs = [file for file in data[subject]["files"]
            if re.match(r'(.*(FMRI).*.json)', file)]
    T1s = [file for file in data[subject]["files"]
           if re.match(r'(.*(T1_MPRAGE).*.json)', file)]

    # control number of files
    if len(EPIs) != 1:
        print(subject, "\n", f"Not exactly one FMRI sequence to read out ({len(EPIs)}).... investigate", "\n")
        EPI_excluded.append(subject)

    if len(T1s) != 1:
        print(subject, "\n", f"Not exactly one T1 sequence to read out ({len(T1)}).... investigate", "\n")
        T1_excluded.append(subject)

    # There seem to be several omissions in the GRER_FIELD data -> investigate
#    elif len(GREs) < 2:
#        print(subject, "\n", f"Not enough GRE_FIELD sequences to read out ({len(GREs)}).... exclude/investigate", "\n")
#        GRE_excluded.append(subject)

    # if there is more than one pair of magnitude images (e1+e2),
    # take the first one as they are likely to be closer to the fMRI acquisition
#    elif len(GREs) > 2:
#        print(subject, "\n", "More than 2 GRE_FIELD sequences",
#              "("+str(len(GREs))+")", ".... select only first two for TE extraction", "\n")


# control presence of other relevant file content (presence of condition onset times, covariates of interest etc.)
# read relevant content
ID_cre = pd.read_excel(credentials, sheet_name="analysis", usecols=['Sub Num FED_XXX'])
ID_mod = pd.read_excel(modelinfo, sheet_name="analysis", usecols=['Sub Num FED_XXX'])
# bring content to list
ID_cre = ID_cre['Sub Num FED_XXX'].tolist()
ID_mod = ID_mod['Sub Num FED_XXX'].tolist()
# model values are not unique (onset timings -> multiple entries^^) -> FIX for subject ID control 
ID_mod = list(set(ID_mod))
# now for the control
for subject, cre, mod in zip_longest(FEDs, ID_cre, ID_mod):
    if subject[-1] != cre != mod:
        print("subject(s) missing!")
        print("First line of list-alignment:  ",subject,f"FED00{cre}",f"FED00{mod}")
        print("Control relevant files!")
        # control, but assume that the smaller value has to be eliminated,
        # because, apparently, there are no values for it in (at least) one data file
        FEDexcl = set(sorted([int(subject[-1]), cre, mod])[:-1])
        FEDexcl = [f"FED00{i}" for i in FEDexcl]
        # exclude respective subjects' functionals and structurals
        EPI_excluded.extend(FEDexcl)
        T1_excluded.extend(FEDexcl)
        break

# exclude FEDs based on file criteria
print("\n\n", "The following subjects where excluded from further analysis due to false file numbers or missing data: ",\
      "\n", "EPI: ", sorted(EPI_excluded), "T1: ", sorted(T1_excluded))

# update data and FEDs based on prior exclusion
[data.pop(sub) for sub in FEDs if sub in T1_excluded and sub in EPI_excluded]
FEDs = [sub for sub in FEDs if sub not in T1_excluded and sub not in EPI_excluded]

subject(s) missing!
First line of list-alignment:   FED006 FED006 FED007
Control relevant files!


 The following subjects where excluded from further analysis due to false file numbers or missing data:  
 EPI:  ['FED006'] T1:  ['FED006']


In [8]:
# control new subject list
print("The new subject list afer initial data and image file checkup:\n")
print(FEDs)
print("\n")

The new subject list afer initial data and image file checkup:

['FED007', 'FED008', 'FED009', 'FED010', 'FED011', 'FED012', 'FED013', 'FED014', 'FED015', 'FED016', 'FED017', 'FED018', 'FED019', 'FED020', 'FED021', 'FED022', 'FED023', 'FED024', 'FED025', 'FED026', 'FED027', 'FED028', 'FED029', 'FED030', 'FED031', 'FED032', 'FED033', 'FED034', 'FED035', 'FED036', 'FED037', 'FED038', 'FED039', 'FED040', 'FED041', 'FED042', 'FED043', 'FED044', 'FED045', 'FED046', 'FED047', 'FED048', 'FED049', 'FED050', 'FED051', 'FED052', 'FED053', 'FED054', 'FED055', 'FED056', 'FED057', 'FED058', 'FED059', 'FED060', 'FED061', 'FED062', 'FED063', 'FED064', 'FED065', 'FED066', 'FED067', 'FED068']




In [47]:
# Read json info data for all relevant parameters
# define info of interest
#GREspecs = ["EchoTime", "PhaseEncodingDirection"]
EPIspecs = ["EchoTime", "RepetitionTime", "EchoTrainLength",
            "PhaseEncodingSteps", "PhaseEncodingDirection",
            "DwellTime", "TotalReadoutTime", "EffectiveEchoSpacing", "PixelBandwidth"]
T1specs = ["EchoTime", "RepetitionTime",
           "PhaseEncodingSteps", "InversionTime", "PixelBandwidth"]
# create list to record missing parameters
missing_params = []
# create list to record multiple parameters
multiple_params = []

# Now for the parameter extraction
for subject in FEDs:
    # collect relevant files/FED for parameter extraction
    #GREs=[file for file in data[subject]["files"]
           #if re.match(r'(.*(_e1|_e2(?!_ph)).*.json)', file)]
    T1s = [file for file in data[subject]["files"]
           if re.match(r'(.*(T1_MPRAGE).*.json)', file)]
    EPIs = [file for file in data[subject]["files"]
            if re.match(r'(.*(FMRI).*.json)', file)]

    # if there is more than one pair of magnitude images (e1+e2),
    # take the first one as they are likely to be closer to the fMRI acquisition
#    if len(GREs) > 2:
#        print(subject, "\n", "More than 2 GRE_FIELD sequences",
#              "("+str(len(GREs))+")", ".... selecting first two for TE extraction", "\n")
#        GREs = GREs[0:2]

    # T1 PARAMETERS
    # collect parameters from the respective scan's json file
    for file in T1s:
        with open(file) as json_file:
            info = json.load(json_file)
            for param in T1specs:
                # put params into data; use capital letters of parameter name as indicator in key
                key = f"T1_{''.join([char for char in param if char.isupper()])}"
                # if key does not exist -> create list with parameter
                if key not in data[subject]["parameters"].keys():
                    try:
                        data[subject]["parameters"][key] = info[param]
                    # if parameter does not exist in json file -> missing_params
                    # append to GRE_excluded
                    except KeyError:
                        #print(f"{subject}'s json file does not specify {param}", "\n",
                             #"noting issue ... ")
                        missing_params.append(f"{subject}_T1-{param}")
                        T1_excluded.append(subject)
                        pass
                # if key does exist -> append parameter to the list
                if key in data[subject]["parameters"].keys():
                    # if second value and first value for the parameter don't match,
                    # there are either multiple .json files or multiple entries of the same parameter
                    if info[param] != data[subject]["parameters"][key]:
                        # print an info message
                        #print(f"{subject}'s json file does specify multiple entries under {key}:","\n",
                              #f'{data[subject]["parameters"][key]} and {info[param]}',"\n",
                             #"inspect files!!!")                            
                        multiple_params.append(f"{subject}_T1-{param}")
                    elif info[param] == data[subject]["parameters"][key]:
                        pass

    # EPI PARAMETERS
    # collect parameters from the respective scan's json file
    for file in EPIs:
        with open(file) as json_file:
            info = json.load(json_file)
            for param in EPIspecs:
                # put params into data; use capital letters of parameter name as indicator in key
                key = f"EPI_{''.join([char for char in param if char.isupper()])}"
                # if key does not exist -> create list with parameter
                if key not in data[subject]["parameters"].keys():
                    try:
                        data[subject]["parameters"][key] = info[param]
                    # if parameter does not exist in json file -> missing_params
                    # append to EPI_excluded
                    except KeyError:
                        #print(f"{subject}'s json file does not specify {param}", "\n",
                             #"noting issue ... ")
                        missing_params.append(f"{subject}_EPI-{param}")
                        EPI_excluded.append(subject)
                        pass
                # if key does exist -> append parameter to the list
                if key in data[subject]["parameters"].keys():
                    # if second value and first value for the parameter don't match,
                    # there are either multiple .json files or multiple entries of the same parameter
                    if info[param] != data[subject]["parameters"][key]:
                        # print an info message
                        #print(f"{subject}'s json file does specify multiple entries under {key}:","\n",
                              #f'{data[subject]["parameters"][key]} and {info[param]}',"\n",
                             #"inspect files!!!")                              
                        multiple_params.append(f"{subject}_EPI-{param}")
                    elif info[param] == data[subject]["parameters"][key]:
                        pass

# show missing parameters from json files:
print("The following parameters where not available from subjects' json files:",
      "\n",sorted(missing_params))
# show duplicate parameters from json files:
print("The following parameters where available multiple times from subjects' json files:",
      "\n",sorted(multiple_params))

# exclude FEDs based on file criteria
#print("\n\n", "The following subjects where excluded from further analysis due to false file numbers or missing parameters: ",\
#      "\n", "EPI: ", sorted(EPI_excluded), "\n\n",
#     len(EPI_excluded), "  subjects in total")

# update data and FEDs based on prior exclusion
#[data.pop(sub) for sub in FEDs if sub in T1_excluded and sub in EPI_excluded]
#FEDs=[sub for sub in FEDs if sub not in T1_excluded and sub not in EPI_excluded]

The following parameters where not available from subjects' json files: 
 ['FED063_EPI-DwellTime', 'FED064_EPI-DwellTime', 'FED065_EPI-DwellTime', 'FED066_EPI-DwellTime']
The following parameters where available multiple times from subjects' json files: 
 []


In [9]:
# Edit the read-in parameters into analysis format
for sub in sorted(FEDs):
    # calculate necessary parameters that are not in header information
    # deltaTE for GRE_FIELD echo-based comparison
    #data[subject]["parameters"]["DeltaTE"]= reduce(operator.sub, data[subject]["parameters"]["GRE_ET"])*-1*1000

    # transfer phase encoding directions from field axes to voxel axes
    # control field axes values
    #print(sub)
    #print(data[sub]["parameters"]["EPI_PED"])
    epi_phasecodedir = data[sub]["parameters"]["EPI_PED"]
    for char in epi_phasecodedir:
        if char == "i":
            data[sub]["parameters"]["EPI_PED"] = f"x{epi_phasecodedir[1:]}"
        elif char == "j":
            data[sub]["parameters"]["EPI_PED"] = f"y{epi_phasecodedir[1:]}"
        elif char == "k":
            data[sub]["parameters"]["EPI_PED"] = f"z{epi_phasecodedir[1:]}"

    # same for T1s
#    T1_phasecodedir = data[sub]["parameters"]["T1_PED"]
#    for char in T1_phasecodedir:
#        if char == "i":
#            data[sub]["parameters"]["T1_PED"] = f"x{T1_phasecodedir[1:]}"
#        elif char == "j":
#            data[sub]["parameters"]["T1_PED"] = f"y{T1_phasecodedir[1:]}"
#        elif char == "k":
#            data[sub]["parameters"]["T1_PED"] = f"z{T1_phasecodedir[1:]}"

Get/create relevant covariates

In [11]:
# create covariates (FSL -> "EVs")
# get sex, age, depression, time since clinical episode, severity of clinical episodes, number of clinical episodes, - data from list in .xlsx file(s)

# files are already defined ^^
# read relevant content
content_cre = pd.read_excel(credentials, sheet_name="analysis",
                            usecols = ['Sub Num FED_XXX', 'Gender', 'Age', 'BDI 22 Score'])
content_mod = pd.read_excel(modelinfo, sheet_name="analysis",
                            usecols = ['Sub Num FED_XXX', 'Condition', 'RT', 'COTcorrect'])

# sort panda dataframe according to file sequence in FEDs (account for excluded)
# define sort-by list (get FED_ID and format to fit entries in dataframe)
model_FED_ID = [i[-3:].lstrip("0") for i in FEDs]
# transform values to integers to get values in model_FED_ID
model_FED_ID = [np.int(i) for i in model_FED_ID]
# define a categorical variable to sort a column and corresponding lines after
content_cre['model_FED_ID'] = pd.Categorical(content_cre['Sub Num FED_XXX'],
                                             categories = model_FED_ID, ordered=True)
content_mod['model_FED_ID'] = pd.Categorical(content_mod['Sub Num FED_XXX'],
                                             categories = model_FED_ID, ordered=True)
# sort dataframes by indicator variables
content_cre.sort_values(['model_FED_ID'], inplace=True)
content_mod.sort_values(['model_FED_ID', 'Condition', 'COTcorrect'], inplace=True)

# split experimental data by subject and create separate dataframes for each
for sub, datasub in zip(FEDs, model_FED_ID):
    # add the dataframe as additional item into subject's parameter dict
    data[sub]["parameters"]["covariates"] = content_cre[content_cre['model_FED_ID'] == datasub]
    data[sub]["parameters"]["modelparams"] = content_mod[content_mod['model_FED_ID'] == datasub]

In [13]:
# total number of onsets seems to vary +-2 -> control results
#print("The current dataframes from the xls/xlsx files for each subject:\n\n")
#for sub,datasub in zip(FEDs, model_FED_ID):
#    print(data[sub]["parameters"]["covariates"])
#    print(data[sub]["parameters"]["modelparams"])

The current dataframes from the xls/xlsx files for each subject:


   Sub Num FED_XXX  Gender  Age  BDI 22 Score model_FED_ID
1                7       0   21             0            7
     Sub Num FED_XXX  Condition      RT  COTcorrect model_FED_ID
20                 7          1  360.85    20273.02            7
22                 7          1  321.38    58437.96            7
7                  7          1  281.30   111040.41            7
18                 7          1  577.72   196397.07            7
19                 7          1  296.54   232598.84            7
..               ...        ...     ...         ...          ...
168                7         12  244.92   275134.06            7
170                7         12  258.64   277469.33            7
163                7         12  345.27   337418.97            7
164                7         12  366.70   349512.31            7
166                7         12  333.56   438719.50            7

[178 rows x 5 columns]
   Sub Num 

In [None]:
# control results
#print(model_FED_ID)