# fMRIPrep + tedana + XCPD

There are three key elements to running tedana + XCPD:

1.  XCPD is very strict about how it expects external (i.e., custom) confounds files to be organized and named.
    We need to copy the associated tedana derivatives (i.e., the mixing matrices) to a new folder and rename them,
    so that XCPD can find them.
2.  Tedana's ICA will probably look bad if dummy scans haven't been dropped from the BOLD file.
    See [this discussion](https://github.com/ME-ICA/tedana/discussions/899).
    Therefore, we must drop the dummy scans before running tedana, 
    then buffer the mixing matrix produced by tedana with dummy data to fill in those scans,
    and finally run XCPD with the same number of dummy scans flagged, 
    so those volumes will ultimately be ignored.
3.  Simply regressing out "bad" components flagged by tedana is a bad idea.
    This is known as "aggressive" denoising, and is not recommended.
    Instead, we must include the "good" components in the regression as well,
    but then reconstruct the noise signal using only the nuisance regressors 
    ("bad" components and any other regressors we want, like motion parameters)
    and their associated parameter estimates from the regression.
    This is known as "non-aggressive" denoising.
    For more information on this, 
    see [tedana's documentation](https://tedana.readthedocs.io/en/latest/denoising.html).
    XCPD will support this by flagging any confounds starting with `signal__`
    as signal regressors to use for a non-aggressive denoising strategy.
    This is implemented in [PennLINC/xcp_d#697](https://github.com/PennLINC/xcp_d/pull/697).

## Step 0: Run fMRIPrep 22.0.0+ with `--me-output-echos` flag

This has already been handled for this dataset. The data are available at https://gin.g-node.org/ME-ICA/ds003643-fmriprep-derivatives.

## Step 1: Remove dummy scans from fMRIPrep files

In [1]:
import os

import nibabel as nib
import numpy as np
import pandas as pd
from tedana.workflows import tedana_workflow


def _flag_dummyvols(confounds_file):
    """Identify the number of dummy volumes flagged in an fMRIPrep confounds file."""
    confounds_df = pd.read_table(confounds_file)
    nss_cols = [c for c in confounds_df.columns if c.startswith("non_steady_state_outlier")]
    if nss_cols:
        initial_volumes_df = confounds_df[nss_cols]
        dummy_vols = np.any(initial_volumes_df.to_numpy(), axis=1)
        dummy_vols = np.where(dummy_vols)[0]

        # reasonably assumes all NSS volumes are contiguous
        n_dummy_vols = int(dummy_vols[-1] + 1)
        # dummy_scans = 10
    else:
        n_dummy_vols = 0
    
    return n_dummy_vols


def _remove_dummyvols(in_file, out_file, n_dummy_vols):
    """Remove dummy volumes from in_file and write out to out_file.
    
    If n_dummy_vols is 0, then just return out_file.
    """
    if n_dummy_vols:
        print(f"Dropping {n_dummy_vols} volumes from {os.path.basename(in_file)}")

        img = nib.load(in_file)
        img = img.slicer[..., n_dummy_vols:]
        img.to_filename(out_file)
    else:
        out_file = in_file

    return out_file


def drop_dummy_vols(bold_files, confounds_file, temp_dir="."):
    """Remove dummy volumes from a list of files.
    
    This infers the number of dummy volumes from the confounds file.
    The shortened files are written out to temp_dir with the same
    filenames as the original files.
    """
    shortened_files = []
    n_dummy_vols = _flag_dummyvols(confounds_file)
    for bold_file in bold_files:
        if n_dummy_vols:
            temp_file = os.path.join(temp_dir, os.path.basename(bold_file))
            shortened_file = _remove_dummyvols(bold_file, temp_file, n_dummy_vols)
        else:
            shortened_file = bold_file
        shortened_files.append(shortened_file)

    return shortened_files, n_dummy_vols


In [3]:
dset_dir = "/Users/taylor/Documents/datasets/ds003643/"
deriv_dir = os.path.join(dset_dir, "derivatives")
func_dir = os.path.join(deriv_dir, "fmriprep/sub-EN100/func")

ECHO_TIMES = [12.8, 27.5, 43]  # hardcoded bc i'm lazy
run_number = 1  # keeping as variable bc there are 10 runs. easy to loop over
prefix = f"sub-EN100_task-lppEN_run-{run_number}"
bold_files = [
    os.path.join(func_dir,f"{prefix}_echo-{echo + 1}_desc-preproc_bold.nii.gz")
    for echo in range(len(ECHO_TIMES))
]
confounds_file = os.path.join(func_dir,  f"{prefix}_desc-confounds_timeseries.tsv")
mask_file = os.path.join(func_dir, f"{prefix}_desc-brain_mask.nii.gz")

# Write tedana outputs to BIDS-like structure,
# but use a separate folder for each run.
tedana_out_dir = os.path.join(deriv_dir, "tedana/sub-EN100/func", prefix)
os.makedirs(tedana_out_dir, exist_ok=True)
# A folder for all of the shortened files.
tedana_temp_dir = os.path.join(dset_dir, "derivatives", "reduced_files")
os.makedirs(tedana_temp_dir, exist_ok=True)

shortened_files, n_dummy_vols = drop_dummy_vols(
    bold_files=bold_files,
    confounds_file=confounds_file,
    temp_dir=tedana_temp_dir,
)

tedana_workflow(
    data=shortened_files,
    tes=ECHO_TIMES,
    out_dir=tedana_out_dir,
    mask=mask_file,
    prefix=prefix,
    fittype="curvefit",
)

Dropping 4 volumes from sub-EN100_task-lppEN_run-1_echo-1_desc-preproc_bold.nii.gz
Dropping 4 volumes from sub-EN100_task-lppEN_run-1_echo-2_desc-preproc_bold.nii.gz
Dropping 4 volumes from sub-EN100_task-lppEN_run-1_echo-3_desc-preproc_bold.nii.gz


INFO     tedana:tedana_workflow:466 Using output directory: /Users/taylor/Documents/datasets/ds003643/derivatives/tedana/sub-EN100/func/sub-EN100_task-lppEN_run-1
INFO     tedana:tedana_workflow:479 Loading input data: ['/Users/taylor/Documents/datasets/ds003643/derivatives/reduced_files/sub-EN100_task-lppEN_run-1_echo-1_desc-preproc_bold.nii.gz', '/Users/taylor/Documents/datasets/ds003643/derivatives/reduced_files/sub-EN100_task-lppEN_run-1_echo-2_desc-preproc_bold.nii.gz', '/Users/taylor/Documents/datasets/ds003643/derivatives/reduced_files/sub-EN100_task-lppEN_run-1_echo-3_desc-preproc_bold.nii.gz']
INFO     tedana:tedana_workflow:561 Using user-defined mask
INFO     tedana:tedana_workflow:609 Computing T2* map
INFO     combine:make_optcom:242 Optimally combining data with voxel-wise T2* estimates
INFO     tedana:tedana_workflow:634 Writing optimally combined data set: /Users/taylor/Documents/datasets/ds003643/derivatives/tedana/sub-EN100/func/sub-EN100_task-lppEN_run-1/sub-EN100_ta

## Step 2: Label tedana components and fill in dummy volumes

In [6]:
# Load mixing matrix and classifications from tedana
mixing_matrix = os.path.join(tedana_out_dir, f"{prefix}_desc-ICA_mixing.tsv")
metrics_df = os.path.join(tedana_out_dir, f"{prefix}_desc-tedana_metrics.tsv")
mixing_matrix = pd.read_table(mixing_matrix)
metrics_df = pd.read_table(metrics_df)

# Prepend "signal__" to all accepted components' column names
accepted_columns = metrics_df.loc[metrics_df["classification"] != "rejected", "Component"]
mixing_matrix = mixing_matrix.rename(columns={c: f"signal__{c}" for c in accepted_columns})

# Add dummyvols back in to beginning of matrix
mixing_matrix_data = mixing_matrix.to_numpy()
first_row = mixing_matrix_data[0, :]
leading_rows = np.ones((n_dummy_vols, mixing_matrix.shape[1])) * first_row
new_mixing_matrix_arr = np.vstack((leading_rows, mixing_matrix_data))
new_mixing_matrix = pd.DataFrame(new_mixing_matrix_arr, columns=mixing_matrix.columns)

# Write out to custom confounds folder
custom_confounds_folder = os.path.join(deriv_dir, "custom_confounds_for_xcpd")
os.makedirs(custom_confounds_folder, exist_ok=True)
# use the same name as the fMRIPrep confounds, but in the new folder
custom_confounds_file = os.path.join(custom_confounds_folder, os.path.basename(confounds_file))
new_mixing_matrix.to_csv(custom_confounds_file, sep="\t", index=False)

# Step 3: Run XCPD with tedana-derived custom confounds

Note: dummy-scans must match between tedana and xcpd
```bash
docker run --rm -u $(id -u) \
    -v /Users/taylor/Documents/datasets/ds003643:/bids-input:rw \
    -v /Users/taylor/Documents/tsalo/xcp_d/xcp_d:/usr/local/miniconda/lib/python3.8/site-packages/xcp_d \
    -v /Users/taylor/Documents/tsalo/xcp_d_testing/data/license.txt:/license.txt --env FS_LICENSE=/license.txt \
    pennlinc/xcp_d:unstable \
    /bids-input/derivatives/fmriprep \
    /bids-input/derivatives \
    participant \
    -w /bids-input/derivatives/work \
    --participant_label EN100 \
    --nuisance-regressors 27P \
    --custom_confounds /bids-input/derivatives/custom_confounds_for_xcpd \
    --dummy-scans auto \
    --bids-filter-file /bids-input/derivatives/code/filter_file.json \
    -vvv
```