From f4eaa3e1cd39610b729ede760fc45e154241f14e Mon Sep 17 00:00:00 2001 From: Circle Date: Mon, 31 Jul 2023 15:52:18 +1000 Subject: [PATCH 01/13] Add robustica method --- tedana/decomposition/ica.py | 50 ++++++++++++++++++++++++++++++- tedana/workflows/tedana.py | 60 +++++++++++++++++++++++++++++-------- 2 files changed, 96 insertions(+), 14 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index ff69626a7..5a233b42e 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -4,15 +4,18 @@ import logging import warnings +import sys + import numpy as np from scipy import stats from sklearn.decomposition import FastICA +from robustica import RobustICA ####BTBTBT LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") -def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): +def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs=30, maxit=500, maxrestart=10): ####BTBTBTB """ Perform ICA on `data` and returns mixing matrix @@ -50,6 +53,51 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): "decompose the dimensionally reduced dataset." ) + if ica_method=='robustica': + mmix, Iq = r_ica(data, n_components, n_robust_runs, maxit) + fixed_seed=-99999 + elif ica_method=='fastica': + mmix, fixed_seed=f_ica(data, n_components, fixed_seed, maxit=500, maxrestart=10) + Iq = 0 + else: + LGR.warning("The selected ICA method is invalid!") + sys.exit() + + + + + return mmix, fixed_seed + + +def r_ica(data, n_components, n_robust_runs, max_it): ####BTBTBTB: + + if n_robust_runs>100: + LGR.warning("The selected n_robust_runs is a very big number!") + + + RepLGR.info( + "RobustICA package was used for ICA decomposition \\citep{Anglada2022}." + ) + rica0 = RobustICA(n_components=n_components, robust_runs=n_robust_runs, whiten='arbitrary-variance',max_iter= max_it, + robust_dimreduce=False, fun='logcosh') + S0, mmix = rica0.fit_transform(data) + + q0 = rica0.evaluate_clustering(rica0.S_all, rica0.clustering.labels_, rica0.signs_, rica0.orientation_) + + + Iq0 = np.array(np.mean(q0.iq)) + + + mmix = stats.zscore(mmix, axis=0) + + LGR.info( + "RobustICA with {0} robust runs was used \n" + "The mean index quality is {1}".format(n_robust_runs, Iq0) + ) + return mmix, Iq0 + + +def f_ica(data, n_components, fixed_seed, maxit, maxrestart): if fixed_seed == -1: fixed_seed = np.random.randint(low=1, high=1000) diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 4d443033d..ee567dfef 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -150,6 +150,7 @@ def _get_parser(): "in which case the specificed number of components will be " "selected." ), + choices=["mdl", "kic", "aic"], default="aic", ) optional.add_argument( @@ -164,19 +165,46 @@ def _get_parser(): ), default="kundu", ) + optional.add_argument(#####BTBTBT + "--ica_method", + dest="ica_method", + help=( + "The applied ICA method. If set to fastica the FastICA " + "from sklearn library will be run once with the seed value. " + "robustica will run FastICA n_robust_runs times and and uses " + "clustering methods to overcome the randomness of the FastICA " + "algorithm. If set to robustica the seed value will be ignored." + "If set to fastica n_robust_runs will not be effective." + ), + choices=["robustica", "fastica"], + default="robustica", + ) optional.add_argument( "--seed", dest="fixed_seed", metavar="INT", type=int, - help=( + help=( ##BTBTBT "Value used for random initialization of ICA " - "algorithm. Set to an integer value for " - "reproducible ICA results. Set to -1 for " + "algorithm when ica_mthods is set to fastica. Set to an integer value for " + "reproducible ICA results with fastica. Set to -1 for " "varying results across ICA calls. " ), default=42, ) + optional.add_argument(#####BTBTBT + "--n_robust_runs", + dest="n_robust_runs", + type=int, + help=( + "The number of times robustica will run." + "This is only effective when ica_mthods is " + "set to robustica." + + ), + ##choices=range(2,100), + default=30, + ) optional.add_argument( "--maxit", dest="maxit", @@ -323,6 +351,8 @@ def tedana_workflow( fittype="loglin", combmode="t2s", tree="kundu", + ica_method="robustica", ########BTBTAdded + n_robust_runs=30, tedpca="aic", fixed_seed=42, maxit=500, @@ -385,9 +415,7 @@ def tedana_workflow( tedpca : {'mdl', 'aic', 'kic', 'kundu', 'kundu-stabilize', float, int}, optional Method with which to select components in TEDPCA. If a float is provided, then it is assumed to represent percentage of variance - explained (0-1) to retain from PCA. If an int is provided, it will output - a fixed number of components defined by the integer between 1 and the - number of time points. + explained (0-1) to retain from PCA. Default is 'aic'. fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. @@ -639,11 +667,12 @@ def tedana_workflow( # Perform ICA, calculate metrics, and apply decision tree # Restart when ICA fails to converge or too few BOLD components found keep_restarting = True + n_restarts = 0 seed = fixed_seed while keep_restarting: mmix, seed = decomposition.tedica( - dd, n_components, seed, maxit, maxrestart=(maxrestart - n_restarts) + dd, n_components, seed, ica_method, n_robust_runs, maxit, maxrestart=(maxrestart - n_restarts) ) seed += 1 n_restarts = seed - fixed_seed @@ -677,13 +706,17 @@ def tedana_workflow( ) ica_selector = selection.automatic_selection(comptable, n_echos, n_vols, tree=tree) n_likely_bold_comps = ica_selector.n_likely_bold_comps - if (n_restarts < maxrestart) and (n_likely_bold_comps == 0): - LGR.warning("No BOLD components found. Re-attempting ICA.") - elif n_likely_bold_comps == 0: - LGR.warning("No BOLD components found, but maximum number of restarts reached.") - keep_restarting = False - else: + + if ica_method=='robustica': #########BTBTBT keep_restarting = False + else: + if (n_restarts < maxrestart) and (n_likely_bold_comps == 0): + LGR.warning("No BOLD components found. Re-attempting ICA.") + elif n_likely_bold_comps == 0: + LGR.warning("No BOLD components found, but maximum number of restarts reached.") + keep_restarting = False + else: + keep_restarting = False # If we're going to restart, temporarily allow force overwrite if keep_restarting: @@ -893,3 +926,4 @@ def _main(argv=None): if __name__ == "__main__": _main() + From b0cac3a49ea6028c78c4d583a79c17ab100a6f5d Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Tue, 5 Dec 2023 12:32:53 +1100 Subject: [PATCH 02/13] Incorporation of major comments regarding robustica addition Manual modification of commit f2cdb4ed756996609a0467ac22279605a2346edb to remove unwanted file additions. --- tedana/config.py | 4 + tedana/decomposition/ica.py | 191 +++++-- tedana/resources/references.bib | 10 + tedana/tests/test_integrity_robustica.py | 667 +++++++++++++++++++++++ tedana/workflows/tedana.py | 167 +++--- 5 files changed, 938 insertions(+), 101 deletions(-) create mode 100644 tedana/config.py create mode 100644 tedana/tests/test_integrity_robustica.py diff --git a/tedana/config.py b/tedana/config.py new file mode 100644 index 000000000..9bca5b873 --- /dev/null +++ b/tedana/config.py @@ -0,0 +1,4 @@ +DEFAULT_ICA_METHOD = "robustica" +DEFAULT_N_ROBUST_RUNS = 30 +DEFAULT_N_MAX_ITER = 500 +DEFAULT_N_MAX_RESTART = 10 \ No newline at end of file diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index 5a233b42e..abb9d2c82 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -1,23 +1,34 @@ -""" -ICA and related signal decomposition methods for tedana -""" +"""ICA and related signal decomposition methods for tedana""" import logging import warnings -import sys - import numpy as np +from robustica import RobustICA from scipy import stats from sklearn.decomposition import FastICA -from robustica import RobustICA ####BTBTBT + +from tedana.config import ( + DEFAULT_ICA_METHOD, + DEFAULT_N_MAX_ITER, + DEFAULT_N_MAX_RESTART, + DEFAULT_N_ROBUST_RUNS, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") -def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs=30, maxit=500, maxrestart=10): ####BTBTBTB +def tedica( + data, + n_components, + fixed_seed, + ica_method=DEFAULT_ICA_METHOD, + n_robust_runs=DEFAULT_N_ROBUST_RUNS, + maxit=DEFAULT_N_MAX_ITER, + maxrestart=DEFAULT_N_MAX_RESTART, +): """ - Perform ICA on `data` and returns mixing matrix + Perform ICA on `data` with the user selected ica method and returns mixing matrix Parameters ---------- @@ -25,9 +36,13 @@ def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs Dimensionally reduced optimally combined functional data, where `S` is samples and `T` is time n_components : :obj:`int` - Number of components retained from PCA decomposition + Number of components retained from PCA decomposition. fixed_seed : :obj:`int` - Seed for ensuring reproducibility of ICA results + Seed for ensuring reproducibility of ICA results. + ica_method : :obj: `str' + slected ICA method, can be fastica or robutica. + n_robust_runs : :obj: `int' + selected number of robust runs when robustica is used. Default is 30. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional @@ -43,9 +58,6 @@ def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs fixed_seed : :obj:`int` Random seed from final decomposition. - Notes - ----- - Uses `sklearn` implementation of FastICA for decomposition """ warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd") RepLGR.info( @@ -53,51 +65,152 @@ def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs "decompose the dimensionally reduced dataset." ) - if ica_method=='robustica': - mmix, Iq = r_ica(data, n_components, n_robust_runs, maxit) - fixed_seed=-99999 - elif ica_method=='fastica': - mmix, fixed_seed=f_ica(data, n_components, fixed_seed, maxit=500, maxrestart=10) - Iq = 0 + ica_method = ica_method.lower() + + if ica_method == "robustica": + mmix, fixed_seed = r_ica( + data, + n_components=n_components, + fixed_seed=fixed_seed, + n_robust_runs=n_robust_runs, + max_it=maxit, + ) + elif ica_method == "fastica": + mmix, fixed_seed = f_ica( + data, + n_components=n_components, + fixed_seed=fixed_seed, + maxit=maxit, + maxrestart=maxrestart, + ) else: - LGR.warning("The selected ICA method is invalid!") - sys.exit() + raise ValueError("The selected ICA method is invalid!") + return mmix, fixed_seed +def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): + """ + Perform robustica on `data` by running FastICA multiple times (n_robust runes) + and returns mixing matrix - return mmix, fixed_seed + Parameters + ---------- + data : (S x T) :obj:`numpy.ndarray` + Dimensionally reduced optimally combined functional data, where `S` is + samples and `T` is time + n_components : :obj:`int` + Number of components retained from PCA decomposition. + fixed_seed : :obj:`int` + Seed for ensuring reproducibility of ICA results. + n_robust_runs : :obj: `int' + selected number of robust runs when robustica is used. Default is 30. + maxit : :obj:`int`, optional + Maximum number of iterations for ICA. Default is 500. + Returns + ------- + mmix : (T x C) :obj:`numpy.ndarray` + Z-scored mixing matrix for converting input data to component space, + where `C` is components and `T` is the same as in `data` + fixed_seed : :obj:`int` + Random seed from final decomposition. -def r_ica(data, n_components, n_robust_runs, max_it): ####BTBTBTB: + """ + if n_robust_runs > 200: + LGR.warning( + "The selected n_robust_runs is a very big number! The process will take a long time!" + ) - if n_robust_runs>100: - LGR.warning("The selected n_robust_runs is a very big number!") + RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.") + if fixed_seed == -1: + fixed_seed = np.random.randint(low=1, high=1000) - RepLGR.info( - "RobustICA package was used for ICA decomposition \\citep{Anglada2022}." - ) - rica0 = RobustICA(n_components=n_components, robust_runs=n_robust_runs, whiten='arbitrary-variance',max_iter= max_it, - robust_dimreduce=False, fun='logcosh') - S0, mmix = rica0.fit_transform(data) + try: + rica = RobustICA( + n_components=n_components, + robust_runs=n_robust_runs, + whiten="arbitrary-variance", + max_iter=max_it, + random_state=fixed_seed, + robust_dimreduce=False, + fun="logcosh", + robust_method="DBSCAN", + ) - q0 = rica0.evaluate_clustering(rica0.S_all, rica0.clustering.labels_, rica0.signs_, rica0.orientation_) + S, mmix = rica.fit_transform(data) + q = rica.evaluate_clustering( + rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ + ) - - Iq0 = np.array(np.mean(q0.iq)) - + except: + rica = RobustICA( + n_components=n_components, + robust_runs=n_robust_runs, + whiten="arbitrary-variance", + max_iter=max_it, + random_state=fixed_seed, + robust_dimreduce=False, + fun="logcosh", + robust_method="AgglomerativeClustering", + ) + + S, mmix = rica.fit_transform(data) + q = rica.evaluate_clustering( + rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ + ) + iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq)) # The cluster labeled -1 is noise + + if iq < 0.6: + LGR.warning( + "The resultant mean Index Quality is low. It is recommended to rerun the " + "process with a different seed." + ) + + mmix = mmix[:, q["cluster_id"] >= 0] mmix = stats.zscore(mmix, axis=0) LGR.info( - "RobustICA with {0} robust runs was used \n" - "The mean index quality is {1}".format(n_robust_runs, Iq0) + "RobustICA with {0} robust runs and seed {1} was used. " + "The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq) ) - return mmix, Iq0 + return mmix, fixed_seed def f_ica(data, n_components, fixed_seed, maxit, maxrestart): + """ + Perform FastICA on `data` and returns mixing matrix + + Parameters + ---------- + data : (S x T) :obj:`numpy.ndarray` + Dimensionally reduced optimally combined functional data, where `S` is + samples and `T` is time + n_components : :obj:`int` + Number of components retained from PCA decomposition + fixed_seed : :obj:`int` + Seed for ensuring reproducibility of ICA results + maxit : :obj:`int`, optional + Maximum number of iterations for ICA. Default is 500. + maxrestart : :obj:`int`, optional + Maximum number of attempted decompositions to perform with different + random seeds. ICA will stop running if there is convergence prior to + reaching this limit. Default is 10. + + Returns + ------- + mmix : (T x C) :obj:`numpy.ndarray` + Z-scored mixing matrix for converting input data to component space, + where `C` is components and `T` is the same as in `data` + fixed_seed : :obj:`int` + Random seed from final decomposition. + + Notes + ----- + Uses `sklearn` implementation of FastICA for decomposition + """ if fixed_seed == -1: fixed_seed = np.random.randint(low=1, high=1000) @@ -135,4 +248,4 @@ def f_ica(data, n_components, fixed_seed, maxit, maxrestart): mmix = ica.mixing_ mmix = stats.zscore(mmix, axis=0) - return mmix, fixed_seed + return mmix, fixed_seed \ No newline at end of file diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib index 006f2f779..180e0a827 100644 --- a/tedana/resources/references.bib +++ b/tedana/resources/references.bib @@ -313,3 +313,13 @@ @Article{Hunter:2007 doi = {10.1109/MCSE.2007.55}, year = 2007 } + +@Article{Anglada:2022, + Author = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah}, + Title = {robustica: customizable robust independent component analysis}, + Journal = {BMC Bioinformatics}, + Volume = {23}, + Number = {519}, + doi = {10.1186/s12859-022-05043-9}, + year = 2022 +} \ No newline at end of file diff --git a/tedana/tests/test_integrity_robustica.py b/tedana/tests/test_integrity_robustica.py new file mode 100644 index 000000000..88646d51f --- /dev/null +++ b/tedana/tests/test_integrity_robustica.py @@ -0,0 +1,667 @@ +"""Integration tests for "real" data.""" + +import glob +import json +import logging +import os +import os.path as op +import re +import shutil +import subprocess +import tarfile +from datetime import datetime +from gzip import GzipFile +from io import BytesIO + +import pandas as pd +import pytest +import requests +from pkg_resources import resource_filename + +from tedana.io import InputHarvester +from tedana.workflows import t2smap as t2smap_cli +from tedana.workflows import tedana as tedana_cli +from tedana.workflows.ica_reclassify import ica_reclassify_workflow + +# Need to see if a no BOLD warning occurred +LOGGER = logging.getLogger(__name__) +# Added a testing logger to output whether or not testing data were downlaoded +TestLGR = logging.getLogger("TESTING") + + +def check_integration_outputs(fname, outpath, n_logs=1): + """ + Checks outputs of integration tests. + + Parameters + ---------- + fname : str + Path to file with expected outputs + outpath : str + Path to output directory generated from integration tests + """ + + # Gets filepaths generated by integration test + found_files = [ + os.path.relpath(f, outpath) + for f in glob.glob(os.path.join(outpath, "**"), recursive=True)[1:] + ] + + # Checks for log file + log_regex = "^tedana_[12][0-9]{3}-[0-9]{2}-[0-9]{2}T[0-9]{2}[0-9]{2}[0-9]{2}.tsv$" + logfiles = [out for out in found_files if re.match(log_regex, out)] + assert len(logfiles) == n_logs + + # Removes logfiles from list of existing files + for log in logfiles: + found_files.remove(log) + + # Compares remaining files with those expected + with open(fname, "r") as f: + expected_files = f.read().splitlines() + expected_files = [os.path.normpath(path) for path in expected_files] + + if sorted(found_files) != sorted(expected_files): + expected_not_found = sorted(list(set(expected_files) - set(found_files))) + found_not_expected = sorted(list(set(found_files) - set(expected_files))) + + msg = "" + if expected_not_found: + msg += "\nExpected but not found:\n\t" + msg += "\n\t".join(expected_not_found) + + if found_not_expected: + msg += "\nFound but not expected:\n\t" + msg += "\n\t".join(found_not_expected) + raise ValueError(msg) + + +def data_for_testing_info(test_dataset=str): + """ + Get the path and download link for each dataset used for testing. + + Also creates the base directories into which the data and output + directories are written + + Parameters + ---------- + test_dataset : str + References one of the datasets to download. It can be: + three-echo + three-echo-reclassify + four-echo + five-echo + + Returns + ------- + test_data_path : str + The path to the local directory where the data will be downloaded + osf_id : str + The ID for the OSF file. + Data download link would be https://osf.io/osf_id/download + Metadata download link would be https://osf.io/osf_id/metadata/?format=datacite-json + """ + + tedana_path = os.path.dirname(tedana_cli.__file__) + base_data_path = os.path.abspath(os.path.join(tedana_path, "../../.testing_data_cache")) + os.makedirs(base_data_path, exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs"), exist_ok=True) + if test_dataset == "three-echo": + test_data_path = os.path.join(base_data_path, "three-echo/TED.three-echo") + osf_id = "rqhfc" + os.makedirs(os.path.join(base_data_path, "three-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/three-echo"), exist_ok=True) + elif test_dataset == "three-echo-reclassify": + test_data_path = os.path.join(base_data_path, "reclassify") + osf_id = "f6g45" + os.makedirs(os.path.join(base_data_path, "outputs/reclassify"), exist_ok=True) + elif test_dataset == "four-echo": + test_data_path = os.path.join(base_data_path, "four-echo/TED.four-echo") + osf_id = "gnj73" + os.makedirs(os.path.join(base_data_path, "four-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/four-echo"), exist_ok=True) + elif test_dataset == "five-echo": + test_data_path = os.path.join(base_data_path, "five-echo/TED.five-echo") + osf_id = "9c42e" + os.makedirs(os.path.join(base_data_path, "five-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/five-echo"), exist_ok=True) + else: + raise ValueError(f"{test_dataset} is not a valid dataset string for data_for_testing_info") + + return test_data_path, osf_id + + +def download_test_data(osf_id, test_data_path): + """ + If current data is not already available, downloads tar.gz data + stored at `https://osf.io/osf_id/download`. + + and unpacks into `out_path`. + + Parameters + ---------- + osf_id : str + The ID for the OSF file. + out_path : str + Path to directory where OSF data should be extracted + """ + + try: + datainfo = requests.get(f"https://osf.io/{osf_id}/metadata/?format=datacite-json") + except Exception: + if len(os.listdir(test_data_path)) == 0: + raise ConnectionError( + f"Cannot access https://osf.io/{osf_id} and testing data " "are not yet downloaded" + ) + else: + TestLGR.warning( + f"Cannot access https://osf.io/{osf_id}. " + f"Using local copy of testing data in {test_data_path} " + "but cannot validate that local copy is up-to-date" + ) + return + datainfo.raise_for_status() + metadata = json.loads(datainfo.content) + # 'dates' is a list with all udpates to the file, the last item in the list + # is the most recent and the 'date' field in the list is the date of the last + # update. + osf_filedate = metadata["dates"][-1]["date"] + + # File the file with the most recent date for comparision with + # the lsst updated date for the osf file + if os.path.exists(test_data_path): + filelist = glob.glob(f"{test_data_path}/*") + most_recent_file = max(filelist, key=os.path.getctime) + if os.path.exists(most_recent_file): + local_filedate = os.path.getmtime(most_recent_file) + local_filedate_str = str(datetime.fromtimestamp(local_filedate).date()) + local_data_exists = True + else: + local_data_exists = False + else: + local_data_exists = False + if local_data_exists: + if local_filedate_str == osf_filedate: + TestLGR.info( + f"Downloaded and up-to-date data already in {test_data_path}. Not redownloading" + ) + return + else: + TestLGR.info( + f"Downloaded data in {test_data_path} was last modified on " + f"{local_filedate_str}. Data on https://osf.io/{osf_id} " + f" was last updated on {osf_filedate}. Deleting and redownloading" + ) + shutil.rmtree(test_data_path) + req = requests.get(f"https://osf.io/{osf_id}/download") + req.raise_for_status() + t = tarfile.open(fileobj=GzipFile(fileobj=BytesIO(req.content))) + os.makedirs(test_data_path, exist_ok=True) + t.extractall(test_data_path) + + +def reclassify_raw() -> str: + test_data_path, _ = data_for_testing_info("three-echo-reclassify") + return os.path.join(test_data_path, "TED.three-echo") + + +def reclassify_raw_registry() -> str: + return os.path.join(reclassify_raw(), "desc-tedana_registry.json") + + +def guarantee_reclassify_data() -> None: + """Ensures that the reclassify data exists at the expected path and return path.""" + + test_data_path, osf_id = data_for_testing_info("three-echo-reclassify") + + # Should now be checking and not downloading for each test so don't see if statement here + # if not os.path.exists(reclassify_raw_registry()): + download_test_data(osf_id, test_data_path) + # else: + # Path exists, be sure that everything in registry exists + ioh = InputHarvester(reclassify_raw_registry()) + all_present = True + for _, v in ioh.registry.items(): + if not isinstance(v, list): + if not os.path.exists(os.path.join(reclassify_raw(), v)): + all_present = False + break + if not all_present: + # Something was removed, need to re-download + shutil.rmtree(reclassify_raw()) + guarantee_reclassify_data() + return test_data_path + + +def test_integration_five_echo(skip_integration): + """Integration test of the full tedana workflow using five-echo test data.""" + + if skip_integration: + pytest.skip("Skipping five-echo integration test") + + test_data_path, osf_id = data_for_testing_info("five-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo")) + # out_dir_manual = f"{out_dir}-manual" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # if os.path.exists(out_dir_manual): + # shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" + suffix = ".sm.nii.gz" + datalist = [prepend + str(i + 1) + suffix for i in range(5)] + echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] + tedana_cli.tedana_workflow( + data=datalist, + tes=echo_times, + ica_method="robustica", + n_robust_runs=5, + out_dir=out_dir, + tedpca=0.95, + fittype="curvefit", + fixed_seed=49, + tedort=True, + verbose=True, + prefix="sub-01", + ) + + # Just a check on the component table pending a unit test of load_comptable + comptable = os.path.join(out_dir, "sub-01_desc-tedana_metrics.tsv") + df = pd.read_table(comptable) + assert isinstance(df, pd.DataFrame) + + +def test_integration_four_echo(skip_integration): + """Integration test of the full tedana workflow using four-echo test data.""" + + if skip_integration: + pytest.skip("Skipping four-echo integration test") + + test_data_path, osf_id = data_for_testing_info("four-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo")) + out_dir_manual = f"{out_dir}-manual" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + if os.path.exists(out_dir_manual): + shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + prepend = f"{test_data_path}/sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" + suffix = "_space-sbref_desc-preproc_bold+orig.HEAD" + datalist = [prepend + str(i + 1) + suffix for i in range(4)] + tedana_cli.tedana_workflow( + data=datalist, + tes=[11.8, 28.04, 44.28, 60.52], + out_dir=out_dir, + ica_method="robustica", + n_robust_runs=6, + tedpca="aic", + gscontrol=["gsr", "mir"], + png_cmap="bone", + prefix="sub-01", + debug=True, + verbose=True, + ) + + +def test_integration_three_echo(skip_integration): + """Integration test of the full tedana workflow using three-echo test data.""" + + if skip_integration: + pytest.skip("Skipping three-echo integration test") + + test_data_path, osf_id = data_for_testing_info("three-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo")) + out_dir_manual = f"{out_dir}-rerun" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + if os.path.exists(out_dir_manual): + shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + + tedana_cli.tedana_workflow( + data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", + tes=[14.5, 38.5, 62.5], + ica_method="robustica", + n_robust_runs=30, + out_dir=out_dir, + low_mem=True, + tedpca="aic", + ) + + # Test re-running, but use the CLI + args = [ + "-d", + f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", + "-e", + "14.5", + "38.5", + "62.5", + "--out-dir", + out_dir_manual, + "--debug", + "--verbose", + "-f", + ] + tedana_cli._main(args) + + +def test_integration_reclassify_insufficient_args(skip_integration): + if skip_integration: + pytest.skip("Skipping reclassify insufficient args") + + guarantee_reclassify_data() + + args = [ + "ica_reclassify", + reclassify_raw_registry(), + ] + + result = subprocess.run(args, capture_output=True) + assert b"ValueError: Must manually accept or reject" in result.stderr + assert result.returncode != 0 + + +def test_integration_reclassify_quiet_csv(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet csv") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # Make some files that have components to manually accept and reject + to_accept = [i for i in range(3)] + to_reject = [i for i in range(7, 4)] + acc_df = pd.DataFrame(data=to_accept, columns=["Components"]) + rej_df = pd.DataFrame(data=to_reject, columns=["Components"]) + acc_csv_fname = os.path.join(reclassify_raw(), "accept.csv") + rej_csv_fname = os.path.join(reclassify_raw(), "reject.csv") + acc_df.to_csv(acc_csv_fname) + rej_df.to_csv(rej_csv_fname) + + args = [ + "ica_reclassify", + "--manacc", + acc_csv_fname, + "--manrej", + rej_csv_fname, + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_quiet_spaces(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet space-delimited integers") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1", + "2", + "3", + "--manrej", + "4", + "5", + "6", + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_quiet_string(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet string of integers") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1,2,3", + "--manrej", + "4,5,6,", + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_debug(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify debug") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/debug")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1", + "2", + "3", + "--prefix", + "sub-testymctestface", + "--convention", + "orig", + "--tedort", + "--mir", + "--no-reports", + "--out-dir", + out_dir, + "--debug", + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_debug_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_both_rej_acc(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/both_rej_acc")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + with pytest.raises( + ValueError, + match=r"The following components were both accepted and", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + reject=[1, 2, 3], + out_dir=out_dir, + ) + + +def test_integration_reclassify_run_twice(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/run_twice")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + out_dir=out_dir, + no_reports=True, + ) + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + out_dir=out_dir, + overwrite=True, + no_reports=True, + ) + fn = resource_filename("tedana", "tests/data/reclassify_run_twice.txt") + check_integration_outputs(fn, out_dir, n_logs=2) + + +def test_integration_reclassify_no_bold(skip_integration, caplog): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ioh = InputHarvester(reclassify_raw_registry()) + comptable = ioh.get_file_contents("ICA metrics tsv") + to_accept = [i for i in range(len(comptable))] + + ica_reclassify_workflow( + reclassify_raw_registry(), + reject=to_accept, + out_dir=out_dir, + no_reports=True, + ) + assert "No accepted components remaining after manual classification!" in caplog.text + + fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_accrej_files(skip_integration, caplog): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ioh = InputHarvester(reclassify_raw_registry()) + comptable = ioh.get_file_contents("ICA metrics tsv") + to_accept = [i for i in range(len(comptable))] + + ica_reclassify_workflow( + reclassify_raw_registry(), + reject=to_accept, + out_dir=out_dir, + no_reports=True, + ) + assert "No accepted components remaining after manual classification!" in caplog.text + + fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_index_failures(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify index failures") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/index_failures")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + with pytest.raises( + ValueError, + match=r"_parse_manual_list expected a list of integers, but the input is", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2.5, 3], + out_dir=out_dir, + no_reports=True, + ) + + with pytest.raises( + ValueError, + match=r"_parse_manual_list expected integers or a filename, but the input is", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[2.5], + out_dir=out_dir, + no_reports=True, + ) + + +def test_integration_t2smap(skip_integration): + """Integration test of the full t2smap workflow using five-echo test data.""" + if skip_integration: + pytest.skip("Skipping t2smap integration test") + test_data_path, osf_id = data_for_testing_info("five-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/t2smap_five-echo")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # download data and run the test + download_test_data(osf_id, test_data_path) + prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" + suffix = ".sm.nii.gz" + datalist = [prepend + str(i + 1) + suffix for i in range(5)] + echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] + args = ( + ["-d"] + + datalist + + ["-e"] + + [str(te) for te in echo_times] + + ["--out-dir", out_dir, "--fittype", "curvefit"] + ) + t2smap_cli._main(args) + + # compare the generated output files + fname = resource_filename("tedana", "tests/data/nih_five_echo_outputs_t2smap.txt") + # Gets filepaths generated by integration test + found_files = [ + os.path.relpath(f, out_dir) + for f in glob.glob(os.path.join(out_dir, "**"), recursive=True)[1:] + ] + + # Compares remaining files with those expected + with open(fname, "r") as f: + expected_files = f.read().splitlines() + assert sorted(expected_files) == sorted(found_files) \ No newline at end of file diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index ee567dfef..6411ce44d 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -1,6 +1,4 @@ -""" -Run the "canonical" TE-Dependent ANAlysis workflow. -""" +"""Run the "canonical" TE-Dependent ANAlysis workflow.""" import argparse import datetime import json @@ -8,6 +6,7 @@ import os import os.path as op import shutil +import sys from glob import glob import numpy as np @@ -29,6 +28,12 @@ utils, ) from tedana.bibtex import get_description_references +from tedana.config import ( + DEFAULT_ICA_METHOD, + DEFAULT_N_MAX_ITER, + DEFAULT_N_MAX_RESTART, + DEFAULT_N_ROBUST_RUNS, +) from tedana.stats import computefeats2 from tedana.workflows.parser_utils import check_tedpca_value, is_valid_file @@ -37,8 +42,7 @@ def _get_parser(): - """ - Parses command line inputs for tedana + """Parse command line inputs for tedana. Returns ------- @@ -46,7 +50,7 @@ def _get_parser(): """ from tedana import __version__ - verstr = "tedana v{}".format(__version__) + verstr = f"tedana v{__version__}" parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Argument parser follow template provided by RalphyZ # https://stackoverflow.com/a/43456577 @@ -150,7 +154,6 @@ def _get_parser(): "in which case the specificed number of components will be " "selected." ), - choices=["mdl", "kic", "aic"], default="aic", ) optional.add_argument( @@ -165,45 +168,44 @@ def _get_parser(): ), default="kundu", ) - optional.add_argument(#####BTBTBT + optional.add_argument( "--ica_method", dest="ica_method", help=( "The applied ICA method. If set to fastica the FastICA " "from sklearn library will be run once with the seed value. " - "robustica will run FastICA n_robust_runs times and and uses " + "robustica will run FastICA n_robust_runs times and uses " "clustering methods to overcome the randomness of the FastICA " - "algorithm. If set to robustica the seed value will be ignored." - "If set to fastica n_robust_runs will not be effective." + "algorithm. When set to fastica n_robust_runs will not be effective." ), choices=["robustica", "fastica"], - default="robustica", + type=str.lower, + default=DEFAULT_ICA_METHOD, ) optional.add_argument( "--seed", dest="fixed_seed", metavar="INT", type=int, - help=( ##BTBTBT + help=( "Value used for random initialization of ICA " - "algorithm when ica_mthods is set to fastica. Set to an integer value for " - "reproducible ICA results with fastica. Set to -1 for " - "varying results across ICA calls. " + "algorithm. Set to an integer value for " + "reproducible ICA results (fastica/robustica). Set to -1 for " + "varying results across ICA (fastica/robustica) calls. " ), default=42, ) - optional.add_argument(#####BTBTBT + optional.add_argument( "--n_robust_runs", dest="n_robust_runs", type=int, help=( "The number of times robustica will run." - "This is only effective when ica_mthods is " + "This is only effective when ica_method is " "set to robustica." - ), - ##choices=range(2,100), - default=30, + choices=range(5, 500), + default=DEFAULT_N_ROBUST_RUNS, ) optional.add_argument( "--maxit", @@ -211,7 +213,7 @@ def _get_parser(): metavar="INT", type=int, help=("Maximum number of iterations for ICA."), - default=500, + default=DEFAULT_N_MAX_ITER, ) optional.add_argument( "--maxrestart", @@ -225,7 +227,7 @@ def _get_parser(): "convergence is achieved before maxrestart " "attempts, ICA will finish early." ), - default=10, + default=DEFAULT_N_MAX_RESTART, ) optional.add_argument( "--tedort", @@ -351,12 +353,12 @@ def tedana_workflow( fittype="loglin", combmode="t2s", tree="kundu", - ica_method="robustica", ########BTBTAdded - n_robust_runs=30, + ica_method=DEFAULT_ICA_METHOD, + n_robust_runs=DEFAULT_N_ROBUST_RUNS, tedpca="aic", fixed_seed=42, - maxit=500, - maxrestart=10, + maxit=DEFAULT_N_MAX_ITER, + maxrestart=DEFAULT_N_MAX_RESTART, tedort=False, gscontrol=None, no_reports=False, @@ -368,9 +370,9 @@ def tedana_workflow( overwrite=False, t2smap=None, mixm=None, + tedana_command=None, ): - """ - Run the "canonical" TE-Dependent ANAlysis workflow. + """Run the "canonical" TE-Dependent ANAlysis workflow. Please remember to cite :footcite:t:`dupre2021te`. @@ -412,15 +414,27 @@ def tedana_workflow( accepts and rejects some distinct components compared to kundu. Testing to better understand the effects of the differences is ongoing. Default is 'kundu'. + ica_method : {'robustica', 'fastica'}, optional + The applied ICA method. If set to fastica the FastICA from sklearn + library will be run once with the seed value. 'robustica' will run + 'FastICA' n_robust_runs times and uses clustering methods to overcome + the randomness of the FastICA algorithm. When set to fastica n_robust_runs + will not be effective. + Default is 'robustica' + n_robust_runs : :obj:`int`, optional + The number of times robustica will run. This is only effective when 'ica_method' is + set to 'robustica'. tedpca : {'mdl', 'aic', 'kic', 'kundu', 'kundu-stabilize', float, int}, optional Method with which to select components in TEDPCA. If a float is provided, then it is assumed to represent percentage of variance - explained (0-1) to retain from PCA. + explained (0-1) to retain from PCA. If an int is provided, it will output + a fixed number of components defined by the integer between 1 and the + number of time points. Default is 'aic'. fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. - Set to a positive integer value for reproducible ICA results; - otherwise, set to -1 for varying results across calls. + Set to a positive integer value for reproducible ICA results (fastica/robustica); + otherwise, set to -1 for varying results across ICA (fastica/robustica) calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional @@ -457,6 +471,9 @@ def tedana_workflow( If True, suppresses logging/printing of messages. Default is False. overwrite : :obj:`bool`, optional If True, force overwriting of files. Default is False. + tedana_command : :obj:`str`, optional + If the command-line interface was used, this is the command that was + run. Default is None. Notes ----- @@ -473,10 +490,11 @@ def tedana_workflow( os.mkdir(out_dir) # boilerplate - basename = "report" + prefix = io._infer_prefix(prefix) + basename = f"{prefix}report" extension = "txt" repname = op.join(out_dir, (basename + "." + extension)) - bibtex_file = op.join(out_dir, "references.bib") + bibtex_file = op.join(out_dir, f"{prefix}references.bib") repex = op.join(out_dir, (basename + "*")) previousreps = glob(repex) previousreps.sort(reverse=True) @@ -492,7 +510,20 @@ def tedana_workflow( logname = op.join(out_dir, (basename + start_time + "." + extension)) utils.setup_loggers(logname, repname, quiet=quiet, debug=debug) - LGR.info("Using output directory: {}".format(out_dir)) + # Save command into sh file, if the command-line interface was used + # TODO: use io_generator to save command + if tedana_command is not None: + command_file = open(os.path.join(out_dir, "tedana_call.sh"), "w") + command_file.write(tedana_command) + command_file.close() + else: + # Get variables passed to function if the tedana command is None + variables = ", ".join(f"{name}={value}" for name, value in locals().items()) + # From variables, remove everything after ", tedana_command" + variables = variables.split(", tedana_command")[0] + tedana_command = f"tedana_workflow({variables})" + + LGR.info(f"Using output directory: {out_dir}") # ensure tes are in appropriate format tes = [float(te) for te in tes] @@ -510,7 +541,7 @@ def tedana_workflow( if isinstance(data, str): data = [data] - LGR.info("Loading input data: {}".format([f for f in data])) + LGR.info(f"Loading input data: {[f for f in data]}") catd, ref_img = io.load_data(data, n_echos=n_echos) io_generator = io.OutputGenerator( @@ -527,8 +558,13 @@ def tedana_workflow( # TODO: turn this into an IOManager since this isn't really output io_generator.register_input(data) + # Save system info to json + info_dict = utils.get_system_info() + info_dict["Python"] = sys.version + info_dict["Command"] = tedana_command + n_samp, n_echos, n_vols = catd.shape - LGR.debug("Resulting data shape: {}".format(catd.shape)) + LGR.debug(f"Resulting data shape: {catd.shape}") # check if TR is 0 img_t_r = io_generator.reference_img.header.get_zooms()[-1] @@ -600,7 +636,7 @@ def tedana_workflow( getsum=True, threshold=1, ) - LGR.debug("Retaining {}/{} samples for denoising".format(mask_denoise.sum(), n_samp)) + LGR.debug(f"Retaining {mask_denoise.sum()}/{n_samp} samples for denoising") io_generator.save_file(masksum_denoise, "adaptive mask img") # Create an adaptive mask with at least 3 good echoes, for classification @@ -614,7 +650,7 @@ def tedana_workflow( "(restricted to voxels with good data in at least the first three echoes) was used for " "the component classification procedure." ) - LGR.debug("Retaining {}/{} samples for classification".format(mask_clf.sum(), n_samp)) + LGR.debug(f"Retaining {mask_clf.sum()}/{n_samp} samples for classification") if t2smap is None: LGR.info("Computing T2* map") @@ -625,7 +661,7 @@ def tedana_workflow( # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method="lower") - LGR.debug("Setting cap on T2* map at {:.5f}s".format(utils.millisec2sec(cap_t2s))) + LGR.debug(f"Setting cap on T2* map at {utils.millisec2sec(cap_t2s):.5f}s") t2s_full[t2s_full > cap_t2s * 10] = cap_t2s io_generator.save_file(utils.millisec2sec(t2s_full), "t2star img") io_generator.save_file(s0_full, "s0 img") @@ -642,23 +678,20 @@ def tedana_workflow( catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, io_generator) fout = io_generator.save_file(data_oc, "combined img") - LGR.info("Writing optimally combined data set: {}".format(fout)) + LGR.info(f"Writing optimally combined data set: {fout}") if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca( catd, data_oc, - combmode, mask_clf, masksum_clf, - t2s_full, io_generator, tes=tes, algorithm=tedpca, kdaw=10.0, rdaw=1.0, - verbose=verbose, low_mem=low_mem, ) if verbose: @@ -667,12 +700,17 @@ def tedana_workflow( # Perform ICA, calculate metrics, and apply decision tree # Restart when ICA fails to converge or too few BOLD components found keep_restarting = True - n_restarts = 0 seed = fixed_seed while keep_restarting: mmix, seed = decomposition.tedica( - dd, n_components, seed, ica_method, n_robust_runs, maxit, maxrestart=(maxrestart - n_restarts) + dd, + n_components, + seed, + ica_method, + n_robust_runs, + maxit, + maxrestart=(maxrestart - n_restarts), ) seed += 1 n_restarts = seed - fixed_seed @@ -706,17 +744,13 @@ def tedana_workflow( ) ica_selector = selection.automatic_selection(comptable, n_echos, n_vols, tree=tree) n_likely_bold_comps = ica_selector.n_likely_bold_comps - - if ica_method=='robustica': #########BTBTBT + if (n_restarts < maxrestart) and (n_likely_bold_comps == 0): + LGR.warning("No BOLD components found. Re-attempting ICA.") + elif n_likely_bold_comps == 0: + LGR.warning("No BOLD components found, but maximum number of restarts reached.") + keep_restarting = False + else: keep_restarting = False - else: - if (n_restarts < maxrestart) and (n_likely_bold_comps == 0): - LGR.warning("No BOLD components found. Re-attempting ICA.") - elif n_likely_bold_comps == 0: - LGR.warning("No BOLD components found, but maximum number of restarts reached.") - keep_restarting = False - else: - keep_restarting = False # If we're going to restart, temporarily allow force overwrite if keep_restarting: @@ -825,7 +859,6 @@ def tedana_workflow( mask=mask_denoise, comptable=comptable, mmix=mmix, - n_vols=n_vols, io_generator=io_generator, ) @@ -852,6 +885,16 @@ def tedana_workflow( "of non-BOLD noise from multi-echo fMRI data." ), "CodeURL": "https://github.com/ME-ICA/tedana", + "Node": { + "Name": info_dict["Node"], + "System": info_dict["System"], + "Machine": info_dict["Machine"], + "Processor": info_dict["Processor"], + "Release": info_dict["Release"], + "Version": info_dict["Version"], + }, + "Python": info_dict["Python"], + "Command": info_dict["Command"], } ], } @@ -908,22 +951,22 @@ def tedana_workflow( ) LGR.info("Generating dynamic report") - reporting.generate_report(io_generator, tr=img_t_r) + reporting.generate_report(io_generator) LGR.info("Workflow completed") utils.teardown_loggers() def _main(argv=None): - """Tedana entry point""" + """Run the tedana workflow.""" + tedana_command = "tedana " + " ".join(sys.argv[1:]) options = _get_parser().parse_args(argv) kwargs = vars(options) n_threads = kwargs.pop("n_threads") n_threads = None if n_threads == -1 else n_threads with threadpool_limits(limits=n_threads, user_api=None): - tedana_workflow(**kwargs) + tedana_workflow(**kwargs, tedana_command=tedana_command) if __name__ == "__main__": - _main() - + _main() \ No newline at end of file From 55c2ae45fbc0fbb392e1de35d6127ccaddee9d4d Mon Sep 17 00:00:00 2001 From: Circle Date: Wed, 1 Nov 2023 13:18:04 +1100 Subject: [PATCH 03/13] Add robustica 0.1.3 to dependency list Cherry-pick of 41354cbfd127ac69cf1bdd69a556328fc6092457. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2e695b542..a9173c8cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "nibabel>=2.5.1", "nilearn>=0.7", "numpy>=1.16", + "robustica>=0.1.3", "pandas>=2.0", "scikit-learn>=0.21", "scipy>=1.2.0", From cd55a3f7d93d251b3eb2be045aa6555e6d5366ce Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Tue, 5 Dec 2023 12:44:58 +1100 Subject: [PATCH 04/13] Multiple fixes to RobustICA addition from code review From: BahmanTahayori/tedana_florey#2. Co-authored-by: Robert E. Smith --- tedana/decomposition/ica.py | 6 +++--- tedana/tests/test_integrity_robustica.py | 4 ++-- tedana/workflows/tedana.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index abb9d2c82..d4fb6c50a 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -91,7 +91,7 @@ def tedica( def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): """ - Perform robustica on `data` by running FastICA multiple times (n_robust runes) + Perform robustica on `data` by running FastICA multiple times (n_robust_runs) and returns mixing matrix Parameters @@ -165,8 +165,8 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): if iq < 0.6: LGR.warning( - "The resultant mean Index Quality is low. It is recommended to rerun the " - "process with a different seed." + "The resultant mean Index Quality is low ({0}). It is recommended to rerun the " + "process with a different seed.".format(iq) ) mmix = mmix[:, q["cluster_id"] >= 0] diff --git a/tedana/tests/test_integrity_robustica.py b/tedana/tests/test_integrity_robustica.py index 88646d51f..4bf9ed6b3 100644 --- a/tedana/tests/test_integrity_robustica.py +++ b/tedana/tests/test_integrity_robustica.py @@ -76,7 +76,7 @@ def check_integration_outputs(fname, outpath, n_logs=1): raise ValueError(msg) -def data_for_testing_info(test_dataset=str): +def data_for_testing_info(test_dataset: str): """ Get the path and download link for each dataset used for testing. @@ -209,7 +209,7 @@ def reclassify_raw_registry() -> str: return os.path.join(reclassify_raw(), "desc-tedana_registry.json") -def guarantee_reclassify_data() -> None: +def guarantee_reclassify_data() -> str: """Ensures that the reclassify data exists at the expected path and return path.""" test_data_path, osf_id = data_for_testing_info("three-echo-reclassify") diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 6411ce44d..b092c8df3 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -176,7 +176,7 @@ def _get_parser(): "from sklearn library will be run once with the seed value. " "robustica will run FastICA n_robust_runs times and uses " "clustering methods to overcome the randomness of the FastICA " - "algorithm. When set to fastica n_robust_runs will not be effective." + "algorithm. When set to fastica, --n_robust_runs option will have no effect." ), choices=["robustica", "fastica"], type=str.lower, From 2d9b0078e1671d31ad70918b5e3cc4c8d47c8c74 Mon Sep 17 00:00:00 2001 From: Circle Date: Wed, 29 Nov 2023 17:07:24 +1100 Subject: [PATCH 05/13] Specify magic number fixed seed of 42 as a constant Cherry-pick of da1b128e2c0a4aad8acbd398e8d151f5c846c47b (with modification). --- tedana/config.py | 3 ++- tedana/decomposition/ica.py | 2 +- tedana/workflows/tedana.py | 10 ++++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tedana/config.py b/tedana/config.py index 9bca5b873..e665795af 100644 --- a/tedana/config.py +++ b/tedana/config.py @@ -1,4 +1,5 @@ DEFAULT_ICA_METHOD = "robustica" DEFAULT_N_ROBUST_RUNS = 30 DEFAULT_N_MAX_ITER = 500 -DEFAULT_N_MAX_RESTART = 10 \ No newline at end of file +DEFAULT_N_MAX_RESTART = 10 +DEFAULT_SEED = 42 \ No newline at end of file diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index d4fb6c50a..ec20e8d92 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -248,4 +248,4 @@ def f_ica(data, n_components, fixed_seed, maxit, maxrestart): mmix = ica.mixing_ mmix = stats.zscore(mmix, axis=0) - return mmix, fixed_seed \ No newline at end of file + return mmix, fixed_seed diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index b092c8df3..49d8a108c 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -33,6 +33,7 @@ DEFAULT_N_MAX_ITER, DEFAULT_N_MAX_RESTART, DEFAULT_N_ROBUST_RUNS, + DEFAULT_SEED ) from tedana.stats import computefeats2 from tedana.workflows.parser_utils import check_tedpca_value, is_valid_file @@ -190,10 +191,11 @@ def _get_parser(): help=( "Value used for random initialization of ICA " "algorithm. Set to an integer value for " - "reproducible ICA results (fastica/robustica). Set to -1 for " - "varying results across ICA (fastica/robustica) calls. " + "reproducible ICA results. Set to -1 for " + "varying results across ICA calls. This" + "applies to both fastica and robustica methods." ), - default=42, + default=DEFAULT_SEED, ) optional.add_argument( "--n_robust_runs", @@ -356,7 +358,7 @@ def tedana_workflow( ica_method=DEFAULT_ICA_METHOD, n_robust_runs=DEFAULT_N_ROBUST_RUNS, tedpca="aic", - fixed_seed=42, + fixed_seed=DEFAULT_SEED, maxit=DEFAULT_N_MAX_ITER, maxrestart=DEFAULT_N_MAX_RESTART, tedort=False, From fc5f9ea8c8470346313acd860de59ee1193dbcb1 Mon Sep 17 00:00:00 2001 From: tahayori Date: Tue, 5 Dec 2023 16:39:30 +1100 Subject: [PATCH 06/13] Updated --- tedana/decomposition/ica.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index f6fc03c9a..3074c38db 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -139,7 +139,7 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): robust_method="DBSCAN", ) - S, mmix = rica.fit_transform(data) + s, mmix = rica.fit_transform(data) q = rica.evaluate_clustering( rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ ) @@ -156,7 +156,7 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): robust_method="AgglomerativeClustering", ) - S, mmix = rica.fit_transform(data) + s, mmix = rica.fit_transform(data) q = rica.evaluate_clustering( rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ ) From 4fc30439ca28efe3fe501f45bf2b6b3d91aaef8c Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Wed, 6 Dec 2023 11:55:30 +1100 Subject: [PATCH 07/13] Robustica Updates --- tedana/config.py | 2 +- tedana/tests/test_integration.py | 136 +++++ tedana/tests/test_integrity_robustica.py | 667 ----------------------- tedana/workflows/tedana.py | 4 +- 4 files changed, 139 insertions(+), 670 deletions(-) delete mode 100644 tedana/tests/test_integrity_robustica.py diff --git a/tedana/config.py b/tedana/config.py index e665795af..58563de79 100644 --- a/tedana/config.py +++ b/tedana/config.py @@ -2,4 +2,4 @@ DEFAULT_N_ROBUST_RUNS = 30 DEFAULT_N_MAX_ITER = 500 DEFAULT_N_MAX_RESTART = 10 -DEFAULT_SEED = 42 \ No newline at end of file +DEFAULT_SEED = 42 diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 59695a939..645057788 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -258,6 +258,7 @@ def test_integration_five_echo(skip_integration): tedana_cli.tedana_workflow( data=datalist, tes=echo_times, + ica_method="fastica", out_dir=out_dir, tedpca=0.95, fittype="curvefit", @@ -277,6 +278,48 @@ def test_integration_five_echo(skip_integration): check_integration_outputs(fn, out_dir) +def test_integration_robustica_five_echo(skip_integration): + """Integration test of the full tedana workflow with robustica using five-echo test data.""" + + if skip_integration: + pytest.skip("Skipping five-echo integration test") + + test_data_path, osf_id = data_for_testing_info("five-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo")) + # out_dir_manual = f"{out_dir}-manual" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # if os.path.exists(out_dir_manual): + # shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" + suffix = ".sm.nii.gz" + datalist = [prepend + str(i + 1) + suffix for i in range(5)] + echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] + tedana_cli.tedana_workflow( + data=datalist, + tes=echo_times, + ica_method="robustica", + n_robust_runs=10, + out_dir=out_dir, + tedpca=0.95, + fittype="curvefit", + fixed_seed=49, + tedort=True, + verbose=True, + prefix="sub-01", + ) + + # Just a check on the component table pending a unit test of load_comptable + comptable = os.path.join(out_dir, "sub-01_desc-tedana_metrics.tsv") + df = pd.read_table(comptable) + assert isinstance(df, pd.DataFrame) + + def test_integration_four_echo(skip_integration): """Integration test of the full tedana workflow using four-echo test data.""" @@ -302,6 +345,7 @@ def test_integration_four_echo(skip_integration): data=datalist, mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), tes=[11.8, 28.04, 44.28, 60.52], + ica_method="fastica", out_dir=out_dir, tedpca="kundu-stabilize", gscontrol=["gsr", "mir"], @@ -325,6 +369,49 @@ def test_integration_four_echo(skip_integration): check_integration_outputs(fn, out_dir) +def test_integration_robustica_four_echo(skip_integration): + """Integration test of the full tedana workflow with robustica using four-echo test data.""" + + if skip_integration: + pytest.skip("Skipping four-echo integration test") + + test_data_path, osf_id = data_for_testing_info("four-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo")) + out_dir_manual = f"{out_dir}-manual" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + if os.path.exists(out_dir_manual): + shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + prepend = f"{test_data_path}/sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" + suffix = "_space-sbref_desc-preproc_bold+orig.HEAD" + datalist = [prepend + str(i + 1) + suffix for i in range(4)] + tedana_cli.tedana_workflow( + data=datalist, + mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), + tes=[11.8, 28.04, 44.28, 60.52], + out_dir=out_dir, + tedpca="kundu-stabilize", + gscontrol=["gsr", "mir"], + png_cmap="bone", + prefix="sub-01", + debug=True, + verbose=True, + ) + + ica_reclassify_workflow( + op.join(out_dir, "sub-01_desc-tedana_registry.json"), + accept=[1, 2, 3], + reject=[4, 5, 6], + out_dir=out_dir_manual, + mir=True, + ) + + def test_integration_three_echo(skip_integration): """Integration test of the full tedana workflow using three-echo test data.""" @@ -346,6 +433,7 @@ def test_integration_three_echo(skip_integration): tedana_cli.tedana_workflow( data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", tes=[14.5, 38.5, 62.5], + ica_method="fastica", out_dir=out_dir, low_mem=True, tedpca="aic", @@ -374,6 +462,54 @@ def test_integration_three_echo(skip_integration): check_integration_outputs(fn, out_dir) +def test_integration_robustica_three_echo(skip_integration): + """Integration test of the full tedana workflow with robustica using three-echo test data.""" + + if skip_integration: + pytest.skip("Skipping three-echo integration test") + + test_data_path, osf_id = data_for_testing_info("three-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo")) + out_dir_manual = f"{out_dir}-rerun" + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + if os.path.exists(out_dir_manual): + shutil.rmtree(out_dir_manual) + + # download data and run the test + download_test_data(osf_id, test_data_path) + tedana_cli.tedana_workflow( + data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", + tes=[14.5, 38.5, 62.5], + ica_method="robustica", + out_dir=out_dir, + low_mem=True, + tedpca="aic", + ) + + # Test re-running, but use the CLI + args = [ + "-d", + f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", + "-e", + "14.5", + "38.5", + "62.5", + "--ica_method", + "robustica", + "--out-dir", + out_dir_manual, + "--debug", + "--verbose", + "-f", + "--mix", + os.path.join(out_dir, "desc-ICA_mixing.tsv"), + ] + tedana_cli._main(args) + + def test_integration_reclassify_insufficient_args(skip_integration): if skip_integration: pytest.skip("Skipping reclassify insufficient args") diff --git a/tedana/tests/test_integrity_robustica.py b/tedana/tests/test_integrity_robustica.py deleted file mode 100644 index 4bf9ed6b3..000000000 --- a/tedana/tests/test_integrity_robustica.py +++ /dev/null @@ -1,667 +0,0 @@ -"""Integration tests for "real" data.""" - -import glob -import json -import logging -import os -import os.path as op -import re -import shutil -import subprocess -import tarfile -from datetime import datetime -from gzip import GzipFile -from io import BytesIO - -import pandas as pd -import pytest -import requests -from pkg_resources import resource_filename - -from tedana.io import InputHarvester -from tedana.workflows import t2smap as t2smap_cli -from tedana.workflows import tedana as tedana_cli -from tedana.workflows.ica_reclassify import ica_reclassify_workflow - -# Need to see if a no BOLD warning occurred -LOGGER = logging.getLogger(__name__) -# Added a testing logger to output whether or not testing data were downlaoded -TestLGR = logging.getLogger("TESTING") - - -def check_integration_outputs(fname, outpath, n_logs=1): - """ - Checks outputs of integration tests. - - Parameters - ---------- - fname : str - Path to file with expected outputs - outpath : str - Path to output directory generated from integration tests - """ - - # Gets filepaths generated by integration test - found_files = [ - os.path.relpath(f, outpath) - for f in glob.glob(os.path.join(outpath, "**"), recursive=True)[1:] - ] - - # Checks for log file - log_regex = "^tedana_[12][0-9]{3}-[0-9]{2}-[0-9]{2}T[0-9]{2}[0-9]{2}[0-9]{2}.tsv$" - logfiles = [out for out in found_files if re.match(log_regex, out)] - assert len(logfiles) == n_logs - - # Removes logfiles from list of existing files - for log in logfiles: - found_files.remove(log) - - # Compares remaining files with those expected - with open(fname, "r") as f: - expected_files = f.read().splitlines() - expected_files = [os.path.normpath(path) for path in expected_files] - - if sorted(found_files) != sorted(expected_files): - expected_not_found = sorted(list(set(expected_files) - set(found_files))) - found_not_expected = sorted(list(set(found_files) - set(expected_files))) - - msg = "" - if expected_not_found: - msg += "\nExpected but not found:\n\t" - msg += "\n\t".join(expected_not_found) - - if found_not_expected: - msg += "\nFound but not expected:\n\t" - msg += "\n\t".join(found_not_expected) - raise ValueError(msg) - - -def data_for_testing_info(test_dataset: str): - """ - Get the path and download link for each dataset used for testing. - - Also creates the base directories into which the data and output - directories are written - - Parameters - ---------- - test_dataset : str - References one of the datasets to download. It can be: - three-echo - three-echo-reclassify - four-echo - five-echo - - Returns - ------- - test_data_path : str - The path to the local directory where the data will be downloaded - osf_id : str - The ID for the OSF file. - Data download link would be https://osf.io/osf_id/download - Metadata download link would be https://osf.io/osf_id/metadata/?format=datacite-json - """ - - tedana_path = os.path.dirname(tedana_cli.__file__) - base_data_path = os.path.abspath(os.path.join(tedana_path, "../../.testing_data_cache")) - os.makedirs(base_data_path, exist_ok=True) - os.makedirs(os.path.join(base_data_path, "outputs"), exist_ok=True) - if test_dataset == "three-echo": - test_data_path = os.path.join(base_data_path, "three-echo/TED.three-echo") - osf_id = "rqhfc" - os.makedirs(os.path.join(base_data_path, "three-echo"), exist_ok=True) - os.makedirs(os.path.join(base_data_path, "outputs/three-echo"), exist_ok=True) - elif test_dataset == "three-echo-reclassify": - test_data_path = os.path.join(base_data_path, "reclassify") - osf_id = "f6g45" - os.makedirs(os.path.join(base_data_path, "outputs/reclassify"), exist_ok=True) - elif test_dataset == "four-echo": - test_data_path = os.path.join(base_data_path, "four-echo/TED.four-echo") - osf_id = "gnj73" - os.makedirs(os.path.join(base_data_path, "four-echo"), exist_ok=True) - os.makedirs(os.path.join(base_data_path, "outputs/four-echo"), exist_ok=True) - elif test_dataset == "five-echo": - test_data_path = os.path.join(base_data_path, "five-echo/TED.five-echo") - osf_id = "9c42e" - os.makedirs(os.path.join(base_data_path, "five-echo"), exist_ok=True) - os.makedirs(os.path.join(base_data_path, "outputs/five-echo"), exist_ok=True) - else: - raise ValueError(f"{test_dataset} is not a valid dataset string for data_for_testing_info") - - return test_data_path, osf_id - - -def download_test_data(osf_id, test_data_path): - """ - If current data is not already available, downloads tar.gz data - stored at `https://osf.io/osf_id/download`. - - and unpacks into `out_path`. - - Parameters - ---------- - osf_id : str - The ID for the OSF file. - out_path : str - Path to directory where OSF data should be extracted - """ - - try: - datainfo = requests.get(f"https://osf.io/{osf_id}/metadata/?format=datacite-json") - except Exception: - if len(os.listdir(test_data_path)) == 0: - raise ConnectionError( - f"Cannot access https://osf.io/{osf_id} and testing data " "are not yet downloaded" - ) - else: - TestLGR.warning( - f"Cannot access https://osf.io/{osf_id}. " - f"Using local copy of testing data in {test_data_path} " - "but cannot validate that local copy is up-to-date" - ) - return - datainfo.raise_for_status() - metadata = json.loads(datainfo.content) - # 'dates' is a list with all udpates to the file, the last item in the list - # is the most recent and the 'date' field in the list is the date of the last - # update. - osf_filedate = metadata["dates"][-1]["date"] - - # File the file with the most recent date for comparision with - # the lsst updated date for the osf file - if os.path.exists(test_data_path): - filelist = glob.glob(f"{test_data_path}/*") - most_recent_file = max(filelist, key=os.path.getctime) - if os.path.exists(most_recent_file): - local_filedate = os.path.getmtime(most_recent_file) - local_filedate_str = str(datetime.fromtimestamp(local_filedate).date()) - local_data_exists = True - else: - local_data_exists = False - else: - local_data_exists = False - if local_data_exists: - if local_filedate_str == osf_filedate: - TestLGR.info( - f"Downloaded and up-to-date data already in {test_data_path}. Not redownloading" - ) - return - else: - TestLGR.info( - f"Downloaded data in {test_data_path} was last modified on " - f"{local_filedate_str}. Data on https://osf.io/{osf_id} " - f" was last updated on {osf_filedate}. Deleting and redownloading" - ) - shutil.rmtree(test_data_path) - req = requests.get(f"https://osf.io/{osf_id}/download") - req.raise_for_status() - t = tarfile.open(fileobj=GzipFile(fileobj=BytesIO(req.content))) - os.makedirs(test_data_path, exist_ok=True) - t.extractall(test_data_path) - - -def reclassify_raw() -> str: - test_data_path, _ = data_for_testing_info("three-echo-reclassify") - return os.path.join(test_data_path, "TED.three-echo") - - -def reclassify_raw_registry() -> str: - return os.path.join(reclassify_raw(), "desc-tedana_registry.json") - - -def guarantee_reclassify_data() -> str: - """Ensures that the reclassify data exists at the expected path and return path.""" - - test_data_path, osf_id = data_for_testing_info("three-echo-reclassify") - - # Should now be checking and not downloading for each test so don't see if statement here - # if not os.path.exists(reclassify_raw_registry()): - download_test_data(osf_id, test_data_path) - # else: - # Path exists, be sure that everything in registry exists - ioh = InputHarvester(reclassify_raw_registry()) - all_present = True - for _, v in ioh.registry.items(): - if not isinstance(v, list): - if not os.path.exists(os.path.join(reclassify_raw(), v)): - all_present = False - break - if not all_present: - # Something was removed, need to re-download - shutil.rmtree(reclassify_raw()) - guarantee_reclassify_data() - return test_data_path - - -def test_integration_five_echo(skip_integration): - """Integration test of the full tedana workflow using five-echo test data.""" - - if skip_integration: - pytest.skip("Skipping five-echo integration test") - - test_data_path, osf_id = data_for_testing_info("five-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo")) - # out_dir_manual = f"{out_dir}-manual" - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - # if os.path.exists(out_dir_manual): - # shutil.rmtree(out_dir_manual) - - # download data and run the test - download_test_data(osf_id, test_data_path) - prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" - suffix = ".sm.nii.gz" - datalist = [prepend + str(i + 1) + suffix for i in range(5)] - echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] - tedana_cli.tedana_workflow( - data=datalist, - tes=echo_times, - ica_method="robustica", - n_robust_runs=5, - out_dir=out_dir, - tedpca=0.95, - fittype="curvefit", - fixed_seed=49, - tedort=True, - verbose=True, - prefix="sub-01", - ) - - # Just a check on the component table pending a unit test of load_comptable - comptable = os.path.join(out_dir, "sub-01_desc-tedana_metrics.tsv") - df = pd.read_table(comptable) - assert isinstance(df, pd.DataFrame) - - -def test_integration_four_echo(skip_integration): - """Integration test of the full tedana workflow using four-echo test data.""" - - if skip_integration: - pytest.skip("Skipping four-echo integration test") - - test_data_path, osf_id = data_for_testing_info("four-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo")) - out_dir_manual = f"{out_dir}-manual" - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - if os.path.exists(out_dir_manual): - shutil.rmtree(out_dir_manual) - - # download data and run the test - download_test_data(osf_id, test_data_path) - prepend = f"{test_data_path}/sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" - suffix = "_space-sbref_desc-preproc_bold+orig.HEAD" - datalist = [prepend + str(i + 1) + suffix for i in range(4)] - tedana_cli.tedana_workflow( - data=datalist, - tes=[11.8, 28.04, 44.28, 60.52], - out_dir=out_dir, - ica_method="robustica", - n_robust_runs=6, - tedpca="aic", - gscontrol=["gsr", "mir"], - png_cmap="bone", - prefix="sub-01", - debug=True, - verbose=True, - ) - - -def test_integration_three_echo(skip_integration): - """Integration test of the full tedana workflow using three-echo test data.""" - - if skip_integration: - pytest.skip("Skipping three-echo integration test") - - test_data_path, osf_id = data_for_testing_info("three-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo")) - out_dir_manual = f"{out_dir}-rerun" - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - if os.path.exists(out_dir_manual): - shutil.rmtree(out_dir_manual) - - # download data and run the test - download_test_data(osf_id, test_data_path) - - tedana_cli.tedana_workflow( - data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", - tes=[14.5, 38.5, 62.5], - ica_method="robustica", - n_robust_runs=30, - out_dir=out_dir, - low_mem=True, - tedpca="aic", - ) - - # Test re-running, but use the CLI - args = [ - "-d", - f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", - "-e", - "14.5", - "38.5", - "62.5", - "--out-dir", - out_dir_manual, - "--debug", - "--verbose", - "-f", - ] - tedana_cli._main(args) - - -def test_integration_reclassify_insufficient_args(skip_integration): - if skip_integration: - pytest.skip("Skipping reclassify insufficient args") - - guarantee_reclassify_data() - - args = [ - "ica_reclassify", - reclassify_raw_registry(), - ] - - result = subprocess.run(args, capture_output=True) - assert b"ValueError: Must manually accept or reject" in result.stderr - assert result.returncode != 0 - - -def test_integration_reclassify_quiet_csv(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify quiet csv") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - # Make some files that have components to manually accept and reject - to_accept = [i for i in range(3)] - to_reject = [i for i in range(7, 4)] - acc_df = pd.DataFrame(data=to_accept, columns=["Components"]) - rej_df = pd.DataFrame(data=to_reject, columns=["Components"]) - acc_csv_fname = os.path.join(reclassify_raw(), "accept.csv") - rej_csv_fname = os.path.join(reclassify_raw(), "reject.csv") - acc_df.to_csv(acc_csv_fname) - rej_df.to_csv(rej_csv_fname) - - args = [ - "ica_reclassify", - "--manacc", - acc_csv_fname, - "--manrej", - rej_csv_fname, - "--out-dir", - out_dir, - reclassify_raw_registry(), - ] - - results = subprocess.run(args, capture_output=True) - assert results.returncode == 0 - fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_quiet_spaces(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify quiet space-delimited integers") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - args = [ - "ica_reclassify", - "--manacc", - "1", - "2", - "3", - "--manrej", - "4", - "5", - "6", - "--out-dir", - out_dir, - reclassify_raw_registry(), - ] - - results = subprocess.run(args, capture_output=True) - assert results.returncode == 0 - fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_quiet_string(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify quiet string of integers") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - args = [ - "ica_reclassify", - "--manacc", - "1,2,3", - "--manrej", - "4,5,6,", - "--out-dir", - out_dir, - reclassify_raw_registry(), - ] - - results = subprocess.run(args, capture_output=True) - assert results.returncode == 0 - fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_debug(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify debug") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/debug")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - args = [ - "ica_reclassify", - "--manacc", - "1", - "2", - "3", - "--prefix", - "sub-testymctestface", - "--convention", - "orig", - "--tedort", - "--mir", - "--no-reports", - "--out-dir", - out_dir, - "--debug", - reclassify_raw_registry(), - ] - - results = subprocess.run(args, capture_output=True) - assert results.returncode == 0 - fn = resource_filename("tedana", "tests/data/reclassify_debug_out.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_both_rej_acc(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify both rejected and accepted") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/both_rej_acc")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - with pytest.raises( - ValueError, - match=r"The following components were both accepted and", - ): - ica_reclassify_workflow( - reclassify_raw_registry(), - accept=[1, 2, 3], - reject=[1, 2, 3], - out_dir=out_dir, - ) - - -def test_integration_reclassify_run_twice(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify both rejected and accepted") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/run_twice")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - ica_reclassify_workflow( - reclassify_raw_registry(), - accept=[1, 2, 3], - out_dir=out_dir, - no_reports=True, - ) - ica_reclassify_workflow( - reclassify_raw_registry(), - accept=[1, 2, 3], - out_dir=out_dir, - overwrite=True, - no_reports=True, - ) - fn = resource_filename("tedana", "tests/data/reclassify_run_twice.txt") - check_integration_outputs(fn, out_dir, n_logs=2) - - -def test_integration_reclassify_no_bold(skip_integration, caplog): - if skip_integration: - pytest.skip("Skip reclassify both rejected and accepted") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - ioh = InputHarvester(reclassify_raw_registry()) - comptable = ioh.get_file_contents("ICA metrics tsv") - to_accept = [i for i in range(len(comptable))] - - ica_reclassify_workflow( - reclassify_raw_registry(), - reject=to_accept, - out_dir=out_dir, - no_reports=True, - ) - assert "No accepted components remaining after manual classification!" in caplog.text - - fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_accrej_files(skip_integration, caplog): - if skip_integration: - pytest.skip("Skip reclassify both rejected and accepted") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - ioh = InputHarvester(reclassify_raw_registry()) - comptable = ioh.get_file_contents("ICA metrics tsv") - to_accept = [i for i in range(len(comptable))] - - ica_reclassify_workflow( - reclassify_raw_registry(), - reject=to_accept, - out_dir=out_dir, - no_reports=True, - ) - assert "No accepted components remaining after manual classification!" in caplog.text - - fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") - check_integration_outputs(fn, out_dir) - - -def test_integration_reclassify_index_failures(skip_integration): - if skip_integration: - pytest.skip("Skip reclassify index failures") - - test_data_path = guarantee_reclassify_data() - out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/index_failures")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - with pytest.raises( - ValueError, - match=r"_parse_manual_list expected a list of integers, but the input is", - ): - ica_reclassify_workflow( - reclassify_raw_registry(), - accept=[1, 2.5, 3], - out_dir=out_dir, - no_reports=True, - ) - - with pytest.raises( - ValueError, - match=r"_parse_manual_list expected integers or a filename, but the input is", - ): - ica_reclassify_workflow( - reclassify_raw_registry(), - accept=[2.5], - out_dir=out_dir, - no_reports=True, - ) - - -def test_integration_t2smap(skip_integration): - """Integration test of the full t2smap workflow using five-echo test data.""" - if skip_integration: - pytest.skip("Skipping t2smap integration test") - test_data_path, osf_id = data_for_testing_info("five-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/t2smap_five-echo")) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - # download data and run the test - download_test_data(osf_id, test_data_path) - prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" - suffix = ".sm.nii.gz" - datalist = [prepend + str(i + 1) + suffix for i in range(5)] - echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] - args = ( - ["-d"] - + datalist - + ["-e"] - + [str(te) for te in echo_times] - + ["--out-dir", out_dir, "--fittype", "curvefit"] - ) - t2smap_cli._main(args) - - # compare the generated output files - fname = resource_filename("tedana", "tests/data/nih_five_echo_outputs_t2smap.txt") - # Gets filepaths generated by integration test - found_files = [ - os.path.relpath(f, out_dir) - for f in glob.glob(os.path.join(out_dir, "**"), recursive=True)[1:] - ] - - # Compares remaining files with those expected - with open(fname, "r") as f: - expected_files = f.read().splitlines() - assert sorted(expected_files) == sorted(found_files) \ No newline at end of file diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index cfc754d74..e738b5843 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -33,7 +33,7 @@ DEFAULT_N_MAX_ITER, DEFAULT_N_MAX_RESTART, DEFAULT_N_ROBUST_RUNS, - DEFAULT_SEED + DEFAULT_SEED, ) from tedana.stats import computefeats2 from tedana.workflows.parser_utils import check_tedpca_value, is_valid_file @@ -971,4 +971,4 @@ def _main(argv=None): if __name__ == "__main__": - _main() \ No newline at end of file + _main() From a20ff57f39caeff7f3aea4f94b56f1afbf31ab35 Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Wed, 20 Dec 2023 11:02:10 +1100 Subject: [PATCH 08/13] Incorporating the third round of Robert E. Smith's comments --- tedana/config.py | 1 + tedana/decomposition/ica.py | 37 ++++++++++++++++++++------------ tedana/tests/test_integration.py | 12 +++++------ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/tedana/config.py b/tedana/config.py index 58563de79..54def041a 100644 --- a/tedana/config.py +++ b/tedana/config.py @@ -1,3 +1,4 @@ +"""Setting default values for ICA decomposition.""" DEFAULT_ICA_METHOD = "robustica" DEFAULT_N_ROBUST_RUNS = 30 DEFAULT_N_MAX_ITER = 500 diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index 3074c38db..b64bf95a4 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -27,8 +27,7 @@ def tedica( maxit=DEFAULT_N_MAX_ITER, maxrestart=DEFAULT_N_MAX_RESTART, ): - """ - Perform ICA on `data` with the user selected ica method and returns mixing matrix + """Perform ICA on `data` with the user selected ica method and returns mixing matrix. Parameters ---------- @@ -90,9 +89,7 @@ def tedica( def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): - """ - Perform robustica on `data` by running FastICA multiple times (n_robust_runs) - and returns mixing matrix + """Perform robustica on `data` and returns mixing matrix. Parameters ---------- @@ -115,7 +112,6 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): where `C` is components and `T` is the same as in `data` fixed_seed : :obj:`int` Random seed from final decomposition. - """ if n_robust_runs > 200: LGR.warning( @@ -161,27 +157,40 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ ) - iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq)) # The cluster labeled -1 is noise + iq = np.array( + np.mean(q[q["cluster_id"] >= 0].iq) + ) # Excluding outliers (cluster -1) from the index quality calculation if iq < 0.6: LGR.warning( - "The resultant mean Index Quality is low ({0}). It is recommended to rerun the " - "process with a different seed.".format(iq) + f"The resultant mean Index Quality is low ({iq}). It is recommended to rerun the " + "process with a different seed." ) - mmix = mmix[:, q["cluster_id"] >= 0] + mmix = mmix[ + :, q["cluster_id"] >= 0 + ] # Excluding outliers (cluster -1) when calculating the mixing matrix mmix = stats.zscore(mmix, axis=0) LGR.info( - "RobustICA with {0} robust runs and seed {1} was used. " - "The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq) + f"RobustICA with {n_robust_runs} robust runs and seed {fixed_seed} was used. " + f"The mean Index Quality is {iq}." ) + + no_outliers = np.count_nonzero(rica.clustering.labels_ == -1) + if no_outliers: + LGR.info( + "The DBSCAN clustering algorithm detected outliers when clustering " + "components for different runs. These outliers are excluded when calculating " + "the index quality and the mixing matrix to maximise the robustness of the " + "decomposition." + ) + return mmix, fixed_seed def f_ica(data, n_components, fixed_seed, maxit, maxrestart): - """ - Perform FastICA on `data` and returns mixing matrix + """Perform FastICA on `data` and returns mixing matrix. Parameters ---------- diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 645057788..570da50d1 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -285,15 +285,11 @@ def test_integration_robustica_five_echo(skip_integration): pytest.skip("Skipping five-echo integration test") test_data_path, osf_id = data_for_testing_info("five-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo")) - # out_dir_manual = f"{out_dir}-manual" + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo_robustica")) if os.path.exists(out_dir): shutil.rmtree(out_dir) - # if os.path.exists(out_dir_manual): - # shutil.rmtree(out_dir_manual) - # download data and run the test download_test_data(osf_id, test_data_path) prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" @@ -376,7 +372,7 @@ def test_integration_robustica_four_echo(skip_integration): pytest.skip("Skipping four-echo integration test") test_data_path, osf_id = data_for_testing_info("four-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo")) + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo_robustica")) out_dir_manual = f"{out_dir}-manual" if os.path.exists(out_dir): @@ -394,6 +390,8 @@ def test_integration_robustica_four_echo(skip_integration): data=datalist, mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), tes=[11.8, 28.04, 44.28, 60.52], + ica_method="robustica", + n_robust_runs=8, out_dir=out_dir, tedpca="kundu-stabilize", gscontrol=["gsr", "mir"], @@ -469,7 +467,7 @@ def test_integration_robustica_three_echo(skip_integration): pytest.skip("Skipping three-echo integration test") test_data_path, osf_id = data_for_testing_info("three-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo")) + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo_robustica")) out_dir_manual = f"{out_dir}-rerun" if os.path.exists(out_dir): From 78c8140aab163ef82fb3c6f4a47065bab4f3350d Mon Sep 17 00:00:00 2001 From: Bahman Date: Fri, 9 Feb 2024 16:10:14 +1100 Subject: [PATCH 09/13] Enhance the "ica_method" description suggested by D. Handwerker Co-authored-by: Dan Handwerker <7406227+handwerkerd@users.noreply.github.com> --- tedana/workflows/tedana.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 3c1fdafdf..720148aed 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -174,11 +174,12 @@ def _get_parser(): "--ica_method", dest="ica_method", help=( - "The applied ICA method. If set to fastica the FastICA " - "from sklearn library will be run once with the seed value. " + "The applied ICA method. fastica runs FastICA from " + "sklearn once with the seed value. " "robustica will run FastICA n_robust_runs times and uses " "clustering methods to overcome the randomness of the FastICA " - "algorithm. When set to fastica, --n_robust_runs option will have no effect." + "algorithm. FastICA was the default in tedana version 23 and earlier. " + "robustica will be slower." ), choices=["robustica", "fastica"], type=str.lower, From ac85e6a9a37d0f180d4baea1a095b7f005daf3d1 Mon Sep 17 00:00:00 2001 From: Bahman Date: Fri, 9 Feb 2024 16:11:57 +1100 Subject: [PATCH 10/13] Enhancing the "n_robust_runs" description suggested by D. Handwerkerd Co-authored-by: Dan Handwerker <7406227+handwerkerd@users.noreply.github.com> --- tedana/workflows/tedana.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 720148aed..193f3ce51 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -204,7 +204,7 @@ def _get_parser(): dest="n_robust_runs", type=int, help=( - "The number of times robustica will run." + "The number of times robustica will run. " "This is only effective when ica_method is " "set to robustica." ), From 979d026bb4cf14efc492cc8c78836ed4f165b155 Mon Sep 17 00:00:00 2001 From: Robert Smith Date: Sun, 11 Feb 2024 23:03:47 +1100 Subject: [PATCH 11/13] RobustICA: Restructure code loop over robust methods (#4) * RobustICA: Restructure code loop over robust methods * Addressing the issue with try/except --------- Co-authored-by: Bahman --- tedana/decomposition/ica.py | 62 +++++++++++++++---------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index 807da19d0..5743715f8 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -124,39 +124,27 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): if fixed_seed == -1: fixed_seed = np.random.randint(low=1, high=1000) - try: - rica = RobustICA( - n_components=n_components, - robust_runs=n_robust_runs, - whiten="arbitrary-variance", - max_iter=max_it, - random_state=fixed_seed, - robust_dimreduce=False, - fun="logcosh", - robust_method="DBSCAN", - ) - - s, mmix = rica.fit_transform(data) - q = rica.evaluate_clustering( - rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ - ) - - except: - rica = RobustICA( - n_components=n_components, - robust_runs=n_robust_runs, - whiten="arbitrary-variance", - max_iter=max_it, - random_state=fixed_seed, - robust_dimreduce=False, - fun="logcosh", - robust_method="AgglomerativeClustering", - ) - - s, mmix = rica.fit_transform(data) - q = rica.evaluate_clustering( - rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ - ) + for robust_method in ("DBSCAN", "AgglomerativeClustering"): + + try: + rica = RobustICA( + n_components=n_components, + robust_runs=n_robust_runs, + whiten="arbitrary-variance", + max_iter=max_it, + random_state=fixed_seed, + robust_dimreduce=False, + fun="logcosh", + robust_method=robust_method, + ) + + s, mmix = rica.fit_transform(data) + q = rica.evaluate_clustering( + rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ + ) + + except Exception: + continue iq = np.array( np.mean(q[q["cluster_id"] >= 0].iq) @@ -181,10 +169,10 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): no_outliers = np.count_nonzero(rica.clustering.labels_ == -1) if no_outliers: LGR.info( - "The DBSCAN clustering algorithm detected outliers when clustering " - "components for different runs. These outliers are excluded when calculating " - "the index quality and the mixing matrix to maximise the robustness of the " - "decomposition." + f"The {robust_method} clustering algorithm detected outliers when clustering " + f"components for different runs. These outliers are excluded when calculating " + f"the index quality and the mixing matrix to maximise the robustness of the " + f"decomposition." ) return mmix, fixed_seed From cac38cd2f06b46e23bbef417d746e6d7b40bccd3 Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Thu, 29 Feb 2024 19:35:50 +1100 Subject: [PATCH 12/13] Applied suggested changes In this commit, some of the comments from Daniel Handwerker and Robert Smith were incorporated. --- tedana/config.py | 13 +++++ tedana/decomposition/ica.py | 11 +++- tedana/resources/references.bib | 4 +- tedana/tests/test_integration.py | 93 +++----------------------------- tedana/workflows/parser_utils.py | 27 ++++++++++ tedana/workflows/tedana.py | 9 ++-- 6 files changed, 64 insertions(+), 93 deletions(-) diff --git a/tedana/config.py b/tedana/config.py index 54def041a..11cf2011c 100644 --- a/tedana/config.py +++ b/tedana/config.py @@ -1,6 +1,19 @@ """Setting default values for ICA decomposition.""" + DEFAULT_ICA_METHOD = "robustica" DEFAULT_N_ROBUST_RUNS = 30 DEFAULT_N_MAX_ITER = 500 DEFAULT_N_MAX_RESTART = 10 DEFAULT_SEED = 42 + + +"""Setting extreme values for number of robust runs.""" + +MIN_N_ROBUST_RUNS = 5 +MAX_N_ROBUST_RUNS = 500 +WARN_N_ROBUST_RUNS = 200 + + +"""Setting the warning threshold for the index quality.""" + +WARN_IQ = 0.6 diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index 5743715f8..fc387f478 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -13,6 +13,8 @@ DEFAULT_N_MAX_ITER, DEFAULT_N_MAX_RESTART, DEFAULT_N_ROBUST_RUNS, + WARN_IQ, + WARN_N_ROBUST_RUNS, ) LGR = logging.getLogger("GENERAL") @@ -114,7 +116,7 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): fixed_seed : :obj:`int` Random seed from final decomposition. """ - if n_robust_runs > 200: + if n_robust_runs > WARN_N_ROBUST_RUNS: LGR.warning( "The selected n_robust_runs is a very big number! The process will take a long time!" ) @@ -146,11 +148,16 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): except Exception: continue + LGR.info( + f"The {robust_method} clustering algorithm was used clustering " + f"components across different runs" + ) + iq = np.array( np.mean(q[q["cluster_id"] >= 0].iq) ) # Excluding outliers (cluster -1) from the index quality calculation - if iq < 0.6: + if iq < WARN_IQ: LGR.warning( f"The resultant mean Index Quality is low ({iq}). It is recommended to rerun the " "process with a different seed." diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib index 4516882d8..786165404 100644 --- a/tedana/resources/references.bib +++ b/tedana/resources/references.bib @@ -314,7 +314,7 @@ @misc{sochat2015ttoz year = 2015 } -@Article{Anglada:2022, +@Article{Anglada2022, Author = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah}, Title = {robustica: customizable robust independent component analysis}, Journal = {BMC Bioinformatics}, @@ -322,4 +322,4 @@ @Article{Anglada:2022 Number = {519}, doi = {10.1186/s12859-022-05043-9}, year = 2022 -} \ No newline at end of file +} diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 8c1853fc8..183496525 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -259,7 +259,8 @@ def test_integration_five_echo(skip_integration): tedana_cli.tedana_workflow( data=datalist, tes=echo_times, - ica_method="fastica", + ica_method="robustica", + n_robust_runs=10, out_dir=out_dir, tedpca=0.95, fittype="curvefit", @@ -279,44 +280,6 @@ def test_integration_five_echo(skip_integration): check_integration_outputs(fn, out_dir) -def test_integration_robustica_five_echo(skip_integration): - """Integration test of the full tedana workflow with robustica using five-echo test data.""" - - if skip_integration: - pytest.skip("Skipping five-echo integration test") - - test_data_path, osf_id = data_for_testing_info("five-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo_robustica")) - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - # download data and run the test - download_test_data(osf_id, test_data_path) - prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" - suffix = ".sm.nii.gz" - datalist = [prepend + str(i + 1) + suffix for i in range(5)] - echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] - tedana_cli.tedana_workflow( - data=datalist, - tes=echo_times, - ica_method="robustica", - n_robust_runs=10, - out_dir=out_dir, - tedpca=0.95, - fittype="curvefit", - fixed_seed=49, - tedort=True, - verbose=True, - prefix="sub-01", - ) - - # Just a check on the component table pending a unit test of load_comptable - comptable = os.path.join(out_dir, "sub-01_desc-tedana_metrics.tsv") - df = pd.read_table(comptable) - assert isinstance(df, pd.DataFrame) - - def test_integration_four_echo(skip_integration): """Integration test of the full tedana workflow using four-echo test data.""" @@ -366,51 +329,6 @@ def test_integration_four_echo(skip_integration): check_integration_outputs(fn, out_dir) -def test_integration_robustica_four_echo(skip_integration): - """Integration test of the full tedana workflow with robustica using four-echo test data.""" - - if skip_integration: - pytest.skip("Skipping four-echo integration test") - - test_data_path, osf_id = data_for_testing_info("four-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo_robustica")) - out_dir_manual = f"{out_dir}-manual" - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - if os.path.exists(out_dir_manual): - shutil.rmtree(out_dir_manual) - - # download data and run the test - download_test_data(osf_id, test_data_path) - prepend = f"{test_data_path}/sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" - suffix = "_space-sbref_desc-preproc_bold+orig.HEAD" - datalist = [prepend + str(i + 1) + suffix for i in range(4)] - tedana_cli.tedana_workflow( - data=datalist, - mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), - tes=[11.8, 28.04, 44.28, 60.52], - ica_method="robustica", - n_robust_runs=8, - out_dir=out_dir, - tedpca="kundu-stabilize", - gscontrol=["gsr", "mir"], - png_cmap="bone", - prefix="sub-01", - debug=True, - verbose=True, - ) - - ica_reclassify_workflow( - op.join(out_dir, "sub-01_desc-tedana_registry.json"), - accept=[1, 2, 3], - reject=[4, 5, 6], - out_dir=out_dir_manual, - mir=True, - ) - - def test_integration_three_echo(skip_integration): """Integration test of the full tedana workflow using three-echo test data.""" @@ -432,7 +350,8 @@ def test_integration_three_echo(skip_integration): tedana_cli.tedana_workflow( data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", tes=[14.5, 38.5, 62.5], - ica_method="fastica", + ica_method="robustica", + n_robust_runs=5, out_dir=out_dir, low_mem=True, tedpca="aic", @@ -448,6 +367,10 @@ def test_integration_three_echo(skip_integration): "62.5", "--out-dir", out_dir_manual, + "--ica_method", + "robustica", + "--n_robust_runs", + "5", "--debug", "--verbose", "-f", diff --git a/tedana/workflows/parser_utils.py b/tedana/workflows/parser_utils.py index b0db74bdc..db0f13b37 100644 --- a/tedana/workflows/parser_utils.py +++ b/tedana/workflows/parser_utils.py @@ -3,6 +3,8 @@ import argparse import os.path as op +from tedana.config import MAX_N_ROBUST_RUNS, MIN_N_ROBUST_RUNS + def check_tedpca_value(string, is_parser=True): """ @@ -33,6 +35,31 @@ def check_tedpca_value(string, is_parser=True): return intarg +def check_n_robust_runs_value(string, is_parser=True): + """ + Check n_robust_runs argument. + + Check if argument is an int between MIN_N_ROBUST_RUNS and MAX_N_ROBUST_RUNS. + """ + error = argparse.ArgumentTypeError if is_parser else ValueError + try: + intarg = int(string) + except ValueError: + msg = ( + f"Argument to n_robust_runs must be an integer " + f"between {MIN_N_ROBUST_RUNS} and {MAX_N_ROBUST_RUNS}." + ) + raise error(msg) + + if not (MIN_N_ROBUST_RUNS <= intarg <= MAX_N_ROBUST_RUNS): + raise error( + f"n_robust_runs must be an integer between {MIN_N_ROBUST_RUNS} " + f"and {MAX_N_ROBUST_RUNS}." + ) + else: + return intarg + + def is_valid_file(parser, arg): """Check if argument is existing file.""" if not op.isfile(arg) and arg is not None: diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 193f3ce51..2333af52a 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -419,11 +419,12 @@ def tedana_workflow( Testing to better understand the effects of the differences is ongoing. Default is 'kundu'. ica_method : {'robustica', 'fastica'}, optional - The applied ICA method. If set to fastica the FastICA from sklearn - library will be run once with the seed value. 'robustica' will run + The applied ICA method. fastica runs FastICA from sklearn + once with the seed value. 'robustica' will run 'FastICA' n_robust_runs times and uses clustering methods to overcome - the randomness of the FastICA algorithm. When set to fastica n_robust_runs - will not be effective. + the randomness of the FastICA algorithm. + FastICA was the default in tedana version 23 and earlier. + robustica will be slower. Default is 'robustica' n_robust_runs : :obj:`int`, optional The number of times robustica will run. This is only effective when 'ica_method' is From a113423e282894466eccc086422f9e0662d6865c Mon Sep 17 00:00:00 2001 From: Bahman Tahayori Date: Mon, 4 Mar 2024 12:14:49 +1100 Subject: [PATCH 13/13] Incorporating more comments * Fixing the problem of argument parser for n_robust_runs. * Removing unnecessary tests from the test_integration. There are 3 tests for echo as before, but the ica_method is robustica for five and three echos and fatsica for the four echo test. --- tedana/tests/test_integration.py | 50 +------------------------------- tedana/workflows/tedana.py | 9 ++++-- 2 files changed, 7 insertions(+), 52 deletions(-) diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 025e527a3..8a7ad7843 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -258,7 +258,7 @@ def test_integration_five_echo(skip_integration): data=datalist, tes=echo_times, ica_method="robustica", - n_robust_runs=10, + n_robust_runs=6, out_dir=out_dir, tedpca=0.95, fittype="curvefit", @@ -383,54 +383,6 @@ def test_integration_three_echo(skip_integration): check_integration_outputs(fn, out_dir) -def test_integration_robustica_three_echo(skip_integration): - """Integration test of the full tedana workflow with robustica using three-echo test data.""" - - if skip_integration: - pytest.skip("Skipping three-echo integration test") - - test_data_path, osf_id = data_for_testing_info("three-echo") - out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo_robustica")) - out_dir_manual = f"{out_dir}-rerun" - - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - - if os.path.exists(out_dir_manual): - shutil.rmtree(out_dir_manual) - - # download data and run the test - download_test_data(osf_id, test_data_path) - tedana_cli.tedana_workflow( - data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", - tes=[14.5, 38.5, 62.5], - ica_method="robustica", - out_dir=out_dir, - low_mem=True, - tedpca="aic", - ) - - # Test re-running, but use the CLI - args = [ - "-d", - f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", - "-e", - "14.5", - "38.5", - "62.5", - "--ica_method", - "robustica", - "--out-dir", - out_dir_manual, - "--debug", - "--verbose", - "-f", - "--mix", - os.path.join(out_dir, "desc-ICA_mixing.tsv"), - ] - tedana_cli._main(args) - - def test_integration_reclassify_insufficient_args(skip_integration): if skip_integration: pytest.skip("Skipping reclassify insufficient args") diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index ff885b507..552b00d30 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -37,7 +37,11 @@ DEFAULT_SEED, ) from tedana.stats import computefeats2 -from tedana.workflows.parser_utils import check_tedpca_value, is_valid_file +from tedana.workflows.parser_utils import ( + check_n_robust_runs_value, + check_tedpca_value, + is_valid_file, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -202,13 +206,12 @@ def _get_parser(): optional.add_argument( "--n_robust_runs", dest="n_robust_runs", - type=int, + type=check_n_robust_runs_value, help=( "The number of times robustica will run. " "This is only effective when ica_method is " "set to robustica." ), - choices=range(5, 500), default=DEFAULT_N_ROBUST_RUNS, ) optional.add_argument(