ME-ICA · BahmanTahayori · Jul 31, 2023 · Dec 5, 2023 · Nov 1, 2023 · Dec 5, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
     "pandas>=2.0,<=2.2.1",
     "pybtex",
     "pybtex-apa-style",
+    "robustica>=0.1.3",
     "scikit-learn>=0.21, <=1.4.1.post1",
     "scipy>=1.2.0, <=1.12.0",
     "threadpoolctl",

diff --git a/tedana/config.py b/tedana/config.py
@@ -0,0 +1,19 @@
+"""Setting default values for ICA decomposition."""
+
+DEFAULT_ICA_METHOD = "robustica"
+DEFAULT_N_ROBUST_RUNS = 30
+DEFAULT_N_MAX_ITER = 500
+DEFAULT_N_MAX_RESTART = 10
+DEFAULT_SEED = 42
+
+
+"""Setting extreme values for number of robust runs."""
+
+MIN_N_ROBUST_RUNS = 5
+MAX_N_ROBUST_RUNS = 500
+WARN_N_ROBUST_RUNS = 200
+
+
+"""Setting the warning threshold for the index quality."""
+
+WARN_IQ = 0.6
diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py
@@ -4,25 +4,47 @@
 import warnings
 
 import numpy as np
+from robustica import RobustICA
 from scipy import stats
 from sklearn.decomposition import FastICA
 
+from tedana.config import (
+    DEFAULT_ICA_METHOD,
+    DEFAULT_N_MAX_ITER,
+    DEFAULT_N_MAX_RESTART,
+    DEFAULT_N_ROBUST_RUNS,
+    WARN_IQ,
+    WARN_N_ROBUST_RUNS,
+)
+
 LGR = logging.getLogger("GENERAL")
 RepLGR = logging.getLogger("REPORT")
 
 
-def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
-    """Perform ICA on ``data`` and return mixing matrix.
+def tedica(
+    data,
+    n_components,
+    fixed_seed,
+    ica_method=DEFAULT_ICA_METHOD,
+    n_robust_runs=DEFAULT_N_ROBUST_RUNS,
+    maxit=DEFAULT_N_MAX_ITER,
+    maxrestart=DEFAULT_N_MAX_RESTART,
+):
+    """Perform ICA on `data` with the user selected ica method and returns mixing matrix.
 
     Parameters
     ----------
     data : (S x T) :obj:`numpy.ndarray`
         Dimensionally reduced optimally combined functional data, where `S` is
         samples and `T` is time
     n_components : :obj:`int`
-        Number of components retained from PCA decomposition
+        Number of components retained from PCA decomposition.
     fixed_seed : :obj:`int`
-        Seed for ensuring reproducibility of ICA results
+        Seed for ensuring reproducibility of ICA results.
+    ica_method : :obj: `str'
+        slected ICA method, can be fastica or robutica.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
     maxit : :obj:`int`, optional
         Maximum number of iterations for ICA. Default is 500.
     maxrestart : :obj:`int`, optional
@@ -38,16 +60,162 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
     fixed_seed : :obj:`int`
         Random seed from final decomposition.
 
-    Notes
-    -----
-    Uses `sklearn` implementation of FastICA for decomposition
     """
     warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
     RepLGR.info(
         "Independent component analysis was then used to "
         "decompose the dimensionally reduced dataset."
     )
 
+    ica_method = ica_method.lower()
+
+    if ica_method == "robustica":
+        mmix, fixed_seed = r_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            n_robust_runs=n_robust_runs,
+            max_it=maxit,
+        )
+    elif ica_method == "fastica":
+        mmix, fixed_seed = f_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            maxit=maxit,
+            maxrestart=maxrestart,
+        )
+    else:
+        raise ValueError("The selected ICA method is invalid!")
+
+    return mmix, fixed_seed
+
+
+def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it):
+    """Perform robustica on `data` and returns mixing matrix.
+
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition.
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
+
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
+    """
+    if n_robust_runs > WARN_N_ROBUST_RUNS:
+        LGR.warning(
+            "The selected n_robust_runs is a very big number! The process will take a long time!"
+        )
+
+    RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.")
+
+    if fixed_seed == -1:
+        fixed_seed = np.random.randint(low=1, high=1000)
+
+    for robust_method in ("DBSCAN", "AgglomerativeClustering"):
+
+        try:
+            rica = RobustICA(
+                n_components=n_components,
+                robust_runs=n_robust_runs,
+                whiten="arbitrary-variance",
+                max_iter=max_it,
+                random_state=fixed_seed,
+                robust_dimreduce=False,
+                fun="logcosh",
+                robust_method=robust_method,
+            )
+
+            s, mmix = rica.fit_transform(data)
+            q = rica.evaluate_clustering(
+                rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
+            )
+
+        except Exception:
+            continue
+
+    LGR.info(
+        f"The {robust_method} clustering algorithm was used clustering "
+        f"components across different runs"
+    )
+
+    iq = np.array(
+        np.mean(q[q["cluster_id"] >= 0].iq)
+    )  # Excluding outliers (cluster -1) from the index quality calculation
+
+    if iq < WARN_IQ:
+        LGR.warning(
+            f"The resultant mean Index Quality is low ({iq}). It is recommended to rerun the "
+            "process with a different seed."
+        )
+
+    mmix = mmix[
+        :, q["cluster_id"] >= 0
+    ]  # Excluding outliers (cluster -1) when calculating the mixing matrix
+    mmix = stats.zscore(mmix, axis=0)
+
+    LGR.info(
+        f"RobustICA with {n_robust_runs} robust runs and seed {fixed_seed} was used. "
+        f"The mean Index Quality is {iq}."
+    )
+
+    no_outliers = np.count_nonzero(rica.clustering.labels_ == -1)
+    if no_outliers:
+        LGR.info(
+            f"The {robust_method} clustering algorithm detected outliers when clustering "
+            f"components for different runs. These outliers are excluded when calculating "
+            f"the index quality and the mixing matrix to maximise the robustness of the "
+            f"decomposition."
+        )
+
+    return mmix, fixed_seed
+
+
+def f_ica(data, n_components, fixed_seed, maxit, maxrestart):
+    """Perform FastICA on `data` and returns mixing matrix.
+
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
+    maxrestart : :obj:`int`, optional
+        Maximum number of attempted decompositions to perform with different
+        random seeds. ICA will stop running if there is convergence prior to
+        reaching this limit. Default is 10.
+
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
+
+    Notes
+    -----
+    Uses `sklearn` implementation of FastICA for decomposition
+    """
     if fixed_seed == -1:
         fixed_seed = np.random.randint(low=1, high=1000)
 

diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib
@@ -333,3 +333,14 @@ @article{tedana_decision_trees
   year    = {2024},
   doi     = {10.6084/m9.figshare.25251433.v1}
 }
+
+@Article{Anglada2022,
+  Author    = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah},
+  Title     = {robustica: customizable robust independent component analysis},
+  Journal   = {BMC Bioinformatics},
+  Volume    = {23},
+  Number    = {519},
+  doi       = {10.1186/s12859-022-05043-9},
+  year      = 2022
+}
+
diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py
@@ -257,6 +257,8 @@ def test_integration_five_echo(skip_integration):
     tedana_cli.tedana_workflow(
         data=datalist,
         tes=echo_times,
+        ica_method="robustica",
+        n_robust_runs=6,
         out_dir=out_dir,
         tedpca=0.95,
         fittype="curvefit",
@@ -302,6 +304,7 @@ def test_integration_four_echo(skip_integration):
         data=datalist,
         mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"),
         tes=[11.8, 28.04, 44.28, 60.52],
+        ica_method="fastica",
         out_dir=out_dir,
         tedpca="kundu-stabilize",
         gscontrol=["gsr", "mir"],
@@ -346,6 +349,8 @@ def test_integration_three_echo(skip_integration):
     tedana_cli.tedana_workflow(
         data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz",
         tes=[14.5, 38.5, 62.5],
+        ica_method="robustica",
+        n_robust_runs=5,
         out_dir=out_dir,
         low_mem=True,
         tedpca="aic",
@@ -361,6 +366,10 @@ def test_integration_three_echo(skip_integration):
         "62.5",
         "--out-dir",
         out_dir_manual,
+        "--ica_method",
+        "robustica",
+        "--n_robust_runs",
+        "5",
         "--debug",
         "--verbose",
         "-f",

diff --git a/tedana/workflows/parser_utils.py b/tedana/workflows/parser_utils.py
@@ -3,6 +3,8 @@
 import argparse
 import os.path as op
 
+from tedana.config import MAX_N_ROBUST_RUNS, MIN_N_ROBUST_RUNS
+
 
 def check_tedpca_value(string, is_parser=True):
     """
@@ -33,6 +35,31 @@ def check_tedpca_value(string, is_parser=True):
         return intarg
 
 
+def check_n_robust_runs_value(string, is_parser=True):
+    """
+    Check n_robust_runs argument.
+
+    Check if argument is an int between MIN_N_ROBUST_RUNS  and MAX_N_ROBUST_RUNS.
+    """
+    error = argparse.ArgumentTypeError if is_parser else ValueError
+    try:
+        intarg = int(string)
+    except ValueError:
+        msg = (
+            f"Argument to n_robust_runs must be an integer "
+            f"between {MIN_N_ROBUST_RUNS} and {MAX_N_ROBUST_RUNS}."
+        )
+        raise error(msg)
+
+    if not (MIN_N_ROBUST_RUNS <= intarg <= MAX_N_ROBUST_RUNS):
+        raise error(
+            f"n_robust_runs must be an integer between {MIN_N_ROBUST_RUNS} "
+            f"and {MAX_N_ROBUST_RUNS}."
+        )
+    else:
+        return intarg
+
+
 def is_valid_file(parser, arg):
     """Check if argument is existing file."""
     if not op.isfile(arg) and arg is not None: