ME-ICA · pablosmig · Nov 23, 2022 · Nov 24, 2022 · Nov 25, 2022 · Aug 14, 2023
diff --git a/dev_tools/pca_check.py b/dev_tools/pca_check.py
@@ -0,0 +1,144 @@
+
+import matplotlib
+
+matplotlib.use("AGG")
+
+import logging
+
+LGR = logging.getLogger("GENERAL")
+MPL_LGR = logging.getLogger("matplotlib")
+MPL_LGR.setLevel(logging.WARNING)
+RepLGR = logging.getLogger("REPORT")
+
+import json
+import os
+import subprocess
+
+import nibabel as nib
+import numpy as np
+import pandas as pd
+from scipy import io as scipy_io
+from tedana.workflows import tedana as tedana_cli
+
+from mapca import MovingAveragePCA
+
+repoid = "ds000258"
+wdir = "/home/portega/Downloads"
+
+repo = os.path.join(wdir, repoid)
+if not os.path.isdir(repo):
+    os.mkdir(repo)
+
+print(">>> INSTALLING DATASET:")
+subprocess.run(
+    f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True
+)
+
+subjects = []
+
+# Create pandas dataframe to store results with the following columns:
+# Subject, maPCA_AIC, GIFT_AIC, maPCA_KIC, GIFT_KIC, maPCA_MDL, GIFT_MDL
+results_df = pd.DataFrame(
+    columns=["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", "GIFT_KIC", "maPCA_MDL", "GIFT_MDL"]
+)
+
+
+for sbj in os.listdir(repo):
+    sbj_dir = os.path.join(repo, sbj)
+
+    # Access subject directory
+    if os.path.isdir(sbj_dir) and "sub-" in sbj_dir:
+        echo_times = []
+        func_files = []
+
+        subjects.append(os.path.basename(sbj_dir))
+
+        print(">>>> Downloading subject", sbj)
+
+        subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo)
+
+        print(">>>> Searching for functional files and echo times")
+
+        # Get functional filenames and echo times
+        for func_file in os.listdir(os.path.join(repo, sbj, "func")):
+            if func_file.endswith(".json"):
+                with open(os.path.join(repo, sbj, "func", func_file)) as f:
+                    data = json.load(f)
+                    echo_times.append(data["EchoTime"])
+            elif func_file.endswith(".nii.gz"):
+                func_files.append(os.path.join(repo, sbj, "func", func_file))
+
+        # Sort echo_times values from lowest to highest and multiply by 1000
+        echo_times = np.array(sorted(echo_times)) * 1000
+
+        # Sort func_files
+        func_files = sorted(func_files)
+        print(func_files)
+
+        # Tedana output directory
+        tedana_output_dir = os.path.join(sbj_dir, "tedana")
+
+        print("Running tedana")
+
+        # Run tedana
+        tedana_cli.tedana_workflow(
+            data=func_files,
+            tes=echo_times,
+            out_dir=tedana_output_dir,
+            tedpca="mdl",
+        )
+
+        # Find tedana optimally combined data and mask
+        tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz")
+        tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz")
+
+        # Read tedana optimally combined data and mask
+        tedana_optcom_img = nib.load(tedana_optcom)
+        tedana_mask_img = nib.load(tedana_mask)
+
+        # Save tedana optimally combined data and mask into mat files
+        tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
+        tedana_mask_mat = os.path.join(sbj_dir, "mask.mat")
+        print("Saving tedana optimally combined data and mask into mat files")
+        scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()})
+        scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()})
+
+        # Run mapca
+        print("Running mapca")
+        pca = MovingAveragePCA(normalize=True)
+        _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img)
+
+        # Get AIC, KIC and MDL values
+        aic = pca.aic_
+        kic = pca.kic_
+        mdl = pca.mdl_
+
+        # Remove tedana output directory and the anat and func directories
+        subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo)
+        subprocess.run(f"rm -rf {sbj}/anat", shell=True, cwd=repo)
+        subprocess.run(f"rm -rf {sbj}/func", shell=True, cwd=repo)
+
+        # Here run matlab script with subprocess.run
+        print("Running GIFT version of maPCA")
+
+        # Append AIC, KIC and MDL values to a pandas dataframe
+        print("Appending AIC, KIC and MDL values to a pandas dataframe")
+        results_df = results_df.append(
+            {
+                "Subject": sbj,
+                "maPCA_AIC": aic,
+                "GIFT_AIC": 0,
+                "maPCA_KIC": kic,
+                "GIFT_KIC": 0,
+                "maPCA_MDL": mdl,
+                "GIFT_MDL": 0,
+            },
+            ignore_index=True,
+        )
+
+        print("Subject", sbj, "done")
+
+# Save pandas dataframe to csv file
+results_df.to_csv(os.path.join(wdir, "results.csv"), index=False)
+
+
diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py
@@ -22,12 +22,10 @@
 def low_mem_pca(data):
     """
     Run Singular Value Decomposition (SVD) on input data.
-
     Parameters
     ----------
     data : (S [*E] x T) array_like
         Optimally combined (S x T) or full multi-echo (S*E x T) data.
-
     Returns
     -------
     u : (S [*E] x C) array_like
@@ -185,6 +183,7 @@ def tedpca(
     :py:mod:`tedana.constants` : The module describing the filenames for
         various naming conventions
     """
+    # TODO refactor this to make it nice
     if algorithm == "kundu":
         alg_str = (
             "followed by the Kundu component selection decision tree \\citep{kundu2013integrated}"
@@ -268,20 +267,15 @@ def tedpca(
             f"90% varexp: {varex_90_varexp}% | 95% varexp: {varex_95_varexp}%"
         )
 
-        pca_optimization_curves = np.array([aic["value"], kic["value"], mdl["value"]])
-        pca_criteria_components = np.array(
-            [
-                n_aic,
-                n_kic,
-                n_mdl,
-                n_varex_90,
-                n_varex_95,
-            ]
-        )
-
-        # Plot maPCA optimization curves
-        LGR.info("Plotting maPCA optimization curves")
-        plot_pca_results(pca_optimization_curves, pca_criteria_components, all_varex, io_generator)
+        # # Plot maPCA optimization curves once all relevant mehtods have been
+        # # computed
+        # LGR.info("Plotting maPCA optimization curves")
+        # plot_pca_results(
+        #     pca_optimization_curves,
+        #     pca_criteria_components,
+        #     all_varex,  # TODO find out what this is doing
+        #     io_generator  # TODO find out what this is doing
+        # )
 
         # Save maPCA results into a dictionary
         mapca_results = {
@@ -300,6 +294,7 @@ def tedpca(
                 "explained_variance_total": mdl_varexp,
                 "curve": mdl["value"],
             },
+            # TODO add the new values
             "varex_90": {
                 "n_components": n_varex_90,
                 "explained_variance_total": varex_90_varexp,
@@ -387,35 +382,80 @@ def tedpca(
     io_generator.save_file(comp_maps, "z-scored PCA components img")
 
     # Select components using decision tree
-    if algorithm == "kundu":
+    if isinstance(algorithm, float):
+        alg_str = "variance explained-based"
+    elif isinstance(algorithm, int):
+        alg_str = "a fixed number of components and no"
+    else:
+        alg_str = algorithm
+    LGR.info(
+        f"Selected {comptable.shape[0]} components with {round(100*varex_norm.sum(),2)}% "
+        f"normalized variance explained using {alg_str} dimensionality detection"
+    )
+    comptable["classification"] = "accepted"
+    comptable["rationale"] = ""
+
+    if algorithm in ["mdl", "aic", "kic"]:
+        # Continue the standard branch of the logic with Kundu
+        # TODO the logic here needs a big refactoring
+
         comptable, metric_metadata = kundu_tedpca(
             comptable,
             n_echos,
             kdaw,
             rdaw,
             stabilize=False,
         )
-    elif algorithm == "kundu-stabilize":
+        n_kundu = np.sum(comptable["classification"] == "accepted")
+        values_kundu = np.array(comptable["variance explained"])
+
         comptable, metric_metadata = kundu_tedpca(
             comptable,
             n_echos,
             kdaw,
             rdaw,
             stabilize=True,
         )
-    else:
-        if isinstance(algorithm, float):
-            alg_str = "variance explained-based"
-        elif isinstance(algorithm, int):
-            alg_str = "a fixed number of components and no"
+        n_kundu_st = np.sum(comptable["classification"] == "accepted")
+        values_kundu_st = np.array(comptable["variance explained"])
+
+        n_val_kundu = values_kundu_st.shape[0]
+        n_val = aic["value"].shape[0]
+        if n_val > n_val_kundu:
+            values_kundu = np.concatenate(
+                [values_kundu, np.array([np.nan] * (n_val - n_val_kundu))]
+            )
+            values_kundu_st = np.concatenate(
+                [values_kundu_st, np.array([np.nan] * (n_val - n_val_kundu))]
+            )
         else:
-            alg_str = algorithm
-        LGR.info(
-            f"Selected {comptable.shape[0]} components with {round(100*varex_norm.sum(),2)}% "
-            f"normalized variance explained using {alg_str} dimensionality estimate"
+            values_kundu = values_kundu[:n_val]
+            values_kundu_st = values_kundu_st[:n_val]
+
+        pca_optimization_curves = np.concatenate(
+            [
+                aic["value"].reshape(1, -1),
+                kic["value"].reshape(1, -1),
+                mdl["value"].reshape(1, -1),
+                values_kundu.reshape(1, -1),
+                values_kundu_st.reshape(1, -1),
+            ]
+        )
+        pca_criteria_components = np.array(
+            [
+                n_aic,
+                n_kic,
+                n_mdl,
+                n_kundu,
+                n_kundu_st,
+                n_varex_90,
+                n_varex_95,
+            ]
         )
-        comptable["classification"] = "accepted"
-        comptable["rationale"] = ""
+
+        # Plot PCA selection curves
+        LGR.info("Plotting PCA selection curves")
+        plot_pca_results(pca_optimization_curves, pca_criteria_components, all_varex, io_generator)
 
     # Save decomposition files
     comp_names = [

diff --git a/tedana/reporting/static_figures.py b/tedana/reporting/static_figures.py
@@ -343,26 +343,26 @@ def pca_results(criteria, n_components, all_varex, io_generator):
     )
     # Vertical line depicting the optimal number of components for 90% variance explained
     plt.vlines(
-        n_components[3],
+        n_components[5],
         ymin=np.min(criteria),
         ymax=np.max(criteria),
-        color="tab:red",
+        color="tab:brown",
         linestyles="dashed",
         label="90% varexp",
     )
     # Vertical line depicting the optimal number of components for 95% variance explained
     plt.vlines(
-        n_components[4],
+        n_components[6],
         ymin=np.min(criteria),
         ymax=np.max(criteria),
-        color="tab:purple",
+        color="tab:pink",
         linestyles="dashed",
         label="95% varexp",
     )
 
     plt.legend()
 
-    #  Save the plot
+    # Save the plot
     plot_name = "pca_criteria.png"
     pca_criteria_name = os.path.join(io_generator.out_dir, "figures", plot_name)
     plt.savefig(pca_criteria_name)
@@ -403,28 +403,46 @@ def pca_results(criteria, n_components, all_varex, io_generator):
         linestyles="dashed",
         label="MDL",
     )
-    # Vertical line depicting the optimal number of components for 90% variance explained
+    # Vertical line depicting the optimal number of components for Kundu
     plt.vlines(
         n_components[3],
         ymin=0,
         ymax=1,
         color="tab:red",
         linestyles="dashed",
-        label="90% varexp",
+        label="Kundu",
     )
-    # Vertical line depicting the optimal number of components for 95% variance explained
+    # Vertical line depicting the optimal number of components for Kundu stabilized
     plt.vlines(
         n_components[4],
         ymin=0,
         ymax=1,
         color="tab:purple",
         linestyles="dashed",
+        label="Kundu stabilized",
+    )
+    # Vertical line depicting the optimal number of components for 90% variance explained
+    plt.vlines(
+        n_components[5],
+        ymin=0,
+        ymax=1,
+        color="tab:brown",
+        linestyles="dashed",
+        label="90% varexp",
+    )
+    # Vertical line depicting the optimal number of components for 95% variance explained
+    plt.vlines(
+        n_components[6],
+        ymin=0,
+        ymax=1,
+        color="tab:pink",
+        linestyles="dashed",
         label="95% varexp",
     )
 
     plt.legend()
 
-    #  Save the plot
+    # Save the plot
     plot_name = "pca_variance_explained.png"
     pca_variance_explained_name = os.path.join(io_generator.out_dir, "figures", plot_name)
     plt.savefig(pca_variance_explained_name)

diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py
@@ -336,7 +336,8 @@ def test_integration_three_echo(skip_integration):
         low_mem=True,
         tedpca="aic",
     )
-
+    # TODO push changes to my branch
+    # TODO script downloading data...
     # Test re-running, but use the CLI
     args = [
         "-d",
@@ -353,7 +354,7 @@ def test_integration_three_echo(skip_integration):
         "--mix",
         os.path.join(out_dir, "desc-ICA_mixing.tsv"),
     ]
-    tedana_cli._main(args)
+    tedana_cli._main(args)  # FIXME ficheros no coinciden
 
     # compare the generated output files
     fn = resource_filename("tedana", "tests/data/cornell_three_echo_outputs.txt")