In [34]:
import re
import time
from collections import OrderedDict
from itertools import accumulate, product
from pathlib import Path

import networkx as nx
import nibabel as nib
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib as mpl
from matplotlib import cm
from matplotlib import pyplot as plt
from matplotlib import ticker

mpl.use("pgf")
plt.style.use("seaborn-whitegrid")
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["text.usetex"] = True
plt.rcParams["pgf.rcfonts"] = False
plt.rcParams["pgf.texsystem"] = "lualatex"
plt.rcParams["pgf.preamble"] = """
\\usepackage{fontspec}
\\usepackage[T1]{fontenc}
\\usepackage[utf8]{inputenc}
\\usepackage{unicode-math}

\\usepackage{graphicx}
\\usepackage[table,svgnames]{xcolor}

\defaultfontfeatures{
    Extension = .otf,
}

\\setmainfont{HelveticaNeueLTStd}[
    UprightFont=*-Roman,
    ItalicFont=*-It,
    BoldFont=*-Md,
    BoldItalicFont=*-MdIt,
    FontFace={xl}{n}{*-UltLt},
    FontFace={xl}{it}{*-UltLtIt},
    FontFace={l}{n}{*-Lt},
    FontFace={l}{it}{*-LtIt},
    FontFace={mb}{n}{*-Md},
    FontFace={mb}{it}{*-MdIt},
    FontFace={k}{n}{*-Blk},
    FontFace={k}{it}{*-BlkIt},
    Scale=0.9,
]
\\setsansfont{HelveticaNeueLTStd}[
    UprightFont=*-Roman,
    ItalicFont=*-It,
    BoldFont=*-Md,
    BoldItalicFont=*-MdIt,
    FontFace={xl}{n}{*-UltLt},
    FontFace={xl}{it}{*-UltLtIt},
    FontFace={l}{n}{*-Lt},
    FontFace={l}{it}{*-LtIt},
    FontFace={mb}{n}{*-Md},
    FontFace={mb}{it}{*-MdIt},
    FontFace={k}{n}{*-Blk},
    FontFace={k}{it}{*-BlkIt},
    Scale=0.9,
]

\setmonofont{EssentialPragmataPro}[
  Extension = .ttf,
  Scale = 0.95
]

\\setmathfont{latinmodern-math.otf}
\\setmathfont[
    range=\\mathup,
    Scale=0.9,
]{HelveticaNeueLTStd-Roman}

\\definecolor{leaGrey}{HTML}{A6A6A6}
\\colorlet{leaLightGrey}{leaGrey!15}

\\newcommand{\\soft}[1]{%
\\smash{\\ttfamily%
\\begingroup\\fboxsep=1pt%
\\colorbox{leaLightGrey}{#1}%
\\endgroup}}
"""

## Load time series data

In [3]:
steps = ["resample", "smooth", "ica_aroma", "temporal_filter"]  #, "confound_regression"]
variable_groups = ["task"]  #, "ica_aroma_signal", "ica_aroma_noise", "motion"]

In [4]:
variable_group_patterns = dict(
    ica_aroma_signal=[
        r"aroma_signal_[0-9]+",
    ],
    ica_aroma_noise=[
        r"aroma_noise_[0-9]+",
    ],
    motion=[
        # r"framewise_displacement",
        # r"dvars",
        # r"std_dvars",
        # r"rmsd",
        # r"(trans|rot)_[xyz](_derivative1)?(_power2)?",
        r"(trans|rot)_[xyz]",
    ],
    wm_csf=[
        r"(white_matter|csf)(_derivative1)?(_power2)?",
        r"csf_wm",
    ],
    a_comp_cor=[
        r"a_comp_cor_0[0-4]",
    ],
    global_signal=[
        r"global_signal(_derivative1)?(_power2)?",
    ],
)

In [5]:
voxel_coordinate = (61, 15, 47)
repetition_time = 2.0

In [6]:
design_file = "data/sub-01_task-faces_run-01_feature-taskBased_desc-design_matrix.tsv"

confound_files = dict(
    resample="data/merge_with_header.tsv",
    smooth="data/merge_with_header.tsv",
    ica_aroma="data/merge_with_header_regfilt.tsv",
    temporal_filter="data/merge_with_header_regfilt_bptf_addmean.tsv",
    confound_regression="data/merge_with_header_regfilt_bptf_addmean_regfilt.tsv",
)

image_files = dict(
    resample="data/vol0000_xform-00000_merged_masked.nii.gz",
    smooth="data/vol0000_xform-00000_merged_masked_afni.nii.gz",
    ica_aroma="data/vol0000_xform-00000_merged_masked_afni_grandmeanscaled_regfilt.nii.gz",
    temporal_filter="data/vol0000_xform-00000_merged_masked_afni_grandmeanscaled_regfilt_bptf_addmean.nii.gz",
    confound_regression="data/vol0000_xform-00000_merged_masked_afni_grandmeanscaled_regfilt_bptf_addmean_regfilt.nii.gz",
)

need_to_scale = set(["resample", "smooth"])

In [7]:
# Reconstruct grand mean scaling factor

unscaled = pd.read_table("data/confounds_expansion_desc-motion_outliers.tsv").global_signal
data_frame = pd.read_table(confound_files["resample"])
grand_mean_scaling_factor = (data_frame.global_signal / unscaled).mean()

In [8]:
design = pd.read_table(design_file)

In [9]:
image_data = dict()
for step, image_file in image_files.items():
    image = nib.load(image_file)
    image_data[step] = image.dataobj[voxel_coordinate].astype(float)
    if step in need_to_scale:
        image_data[step] *= grand_mean_scaling_factor

In [10]:
regressor_data = dict()
for step, confound_file in confound_files.items():
    regressor_data[step] = dict(
        task=design,
    )

    data_frame = pd.read_table(confound_file)
    data_frame = data_frame.sub(data_frame.mean())  # demean
    
    for variable_group, patterns in variable_group_patterns.items():
        columns = [
            column
            for pattern in patterns
            for column in data_frame.columns
            if re.fullmatch(pattern, column) is not None
        ]

        regressor_data[step][variable_group] = data_frame[columns]

## Calculation

In [11]:
n = len(variable_groups)
m = len(steps)

In [12]:
def predict(y, x):
    return (
        sm.OLS(
            endog=y,
            exog=np.hstack([np.ones([y.size, 1]), x.fillna(0).values]),
        )
        .fit()
        .predict()
    )

In [13]:
y_dict = dict()
for step in steps:
    y = np.copy(image_data[step])
    y -= y.mean()
    y_dict[step] = y

rsquareds_dict = dict()
for step in steps:
    y = y_dict[step] 
    
    rsquareds_dict[step] = {
        is_selected: sm.OLS(
            endog=y,
            exog=np.hstack(
                [
                    np.ones([y.size, 1]),  # intercept
                    *[
                        regressor_data[step][variable_groups[i]].fillna(0).values
                        for i in range(len(variable_groups))
                        if is_selected[i] == 1
                    ],
                ]
            ),
        )
        .fit()
        .rsquared
        for is_selected in product((0, 1), repeat=len(variable_groups))
    }

In [30]:
# Labels

step_labels = dict(
    resample="{\\fontseries{b}\\selectfont A)} \\soft{fMRIPrep} minimally preprocessed image",
    smooth="{\\fontseries{b}\\selectfont B)} + Smoothing",
    ica_aroma="{\\fontseries{b}\\selectfont C)} + ICA-AROMA denoising",
    temporal_filter="{\\fontseries{b}\\selectfont D)} + Temporal filter (high-pass only)",
    confound_regression="{\\fontseries{b}\\selectfont E.} Post confound regression (6 motion parameters)",
)

variable_group_labels = dict(
    task="Task",
    ica_aroma_signal="ICA-AROMA\nSignal",
    ica_aroma_noise="ICA-AROMA\nNoise",
    motion="Motion",
    wm_csf="WM/CSF",
    global_signal="Global signal",
    a_comp_cor="aCompCor",
    auto_corr="Autocorrelation",
)

In [16]:
# Plotting helpers

time_formatter = ticker.FuncFormatter(
    lambda seconds, x: time.strftime("%M:%S", time.gmtime(seconds))
)

percent_formatter = ticker.PercentFormatter()

cmap = cm.get_cmap("Set2")

In [39]:
fig, axs = plt.subplots(
    m,
    2,
    figsize=(7.5, 8),
    gridspec_kw=dict(width_ratios=[0.9, 1]),
)
fig.tight_layout()

axs[0, 0].get_shared_y_axes().join(*[axs[k, 0] for k in range(m)])
axs[0, 1].get_shared_x_axes().join(*[axs[k, 1] for k in range(m)])

for k, step in enumerate(steps):
    y = y_dict[step]
    rsquareds = rsquareds_dict[step]

    left_panel = axs[k, 0]
    
    left_panel.set_title(step_labels[step], loc="left", pad=10)
    
    left_panel.grid(False)

    left_panel.set_yticks([])
    
    left_panel.xaxis.set_major_formatter(time_formatter)
    left_panel.set(xlabel="Time")

    for key in ["task"]:
        y_hat = predict(y, regressor_data[step][key])

        left_panel.plot(
            np.arange(len(y)) * repetition_time,
            y_hat,
            "o--",
            color=(
                *cmap(variable_groups.index(key))[:3],
                0.75,  # opacity
            ),
            linewidth=0.5,
            markersize=1,
        )

    left_panel.plot(
        np.arange(len(y)) * repetition_time,
        y,
        "o-",
        color="black",
        linewidth=0.5,
        markersize=1,
    )
    
    right_panel = axs[k, 1]
    
    right_panel.set_yticks([])
    right_panel.set_ylim(-1.25, 1)
    
    right_panel.set_xlim(0, 100)
    right_panel.xaxis.set_major_formatter(percent_formatter)
    right_panel.set(xlabel="Task variance explained [%]")

    for i in range(n):
        rsquared = rsquareds[tuple(1 if j == i else 0 for j in range(n))] * 100.0
        
        right_panel.barh(
            n - i - 1.125, rsquared, 0.75, color=cmap(i),
        )

        right_panel.text(
            rsquared + 5,
            n - i - 1.125,
            f"{rsquared:.0f}%",
            ha="left",
            va="center",
            color="black",
            bbox=dict(
                boxstyle="square,pad=-0.07", 
                edgecolor="white", 
                facecolor="white",
            ),
        )

plt.subplots_adjust(wspace=0.05, hspace=0.5, top=0.95, bottom=0.05, right=0.95)

plt.savefig("confs.pdf", backend="pgf")