In [None]:
import itertools
import os.path as op
import gc

import matplotlib
matplotlib.use("Agg")

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.gridspec import GridSpec
import pandas as pd
import ptitprince as pt
from matplotlib.ticker import FormatStrFormatter

In [None]:
result_dir = op.abspath("../results")
dec_data_dir = op.join(result_dir, "decoding")
figures_dir = op.abspath("../figures")

In [None]:
hue_order = [
    'term_neurosynth_Percentile',
    'term_neuroquery_Percentile',
    'term_neurosynth_KMeans', 
    "term_neuroquery_KMeans", 
    "term_neurosynth_KDE", 
    "term_neuroquery_KDE",
    'lda_neurosynth_Percentile',
    'lda_neuroquery_Percentile',
    'lda_neurosynth_KMeans', 
    "lda_neuroquery_KMeans", 
    "lda_neurosynth_KDE", 
    "lda_neuroquery_KDE",
    'gclda_neurosynth_Percentile',
    'gclda_neuroquery_Percentile',
    'gclda_neurosynth_KMeans', 
    "gclda_neuroquery_KMeans", 
    "gclda_neurosynth_KDE", 
    "gclda_neuroquery_KDE",
]

In [None]:
methods = ["Percentile", "KMeans", "KDE"]
dset_names = ["neurosynth", "neuroquery"]
models = ["term", "lda", "gclda"]

hight = 15
method_nm_lst, seg_sol_lst, corr_val_lst, pval_val_lst, data_df_lst = [], [], [], [], []
data_df_lst = []
for seg_sol in range(3, 33):
    temp_data_df_lst = []
    for dset_name, model, method in itertools.product(dset_names, models, methods):
        corr_dir = op.join(dec_data_dir, f"{dset_name}_{model}_corr_{method}")
        corr_file = op.join(corr_dir, f"corrs_{seg_sol:02d}.csv")
        pval_file = op.join(corr_dir, f"pvals-FDR_{seg_sol:02d}.csv")
        corr_df = pd.read_csv(corr_file, index_col="feature")
        pval_df = pd.read_csv(pval_file, index_col="feature")
        
        tmp_data_df = corr_df.melt().rename(columns={'variable': 'seg_id', "value": "corr"})
        tmp_data_df['seg_id'] = tmp_data_df['seg_id'].astype(int) + 1
        tmp_data_df["pval"] = pval_df.melt()["value"]

        tmp_data_df.insert(0, 'seg_sol', [seg_sol] * len(tmp_data_df))
        tmp_data_df.insert(0, 'method', [f"{model}_{dset_name}_{method}"] * len(tmp_data_df))
        
        temp_data_df_lst.append(tmp_data_df)
    
    data_df_lst.append(pd.concat(temp_data_df_lst))
# data_df = pd.concat(data_df_lst)

In [None]:
sns.set_style("ticks")

cmap="tab20"
ort = "v"
dy = "corr"
dx = "method"
file_lbs = ["uncorrected", "corrected"]
for seg_sol in range(5, 6):
    data_df = data_df_lst[seg_sol-3]
    for seg_id in range(1, seg_sol+1):
        sub_data_uncorr_df = data_df.query(f'seg_sol == {seg_sol} & seg_id == {seg_id}')
        sub_data_corr_df = data_df.query(f'seg_sol == {seg_sol} & seg_id == {seg_id} & pval < 0.05')
        for file_lb, sub_data_df in zip(file_lbs, [sub_data_uncorr_df, sub_data_corr_df]):
            fig, ax = plt.subplots(1, 1)
            fig.set_size_inches(15, 2.5)

            pt.half_violinplot(
                x=dx,
                y=dy,
                data=sub_data_df,
                order=hue_order,
                palette=cmap,
                bw=0.1,
                cut=0.0,
                width=1.3,
                dodge=False,
                inner=None,
                alpha=1,
                scale="area",
                offset=.17,
                saturation=1,
                orient=ort,
                ax=ax,
            )
            sns.stripplot(
                x=dx,
                y=dy,
                data=sub_data_df,
                order=hue_order,
                palette=cmap,
                edgecolor="white",
                dodge=False,
                size=2,
                jitter=1,
                zorder=0,
                orient=ort,
                ax=ax,
            )
            box_axe = sns.boxplot(
                x=dx,
                y=dy,
                data=sub_data_df,
                order=hue_order,
                palette=cmap,
                width=0.2,
                zorder=10,
                dodge=True,
                showcaps=True,
                showfliers=False,
                boxprops={"zorder": 9},
                whiskerprops={"color": "black", "zorder": 10},
                capprops={"color": "black", "zorder": 10},
                medianprops={"color": "black", "zorder": 10},
                saturation=1,
                orient=ort,
                ax=ax,
            )

            ax.set_ylabel("")
            ax.set_xlabel("")
            ax.yaxis.set_tick_params(labelsize=18)
            ax.set_xticklabels([])
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            ax.grid(axis='y', which='major', color='gray', alpha=0.5)

            fig.tight_layout()
            fig.savefig(
                op.join("./Fig", "decoding", f"distributions-{file_lb}_{seg_sol:02d}-{seg_id:02d}.png"), 
                bbox_inches="tight",
                dpi=1000,
            )
            plt.close()
            plt.clf()
            gc.collect()

In [None]:
n_cols = 1
n_rows = 5
w = 7.5
h = 5

img_lbs = ["PCT", "KMeans", "KDE"]
step = 0
row = 0
fig_i = 66
for segment_size in range(32, 33):
    for segment_id in range(28, segment_size+1):
        if row == 0:
            out_file = op.join(figures_dir, "Fig", "decoding", f"{fig_i:02d}_distributions-corrected.eps")
            print(f"\includegraphics[scale=1]{{{fig_i:02d}_distributions-corrected.eps}}\n")
            print("\includegraphics[scale=0.37]{legend.pdf}\n")

        if not op.exists(out_file):
            file = op.join(
                figures_dir, 
                "Fig", 
                "decoding", 
                f"distributions-corrected_{segment_size:02d}-{segment_id:02d}.png"
            )

            add_title = False
            if step % n_rows == 0:
                add_title = True

            step += 1

            if row == 0:
                fig = plt.figure(figsize=(w, h))
                fig.subplots_adjust(
                    left=None, bottom=None, right=None, top=None, wspace=0.9, hspace=None
                )
                gs = GridSpec(n_rows, n_cols, figure=fig)

            img = mpimg.imread(file)
            ax = fig.add_subplot(gs[row, 0], aspect="equal")
            ax.imshow(img)

            ax.set_xticks([])
            ax.set_yticks([])
            if add_title:
                ax.set_ylabel(f"Segment\nSolution\n\n\n{segment_size:02d}-{segment_id:02d}", rotation=0, labelpad=25, fontsize=10)
                ax.set_title("Correlation Coefficient Distributions", fontsize=12)
            else:
                ax.set_ylabel(f"{segment_size:02d}-{segment_id:02d}", rotation=0, labelpad=25, fontsize=10)
            plt.setp(ax.spines.values(), color=None)

            if row == n_rows-1:
                row = 0
                fig_i += 1
                fig.tight_layout(pad=0.1, w_pad=0.6, h_pad=0.1)
                fig.savefig(
                    out_file, 
                    bbox_inches="tight", 
                    dpi=500
                )
                plt.close()
                plt.clf()
                gc.collect()
            else:
                row += 1
        elif row == n_rows-1:
            row = 0
            fig_i += 1
        else:
            row +=1

In [None]:
n_cols = 1
n_rows = 5
w = 7.5
h = 5

img_lbs = ["PCT", "KMeans", "KDE"]
step = 0
row = 0
fig_i = 66
for segment_size in range(32, 33):
    for segment_id in range(28, segment_size+1):
        if row == 0:
            out_file = op.join(figures_dir, "Fig", "decoding", f"{fig_i:02d}_distributions-uncorrected.eps")
            print(f"\includegraphics[scale=1]{{{fig_i:02d}_distributions-uncorrected.eps}}\n")
            print("\includegraphics[scale=0.37]{legend.pdf}\n")

        if not op.exists(out_file):
            file = op.join(
                figures_dir, 
                "Fig", 
                "decoding", 
                f"distributions-uncorrected_{segment_size:02d}-{segment_id:02d}.png"
            )

            add_title = False
            if step % n_rows == 0:
                add_title = True

            step += 1

            if row == 0:
                fig = plt.figure(figsize=(w, h))
                fig.subplots_adjust(
                    left=None, bottom=None, right=None, top=None, wspace=0.9, hspace=None
                )
                gs = GridSpec(n_rows, n_cols, figure=fig)

            img = mpimg.imread(file)
            ax = fig.add_subplot(gs[row, 0], aspect="equal")
            ax.imshow(img)

            ax.set_xticks([])
            ax.set_yticks([])
            if add_title:
                ax.set_ylabel(f"Segment\nSolution\n\n\n{segment_size:02d}-{segment_id:02d}", rotation=0, labelpad=25, fontsize=10)
                ax.set_title("Correlation Coefficient Distributions", fontsize=12)
            else:
                ax.set_ylabel(f"{segment_size:02d}-{segment_id:02d}", rotation=0, labelpad=25, fontsize=10)
            plt.setp(ax.spines.values(), color=None)

            if row == n_rows-1:
                row = 0
                fig_i += 1
                fig.tight_layout(pad=0.1, w_pad=0.6, h_pad=0.1)
                fig.savefig(
                    out_file, 
                    bbox_inches="tight", 
                    dpi=500
                )
                plt.close()
                plt.clf()
                gc.collect()
            else:
                row += 1
        elif row == n_rows-1:
            row = 0
            fig_i += 1
        else:
            row +=1