In [None]:
import os.path as op
import gc

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def plot_profile(data_df, metric, hue_order, cmap="tab20"):
    sns.set(style="whitegrid")
    
    n_segments = 30
    for seg_sol in range(n_segments):
        fontsize=14
        fig, ax = plt.subplots(1, 1)
        fig.set_size_inches(9 + seg_sol*0.2, 4)

        test_df = data_df[data_df["segment_solution"] == seg_sol + 3]
        test_df = test_df.reset_index()
        test_df["segment"] = test_df["segment"].astype(str)

        sns.lineplot(
            data=test_df,
            x="segment",
            y=metric,
            palette=cmap,
            hue="method",
            hue_order=hue_order,
            marker="o",
            ax=ax,
        )
        if seg_sol == 0:
            handles, labels = ax.get_legend_handles_labels()
            [ha.set_linewidth(8) for ha in handles ]
            new_labels = []
            for label in labels:
                method, dset_nm, seg = label.split("_")
                method = method.upper()
                seg = "PCT" if seg == "Percentile" else seg
                dset_nm = "NS" if dset_nm == "neurosynth" else "NQ"
                new_labels.append(f"{dset_nm}-{method}-{seg}")

        ax.get_legend().remove()

        text_lst = []
        mean_lst = []
        for approach in hue_order:
            approach_df = test_df[test_df["method"] == approach]
            # print(approach_df)
            mean_corr = approach_df[metric]
            text_lst.append(f"{mean_corr.mean():.3f} Â± {mean_corr.std():.3f}")
            mean_lst.append(mean_corr.mean())

        ax_handles, ax_labels = ax.get_legend_handles_labels()
        sort_idx = np.argsort(-np.array(mean_lst))
        [ha.set_linewidth(6) for ha in ax_handles ]


        legend_title = "$Mean \pm \sigma$"
        ax.legend(
            np.array(ax_handles)[sort_idx],
            np.array(text_lst)[sort_idx],
            loc="upper left",
            bbox_to_anchor=(1.04, 1),
            ncol=2,
            title=legend_title,
            fontsize=fontsize,
        )
        
        ax.set_xlabel('Segment ID', fontsize=fontsize)
        plt.xticks(fontsize=fontsize)
        if metric == "max_corr":
            ax.set_ylabel('Max Correlation Coefficient', fontsize=fontsize)
            plt.yticks(fontsize=fontsize)
            ax.set_yticks([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
            ax.set_yticklabels([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], fontsize=fontsize)
        elif metric == "information_content":
            ax.set_ylabel('Information Content', fontsize=fontsize)
            plt.yticks(fontsize=fontsize)
            # ax.set_yticks([2, 3, 4, 5, 6, 7, 8])
            # ax.set_yticklabels([2, 3, 4, 5, 6, 7, 8], fontsize=fontsize)
        elif metric == "tfidf":
            ax.set_ylabel('TFIDF', fontsize=fontsize)

        ax.set_title(f"Segment Solution {seg_sol + 3:02d}", fontsize=fontsize)
        fig.tight_layout()
        plt.savefig(op.join("./Fig", "performance", f"{metric}_profile_{seg_sol}.eps"), bbox_inches="tight")
        plt.close()
        gc.collect()
    
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(4, 4)
    fig.legend(
        handles,
        new_labels,
        loc="center",
        ncol=9,
        fontsize=fontsize,
    )
    ax.axis('off')
    # bbox_to_anchor=(0.5, -0.01),
    fig.tight_layout()
    plt.savefig(op.join("./Fig", "performance", f"{metric}_profile_legend.eps"), bbox_inches="tight")
    plt.close()
    gc.collect()

In [None]:
def plot_mean_profile(data_df, metric, hue_order, cmap="tab20"):
    sns.set(style="white")
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(3, 15)

    sns.lineplot(
        data=data_df,
        x=metric,
        y="segment_solution",
        palette=cmap,
        hue="method",
        hue_order=hue_order,
        sort=False,
        marker="o",
        ax=ax,
        estimator=None,
    )
    ax.get_legend().remove()
    ax.spines[['right', "bottom"]].set_visible(False)
    ax.xaxis.set_ticks_position('top')
    ax.xaxis.set_label_position('top')

    if metric == "max_corr":
        fontsize = 12
        ax.set_xlabel('Mean Correlation Coefficient', fontsize=fontsize)
        ax.set_xticks([0.1, 0.2, 0.3, 0.4, 0.5])
        ax.set_xticklabels([0.1, 0.2, 0.3, 0.4, 0.5], fontsize=fontsize)
    elif metric == "ic":
        fontsize = 16
        ax.set_xlabel('Information Content', fontsize=fontsize, labelpad=10)
        # ax.set_xticks([4, 5, 6, 7])
        # ax.set_xticklabels([4, 5, 6, 7], fontsize=fontsize)
    elif metric == "tfidf":
        fontsize = 16
        ax.set_xlabel('Mean TFIDF', fontsize=fontsize, labelpad=10)
        # ax.set_xticks([0.05, 0.10, 0.15, 0.20])
        # ax.set_xticklabels([0.05, 0.10, 0.15, 0.20], fontsize=fontsize)
    elif metric == "snr":
        fontsize = 12
        ax.set_xlabel('SNR', fontsize=fontsize, labelpad=10)

    plt.xticks(fontsize=fontsize)
    ax.set_ylabel('Segment Solution', fontsize=fontsize)
    plt.yticks(fontsize=fontsize)

    plt.savefig(op.join("./Fig", "performance", f"mean_{metric}_profile.eps"), bbox_inches="tight")
    plt.close()
    gc.collect()

In [None]:
def plot_mean_sbars(sub_mean_data_df, metric, hue_order, cmap="tab20"):
    sns.set(style="white")

    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(3, 15)

    my_cmap = plt.get_cmap("tab20")
    sub_mean_data_df.plot.barh(
        rot=0,
        width=.81,
        stacked=True,
        color=my_cmap.colors[: len(hue_order)],
        ax=ax,
    )
    plt.gca().invert_yaxis()
    ax.get_legend().remove()
    ax.spines[['right', "bottom"]].set_visible(False)
    ax.xaxis.set_ticks_position('top')
    ax.xaxis.set_label_position('top')

    fontsize = 12
    
    if metric == "max_corr":
        fontsize = 12
        ax.set_xlabel('Mean Correlation Coefficient', fontsize=fontsize)
        ax.set_xticks([0.1, 0.2, 0.3, 0.4, 0.5])
        ax.set_xticklabels([0.1, 0.2, 0.3, 0.4, 0.5], fontsize=fontsize)
    elif metric == "ic":
        fontsize = 16
        ax.set_xlabel('Information Content', fontsize=fontsize, labelpad=10)
    elif metric == "tfidf":
        fontsize = 16
        ax.set_xlabel('Mean TFIDF', fontsize=fontsize, labelpad=10)
    elif metric == "snr":
        fontsize = 16
        ax.set_xlabel('Normalized SNR', fontsize=fontsize)

    plt.xticks(fontsize=fontsize)
    ax.set_ylabel('Segment Solution', fontsize=fontsize)
    plt.yticks(fontsize=fontsize)

    plt.savefig(op.join("./Fig", "performance", f"mean_{metric}_barh.eps"), bbox_inches="tight")
    plt.close()
    gc.collect()

In [None]:
result_dir = op.abspath("../results")
figure_dir = op.abspath("./Fig")

hue_order = [
    'term_neurosynth_Percentile',
    'term_neuroquery_Percentile',
    'term_neurosynth_KMeans', 
    "term_neuroquery_KMeans", 
    "term_neurosynth_KDE", 
    "term_neuroquery_KDE",
    'lda_neurosynth_Percentile',
    'lda_neuroquery_Percentile',
    'lda_neurosynth_KMeans', 
    "lda_neuroquery_KMeans", 
    "lda_neurosynth_KDE", 
    "lda_neuroquery_KDE",
    'gclda_neurosynth_Percentile',
    'gclda_neuroquery_Percentile',
    'gclda_neurosynth_KMeans', 
    "gclda_neuroquery_KMeans", 
    "gclda_neurosynth_KDE", 
    "gclda_neuroquery_KDE",
]

In [None]:
data_df = pd.read_csv(op.join(result_dir, "performance", "performance.tsv"),  delimiter="\t")
data_df

In [None]:
mean_data_df = pd.read_csv(op.join(result_dir, "performance", "performance_average.tsv"),  delimiter="\t")
mean_data_df["segment_solution"] = mean_data_df["segment_solution"].astype(str)
mean_data_df

In [None]:
for metric in ["max_corr", "information_content", "tfidf"]:
    plot_profile(data_df, metric, hue_order)

In [None]:
for i in range(1, 31):
    print(f"\includegraphics[scale=0.47]{{information_content_profile_{i-1}.eps}}\n")
    
    if i % 5 == 0:
        print("\includegraphics[scale=0.37]{legend.pdf}\n")

In [None]:
for metric in ["max_corr", "ic", "tfidf", "snr"]:
    plot_mean_profile(mean_data_df, metric, hue_order)

In [32]:
for metric in ["max_corr", "ic", "tfidf", "snr"]:
    sub_mean_data_df = mean_data_df.pivot_table(
        values=metric, index=mean_data_df["segment_solution"], columns="method"
    )
    sub_mean_data_df = sub_mean_data_df.reindex(hue_order, axis=1)
    sub_mean_data_df.index = sub_mean_data_df.index.astype(int)
    sub_mean_data_df = sub_mean_data_df.sort_index()
    sub_mean_data_df.index = sub_mean_data_df.index.astype(str)

    plot_mean_sbars(sub_mean_data_df, metric, hue_order)