In [1]:
import math
import os.path as op
from ast import literal_eval
import itertools

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.cm as cm

In [5]:

def plot_radar(corrs, features, model, out_fig):
    n_rows = 10 if len(corrs) > 10 else len(corrs)
    angle_zero = 0
    fontsize = 36
    
    corrs = corrs[:n_rows]
    features = features[:n_rows]
    angles = [(angle_zero + (n / float(n_rows) * 2 * np.pi)) for n in range(n_rows)]
    if model == "lda" or model == "gclda":
        features = ["\n".join(feature.split("_")[1:]) for feature in features]

    roundup_corr = math.ceil(corrs.max() * 10) / 10

    # Define color scheme
    plt.rcParams["text.color"] = "#1f1f1f"
    cmap = cm.get_cmap("YlOrRd")
    norm = plt.Normalize(vmin=corrs.min(), vmax=corrs.max())
    colors = cmap(norm(corrs))

    # Plot radar
    fig, ax = plt.subplots(figsize=(9, 9), subplot_kw={"projection": "polar"})
    ax.set_theta_offset(0)
    ax.set_ylim(-0.1, roundup_corr)

    ax.bar(angles, corrs, color=colors, alpha=0.9, width=0.52, zorder=10)  
    ax.vlines(angles, 0, roundup_corr, color="grey", ls=(0, (4, 4)), zorder=11)

    ax.set_xticks(angles, zorder=13)
    ax.set_xticklabels(features, size=fontsize, zorder=13)

    ax.xaxis.grid(False)

    step = 0.10000000000000009
    yticks = np.round(np.arange(0, roundup_corr + step, step), 1)
    ax.set_yticklabels([])
    ax.set_yticks(yticks)

    ax.spines["start"].set_color("none")
    ax.spines["polar"].set_color("none")

    xticks = ax.xaxis.get_major_ticks()
    [xtick.set_pad(80) for xtick in xticks]

    sep = 0.06
    [
        ax.text(np.pi / 2, ytick - sep, f"{ytick}", ha="center", size=fontsize-2, color="grey", zorder=12) 
        for ytick in yticks
    ]

    fig.savefig(out_fig, bbox_inches="tight")

In [23]:
from wordcloud import WordCloud

def plot_cloud(features_list, frequencies, corrs, model, fig=None, ax=None, out_fig=None):
    frequencies_dict = {}
    if model == "lda" or model == "gclda":
        for features, frequency_str, corr in zip(features_list, frequencies, corrs):
            frequency = literal_eval(frequency_str)
            for word, freq in zip(features, frequency):
                if word not in frequencies_dict:
                    frequencies_dict[word] = freq * corr
    else:
        for word, corr in zip(features_list, corrs):
            if word not in frequencies_dict:
                frequencies_dict[word] = corr
    
    if fig is None and ax is None:
        fig, ax = plt.subplots(figsize=(9, 5))
    
    wc = WordCloud(background_color="white", random_state=0, colormap="YlOrRd")
    wc.generate_from_frequencies(frequencies=frequencies_dict)
    ax.imshow(wc)
    # ax.axis("off")
    ax.get_xaxis().set_ticks([])
    ax.get_yaxis().set_ticks([])
    for spine in ax.spines.values():
        spine.set_visible(False)
    
    if out_fig is not None:
        fig.savefig(out_fig, bbox_inches="tight")

In [32]:
methods = ["Percentile", "KMeans", "KDE"]
dset_names = ["neurosynth", "neuroquery"]
models = ["term", "lda", "gclda"]

label_dict = {
    "Percentile": "PCT", 
    "KMeans": "KMeans", 
    "KDE": "KDE", 
    "neurosynth": "NS", 
    "neuroquery": "NQ", 
    "term": "Term", 
    "lda": "LDA", 
    "gclda": "GCLDA"
}

n_segmentations = 30
for segmentation_i in range(n_segmentations):
    n_segments = segmentation_i + 3
    seg = f"{n_segments:02d}"

    cloud_fig, cloud_axes_tpl = plt.subplots(18, n_segments)
    cloud_fig.set_size_inches(1.6 * n_segments, 15)
    
    for row_i, (model, dset_name, method) in enumerate(itertools.product(models, dset_names, methods)):
        corr_dir = op.join("../results/decoding", f"{model}_{dset_name}_corr_{method}")
        for segment_i in range(n_segments):
            segment_id = segment_i + 1
            seg_sol = f"{segment_id:02d}"
            cloud_ax = cloud_axes_tpl[row_i, segment_i]
            data_df = pd.read_csv(op.join(corr_dir, f"{seg}-{seg_sol}.tsv"), sep="\t", index_col=None)
                    
            data_df = data_df.rename(columns={ data_df.columns[0]: "index" })
            filtered_df = data_df.query('corr > 0 & pval < 0.05 & classification == "Functional"')
            filtered_df = filtered_df.sort_values(by=['corr'], ascending=False)

            # Data for radar plot
            corrs = filtered_df["corr"].values
            features = filtered_df["feature"].values

            # Data for wordcloud
            if model == "lda" or model == "gclda":
                features_split = [feature.split("_")[1:] for feature in features]
                frequencies = filtered_df["frequencies"].values.tolist()
            else:
                features_split = features
                frequencies = None

            # radar_fn = op.join("./Fig", "survey", f"radar_{model}-{dset_name}-{method}_{seg}-{seg_sol}.eps")
            # cloud_fn = op.join("./Fig", "survey", f"cloud_{model}-{dset_name}-{method}_{seg}-{seg_sol}.eps")
            
            # plot_radar(corrs, features, model, radar_fn)
            plot_cloud(features_split, frequencies, corrs, model, cloud_fig, cloud_ax)
            if row_i == 0:
                cloud_ax.set_title(f"Segment {seg_sol}", fontsize=8)
            if segment_i == 0:
                print(label_dict[model], label_dict[dset_name], label_dict[method])
                # cloud_axes_tpl[0,0].spines["left"].set_visible(True)
                cloud_ax.set_ylabel(
                    f"{label_dict[model]}-{label_dict[dset_name]}\n{label_dict[method]}", 
                    fontsize=6,
                )
    cloud_fn = op.join("./Fig", "survey", f"cloud_{seg}.png")
    cloud_fig.savefig(cloud_fn, dpi=1000, bbox_inches="tight")
    plt.close()

Term NS PCT
Term NS KMeans
Term NS KDE
Term NQ PCT
Term NQ KMeans
Term NQ KDE
LDA NS PCT
LDA NS KMeans
LDA NS KDE
LDA NQ PCT
LDA NQ KMeans
LDA NQ KDE
GCLDA NS PCT
GCLDA NS KMeans
GCLDA NS KDE
GCLDA NQ PCT
GCLDA NQ KMeans
GCLDA NQ KDE
Term NS PCT
Term NS KMeans
Term NS KDE
Term NQ PCT
Term NQ KMeans
Term NQ KDE
LDA NS PCT
LDA NS KMeans
LDA NS KDE
LDA NQ PCT
LDA NQ KMeans
LDA NQ KDE
GCLDA NS PCT
GCLDA NS KMeans
GCLDA NS KDE
GCLDA NQ PCT
GCLDA NQ KMeans
GCLDA NQ KDE
Term NS PCT
Term NS KMeans
Term NS KDE
Term NQ PCT
Term NQ KMeans
Term NQ KDE
LDA NS PCT
LDA NS KMeans
LDA NS KDE
LDA NQ PCT
LDA NQ KMeans
LDA NQ KDE
GCLDA NS PCT
GCLDA NS KMeans
GCLDA NS KDE
GCLDA NQ PCT
GCLDA NQ KMeans
GCLDA NQ KDE
Term NS PCT
Term NS KMeans
Term NS KDE
Term NQ PCT
Term NQ KMeans
Term NQ KDE
LDA NS PCT
LDA NS KMeans
LDA NS KDE
LDA NQ PCT
LDA NQ KMeans
LDA NQ KDE
GCLDA NS PCT
GCLDA NS KMeans
GCLDA NS KDE
GCLDA NQ PCT
GCLDA NQ KMeans
GCLDA NQ KDE
Term NS PCT
Term NS KMeans
Term NS KDE
Term NQ PCT
Term NQ KMean