In [None]:
import math
import textwrap
import matplotlib
import warnings
import sys
import os
# Get the current working directory
current_dir = os.getcwd()
# Add the parent directory to sys.path
sys.path.insert(0, os.path.dirname(current_dir))
from SpaMV.utils import compute_gene_topic_correlations

warnings.filterwarnings("ignore")
import numpy as np
import scanpy as sc
import squidpy as sq
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.gridspec import GridSpec
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas 
from matplotlib.patches import Patch

font = {'size': 7}
plt.rc('font', size=8)
plt.rc('axes', titlesize=10)
plt.rc('axes', labelsize=8)
plt.rc('xtick', labelsize=6)

matplotlib.rc('font', **font)
dataset = 'ccRCC_Y7_T'
# load mouse embryo dataset
data_rna = sc.read_h5ad('../Dataset/' + dataset + '/adata_RNA.h5ad')

cluster_size = 20
dr_size = 50
width = 13
height = 10
gt_scale = {'top':1 - 1/height, 'bottom':1 - 4/height, 'left':.05, 'right': .2}
clustering_scale = {'top':1 - 1/height, 'bottom':1 - 4/height, 'left': .22, 'right': .72}
score_scale = {'top':1 - 1/height, 'bottom':1 - 4/height, 'left': .72, 'right': .82}
annotation_scale = {'top': 1-5.2/height, 'bottom': 1-9.3/height, 'left':.05, 'right': .15}
dr_scale = {'top': 1-5/height, 'bottom': 1-9.5/height, 'left': .27, 'right':.7}
topic_colorbar_scale = {'top': 1-(7.75-.25)/height, 'bottom': 1-(7.75+.25)/height, 'left':.65, 'right': .658}
# feature_colorbar_scale = {'top': 1-(5+9.2/3-.2)/height, 'bottom': 1-(5+9.2/3+.2)/height, 'left':.65, 'right': .658}
correlation_scale = {'top': 1-5/height, 'bottom': 1-9/height, 'left': .78, 'right': .92}
# pathway_scale = {'top': 1-10/height, 'bottom': 1-13/height}

fig = plt.figure(figsize=(width, height), dpi=500)
###############################################################################################################################
# plot clustering results
spec_gt = GridSpec(1, 1)
spec_gt.update(**gt_scale)
he = plt.subplot(spec_gt[0, 0])
sc.pl.spatial(data_rna, ax=he, title='H&E', show=False, frameon=False)
he.get_xaxis().set_visible(False)
he.get_yaxis().set_visible(False)

spec_clustering = GridSpec(2, 4, wspace=0.1)
spec_clustering.update(**clustering_scale)

f_dict = {}
f_dict['SpatialGlue'] = plt.subplot(spec_clustering[0, 0])
f_dict['CellCharter'] = plt.subplot(spec_clustering[0, 1])
f_dict["COSMOS"] = plt.subplot(spec_clustering[0, 2])
f_dict['SMOPCA'] = plt.subplot(spec_clustering[1, 0])
f_dict['MISO'] = plt.subplot(spec_clustering[1, 1])
f_dict['SpaMV'] = plt.subplot(spec_clustering[1, 2])
legend = plt.subplot(spec_clustering[:, 3])

data_methods = {}
for method in ['SpaMV', 'COSMOS', 'CellCharter', 'SpatialGlue', 'MISO', 'SMOPCA']:
    data_methods[method] = sc.read_h5ad('../Results/' + dataset + '/' + method + '.h5ad')
    data_methods[method] = data_methods[method][data_rna.obs_names.intersection(data_methods[method].obs_names), :]
    data_methods[method].obsm['spatial'] = data_rna[data_methods[method].obs_names].obsm['spatial']
    data_methods[method].uns['spatial'] = data_rna[data_methods[method].obs_names].uns['spatial']
    sc.pl.spatial(data_methods[method], color=method, ax=f_dict[method], show=False)

handles, labels = f_dict['SpaMV'].get_legend_handles_labels()
legend.legend(handles, labels, loc='center left', frameon=False, ncol=1)
legend.axis('off')
for f in f_dict.values():
    f.get_legend().remove()
    f.axes.get_xaxis().set_visible(False)
    f.axes.get_yaxis().set_visible(False)

###############################################################################################################################
# plot clutering evaluation results
# plot unsupervised results
spec_score = GridSpec(2, 1, hspace=.8)
spec_score.update(**score_scale)
evaluation_spamv = pd.read_csv('../Results/' + dataset + '/Evaluation_SpaMV.csv')
evaluation_miso = pd.read_csv('../Results/' + dataset + '/Evaluation_MISO.csv')
evaluation_cosmos = pd.read_csv('../Results/' + dataset + '/Evaluation_COSMOS.csv')
evaluation_spatialglue = pd.read_csv('../Results/' + dataset + '/Evaluation_SpatialGlue.csv')
evaluation_cellcharter = pd.read_csv('../Results/' + dataset + '/Evaluation_CellCharter.csv')
evaluation_smopca = pd.read_csv('../Results/' + dataset + '/Evaluation_SMOPCA.csv', index_col=0)

unsupervised = plt.subplot(spec_score[0, 0])
melted_df = evaluation_spamv
for df in [evaluation_miso, evaluation_cosmos, evaluation_spatialglue, evaluation_cellcharter, evaluation_smopca]:
    melted_df = pd.merge(melted_df, df, how='outer')

melted_df = melted_df[melted_df['Dataset'] == dataset]
metrics = ['jaccard 1', 'jaccard 2']  # your metrics
methods = ['MISO', 'CellCharter', 'COSMOS', 'SMOPCA', 'SpatialGlue', 'SpaMV', 'SpaMV (Shared)', 'SpaMV (Transcriptomics related)', 'SpaMV (Metabolomics related)']

melted_df = melted_df.drop(['Dataset'], axis=1)
# Calculate mean and standard deviation for each algorithm and metric
df_means = melted_df.groupby('method').mean().reset_index()
df_errors = melted_df.groupby('method').std().reset_index()

# Reshape the DataFrames for plotting
df_means_melted = df_means.melt(id_vars='method', var_name='Metric', value_name='Mean')
df_errors_melted = df_errors.melt(id_vars='method', var_name='Metric', value_name='Std')

# Merge the dataframes to include errors
df_combined = pd.merge(df_means_melted, df_errors_melted, on=['method', 'Metric'])

x = np.arange(len(metrics))  # the label locations
width = 0.08  # the width of the bars
cat_width=.8
colors = {'MISO': '#1f77b4', 'COSMOS': '#ff7f0e', 'spaMultiVAE': '#8c564b', 'CellCharter': '#2ca02c', 'SMOPCA': '#9467bd', 'SpatialGlue': '#bcbd22', 'SpaMV': '#d62728'}
ms = ['MISO', 'CellCharter', 'COSMOS', 'SMOPCA', 'SpatialGlue', 'SpaMV']
values = {}
bar_width = 0.64 
for o in ['H3K27ac', 'H3K27me3']:
    values[o] = []
    for method in ['MISO', 'CellCharter', 'COSMOS', 'SMOPCA', 'SpatialGlue']:
        if o == 'H3K27ac':
            values[o].append(melted_df[melted_df['method'] == method]['jaccard 1'].mean())
        else:
            values[o].append(melted_df[melted_df['method'] == method]['jaccard 2'].mean())
    if o == 'H3K27ac':
        values[o].append(melted_df[melted_df['method'] == 'SpaMV (H3K27ac related)']['jaccard 1'].mean())
    else:
        values[o].append(melted_df[melted_df['method'] == 'SpaMV (H3K27me3 related)']['jaccard 2'].mean())

bottom = np.zeros(len(ms))

for boolean, weight_count in values.items():
    p = unsupervised.bar(ms, weight_count, bar_width, label=boolean, bottom=bottom)
    bottom += weight_count
# Add labels and title
unsupervised.set_ylabel('Jaccard Similarity')
unsupervised.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0, frameon=False)
unsupervised.axes.set_xticklabels(labels=ms, rotation=45, ha='right', rotation_mode='anchor')
unsupervised.axes.xaxis.set_tick_params(pad=0)
unsupervised.set_ylim([0, .6])
# unsupervised.set_ylim([0, ymax])

# Remove the top and right spines
unsupervised.spines['top'].set_visible(False)
unsupervised.spines['right'].set_visible(False)

# Remove the x-axis label
unsupervised.set_xlabel('')  # Set the x-axis label to an empty string


# plot unsupervised results
unsupervised_spamv = plt.subplot(spec_score[1, 0])
melted_df = evaluation_spamv
for df in [evaluation_miso, evaluation_cosmos, evaluation_spatialglue, evaluation_cellcharter, evaluation_smopca]:
    melted_df = pd.merge(melted_df, df, how='outer')

melted_df = melted_df[melted_df['Dataset'] == dataset]
melted_df = melted_df.drop(['Dataset'], axis=1)
methods = ['SpaMV', 'SpaMV (Shared)', 'SpaMV (Transcriptomics related)', 'SpaMV (Metabolomics related)']
mean_scores = melted_df.groupby('method').mean().reset_index()
std_scores = melted_df.groupby('method').std().reset_index()

# Reshape the DataFrames for plotting
df_means_melted = mean_scores.melt(id_vars='method', var_name='Metric', value_name='Mean')
df_errors_melted = std_scores.melt(id_vars='method', var_name='Metric', value_name='Std')

# Merge the dataframes to include errors
df_combined = pd.merge(df_means_melted, df_errors_melted, on=['method', 'Metric'])

x = np.arange(2)  # the label locations

cat_width = .6
width = cat_width / 4 * .8 # the width of the bars
colors = {'MISO': '#1f77b4', 'COSMOS': '#ff7f0e', 'spaMultiVAE': '#8c564b', 'CellCharter': '#2ca02c', 'SMOPCA': '#9467bd', 'SpatialGlue': '#bcbd22', 'SpaMV': '#d62728'}
# Plot bars for each method
for i, method in enumerate(methods):
    offset = cat_width * i / 4 - (.65-width*4)/.8
    algo_data = df_combined[df_combined['method'] == method]
    if method == 'SpaMV (Transcriptomics related)':
        label = 'Shared + Transcriptome private'
    elif method == 'SpaMV (Metabolomics related)':
        label = 'Shared + Metabolome private'
    elif method == 'SpaMV (Shared)':
        label = "Shared"
    else:
        label = 'All'
    # unsupervised.bar(x + offset, mean_scores.loc[method], width, label=label, color=color, alpha=alpha, hatch=hatch)
    unsupervised_spamv.bar(x + offset, algo_data['Mean'], yerr=algo_data['Std'], width=width, capsize=2, label=label)

# Customize the plot
unsupervised_spamv.set_ylabel('Jaccard Similarity')
unsupervised_spamv.set_xticks(x)
unsupervised_spamv.set_xticklabels(['Transcriptome', 'Metabolome'])
unsupervised_spamv.set_ylim([0, .4])
unsupervised_spamv.legend()
# Remove spines
unsupervised_spamv.spines['right'].set_visible(False)
unsupervised_spamv.spines['top'].set_visible(False)
unsupervised_spamv.legend(bbox_to_anchor=(1.02, 0.5), loc='center left', frameon=False) 

###############################################################################################################################
# plot annotated result
spec_annotation = GridSpec(1, 1)
spec_annotation.update(**annotation_scale)

annotated = plt.subplot(spec_annotation[0, 0])
spamv_annotated = sc.read_h5ad('../Results/' + dataset + '/SpaMV.h5ad')
spamv_annotated.uns = data_rna.uns
spamv_annotated.obs['SpaMV_annotation'] = pd.Categorical(spamv_annotated.obs['SpaMV_annotation'], categories=['Stromal', 'Malignant', 'Immune', 'Endothelial'], ordered=True)
sc.pl.spatial(spamv_annotated, color='SpaMV_annotation', ax=annotated, frameon=False, title='SpaMV\nAnnotation', show=False)

###############################################################################################################################
# plot dimension reduction result
spec_dr = GridSpec(4, 6, hspace=.3)
spec_dr.update(**dr_scale)
z = pd.read_csv('../Results/' + dataset + '/SpaMV_z.csv', index_col=0)
col_dict = {}
si = 1
oi = 1
ti = 1
for topic in z.columns:
    if 'Shared' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(si)
        si += 1
    elif 'Transcriptomics' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(oi)
        oi += 1
    else:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(ti)
        ti += 1
z = z.rename(columns=col_dict)
def softmax(row):
    shifted = row - np.max(row)  # Prevent overflow
    exp_values = np.exp(shifted)
    return exp_values / exp_values.sum()
z = z.apply(softmax, axis=1)
w = [pd.read_csv('../Results/' + dataset + '/SpaMV_w_Transcriptomics.csv', index_col=0), pd.read_csv('../Results/' + dataset + '/SpaMV_w_Metabolomics.csv', index_col=0)]
w[0] = w[0].rename(columns=col_dict)
w[1] = w[1].rename(columns=col_dict)
rna = sc.read_h5ad('../Results/' + dataset + '/adata_RNA_preprocessed.h5ad')
met = sc.read_h5ad('../Results/' + dataset + '/adata_MET_preprocessed.h5ad')
w[1].index = met.var_names
spamv_annotated.obs[z.columns] = z.values
spamv_annotated.obs['SpaMV_interpretable'] = z.idxmax(1).values
contingency_table = pd.crosstab(spamv_annotated.obs['SpaMV_annotation'], spamv_annotated.obs['SpaMV_interpretable'])

# Initialize a dictionary to store results
matching = {}

# Iterate over each column in the contingency table
for column in w[0].columns:
    # Find the index with the maximum count for the current column
    max_index = contingency_table[column].idxmax()
    
    # Add the result to the dictionary
    if max_index in matching:
        matching[max_index].append(column)
    else:
        matching[max_index] = [column]
for column in w[1].columns[-5:]:
    # Find the index with the maximum count for the current column
    max_index = contingency_table[column].idxmax()
    
    # Add the result to the dictionary
    if max_index in matching:
        matching[max_index].append(column)
    else:
        matching[max_index] = [column]
f_dict = {}
img_alpha=.5
i = 0
j = 0
# Add a rectangle as a custom background

size = 23
for cluster in spamv_annotated.obs['SpaMV_annotation'].cat.categories:
    for topic in matching[cluster]:
        if 'Shared' in topic:
            label = topic
        elif topic.split(maxsplit=1)[0] == 'Transcriptomics':
            label = 'Transcriptome\n' + topic.split(maxsplit=1)[1]
        else:
            label = 'Metabolome\n' + topic.split(maxsplit=1)[1]
        f_dict[topic] = plt.subplot(spec_dr[i, j])
        if j == 0:
            sc.pl.spatial(spamv_annotated, color=topic, ax=f_dict[topic], alpha_img=img_alpha, frameon=True, show=False, title='', colorbar_loc=None, vmax='p99')
            # f_dict[topic].set_ylabel(label, fontsize=7)
            f_dict[topic].set_title(label, fontsize=7)
            if i == 0:
                color = '#1F77B4'
                ylabel = ' ' * round((size - len(cluster)) / 2) + cluster + ' ' * round((size - len(cluster)) / 2)
            elif i == 1:
                color = '#FF7F0E'
                ylabel = ' ' * round((size - len(cluster)) / 2) + cluster + ' ' * round((size - len(cluster)) / 2)
            elif i == 2:
                color = '#279E68'
                ylabel = ' ' * round((size - len(cluster)) / 2) + cluster + ' ' * round((size - len(cluster)) / 2)
            else:
                color = '#d62728'
                ylabel = ' ' * round((size - len(cluster)) / 2) + cluster + ' ' * round((size - len(cluster)) / 2)
            f_dict[topic].set_ylabel(ylabel, bbox=dict(facecolor='none', alpha=0.8, edgecolor=color), labelpad=10, loc='center')
            f_dict[topic].spines['top'].set_visible(False)
            f_dict[topic].spines['right'].set_visible(False)
            f_dict[topic].spines['left'].set_visible(False)
            f_dict[topic].spines['bottom'].set_visible(False)
            f_dict[topic].set_xlabel('')
        else:
            sc.pl.spatial(spamv_annotated, color=topic, ax=f_dict[topic], alpha_img=img_alpha, frameon=False, show=False, vmax='p99')
            f_dict[topic].set_title(label, fontsize=7)

            topic_cb = f_dict[topic].collections[0]
            f_dict[topic].collections[0].colorbar.remove()
        j += 1
    i+=1
    j=0

spec_topic_colorbar = GridSpec(1, 1)
spec_topic_colorbar.update(**topic_colorbar_scale)
topic_colorbar = plt.subplot(spec_topic_colorbar[0, 0])
tcb = plt.colorbar(topic_cb, cax=topic_colorbar)
vmin, vmax = z[topic].min() + .003, z[topic].max() - .02
# Define tick positions including intermediate ticks
num_ticks = 4  # Total number of ticks
ticks = np.linspace(vmin, vmax, num=num_ticks)

# Set the ticks on the colorbar
tcb.outline.set_visible(False)
tcb.set_ticks(ticks)

# Create labels where only the first and last are labeled
labels = ['Low'] + [''] * (num_ticks - 2) + ['High']

# Set these labels on the colorbar
tcb.ax.set_yticklabels(labels)
tcb.ax.tick_params(size=0)
tcb.ax.set_title('Topic\nAbundance', fontsize=7, loc='left')  # Set the label position to the top
###############################################################################################################################
# plot correlation result
spec_correlation = GridSpec(1, 1)
spec_correlation.update(**correlation_scale)
dr_score = plt.subplot(spec_correlation[0, 0])
def plot_topic_correlation_ratio_multimodal(data, omics_names, z, ax, k_values=None):
    """
    Plot the log2 fold change of mean top-k correlations between modalities for each topic.

    Parameters
    ----------
    data_list : list
        List of AnnData objects, one for each modality
    omics_names : list
        List of strings containing names of each modality
    z : pandas.DataFrame
        Topic matrix where rows are cells and columns are topics
    k_values : dict or None
        Dictionary mapping modality names to their k values for top correlations
        If None, defaults to k=20 for RNA and k=5 for others
    figsize : tuple
        Figure size (width, height)
    """
    # Validate inputs
    if len(data) != len(omics_names):
        raise ValueError("Number of datasets must match number of omics names")
    if len(data) != 2:
        raise ValueError("This function currently supports exactly 2 modalities")

    # Set default k values if not provided
    if k_values is None:
        k_values = {name: 5 if name in ['Proteomics'] else 10 for name in omics_names}

    for topic in z.columns:
        if 'Transcriptomics' in topic:
            z = z.rename(columns={topic: 'Transcriptome ' + topic.split(maxsplit=1)[1]})
        elif 'Metabolomics' in topic:
            z = z.rename(columns={topic: 'Metabolome ' + topic.split(maxsplit=1)[1]})
    # Compute correlations for each modality
    modality_corrs = {}
    for data, name in zip(data, omics_names):
        corr_df, _ = compute_gene_topic_correlations(data, z)
        modality_corrs[name] = corr_df

    # Calculate mean top-k correlations for each modality
    topic_means = {name: [] for name in omics_names}

    for topic in z.columns:
        for name in omics_names:
            # Get top k correlations for this modality
            top_k_corrs = np.sort(modality_corrs[name][topic].values)[-k_values[name]:]
            topic_means[name].append(np.mean(top_k_corrs))

    # Calculate log2 fold change
    log2_fold_changes = np.log2(np.array(topic_means[omics_names[0]]) /
                               np.array(topic_means[omics_names[1]]))

    # Create DataFrame with results
    result_df = pd.DataFrame({'Topic': z.columns, 'Log2 Fold Change': log2_fold_changes})

    # Sort by log2 fold change
    result_df = result_df.sort_values('Log2 Fold Change', ascending=True)

    # Create horizontal bar plot
    bars = ax.barh(range(len(result_df)), result_df['Log2 Fold Change'])

    # Color bars based on which modality has stronger correlation
    for i, bar in enumerate(bars):
        if result_df['Log2 Fold Change'].iloc[i] > 0:
            bar.set_color('skyblue')  # First modality stronger
        else:
            bar.set_color('lightgreen')  # Second modality stronger

    # Customize the plot
    plt.title(f'Log2 Fold Change of Top Correlations\n({omics_names[0]} vs {omics_names[1]})')

    def split_label(label, threshold=10):
       if len(label) <= threshold:
           return label

       words = label.split()
       current_line = ""
       broken_label = ""

       for word in words:
           if len(current_line) + len(word) + 1 > threshold:
               broken_label += current_line.strip() + '\n'
               current_line = word + " "
           else:
               current_line += word + " "

       broken_label += current_line.strip()  # Add the last line
       return broken_label

    new_labels = [split_label(label, threshold=16) for label in result_df['Topic']]
    # Set topic names as y-axis labels
    plt.yticks(range(len(result_df)), new_labels, fontsize=6)

    label1 = f'Stronger {omics_names[0]} correlation'
    label2 = f'Stronger {omics_names[1]} correlation'

    legend_elements = [Patch(facecolor='skyblue', label=label1), Patch(facecolor='lightgreen', label=label2)]
    plt.legend(handles=legend_elements, loc='lower right', bbox_to_anchor=(1, -.13), frameon=False)
    # Add grid for better readability
    plt.grid(axis='x', linestyle='--', alpha=0.5)
    plt.tight_layout()
    return plt.gcf()

plot_topic_correlation_ratio_multimodal([rna, met], ['Transcriptomics', 'Metabolomics'], z, dr_score)

plt.tight_layout()
fs = 17
fig.text(.04, .86, 'a', fontsize=fs, fontweight='bold')
fig.text(.2, .92, 'b', fontsize=fs, fontweight='bold')
fig.text(.67, .92, 'c', fontsize=fs, fontweight='bold')
fig.text(.67, .73, 'd', fontsize=fs, fontweight='bold')
fig.text(.04, .37, 'e', fontsize=fs, fontweight='bold')
fig.text(.23, .55, 'f', fontsize=fs, fontweight='bold')
fig.text(.7, .55, 'g', fontsize=fs, fontweight='bold')
fig.text(.04, 0, 'h', fontsize=fs, fontweight='bold')
fig.text(.54, 0, 'i', fontsize=fs, fontweight='bold')
plt.savefig('../Figures/visualisation_5.pdf')
plt.show()

In [None]:
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
sc.set_figure_params(scanpy=True, fontsize=10)
dataset = 'ccRCC_Y7_T'
z = pd.read_csv('../Results/' + dataset + '/SpaMV_z.csv', index_col=0)
z.index = z.index.astype(str)
w = [pd.read_csv('../Results/' + dataset + '/SpaMV_w_Transcriptomics.csv', index_col=0), pd.read_csv('../Results/' + dataset + '/SpaMV_w_Metabolomics.csv', index_col=0)]
w[1].index = w[1].index.astype(str)
col_dict = {}
si = 1
oi = 1
ti = 1
for topic in z.columns:
    if 'Shared' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(si)
        si += 1
    elif 'Transcriptomics' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(oi)
        oi += 1
    else:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(ti)
        ti += 1
z = z.rename(columns=col_dict)
w[0] = w[0].rename(columns=col_dict)
w[1] = w[1].rename(columns=col_dict)
data_rna_preprocessed = sc.read_h5ad('../Results/' + dataset + '/adata_RNA_preprocessed.h5ad')
sc.pp.scale(data_rna_preprocessed)
data_rna_preprocessed.obs['SpaMV'] = z.idxmax(1)
data_rna_preprocessed = data_rna_preprocessed[data_rna_preprocessed.obs['SpaMV'].isin(w[0].columns)]
data_rna_preprocessed.obs['SpaMV'] = data_rna_preprocessed.obs['SpaMV'].replace({'Transcriptomics private topic 1': 'Transcriptome\n private topic 1', 'Transcriptomics private topic 2': 'Transcriptome\n private topic 2', 'Transcriptomics private topic 3': 'Transcriptome\n private topic 3', 'Transcriptomics private topic 4': 'Transcriptome\n private topic 4', 'Transcriptomics private topic 5': 'Transcriptome\n private topic 5'})
sc.tl.rank_genes_groups(data_rna_preprocessed, 'SpaMV', method='wilcoxon', key_added='wilcoxon')
ax = sc.pl.rank_genes_groups_heatmap(data_rna_preprocessed, key="wilcoxon", groupby="SpaMV", show_gene_labels=False, cmap='coolwarm', vmin=-2, vmax=2, show=False)
ax['groupby_ax'].set_ylabel('')
# ax['groupby_ax'].set_xticklabels(labels=[ax['groupby_ax'].get_yticklabels()[i].get_text() for i in range(len(ax['groupby_ax'].get_yticklabels()))], rotation=45, ha='right', rotation_mode='anchor')
plt.tight_layout()
plt.savefig('../Figures/visualisation_5_deg.pdf', bbox_inches='tight')
plt.show()

In [None]:
data_met_preprocessed = sc.read_h5ad('../Results/' + dataset + '/adata_MET_preprocessed.h5ad')
sc.pp.scale(data_met_preprocessed)
data_met_preprocessed.obs['SpaMV'] = z.idxmax(1)
data_met_preprocessed = data_met_preprocessed[data_met_preprocessed.obs['SpaMV'].isin(w[1].columns)]
data_met_preprocessed.obs['SpaMV'] = data_met_preprocessed.obs['SpaMV'].replace({'Metabolomics private topic 1': 'Metabolome\n private topic 1', 'Metabolomics private topic 2': 'Metabolome\n private topic 2', 'Metabolomics private topic 3': 'Metabolome\n private topic 3', 'Metabolomics private topic 4': 'Metabolome\n private topic 4', 'Metabolomics private topic 5': 'Metabolome\n private topic 5'})

sc.tl.rank_genes_groups(data_met_preprocessed, 'SpaMV', method='wilcoxon', key_added='wilcoxon')
ax = sc.pl.rank_genes_groups_heatmap(data_met_preprocessed, key="wilcoxon", groupby="SpaMV", show_gene_labels=False, cmap='coolwarm', vmin=-2, vmax=2, show=False)
ax['groupby_ax'].set_ylabel('')
plt.tight_layout()
plt.savefig('../Figures/visualisation_5_dem.pdf', bbox_inches='tight')
plt.show()

In [None]:
import sys
import os
# Get the current working directory
current_dir = os.getcwd()
# Add the parent directory to sys.path
sys.path.insert(0, os.path.dirname(os.path.dirname(current_dir)))
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
dataset = 'ccRCC_Y7_T'
plt.rcParams['font.size'] = 5
# sc.set_figure_params(scanpy=True, fontsize=5)
z = pd.read_csv('../Results/' + dataset + '/SpaMV_z.csv', index_col=0)
w = pd.read_csv('../Results/' + dataset + '/SpaMV_w_Transcriptomics.csv', index_col=0)
col_dict = {}
si = 1
oi = 1
ti = 1
for topic in z.columns:
    if 'Shared' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(si)
        si += 1
    elif 'Transcriptomics' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(oi)
        oi += 1
    else:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(ti)
        ti += 1
z = z.rename(columns=col_dict)
w = w.rename(columns=col_dict)
rows = w.shape[1]
columns = 11
d = sc.read_h5ad('../Results/' + dataset + '/adata_RNA_preprocessed.h5ad')
d = d[z.index]
d.obs[z.columns] = z.values
sf = 2
space = 0.2
fig = plt.figure(figsize=(columns * sf, rows * sf))
spec_dr = GridSpec(rows, columns, hspace=space + .2, wspace=space-.2)
spec_dr.update(left=0, right=.98, top=.98, bottom=0.02)
f_dict = {}
i = 0
for i in range(rows):
    genes = w.nlargest(columns - 1, w.columns[i]).index
    for j in range(columns):
        f_dict[w.columns[i] + str(j)] = plt.subplot(spec_dr[i, j])
        if j == 0:
            sc.pl.spatial(d, color=w.columns[i], ax=f_dict[w.columns[i] + str(j)], show=False, frameon=True, legend_loc='none', vmax='p99')
            label = w.columns[i].replace('Transcriptomics', 'Transcriptome')
            if 'Transcriptome' in label:
                label = label.split(maxsplit=1)[0] + '\n' + label.split(maxsplit=1)[1]
            f_dict[w.columns[i] + str(j)].set_title(label, fontsize=14, pad=5)
        else:
            sc.pl.spatial(d, color=genes[j - 1], ax=f_dict[w.columns[i] + str(j)], show=False, frameon=True, legend_loc='none', vmax='p99', cmap='coolwarm')
            f_dict[w.columns[i] + str(j)].set_title(genes[j - 1], fontsize=14, pad=5)
        f_dict[w.columns[i] + str(j)].axes.get_xaxis().set_visible(False)
        f_dict[w.columns[i] + str(j)].axes.get_yaxis().set_visible(False)
        
plt.savefig('../Figures/visualisation_5_11_ccRCC_Y7_T_RNA.pdf')
plt.show()

In [None]:
z = pd.read_csv('../Results/' + dataset + '/SpaMV_z.csv', index_col=0)
w = pd.read_csv('../Results/' + dataset + '/SpaMV_w_Metabolomics.csv', index_col=0)
col_dict = {}
si = 1
oi = 1
ti = 1
for topic in z.columns:
    if 'Shared' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(si)
        si += 1
    elif 'Transcriptomics' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(oi)
        oi += 1
    else:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(ti)
        ti += 1
z = z.rename(columns=col_dict)
w = w.rename(columns=col_dict)
rows = w.shape[1]
columns = 11
d = sc.read_h5ad('../Results/' + dataset + '/adata_MET_preprocessed.h5ad')
w.index = d.var_names
# d = sc.read_h5ad('../../Dataset/' + dataset + '/adata_H3K27ac_ATAC.h5ad')
d = d[z.index]
d.obs[z.columns] = z.values
sf = 2
space = 0.2
fig = plt.figure(figsize=(columns * sf, rows * sf), dpi=200)
spec_dr = GridSpec(rows, columns, hspace=space + .2, wspace=space-.2)
spec_dr.update(left=0, right=.98, top=.98, bottom=0.02)
f_dict = {}
i = 0
for i in range(rows):
    genes = w.nlargest(columns - 1, w.columns[i]).index
    for j in range(columns):
        f_dict[w.columns[i] + str(j)] = plt.subplot(spec_dr[i, j])
        if j == 0:
            sc.pl.spatial(d, color=w.columns[i], ax=f_dict[w.columns[i] + str(j)], show=False, frameon=True, legend_loc='none', vmax='p99', legend_fontsize=5)
            label = w.columns[i].replace('Metabolomics', 'Metabolome')
            if 'Metabolome' in label:
                label = label.split(maxsplit=1)[0] + '\n' + label.split(maxsplit=1)[1]
            f_dict[w.columns[i] + str(j)].set_title(label, fontsize=14, pad=5)
        else:
            sc.pl.spatial(d, color=genes[j - 1], ax=f_dict[w.columns[i] + str(j)], show=False, frameon=True, legend_loc='none', vmax='p99', cmap='coolwarm')
            f_dict[w.columns[i] + str(j)].set_title('m/z=' + genes[j - 1][:8], fontsize=14, pad=5)
        f_dict[w.columns[i] + str(j)].axes.get_xaxis().set_visible(False)
        f_dict[w.columns[i] + str(j)].axes.get_yaxis().set_visible(False)
        
plt.savefig('../Figures/visualisation_5_11_ccRCC_Y7_T_MET.pdf')
plt.show()

In [None]:
import sys
import os
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
# Get the current working directory
current_dir = os.getcwd()
# Add the parent directory to sys.path
sys.path.insert(0, os.path.dirname(os.path.dirname(current_dir)))
from Methods.SpaMV_copy.utils import plot_top_positive_correlations_boxplot

dataset = 'ccRCC_Y7_T'
z = pd.read_csv('../Results/' + dataset + '/SpaMV_z.csv', index_col=0)
col_dict = {}
si = 1
oi = 1
ti = 1
for topic in z.columns:
    if 'Shared' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(si)
        si += 1
    elif 'Transcriptomics' in topic:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(oi)
        oi += 1
    else:
        col_dict[topic] = topic.rsplit(' ', 1)[0] + ' ' + str(ti)
        ti += 1
z = z.rename(columns=col_dict)
z.index = z.index.astype(str)
for topic in z.columns:
    z = z.rename(columns={topic: topic.replace('Transcriptomics', 'Transcriptome')})
    z = z.rename(columns={topic: topic.replace('Metabolomics', 'Metabolome')})
d = [sc.read_h5ad('../Results/' + dataset + '/adata_RNA_preprocessed.h5ad'), sc.read_h5ad('../Results/' + dataset + '/adata_MET_preprocessed.h5ad')]
omics_names = ['Transcriptomics', 'Metabolomics']
for i in range(len(d)):
    plot_top_positive_correlations_boxplot(d[i], z, omics_name=omics_names[i], figsize=(6, 3))
    plt.savefig('../Figures/visualisation_5_11_ccRCC_Y7_T_' + omics_names[i] + '_pcc.pdf')