# Imports

In [6]:
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import TwoSlopeNorm
from matplotlib.pyplot import rc_context
import matplotlib.pyplot as plt
import matplotlib
import fastcluster
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import math
import glob
import h5py
import sys
import os

main_path = '/media/adalberto/Disk2/PhD_Workspace'
sys.path.append(main_path)
from models.clustering.logistic_regression_leiden_clusters import *
from models.evaluation.folds import load_existing_split
from models.clustering.correlations import *
from models.clustering.data_processing import *
from models.clustering.leiden_representations import include_tile_connections_frame
from data_manipulation.utils import store_data


# Methods

In [44]:
def create_histo_annotation_df(h5_hist_anno_path, additional_df, selected_anno=['acinar', 'lepidic', 'micropapillary', 'papillary', 'solid']):
    with h5py.File(h5_hist_anno_path, 'r') as content:
        slides   = [slide.decode("utf-8").split('_')[0] for slide in content['combined_slides']]
        tiles    = [tile.decode("utf-8").split('.')[0] for tile in content['combined_tiles']]
        histtype = [type_.decode("utf-8") for type_ in content['combined_hist_subtype']]
        histo_df = pd.DataFrame(slides, columns=['slides'])
        histo_df['tiles'] = tiles
        histo_df['histtype'] = histtype

    additional_df['slides'] = additional_df['slides'].astype(str)
    histo_complete_df = additional_df.merge(histo_df, how='inner', left_on=['slides','tiles'], right_on=['slides','tiles'])
    histo_complete_df = histo_complete_df.loc[histo_complete_df['histtype'].isin(selected_anno)]

    return histo_complete_df


def get_col_colors(cox_os_clusters, cox_pfs_clusters, p_th):
    colors        = None
    colors_masked = None
    if cox_os_clusters is not None:
        # Column colors.
        coef_df   = cox_os_clusters.sort_values(by=groupby)
        cmap_PiYG = plt.cm.PiYG_r
        norm      = TwoSlopeNorm(vmin=coef_df['coef'].min(), vcenter=0, vmax=coef_df['coef'].max())
        column_os_colors              = pd.Series([cmap_PiYG(norm(coef)) for p, coef in zip(coef_df['p'], coef_df['coef'])], name='Cox Coefficient Overall Survival')
        column_os_colors_masked       = pd.Series([cmap_PiYG(norm(coef)) if p <p_th else cmap_PiYG(norm(0))[:3] for p, coef in zip(coef_df['p'], coef_df['coef'])], name='Cox Coefficient Overall Survival')
        column_os_colors_masked.index = coef_df[groupby].astype(str)
        column_os_colors.index        = coef_df[groupby].astype(str)
        colors        = column_os_colors
        colors_masked = column_os_colors_masked

        if cox_pfs_clusters is not None:
            cox_pfs_clusters = cox_pfs_clusters.sort_values(by=groupby)
            cmap_PiYG = plt.cm.PiYG_r
            norm                     = TwoSlopeNorm(vmin=cox_pfs_clusters['coef'].astype(float).min(), vcenter=0, vmax=cox_pfs_clusters['coef'].astype(float).max())
            column_pfs_colors        = pd.Series([cmap_PiYG(norm(coef)) for p, coef in zip(cox_pfs_clusters['p'], cox_pfs_clusters['coef'])], name='Cox Coefficient\nRecurrence Free Survival')
            column_pfs_colors_masked = pd.Series([cmap_PiYG(norm(coef)) if p <p_th else cmap_PiYG(norm(0))[:3] for p, coef in zip(cox_pfs_clusters['p'], cox_pfs_clusters['coef'])], name='Cox Coefficient\nProgression Free Survival')
            column_pfs_colors.index        = coef_df[groupby].astype(str)
            column_pfs_colors_masked.index = coef_df[groupby].astype(str)

            colors = pd.concat([column_os_colors, column_pfs_colors],axis=1)
            colors_masked = pd.concat([column_os_colors_masked, column_pfs_colors_masked],axis=1)

    return colors, colors_masked


def plot_clustermap(all_data_rho, mask, x_label, y_label, directory, file_name, figsize, vcenter=0, annot=True, fmt='.2f', cox_os_clusters=None, cox_pfs_clusters=None,
                    col_linkage=None, row_linkage=None, fontsize_ticks=28, fontsize_labels=30, fontsize_annot=20, dendrogram_ratio=0.2, row_colors_same=False,
                    show=False, not_masked=False, p_th=0.05):

    colors, colors_masked = get_col_colors(cox_os_clusters, cox_pfs_clusters, p_th)
    with rc_context({'figure.figsize': figsize}):

        for name, col_colors in [('', colors), ('_masked', colors_masked)]:
            if 'masked' in name and not_masked:
                continue
            sns.set_theme(style='white')
            vref = np.max(np.abs(all_data_rho.values))
            if vcenter == 0:
                norm = TwoSlopeNorm(vmin=-vref, vcenter=vcenter, vmax=vref)
            else:
                norm = TwoSlopeNorm(vmin=all_data_rho.values.min(), vcenter=vcenter, vmax=all_data_rho.values.max())

            row_colors = None
            if row_colors_same:
                row_colors = col_colors

            g = sns.clustermap(all_data_rho, vmin=-vref, vmax=vref, method='ward', metric='euclidean', annot=annot, mask=mask, col_colors=col_colors, row_colors=row_colors, col_linkage=col_linkage, row_linkage=row_linkage, fmt=fmt, norm=norm, cmap=sns.diverging_palette(250, 20, as_cmap=True), dendrogram_ratio=dendrogram_ratio, annot_kws={"size": fontsize_annot},  yticklabels=True,  xticklabels=True)

            if col_colors is not None:
                g.ax_col_colors.set_yticklabels(g.ax_col_colors.get_ymajorticklabels(), fontsize=fontsize_ticks)
            if row_colors_same:
                g.ax_row_colors.set_xticklabels(g.ax_row_colors.get_xmajorticklabels(), fontsize=fontsize_ticks)

            g.ax_heatmap.set_ylabel('\n%s' % y_label, fontsize=fontsize_labels)
            g.ax_heatmap.set_xlabel('\n%s' % x_label, fontsize=fontsize_labels)
            g._figure.set_size_inches(figsize[0]*1.1, figsize[1]*1.1)
            g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xmajorticklabels(), fontsize=fontsize_ticks)
            g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_ymajorticklabels(), fontsize=fontsize_ticks)
            g.ax_cbar.tick_params(labelsize=fontsize_ticks)
            if show:
                plt.show()
            else:
                plt.savefig('%s/%s' % (directory, file_name.replace('.jpg', '%s.jpg' % name)))
                plt.close(g._figure)

            if col_colors is None:
                break
    return g

def spatial_correlations_figure(data_sc, leiden_clusters, coef_os_df, coef_pfs_df, xlabel, ylabel, directory=None, file_name=None, linkage=None, p_th=0.05, figsize=(15,15), fontsize=14, show=False):
    sns.set_theme(style='white')

    colors, _ = get_col_colors(coef_os_df, coef_pfs_df, p_th=p_th)
    data_sc = data_sc[leiden_clusters].copy(deep=True)
    data_sc.index   = colors.index
    data_sc.columns = colors.index
    if linkage is None:
        g = sns.clustermap(data_sc.T, row_colors=colors, col_colors=colors, cmap='rocket_r', method='ward', metric='correlation', figsize=figsize)
    else:
        g = sns.clustermap(data_sc.T, row_colors=colors, col_colors=colors, col_linkage=linkage, cmap='rocket_r', method='ward', metric='correlation', figsize=figsize)
    g.ax_heatmap.set_ylabel(ylabel, fontsize=fontsize)
    g.ax_heatmap.set_xlabel(xlabel, fontsize=fontsize)
    if directory is not None and file_name is not None:
        plt.savefig('%s/%s' % (directory, file_name))
    if show:
        plt.show()
    elif directory is not None and file_name is not None:
        plt.close(g._figure)

    return g

def plot_dendrogram(adata, groupby, directory=None, file_name=None, show=False):
    fig = plt.figure(figsize=(15,5))
    fig.suptitle('Leiden Cluster Dendrogram')
    ax = fig.add_subplot(1, 1, 1)
    ax = sc.pl.dendrogram(adata, groupby=groupby, ax=ax, show=show)
    if directory is not None and file_name is not None:
        plt.savefig(os.path.join(directory, file_name))
        plt.close(fig)


# Variables for run comparison

In [46]:
# Workspace path.
main_path = '/media/adalberto/Disk2/PhD_Workspace'

# Resolution and fold for the tile clustering and slide representations.
resolution     = 2.0
fold_number    = 0
groupby        = 'leiden_%s' % resolution

# Folder run.
meta_folder     = 'luad_overall_survival_nn250_fold%s_NYU_v3' % fold_number
meta_folder_pfs = 'luad_progression_free_survival_nn250_fold%s_NYU_v3_csNP' % fold_number
matching_field   = 'samples'
# meta_field       = 'luad'
meta_field       = 'os_event_ind'

##### HoverNet dataset annotations.
cell_names = ['cell neoplastic', 'cell inflammatory', 'cell connective', 'cell dead']
if 'luad' in meta_folder:
    dataset = 'NYU_LUADall_5x'
else:
    dataset = 'TCGAFFPE_LUADLUSC_5x_10pc'
magnification           = '20x'
annotation_restriction = 1

# Penalties for Cox regression and flag for usage.
use_cox        = True  # Flag if you want to use the cox coefficients to cross check with correlations.
alpha          = 1.0   # Alpha penalty for the folder to select on OS.
alpha_2        = None  # Alpha penalty for the folder to select on PFS.

# Pickle files.
# folds_pickle  = '%s/utilities/files/LUADLUSC/lungsubtype_Institutions.pkl' % main_path
folds_pickle = '%s/utilities/files/LUAD/overall_survival_TCGA_folds.pkl'  % main_path

# Tile representation files.
h5_complete_path   = '%s/results/BarlowTwins_3/TCGAFFPE_LUADLUSC_5x_60pc_250K/h224_w224_n3_zdim128_filtered/hdf5_TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_filtered.h5' % main_path
## LUAD run.
if 'luad' in meta_folder:
    h5_additional_path = '%s/results/BarlowTwins_3/TCGAFFPE_LUADLUSC_5x_60pc_250K/h224_w224_n3_zdim128_filtered/NYU300LUAD_Survival_5x_60pc/h224_w224_n3_zdim128/hdf5_NYU300LUAD_Survival_5x_60pc_he_train_overall_progression_free_surival_filtered.h5' % main_path
    h5_hist_anno_path  = '%s/datasets/NYUFFPE_Annotated_5x_60pc/he/patches_h224_w224/hdf5_NYUFFPE_Annotated_5x_60pc_he_combined.h5' % main_path
## LUSC run.
else:
    h5_additional_path = None

# Annotation files.
hovernet_csv     = '%s/datasets/HoverNet/%s/%s/%s_hovernet_annotations_5x.csv' % (main_path, dataset, magnification, dataset)
tcga_immune_csv  = '%s/utilities/files/TCGA/TCGA_immune_landscape.csv' % main_path

# Folder handling to save correlation and figures.
main_cluster_path = h5_complete_path.split('hdf5_')[0]
main_cluster_path = os.path.join(main_cluster_path, meta_folder)
adatas_path       = os.path.join(main_cluster_path, 'adatas')
figure_path        = os.path.join(main_cluster_path, 'leiden_%s_fold%s' % (str(resolution).replace('.','p'),fold_number))
interactions_path = os.path.join(figure_path, 'interactions')
figure_path        = os.path.join(figure_path, 'figures')
spatial_fig_path   = os.path.join(figure_path, 'spatial')
if not os.path.isdir(spatial_fig_path):
    os.makedirs(figure_path)
    os.makedirs(interactions_path)
    os.makedirs(spatial_fig_path)


### Cox Regression runs

In [47]:
# Read in the coefficients for the selected Cox runs.
coef_os_df  = None
coef_pfs_df = None
if use_cox:
    csv_cox = os.path.join(main_cluster_path, '%s_leiden_%s_alpha_%s_l1ratio_0p0_mintiles_100' % (meta_folder, resolution, str(alpha).replace('.','p')))
    csv_cox = os.path.join(csv_cox, 'leiden_%s_stats_all_folds.csv' % (str(resolution).replace('.','p')))

    # Read in regression coefficient file
    cox_df  = pd.read_csv(csv_cox)
    coef_os_df = cox_df[[groupby, 'coef', 'p']].copy(deep=True)

    if 'luad_overall_survival' in csv_cox:

        csv_cox = csv_cox.replace(meta_folder, meta_folder_pfs)
        csv_cox = csv_cox.replace('alpha_%s_l1ratio_0p0' % (str(alpha).replace('.','p')), 'alpha_%s_l1ratio_0p0' % (str(alpha_2).replace('.','p')))

        # Read in regression coefficient file
        cox_df  = pd.read_csv(csv_cox)
        coef_pfs_df = cox_df[[groupby, 'coef', 'p']].copy(deep=True)

### Annotation files

In [48]:
# Immune landscape sample annotations.
immune_landscape_df = pd.read_csv(tcga_immune_csv)

# HoverNet Annotations.
hovernet_df  = pd.read_csv(hovernet_csv)
if '.' in hovernet_df.slides.astype(str).values[0]:
    hovernet_df['slides'] = [slide.split('.')[0] for slide in hovernet_df.slides if '.' in slide ]


### Representations: Slides and Tiles.

In [57]:
''' Get representations for slide representation correlations. '''
# Fold
folds = load_existing_split(folds_pickle)
fold = folds[fold_number]

# Read cohort CSVs.
dataframes, complete_df, leiden_clusters   = read_csvs(adatas_path, matching_field, groupby, fold_number, fold, h5_complete_path, h5_additional_path, additional_as_fold=False, force_fold=None)
train_df, valid_df, test_df, additional_df = dataframes
complete_df['tiles']   = complete_df['tiles'].apply(lambda x: x.split('.jpeg')[0])
if additional_df is not None:
    additional_df['tiles'] = additional_df['tiles'].apply(lambda x: x.split('.jpeg')[0])

''' Get cluster interactions'''
# Get cluster interactions. If not create file.
file_name     = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_cluster_interactions' % (groupby.replace('.', 'p'), fold_number, meta_folder)
file_path     = os.path.join(interactions_path, file_name + '.csv')
if os.path.isfile(file_path):
    frame_conn = pd.read_csv(file_path)
else:
    frame_conn = include_tile_connections_frame(frame=complete_df, groupby=groupby)
    frame_conn.to_csv(file_path, index=False)

file_name_add = h5_additional_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_cluster_interactions' % (groupby.replace('.', 'p'), fold_number, meta_folder)
file_path_add = os.path.join(interactions_path, file_name_add + '.csv')
if h5_additional_path is not None:
    if os.path.isfile(file_path_add):
        frame_conn_add = pd.read_csv(file_path_add)
    else:
        frame_conn_add = include_tile_connections_frame(frame=additional_df, groupby=groupby)
        frame_conn_add.to_csv(file_path_add, index=False)

# Cross reference HoverNet and Cluster annotations.
if 'NYU' in hovernet_csv:
    annotated_df           = additional_df.merge(hovernet_df, how='inner', left_on=['slides', 'tiles'], right_on=['slides', 'tiles'])
    cluster_anno_df        = annotated_df[annotated_df['annotated_20x_tile_count']>=annotation_restriction]
else:
    annotated_df           = complete_df.merge(hovernet_df, how='inner', left_on=['slides', 'tiles'], right_on=['slides', 'tiles'])
    cluster_anno_df        = annotated_df[annotated_df['annotated_20x_tile_count']>=annotation_restriction]

''' Get representations for slide representation correlations. '''
frames = build_cohort_representations(meta_folder, meta_field, matching_field, groupby, fold_number, folds_pickle, h5_complete_path, h5_additional_path, 'clr', 100)
complete_df, additional_complete_df, frame_clusters, frame_samples, features = frames
complete_df.columns            = complete_df.columns.astype(str)
if additional_complete_df is not None: additional_complete_df.columns = additional_complete_df.columns.astype(str)

''' Read clustering file '''
adata_train, h5ad_path = read_h5ad_reference(h5_complete_path, meta_folder, groupby, fold_number)

''' Prepare manual annotations for histological subtypes.'''
if additional_df is not None:
    histo_complete_df  = create_histo_annotation_df(h5_hist_anno_path, additional_df)


### Correlations

In [62]:
pval_th       = 0.01

correlations_dict = dict()

''' Leiden Cluster Dendrogram'''
file_name = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_leiden_dendrogram' % (groupby.replace('.', 'p'), fold_number, meta_folder)
sc.tl.dendrogram(adata_train, groupby=groupby, cor_method='pearson', linkage_method='average', optimal_ordering=True)
correlations_dict[groupby] = dict()
correlations_dict[groupby]['file_name'] = file_name
correlations_dict[groupby]['linkage']   = adata_train.uns['dendrogram_leiden_2.0']['linkage']

''' HoverNet Annotations '''
if 'NYU' in hovernet_csv:
    file_name = h5_additional_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_hovernet' % (groupby.replace('.', 'p'), fold_number, meta_folder)
else:
    file_name = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_hovernet' % (groupby.replace('.', 'p'), fold_number, meta_folder)
critical_coef, critical_ref, p_values = ks_test_cluster_purities(cluster_anno_df=annotated_df, fields=cell_names, groupby=groupby, fold_number=fold_number, directory=main_cluster_path,
                                                                 file_name=file_name, p_th=0.01, critical_values_flag=True)
mask  = (np.abs(critical_coef)<=critical_ref)
correlations_dict['hovernet'] = dict()
correlations_dict['hovernet']['file_name'] = file_name
correlations_dict['hovernet']['data']      = critical_coef, critical_ref, p_values, mask

''' Immune Landscape '''
file_name = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_immunelandscape' % (groupby.replace('.', 'p'), fold_number, meta_folder)
all_data_rho, all_data_pval, mask, _ = correlate_clusters_annotation(slide_rep_df=complete_df, annotations_df=immune_landscape_df, purity_field=meta_field,
                                                                     matching_field='samples', corr_method='spearman', pval_th=pval_th, field_th=0.05*len(features),
                                                                     groupby=groupby, fold_number=fold_number, directory=main_cluster_path, file_name=file_name)
correlations_dict['immunelandscape'] = dict()
correlations_dict['immunelandscape']['file_name'] = file_name
correlations_dict['immunelandscape']['data']      = all_data_rho, all_data_pval, mask

''' Tile Histological Subtype Annotation '''
if additional_complete_df is not None:
    file_name = h5_additional_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_histsubtypes_anno' % (groupby.replace('.', 'p'), fold_number, meta_folder)
    p_values, strength, mask = cluster_purity_hypergeom(histo_complete_df, frame_clusters, groupby, 'histtype', pval_th=pval_th, pvalue_as_strengh=False)
    strength.index = strength.index.astype(str)
    correlations_dict['tile_histsubtypes'] = dict()
    correlations_dict['tile_histsubtypes']['file_name'] = file_name
    correlations_dict['tile_histsubtypes']['data']      = p_values, strength, mask

''' Spatial Correlations '''
file_name     = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_spatialcorr_icluster' % (groupby.replace('.', 'p'), fold_number, meta_folder)
spatial_corr     = spatial_correlations_all(frame=frame_conn, leiden_clusters=leiden_clusters, groupby=groupby, normalize='cluster', include_background=False)
if h5_additional_path is not None:
    spatial_corr_add = spatial_correlations_all(frame=frame_conn_add, leiden_clusters=leiden_clusters, groupby=groupby, normalize='cluster', include_background=False)

correlations_dict['spatial_corr_cluster'] = dict()
correlations_dict['spatial_corr_cluster']['file_name'] = file_name
correlations_dict['spatial_corr_cluster']['data']      = spatial_corr

file_name     = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_spatialcorr_iall' % (groupby.replace('.', 'p'), fold_number, meta_folder)
spatial_corr     = spatial_correlations_all(frame=frame_conn, leiden_clusters=leiden_clusters, groupby=groupby, normalize='all', include_background=False)
if h5_additional_path is not None:
    spatial_corr_add = spatial_correlations_all(frame=frame_conn_add, leiden_clusters=leiden_clusters, groupby=groupby, normalize='all', include_background=False)

correlations_dict['spatial_corr_all'] = dict()
correlations_dict['spatial_corr_all']['file_name'] = file_name
correlations_dict['spatial_corr_all']['data']      = spatial_corr

''' WSI Rep. Cluster Correlations '''
file_name     = h5_complete_path.split('/hdf5_')[1].split('.h5')[0] + '_%s__fold%s_%s_contentcorr' % (groupby.replace('.', 'p'), fold_number, meta_folder)
all_data_rho, all_data_pval, mask = correlate_clusters_occurrance_annotation(complete_df, meta_field, groupby, fold_number, main_cluster_path, file_name,
                                                                             corr_method='spearman', pval_th=pval_th)

correlations_dict['content_corr'] = dict()
correlations_dict['content_corr']['file_name'] = file_name
correlations_dict['content_corr']['data']      = all_data_rho, all_data_pval, mask

### Figures
#### Correlations

In [63]:
''' Leiden Dendrogram '''
file_name = correlations_dict[groupby]['file_name']
plot_dendrogram(adata_train, groupby, directory=figure_path, file_name=file_name+'.png', show=False)

''' Cluster Purity '''
file_name = correlations_dict['hovernet']['file_name']
critical_coef, critical_ref, p_values, mask = correlations_dict['hovernet']['data']
g = plot_clustermap(all_data_rho=np.round(critical_coef,2), mask=mask.values, x_label='Cluster', y_label='Cell Annotations', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))
correlations_dict['hovernet']['linkage'] = g.dendrogram_col.linkage

''' Immune Landscape '''
file_name = correlations_dict['immunelandscape']['file_name']
all_data_rho, all_data_pval, mask = correlations_dict['immunelandscape']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask.values, x_label='Cluster', y_label='Immune feature', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                    directory=figure_path, file_name=file_name+'.jpg', figsize=(65,35))
correlations_dict['immunelandscape']['linkage'] = g.dendrogram_col.linkage

'''Tile Histological Subtype Annotations'''
if additional_complete_df is not None:
    file_name = correlations_dict['tile_histsubtypes']['file_name']
    p_values, strength, mask = correlations_dict['tile_histsubtypes']['data']
    g = plot_clustermap(all_data_rho=strength.transpose(), mask=mask.values.transpose(), vcenter=1, x_label='Cluster', y_label='Histological subtype', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                        fmt='.1f', directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))


In [64]:
'''Cross Dendrogram - Immune/HoverNet & HoverNet/Immune '''
file_name = correlations_dict['hovernet']['file_name'] + '_cross_immune'
critical_coef, critical_ref, p_values, mask = correlations_dict['hovernet']['data']
g = plot_clustermap(all_data_rho=np.round(critical_coef,2), mask=mask.values, x_label='Cluster', y_label='Cell Annotations', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                    col_linkage=correlations_dict['immunelandscape']['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))

file_name = correlations_dict['immunelandscape']['file_name'] + '_cross_hovernet'
all_data_rho, all_data_pval, mask = correlations_dict['immunelandscape']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask.values, x_label='Cluster', y_label='Immune feature', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                    col_linkage=correlations_dict['hovernet']['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(65,35))

'''Cross Dendrogram - Immune/Leiden & HoverNet/Leiden '''
file_name = correlations_dict['hovernet']['file_name'] + '_cross_leiden'
critical_coef, critical_ref, p_values, mask = correlations_dict['hovernet']['data']
g = plot_clustermap(all_data_rho=np.round(critical_coef,2), mask=mask.values, x_label='Cluster', y_label='Cell Annotations', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                    col_linkage=correlations_dict[groupby]['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))

file_name = correlations_dict['immunelandscape']['file_name'] + '_cross_leiden'
all_data_rho, all_data_pval, mask = correlations_dict['immunelandscape']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask.values, x_label='Cluster', y_label='Immune feature', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df,
                    col_linkage=correlations_dict[groupby]['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(65,35))

'''Cross Dendrogram - Tile Annotation/Immune & Tile Annotation/HoverNet/Leiden '''
if additional_complete_df is not None:
    file_name = correlations_dict['tile_histsubtypes']['file_name'] + '_cross_immune'
    p_values, strength, mask = correlations_dict['tile_histsubtypes']['data']
    g = plot_clustermap(all_data_rho=strength.transpose(), mask=mask.values.transpose(), vcenter=1, x_label='Cluster', y_label='Histological subtype', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, fmt='.1f', col_linkage=correlations_dict['immunelandscape']['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))

    file_name = correlations_dict['tile_histsubtypes']['file_name'] + '_cross_leiden'
    g = plot_clustermap(all_data_rho=strength.transpose(), mask=mask.values.transpose(), vcenter=1, x_label='Cluster', y_label='Histological subtype', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, fmt='.1f', col_linkage=correlations_dict[groupby]['linkage'], directory=figure_path, file_name=file_name+'.jpg', figsize=(60,30))


#### Spatial Correlation Figures

In [65]:
''' Spatial Cluster Correlations '''
file_name = correlations_dict['spatial_corr_all']['file_name']
data_sc   = correlations_dict['spatial_corr_all']['data'][leiden_clusters].copy(deep=True)
xlabel    = 'Cluster'
ylabel    = 'Cluster interaction with X cluster'
g = spatial_correlations_figure(data_sc, leiden_clusters, coef_os_df, coef_pfs_df, xlabel=xlabel, ylabel=ylabel, directory=spatial_fig_path, file_name=file_name+'.jpg', p_th=0.05, figsize=(15,15), fontsize=14)
correlations_dict['spatial_corr_all']['linkage'] = g.dendrogram_col.linkage

file_name = correlations_dict['spatial_corr_cluster']['file_name']
data_sc   = correlations_dict['spatial_corr_cluster']['data'][leiden_clusters].copy(deep=True)
xlabel    = 'Cluster \n(interaction norm per cluster)'
ylabel    = 'Cluster interaction with X cluster'
g = spatial_correlations_figure(data_sc, leiden_clusters, coef_os_df, coef_pfs_df, xlabel=xlabel, ylabel=ylabel, directory=spatial_fig_path, file_name=file_name+'.jpg', p_th=0.05, figsize=(15,15), fontsize=14)
correlations_dict['spatial_corr_cluster']['linkage'] = g.dendrogram_col.linkage

''' WSI Rep. Cluster Content Correlations '''
file_name    = correlations_dict['content_corr']['file_name']
all_data_rho, all_data_pval, mask = correlations_dict['content_corr']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask.values, x_label='Cluster', y_label='Cluster', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, row_colors_same=True, annot=False, directory=spatial_fig_path, file_name=file_name+'.jpg', figsize=(30,30))
correlations_dict['content_corr']['linkage'] = g.dendrogram_col.linkage

In [66]:
''' Cross Dendrogram - Spatial Cluster Correlations/Immune Landscape '''
linkage   = correlations_dict['immunelandscape']['linkage']
file_name = correlations_dict['spatial_corr_all']['file_name'] + '_cross_immune'
data_sc   = correlations_dict['spatial_corr_all']['data'][leiden_clusters].copy(deep=True)
xlabel    = 'Cluster'
ylabel    = 'Cluster interaction with X cluster'
spatial_correlations_figure(data_sc, leiden_clusters, coef_os_df, coef_pfs_df, linkage=linkage, xlabel=xlabel, ylabel=ylabel, directory=spatial_fig_path, file_name=file_name+'.jpg', p_th=0.05, figsize=(15,15), fontsize=14)

''' Cross Dendrogram - Spatial Cluster Correlations/Leiden Dendrogram '''
linkage   = correlations_dict[groupby]['linkage']
file_name = correlations_dict['spatial_corr_all']['file_name'] + '_cross_leiden'
data_sc   = correlations_dict['spatial_corr_all']['data'][leiden_clusters].copy(deep=True)
xlabel    = 'Cluster'
ylabel    = 'Cluster interaction with X cluster'
spatial_correlations_figure(data_sc, leiden_clusters, coef_os_df, coef_pfs_df, linkage=linkage, xlabel=xlabel, ylabel=ylabel, directory=spatial_fig_path, file_name=file_name+'.jpg', p_th=0.05, figsize=(15,15), fontsize=14)

''' Cross Dendrogram - WSI Rep. Cluster Content /Immune Landscape '''
linkage      = correlations_dict['immunelandscape']['linkage']
file_name    = correlations_dict['content_corr']['file_name'] + '_cross_immune'
all_data_rho, all_data_pval, mask = correlations_dict['content_corr']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask, x_label='Cluster', y_label='Cluster', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, row_colors_same=True, annot=False, col_linkage=linkage, directory=spatial_fig_path, file_name=file_name+'.jpg', figsize=(30,30))

''' Cross Dendrogram - WSI Rep. Cluster Content /Leiden Dendrogram '''
linkage      = correlations_dict[groupby]['linkage']
file_name    = correlations_dict['content_corr']['file_name'] + '_cross_leiden'
all_data_rho, all_data_pval, mask = correlations_dict['content_corr']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask, x_label='Cluster', y_label='Cluster', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, row_colors_same=True, annot=False, col_linkage=linkage, directory=spatial_fig_path, file_name=file_name+'.jpg', figsize=(30,30))

''' Cross Dendrogram - WSI Rep. Cluster Content /Spatial All '''
linkage      = correlations_dict['spatial_corr_all']['linkage']
file_name    = correlations_dict['content_corr']['file_name'] + '_cross_spatialall'
all_data_rho, all_data_pval, mask = correlations_dict['content_corr']['data']
g = plot_clustermap(all_data_rho=all_data_rho, mask=mask, x_label='Cluster', y_label='Cluster', cox_os_clusters=coef_os_df, cox_pfs_clusters=coef_pfs_df, row_colors_same=True, annot=False, col_linkage=linkage, directory=spatial_fig_path, file_name=file_name+'.jpg', figsize=(30,30))