In [None]:
import os
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
#import squidpy as sq
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

import cell2location
import scvi

from matplotlib import rcParams
# Modify Matplotlib settings to remove grid lines
plt.rcParams["axes.grid"] = False
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set variables for file paths to read from and write to:

# set a working directory
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks"
os.chdir( wdir )

# folder structures
RESULTS_FOLDERNAME = "foetal/results/Spatial/"
FIGURES_FOLDERNAME = "foetal/figures/Spatial/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME
    
sp_data_folder = "../files/Spatial/dev/"

# create paths and names to results folders for reference regression and cell2location models
ref_run_name = f'{RESULTS_FOLDERNAME}/reference_signatures'
run_name = f'{RESULTS_FOLDERNAME}/cell2location_map'

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

# Cell2Location

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'concatenated_adata.h5ad'))

In [None]:
adata.obs

In [None]:
adata.var

In [None]:
HARMONY_FOLDERNAME = "foetal/results/Harmony/"

adata_sn = sc.read(os.path.join(HARMONY_FOLDERNAME, '{}.h5ad'.format('dev_harmony')))
adata_sn

In [None]:
adata_sn = adata_sn[adata_sn.obs['age']=='20w'].copy()
adata_sn.obs['C_scANVI'].value_counts()

In [None]:
adata_sn.var

In [None]:
annot = sc.queries.biomart_annotations(
    "hsapiens",
    ["ensembl_gene_id", "external_gene_name"],
).set_index("ensembl_gene_id")

annot['ensembl_gene_id'] = annot.index
annot['Gene'] = annot['external_gene_name']
annot['Gene'] = annot['Gene'].fillna(annot['ensembl_gene_id'])
annot.index = annot['Gene']
annot = annot[~annot.index.duplicated(keep='first')]

adata_sn.var[annot.columns] = annot
adata_sn.var_names_make_unique()

adata_sn.var['Gene'] = adata_sn.var.index
adata_sn.var['ensembl_gene_id'] = adata_sn.var['ensembl_gene_id'].fillna(adata_sn.var['Gene'])
adata_sn.var.index = adata_sn.var['ensembl_gene_id']
adata_sn.var_names_make_unique()
adata_sn.var

In [None]:
del adata_sn.raw

In [None]:
from cell2location.utils.filtering import filter_genes
selected = filter_genes(adata_sn, cell_count_cutoff=30, #cell_percentage_cutoff2=0.03, 
                        nonz_mean_cutoff=1.12)

In [None]:
# filter the object
adata_sn = adata_sn[:, selected].copy()
adata_sn.var

In [None]:
# prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=adata_sn,
                        layer="counts",
                        # 10X reaction / sample / batch
                        batch_key='sample',
                        # cell type, covariate used for constructing signatures
                        labels_key='C_scANVI',
                        # multiplicative technical effects (platform, 3' vs 5', donor effect)
                        categorical_covariate_keys=['libbatch', 'type']
                       )

# create the regression model
from cell2location.models import RegressionModel
mod = RegressionModel(adata_sn)

# view anndata_setup as a sanity check
mod.view_anndata_setup()

In [None]:
%%time
mod.train(max_epochs=400, use_gpu=True)

In [None]:
mod.plot_history(20)

In [None]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_sn = mod.export_posterior(
    adata_sn, sample_kwargs={'num_samples': 5000, 'batch_size': 2500, 'use_gpu': True}
)

# Save model
mod.save(f"{ref_run_name}", overwrite=True)

# Save anndata object with results
adata_file = f"{ref_run_name}/sc.h5ad"
adata_sn.write(adata_file)
adata_file

In [None]:
mod.plot_QC()

In [None]:
#adata_file = f"{ref_run_name}/sc.h5ad"
#adata_sn = sc.read_h5ad(adata_file)
#mod = cell2location.models.RegressionModel.load(f"{ref_run_name}", adata_sn)

In [None]:
# export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_sn.varm.keys():
    inf_aver = adata_sn.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}'
                                    for i in adata_sn.uns['mod']['factor_names']]].copy()
else:
    inf_aver = adata_sn.var[[f'means_per_cluster_mu_fg_{i}'
                                    for i in adata_sn.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_sn.uns['mod']['factor_names']
inf_aver

In [None]:
inf_aver.to_csv(os.path.join(RESULTS_FOLDERNAME,'EstExpressionPerCluster.csv'), index=True)

# Cell2Location Spatial Mapping

In [None]:
# find shared genes and subset both anndata and reference signatures
intersect = np.intersect1d(adata.var_names, inf_aver.index)
adata = adata[:, intersect].copy()
inf_aver = inf_aver.loc[intersect, :].copy()

# prepare anndata for cell2location model
cell2location.models.Cell2location.setup_anndata(adata=adata, batch_key="sample")

In [None]:
adata

In [None]:
# create and train the model
mod2 = cell2location.models.Cell2location(
    adata, 
    cell_state_df=inf_aver,
    # the expected average cell abundance: tissue-dependent
    # hyper-prior which can be estimated from paired histology:
    N_cells_per_location=17,
    # hyperparameter controlling normalisation of
    # within-experiment variation in RNA detection:
    detection_alpha=20
)
mod2.view_anndata_setup()

In [None]:
import torch
torch.set_float32_matmul_precision('high')

In [None]:
mod2.train(max_epochs=15000,
          # train using full data (batch_size=None)
          batch_size=None,
          # use all data points in training because
          # we need to estimate cell abundance at all locations
          train_size=1,
          use_gpu=True, log_every_n_steps=1
         )

# plot ELBO loss history during training, removing first 100 epochs from the plot
mod2.plot_history(1000)
plt.legend(labels=['full data training']);

In [None]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata = mod2.export_posterior(
    adata, sample_kwargs={'num_samples': 5000, 'batch_size': mod2.adata.n_obs, 'use_gpu': True}
)

# Save model
mod2.save(f"{run_name}", overwrite=True)
# Save anndata object with results
adata.write(f"{run_name}/sp.h5ad")

In [None]:
mod2.plot_QC()

In [None]:
mod2.plot_spatial_QC_across_batches()

In [None]:
adata.var

In [None]:
#adata_file = f"{run_name}/sp.h5ad"
#adata = sc.read_h5ad(adata_file)
#adata

#mod2 = cell2location.models.Cell2location.load(f"{run_name}", adata)
#mod2

In [None]:
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata.obs[adata.uns['mod']['factor_names']] = adata.obsm['q05_cell_abundance_w_sf']
adata

In [None]:
def select_slide(adata, s, s_col='sample'):
    """ 
    This function selects the data for one slide from the spatial anndata object.

    :param adata: Anndata object with multiple spatial experiments
    :param s: name of selected experiment
    :param s_col: column in adata.obs listing experiment name for each location
    """

    slide = adata[adata.obs[s_col].isin([s]), :]
    s_keys = list(slide.uns['spatial'].keys())
    s_spatial = np.array(s_keys)[[s in k for k in s_keys]][0]

    slide.uns['spatial'] = {s_spatial: slide.uns['spatial'][s_spatial]}

    return slide

In [None]:
slide = select_slide(adata, 'Dev16126_Quad_MB_H')
#sample_data = 'Dev16126_Ach_EnthMB_H', 'Dev16126_Quad_MB_H', 'Dev16126_Quad_MB2_H'

# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor':  'black',
                     'figure.figsize': [4.5, 5]}):

    sc.pl.spatial(slide, cmap='inferno',
                  color=[
                         'ABI3BP GAS2 Fibroblasts 1',
                         'ABI3BP GAS2 Fibroblasts 2',
                         'NEGR1 SCN7A Fibroblasts',
                         'COL6A6 FNDC1 Fibroblasts',
                         'FGF14 THBS4 Fibroblasts',
                         'COL3A1 PI16 Fibroblasts',
                         'vasEndothelial Cells',
                         'lymEndothelial Cells',
                         'Smooth Myocytes',
                         'Nervous System Cells',
                         'Satellite Cells',
                         'Skeletal Myocytes',
                         'Immune Cells',
                         'Chondrocytes',
                         'clusters',
                         'joint_leiden',
                         'joint_leiden_graph'],
                  ncols=4, size=1.3,
                  img_key='hires',
                  # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.2',
                  #save='Dev16126_Quad_MB_H_cell2loc.svg'
                  save='Dev16126_Quad_MB_H_cell2loc.png'
                 )

In [None]:
slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
#sample_data = 'Dev16126_Ach_EnthMB_H', 'Dev16126_Quad_MB_H', 'Dev16126_Quad_MB2_H'

# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor':  'black',
                     'figure.figsize': [4.5, 5]}):

    sc.pl.spatial(slide, cmap='inferno',
                  color=[
                         'ABI3BP GAS2 Fibroblasts 1',
                         'ABI3BP GAS2 Fibroblasts 2',
                         'NEGR1 SCN7A Fibroblasts',
                         'COL6A6 FNDC1 Fibroblasts',
                         'FGF14 THBS4 Fibroblasts',
                         'COL3A1 PI16 Fibroblasts',
                         'vasEndothelial Cells',
                         'lymEndothelial Cells',
                         'Smooth Myocytes',
                         'Nervous System Cells',
                         'Satellite Cells',
                         'Skeletal Myocytes',
                         'Immune Cells',
                         'Chondrocytes',
                         'clusters',
                         'joint_leiden',
                         'joint_leiden_graph'],
                  ncols=4, size=1.3,
                  img_key='hires',
                  # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.2',
                  #save='Dev16126_Quad_MB2_H_cell2loc.svg'
                  save='Dev16126_Quad_MB2_H_cell2loc.png'
                 )

In [None]:
slide = select_slide(adata, 'Dev16126_Ach_EnthMB_H')
#sample_data = 'Dev16126_Ach_EnthMB_H', 'Dev16126_Quad_MB_H', 'Dev16126_Quad_MB2_H'

# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor':  'black',
                     'figure.figsize': [4.5, 5]}):

    sc.pl.spatial(slide, cmap='inferno',
                  color=[
                         'ABI3BP GAS2 Fibroblasts 1',
                         'ABI3BP GAS2 Fibroblasts 2',
                         'NEGR1 SCN7A Fibroblasts',
                         'COL6A6 FNDC1 Fibroblasts',
                         'FGF14 THBS4 Fibroblasts',
                         'COL3A1 PI16 Fibroblasts',
                         'vasEndothelial Cells',
                         'lymEndothelial Cells',
                         'Smooth Myocytes',
                         'Nervous System Cells',
                         'Satellite Cells',
                         'Skeletal Myocytes',
                         'Immune Cells',
                         'Chondrocytes',
                         'clusters',
                         'joint_leiden',
                         'joint_leiden_graph'],
                  ncols=4, size=1.3,
                  img_key='hires',
                  # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.2',
                  #save='Dev16126_Ach_EnthMB_H_cell2loc.svg'
                  save='Dev16126_Ach_EnthMB_H_cell2loc.png'
                 )

# Changing image parameters to enhance visualisation

In [None]:
import skimage.exposure
from skimage import exposure

# Create a new AnnData object to store adjusted image data
adata_adjusted = adata.copy()

# Iterate through different samples in the 'sample' observation column
for sample_id in adata.obs['sample'].unique():
    # Filter the data for the current sample
    sample_data = adata[adata.obs['sample'] == sample_id]
    
    # Extract the original image data
    img_png = sample_data.uns['spatial'][sample_id]['images']['hires']
    
    # Calculate percentiles
    p2, p98 = np.percentile(img_png, (0.5, 99.5))
    
    # Rescale intensity for brightness adjustment
    img_rescale = skimage.exposure.rescale_intensity(img_png, in_range=(p2, p98))
    
    # Convert to HSV color space
    img_hsv = skimage.color.rgb2hsv(img_rescale)
    
    # Adjust hue and saturation
    img_hsv[:, :, 0] = (img_hsv[:, :, 0] + 0.01) % 1.0  
    img_hsv[:, :, 1] = img_hsv[:, :, 1] * 0.5 
    
    # Convert back to RGB color space
    img_adjusted = skimage.color.hsv2rgb(img_hsv)
    
    # Update the 'hires' image data in the new AnnData object
    adata_adjusted.uns['spatial'][sample_id]['images']['hires'] = img_adjusted

# Plot original and adjusted images side by side
for sample_id in adata.obs['sample'].unique():
    original_img = adata.uns['spatial'][sample_id]['images']['hires']
    adjusted_img = adata_adjusted.uns['spatial'][sample_id]['images']['hires']

    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    axes[0].imshow(original_img[500:1000, 500:1000])
    axes[0].set_title('Original Image')
    axes[1].imshow(adjusted_img[500:1000, 500:1000])
    axes[1].set_title('Adjusted Image')

    plt.suptitle(f'Sample: {sample_id}')
    plt.show()

In [None]:
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_adjusted.obs[adata.uns['mod']['factor_names']] = adata_adjusted.obsm['q05_cell_abundance_w_sf']

In [None]:
# Now we use cell2location plotter that allows showing multiple cell types in one panel
from cell2location.plt import plot_spatial

clust_labels_dict = {
    'fibroblasts1': ['ABI3BP GAS2 Fibroblasts 2',
                     'ABI3BP GAS2 Fibroblasts 1',
                     'FGF14 THBS4 Fibroblasts'],
    'fibroblasts2': ['NEGR1 SCN7A Fibroblasts', 
                     'COL6A6 FNDC1 Fibroblasts',
                     'COL3A1 PI16 Fibroblasts'],
    'endothelial': ['lymEndothelial Cells',
                     'vasEndothelial Cells',
                     'Smooth Myocytes'],
    'immuneandmuscle': ['Satellite Cells',
                        'Skeletal Myocytes',
                        'Nervous System Cells',
                        'Immune Cells']
}

for cellgroup, cts in clust_labels_dict.items():
    clust_labels = cts
    clust_col = ['' + str(i) for i in clust_labels]  # in case column names differ from labels
    for samplename in adata.obs['sample'].unique():
        slide = select_slide(adata, samplename)
        with mpl.rc_context({'figure.figsize': (20, 15)}):
            fig = plot_spatial(
                adata=slide,
                color=clust_col, labels=clust_labels,
                style='fast',
                max_color_quantile=0.95,
                coords=slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors']['tissue_hires_scalef'], 
                show_img=False, 
                img_alpha=0.7,
                img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                circle_diameter=7, colorbar_position='right',
                adjust_text=True
            )
            # Remove grid lines from the generated plot
            ax = fig.get_axes()[0]
            ax.grid(False)

            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{samplename}_cell2locMap_{cellgroup}.png'), format='png')
            # plt.show()
            plt.close(fig)

        with mpl.rc_context({'figure.figsize': (20, 15)}):
            fig = plot_spatial(
                adata=slide,
                color=clust_col, labels=clust_labels,
                coords=slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors']['tissue_hires_scalef'], 
                show_img=False, 
                img_alpha=0,
                style='dark_background',  # fast or dark_background
                img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                circle_diameter=7, colorbar_position='right'
            )
            # Remove grid lines from the generated plot
            ax = fig.get_axes()[0]
            ax.grid(False)

            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{slide.obs["sample"][0]}_cell2locMap_{cellgroup}_black.png'), format='png')
            plt.close(fig)

In [None]:
for cellgroup, cts in clust_labels_dict.items():
    clust_labels = cts
    clust_col = ['' + str(i) for i in clust_labels]  # in case column names differ from labels
    for samplename in adata.obs['sample'].unique():
        slide = select_slide(adata, samplename)
        with mpl.rc_context({'figure.figsize': (20, 15)}):
            fig = plot_spatial(
                adata=slide,
                color=clust_col, labels=clust_labels,
                style='fast',
                max_color_quantile=0.95,
                coords=slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors']['tissue_hires_scalef'], 
                show_img=False, 
                img_alpha=0.7,
                img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                circle_diameter=7, colorbar_position='right',
                adjust_text=True
            )
            # Remove grid lines from the generated plot
            ax = fig.get_axes()[0]
            ax.grid(False)

            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{samplename}_cell2locMap_{cellgroup}.png'), format='png')
            # plt.show()
            plt.close(fig)

        with mpl.rc_context({'figure.figsize': (20, 15)}):
            fig = plot_spatial(
                adata=slide,
                color=clust_col, labels=clust_labels,
                coords=slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors']['tissue_hires_scalef'], 
                show_img=False, 
                img_alpha=0,
                style='dark_background',  # fast or dark_background
                img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                circle_diameter=7, colorbar_position='right'
            )
            # Remove grid lines from the generated plot
            ax = fig.get_axes()[0]
            ax.grid(False)

            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{slide.obs["sample"][0]}_cell2locMap_{cellgroup}_black.png'), format='png')
            plt.close(fig)

In [None]:
# identify spot locations to crop near tissue
def get_crop_coord(slide, scale='tissue_hires_scalef'):

    crop_max = (slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors'][scale]).max(axis=0)
    crop_min = (slide.obsm['spatial'] * list(slide.uns['spatial'].values())[0]['scalefactors'][scale]).min(axis=0)

    crop_x = [crop_min[0]-0, crop_max[0]+0]
    crop_y = [crop_min[1]-0, crop_max[1]+0]

    return crop_x, crop_y

In [None]:
# select up to 5-6 clusters (the last colour is grey)
sel_clust = ['lymEndothelial Cells', 
             'vasEndothelial Cells',
             'Smooth Myocytes', 
             'Nervous System Cells',
             'Satellite Cells',
             'Skeletal Myocytes',
             #'Immune Cells'
            ]
sel_clust_col = ['' + str(i) for i in sel_clust]

for samplename in adata.obs['sample'].unique():
        slide = select_slide(adata, samplename)
        #crop_x, crop_y = get_crop_coord(slide, scale='tissue_hires_scalef')
        with mpl.rc_context({'figure.figsize': (15, 15)}):
            fig = plot_spatial(adata=slide, color=sel_clust_col, labels=sel_clust,
                              coords=slide.obsm['spatial'],
                              show_img=True, img_alpha=0,
                              max_color_quantile=0.98,
                              #crop_x=crop_x, crop_y=crop_y,
                              style='dark_background', # fast or dark_background
                              img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                              circle_diameter=7, colorbar_position='right')
            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{slide.obs["sample"][0]}_cell2locMap2_muscleifm_black.png'), format='png')
            plt.close(fig)

In [None]:
# select up to 5-6 clusters (the last colour is grey)
sel_clust = ['ABI3BP GAS2 Fibroblasts 2',
             'ABI3BP GAS2 Fibroblasts 1',
             'FGF14 THBS4 Fibroblasts',
             'COL6A6 FNDC1 Fibroblasts',
             'COL3A1 PI16 Fibroblasts',
             'NEGR1 SCN7A Fibroblasts']
sel_clust_col = ['' + str(i) for i in sel_clust]

for samplename in adata.obs['sample'].unique():
        slide = select_slide(adata, samplename)
        #crop_x, crop_y = get_crop_coord(slide, scale='tissue_hires_scalef')
        with mpl.rc_context({'figure.figsize': (15, 15)}):
            fig = plot_spatial(adata=slide, color=sel_clust_col, labels=sel_clust,
                              coords=slide.obsm['spatial'],
                              show_img=True, img_alpha=0.1,
                              max_color_quantile=0.98,
                              #crop_x=crop_x, crop_y=crop_y,
                              style='fast', # fast or dark_background
                              img=list(slide.uns['spatial'].values())[0]['images']['hires'],
                              circle_diameter=7, colorbar_position='right')
            fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{slide.obs["sample"][0]}_cell2locMap2_fibroblasts_white.png'), format='png')
            plt.close(fig)

In [None]:
# plot hne
for samplename in adata.obs['sample'].unique():
    slide = select_slide(adata, samplename)
    fig, ax = plt.subplots(
        figsize=(3, 5),
    )
    sc.pl.spatial(
        slide,
        color=None,
        img_key='hires',
        ax=ax,
        title='H&E stain',
        #legend_loc=False,
        show=False
    )
    ax.axes.xaxis.label.set_visible(False)
    ax.axes.yaxis.label.set_visible(False)
    # save figure
    plt.tight_layout()
    #plt.savefig(os.path.join(FIGURES_FOLDERNAME, f'{library_id}_spatial_hne.png'), dpi=300, bbox_inches='tight')
    savesvg(f'{samplename}_spatial_hne.svg', plt)

In [None]:
cell_type_data = pd.DataFrame(adata.obs[['ABI3BP GAS2 Fibroblasts 1',
       'ABI3BP GAS2 Fibroblasts 2', 'COL3A1 PI16 Fibroblasts',
       'COL6A6 FNDC1 Fibroblasts', 'Chondrocytes', 'FGF14 THBS4 Fibroblasts',
       'Immune Cells', 'NEGR1 SCN7A Fibroblasts', 'Nervous System Cells',
       'Satellite Cells', 'Skeletal Myocytes', 'Smooth Myocytes',
       'lymEndothelial Cells', 'vasEndothelial Cells']])
cell_type_data

In [None]:
# pearson correlation of cell type abundance (by spot)
correlation_matrix = cell_type_data.corr()

colormap = plt.get_cmap('PuOr')
inverted_colormap = colormap.reversed()

# Create a seaborn heatmap for visualization
plt.figure(figsize=(10, 8))
sns.clustermap(correlation_matrix, xticklabels=cell_type_data.columns, 
               yticklabels=cell_type_data.columns, cmap=inverted_colormap,
               center=0)
plt.title('Cell Type Similarity')
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'pearson_celltype_correlation_clustermap.svg'))
plt.show()

## Napari Interactive Viewer

In [None]:
image_dict = {}

for library_id in adata.obs['sample'].unique():
    adata_sample = adata[adata.obs['sample'] == library_id]
    img = sq.im.ImageContainer(
        adata_sample.uns['spatial'][library_id]['images']['hires'],
        scale=adata_sample.uns['spatial'][library_id]['scalefactors']['tissue_hires_scalef']
    )
    image_dict[library_id] = img

image_dict

In [None]:
# Split the AnnData object based on 'sample' column
groups = adata.obs.groupby('sample')

# Create a dictionary to store the split AnnData objects
split_data = {}

# Iterate over the groups and create a separate AnnData object for each group
for group_name, group_indices in groups.indices.items():
    split_data[group_name] = adata[group_indices, :].copy()

    # Split the `uns['spatial']` dictionary by sample
    split_data[group_name].uns['spatial'] = {
        sample: adata.uns['spatial'][sample]
        for sample in split_data[group_name].obs['sample']
    }
    
# Access the split AnnData objects and their corresponding spatial data by group name
for group_name, split_adata in split_data.items():
    print(f"Group: {group_name}")
    print("AnnData Object:")
    print(split_adata)
    print("Spatial Data:")
    print(split_adata.uns['spatial'])

In [None]:
#adata.var['ENSEMBL'] = adata.var.index
#adata.var.index = adata.var['Gene']
#adata_adatavis

In [None]:
for data in split_data.values():
    data.var['ENSEMBL'] = data.var.index
    data.var.index = data.var['Gene']
split_data['Dev16126_Quad_MB2_H'].var

In [None]:
viewer = image_dict['Dev16126_Quad_MB2_H'].interactive(split_data['Dev16126_Quad_MB2_H'])

# Tissue Region Identification by Clustering

In [None]:
adata.obs['sample'].value_counts()

In [None]:
adata.obsm['q05_cell_abundance_w_sf'] = adata_adjusted.obsm['q05_cell_abundance_w_sf'].copy()
adata.obsm['q05_cell_abundance_w_sf']

In [None]:
# compute KNN using the cell2location output
sc.pp.neighbors(adata, use_rep='q05_cell_abundance_w_sf',
                n_neighbors = 30)

# Cluster spots into regions using scanpy
sc.tl.leiden(adata, resolution=1, key_added='leiden_01_ct')

# add region as categorical variable
adata.obs["region_cluster"] = adata.obs["leiden_01_ct"].astype("category")

# compute UMAP using KNN graph based on the cell2location output
sc.tl.umap(adata, min_dist = 0.3, spread = 1)

# show regions in UMAP coordinates
with mpl.rc_context({'axes.facecolor':  'white',
                     'figure.figsize': [4, 4]}):
    sc.pl.umap(adata, color=['sample','region_cluster'], size=30,
               color_map = 'RdPu', ncols = 2, legend_loc='on data',
               legend_fontsize=10, save='combined_cell2loc_regionClusterUMAPs.svg')

In [None]:
for samplename in adata.obs['sample'].unique():
    slide = select_slide(adata, samplename)
    with mpl.rc_context({'figure.figsize': (5, 6)}):
        sc.pl.spatial(slide, color=['region_cluster'],
                      size=1.1, alpha=1,
                      img_key='hires', 
                      save=f'{samplename}_cell2loc_regionClustersfromCOMBINED.svg'
                    )

# Save for 10X Loupe Browser Exploration

In [None]:
adata = sc.read_h5ad(f"{run_name}/sp_full.h5ad")
adata

In [None]:
# save maps for each sample separately
sam = np.array(adata.obs['sample'])
for i in np.unique(sam):
    s1 = adata.obs[['region_cluster']]
    s1 = s1.loc[sam == i]
    s1.index = s1.index.str.rsplit('_', n=1).str[-1]
    s1.index.name = 'Barcode'
    s1.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{i}_region_clusters.csv'))

In [None]:
# for data in split_data.values():
#     # compute KNN using the cell2location output stored in adata.obsm
#     sc.pp.neighbors(data, use_rep='q05_cell_abundance_w_sf',
#                     n_neighbors = 30)

#     # Cluster spots into regions using scanpy
#     sc.tl.leiden(data, resolution=1, key_added='leiden_01_ct')

#     # add region as categorical variable
#     data.obs["region_cluster"] = data.obs["leiden_01_ct"].astype("category")
    
#     # compute UMAP using KNN graph based on the cell2location output
#     sc.tl.umap(data, min_dist = 0.3, spread = 1)

#     # show regions in UMAP coordinates
#     with mpl.rc_context({'axes.facecolor':  'white',
#                          'figure.figsize': [4, 4]}):
#         sc.pl.umap(data, color=['region_cluster'], size=30,
#                    color_map = 'RdPu', ncols = 2, legend_loc='on data',
#                    legend_fontsize=10, save=f'{data.obs["sample"][0]}_cell2loc_regionClusterUMAPs.svg')

#     # plot in spatial coordinates
#     with mpl.rc_context({'axes.facecolor':  'black',
#                          'figure.figsize': [4.5, 5]}):
#         sc.pl.spatial(data, color=['region_cluster'],
#                       size=1.3, alpha=1,
#                       img_key='hires',save=f'{data.obs["sample"][0]}_cell2loc_regionClusters.svg')

# for data in split_data.values():
#     name = data.obs['sample'][0]
#     s1 = data.obs[['region_cluster']]
#     s1.index = [x[20:] for x in s1.index]
#     s1.index.name = 'Barcode'
#     print(s1)
#     s1.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_region_clusters.csv'))

# Adding region names

make a dictionary with cluster labels as below and re-annotate for each sample.

In [None]:
adata.obs['region_name'] = ''

region_name_mappings = {
    'Dev16126_Quad_MB2_H': {
        0: 'Tendon (Throughout)', #
        1: 'Muscle LCT', #
        2: 'Tendon LCT (Inner, Throughout)', 
        3: 'Skeletal Muscle', 
        4: 'Tendon LCT (Outer, MTJ)', 
        5: 'Tendon LCT (Inner, ENTH-MB)',
        6: 'Muscle LCT'
    },
    'Dev16126_Ach_EnthMB_H': {
        0: 'Tendon (Throughout)', #
        1: 'Tendon (ENTH)', #
        2: 'Tendon (MB-MTJ)', 
        3: 'Tendon (ENTH-MB)', 
        4: 'Tendon LCT (Outer)', 
        5: 'Skeletal Muscle',
        6: 'Tendon LCT (Inner)'
    },
    'Dev16126_Quad_MB_H': {
        0: 'Skeletal Muscle', #
        1: 'Muscle LCT', #
        2: 'Tendon (ENTH-MB)', 
        3: 'Tendon (MB-MTJ)', 
        4: 'Tendon LCT (Outer, MTJ)', 
        5: 'Tendon LCT (Outer, ENTH-MB)',
        6: 'Tendon LCT (Outer, MB-MTJ)'
    }
}

if 'region_name' not in adata.obs.columns:
    adata.obs['region_name'] = ''

# Iterate over each sample and apply the custom mapping
for sample_name, region_names in region_name_mappings.items():
    mask = adata.obs['sample'] == sample_name
    cluster_values = adata.obs['clusters'][mask].astype(int)
    region_name_values = np.vectorize(region_names.get)(cluster_values)
    adata.obs['region_name'][mask] = pd.Categorical(region_name_values)

In [None]:
adata.obs[['sample','region_name']].value_counts()

In [None]:
for samplename in adata.obs['sample'].unique():
    slide = select_slide(adata, samplename)
    with mpl.rc_context({'figure.figsize': (5, 6)}):
        sc.pl.spatial(slide, color=['region_name'],
                      size=1.1, alpha=1,
                      img_key='hires',
                      save=f'_{samplename}_annotated_region_clusters_spatialmap2.svg'
                    )

# Cell abundance by clustering and histology-based annotation

In [None]:
from cell2location.plt.plot_heatmap import clustermap

for samplename in adata.obs['sample'].unique():
    slide = select_slide(adata, samplename)
    slide.X = slide.layers['normcounts'].copy()
    
    regions = slide.obs['region_name'].unique()
    cell_type_region_abundance_df = pd.DataFrame(index=slide.uns['mod']['factor_names'],
                                                 columns=regions)
    
    for region in regions:
        mask = slide.obs['region_name'] == region
        cell_type_region_abundance_df[region] = slide.obs.loc[mask, slide.uns['mod']['factor_names']].mean(0)
    
    cell_type_region_abundance_norm = (cell_type_region_abundance_df.T / cell_type_region_abundance_df.sum(1)).T

    
    mpl.rc_file_defaults()
    mpl.rcParams['pdf.fonttype'] = 42 # enables correct plotting of text
    with mpl.rc_context({'font.size': 8, 'axes.facecolor': "white"}):
        clustermap(cell_type_region_abundance_norm, figure_size=(4, 5), 
                   cmap='RdPu', log=False, fun_type='dotplot',
                   cluster_rows=True, cluster_cols=True)
        plt.savefig(os.path.join(FIGURES_FOLDERNAME, f"{samplename}_histology_annotation_ct_abundance_dotplot.svg"),
                        bbox_inches='tight')
        plt.show()
        

# Identifying groups of co-located cell types using matrix factorisation

What cell types can be assumed to co-locate?

Three types of fibroblasts co-locating and communicating. Likely to be closely located to the endothelial and nervous system structures as well as immune cells. 

In [None]:
from cell2location import run_colocation
res_dict, adata = run_colocation(
    adata,
    #verbose=True,
    model_name='CoLocatedGroupsSklearnNMF',
    return_all=True,
    train_args={
      'n_fact': np.arange(5, 15),
      'n_restarts': 5, # number of training restarts
      'sample_name_col': 'sample', # columns in adata_vis.obs that identifies sample
      'mode': 'normal',
      'n_type': 'restart', 
      'n_iter': 20000, # maximum number of training iterations
    },
    # the hyperparameters of NMF can be also adjusted:
    model_kwargs={'alpha': 0.01, 'init': 'random', "nmf_kwd_args": {"tol": 0.000001}},
    export_args={'path': f'{run_name}/CoLocatedComb/'}
)


In [None]:
mod_ch = res_dict['n_fact12']['mod']
mod_ch.plot_gene_loadings(mod_ch.var_names_read, mod_ch.var_names_read,
                        fact_filt=mod_ch.fact_filt,
                        loadings_attr='cell_type_fractions',
                        gene_fact_name='cell_type_fractions',
                        fun_type='dotplot', #or heatmap
                        cmap='RdPu', figsize=[10, 4])
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'n_fact12.svg'))
plt.show()

In [None]:
mod_ch = res_dict['n_fact5']['mod']
mod_ch.plot_gene_loadings(mod_ch.var_names_read, mod_ch.var_names_read,
                        fact_filt=mod_ch.fact_filt,
                        loadings_attr='cell_type_fractions',
                        gene_fact_name='cell_type_fractions',
                        fun_type='dotplot', #or heatmap
                        cmap='RdPu', figsize=[10, 4])
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'n_fact5.svg'))
plt.show()

In [None]:
# extract parameters into DataFrames
mod_ch.sample2df(node_name='nUMI_factors', ct_node_name = 'cell_type_factors')

# export results to scanpy object
adata = mod_ch.annotate_adata(adata) # as columns to .obs
adata = mod_ch.export2adata(adata, slot_name='mod_sklearn') # as a slot in .uns

# print the fraction of cells of each type located to each combination
mod_ch.print_gene_loadings(loadings_attr='cell_type_fractions',
                         gene_fact_name='cell_type_fractions')

In [None]:
mod_ch.location_factors_df.columns

In [None]:
for samplename in adata.obs['sample'].unique():
    slide = select_slide(adata, samplename)
    # plot cell density in each combination
    with mpl.rc_context({'figure.figsize': (5, 7), 'axes.facecolor': 'black'}):
        sc.pl.spatial(slide,
                      cmap='inferno',
                      color=mod_ch.location_factors_df.columns,
                      #color=mod_ch.location_factors_df.mean_nUMI_factorsfact_12.name,
                      ncols=6, 
                      size=1.2, img_key='hires', 
                      alpha_img=0,
                      vmin=0, vmax='p98',
                      #save=os.path.join(FIGURES_FOLDERNAME, f'{samplename}_nfact5_celldensity.svg')
                     )

In [None]:
adata

In [None]:
# Save anndata object with results
adata_file = f"{run_name}/sp_full.h5ad"
adata.write(adata_file)

# Cell-type specific gene expression

In [None]:
adata_file = f"{run_name}/sp_full.h5ad"
adata = sc.read_h5ad(adata_file)

In [None]:
# Compute expected expression per cell type
expected_dict = mod2.module.model.compute_expected_per_cell_type(
    mod2.samples["post_sample_q05"], mod2.adata_manager
)

# Add to anndata layers
for i, n in enumerate(mod2.factor_names_):
    adata.layers[n] = expected_dict['mu'][i]

In [None]:
adata.layers

In [None]:
import matplotlib.pyplot as plt
import scanpy as sc
import numpy as np


def plot_genes_per_cell_type(slide, genes, ctypes):
    n_genes = len(genes)
    n_ctypes = len(ctypes)
    fig, axs = plt.subplots(
        nrows=n_genes, ncols=n_ctypes + 1, figsize=(4.5 * (n_ctypes + 1) + 2, 5 * n_genes + 1), squeeze=False
    )
    # axs = axs.reshape((n_genes, n_ctypes+1))

    # plots of every gene
    for j in range(n_genes):
        # limit color scale at 99.2% quantile of gene expression (computed across cell types)
        quantile_across_ct = np.array(
            [
                np.quantile(slide.layers[n][:, slide.var["SYMBOL"] == genes[j]].toarray(), 0.992)
                for n in slide.uns["mod"]["factor_names"]
            ]
        )
        quantile_across_ct = np.partition(quantile_across_ct.flatten(), -2)[-2]
        sc.pl.spatial(
            slide,
            cmap="magma",
            color=genes[j],
            # layer=ctypes[i],
            gene_symbols="SYMBOL",
            ncols=4,
            size=1.3,
            img_key="hires",
            # limit color scale at 99.2% quantile of gene expression
            vmin=0,
            vmax="p99.2",
            ax=axs[j, 0],
            show=False,
        )

        # plots of every cell type
        for i in range(n_ctypes):
            sc.pl.spatial(
                slide,
                cmap="magma",
                color=genes[j],
                layer=ctypes[i],
                gene_symbols="SYMBOL",
                ncols=4,
                size=1.3,
                img_key="hires",
                # limit color scale at 99.2% quantile of gene expression
                vmin=0,
                vmax=quantile_across_ct,
                ax=axs[j, i + 1],
                show=False,
            )
            axs[j, i + 1].set_title(f"{genes[j]} {ctypes[i]}")

    return fig, axs

In [None]:
adata.var['SYMBOL'] = adata.var['Gene']

In [None]:
# list cell types and genes for plotting
ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
genes = ['SCX', 'TNMD', 'FMOD', 'MKX', 'EGR1']

with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB2_H_tendoncell_ByCellType.svg'))
    plt.show()

In [None]:
# list cell types and genes for plotting
ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
genes = ['SCX', 'TNMD', 'FMOD', 'MKX', 'EGR1']

with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_tendoncell_ByCellType.svg'))
    plt.show()

In [None]:
# list cell types and genes for plotting
ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
#genes = [, 'NES', 'TPPP3']
genes = [
    'ITGB1',   # CD29
    'CD44',
    'PDGFRA',  # PDGFRα
    'VIM',     # Vimentin
    'NES',     # Nestin
    #'POU5F1',  # Oct4
    'SOX9',
    'TWIST1',
    'ACTA2', 'THY1', 'MCAM', 'RGS5'
]

with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB_H')
    plot_genes_per_cell_type(slide, genes, ctypes);
    plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_tspc_ByCellType.svg'))

In [None]:
# list cell types and genes for plotting
ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
genes = ['DIAPH3', 'MKI67', 'TOP2A', 'CENPK']

with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB2_H_dividing_ByCellType.svg'))
    plt.show()

In [None]:
# list cell types and genes for plotting
ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
genes = ['DIAPH3', 'MKI67', 'TOP2A', 'CENPK']

with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_dividing_ByCellType.svg'))
    plt.show()

In [None]:
# Save anndata object with results
adata_file = f"{run_name}/sp_full.h5ad"
adata.write(adata_file)

In [None]:
adata = sc.read_h5ad(f"{run_name}/sp_full.h5ad")

In [None]:
adata.obs.columns

In [None]:
genes = ['EMCN', 'PECAM1', 'LOX', 'LTBP4', 'MFAP4', 'THSD4']

ctypes = ['vasEndothelial Cells', 'lymEndothelial Cells']
with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    #plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_dividing_ByCellType.svg'))
    plt.show()

In [None]:
genes = ['ELN', 'EMILIN1', 'FBLN5', 'LOX', 'LTBP4', 'MFAP4', 'THSD4']

ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    #plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_dividing_ByCellType.svg'))
    plt.show()

In [None]:
genes = [#'AXIN2', 
         'PDGFRA', 
         'TPPP3']

ctypes = ['ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts 2', 
          'FGF14 THBS4 Fibroblasts', 'NEGR1 SCN7A Fibroblasts', 
          'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts']
with mpl.rc_context({'axes.facecolor':  'black'}):
    # select one slide
    slide = select_slide(adata, 'Dev16126_Quad_MB2_H')
    plot_genes_per_cell_type(slide, genes, ctypes)
    #plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'Dev16126_Quad_MB_H_dividing_ByCellType.svg'))
    plt.show()