### Continuing with the Scimap framework
Plotting and doing spatial analysis cont'd

In [1]:
import sys
import os
from functions import glasbey
import sklearn
import matplotlib.pyplot as plt
import matplotlib as mpl
from anndata import AnnData
import anndata as ad
import scimap as sm
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import copy
import os

# read in saved anndata object
data_path = os.path.join('..', 'outputs', 'adata2.h5ad')

if os.path.exists(data_path):
    adata = ad.read_h5ad(data_path)
else:
    print(f"File not found. Check path.")

Running SCIMAP  2.1.3


In [2]:
print(adata)

AnnData object with n_obs × n_vars = 1187155 × 36
    obs: 'Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'Leiden', 'phenotype', 'Timepoint', 'spatial_pscore_Ant Pres Macrophage-Plasma B', 'spatial_pscore_Inhib Monocyte-Th1 Helper T', 'spatial_pscore_Act Macrophage-Prolif Cyt T', 'spatial_pscore_T reg-Plasma B', 'spatial_aggregate_radius'
    uns: 'spatial_pscore_Act Macrophage-Prolif Cyt T', 'spatial_pscore_Ant Pres Macrophage-Plasma B', 'spatial_pscore_Inhib Monocyte-Th1 Helper T', 'spatial_pscore_T reg-Plasma B'


### Neighbourhood analysis
Define spatial neighbourhoods, understand their compositions

In [3]:
# compute a neighbourhood matrix to identify local cell clusters
# within a certain radius or number of nearest neighbours
adata = sm.tl.spatial_count(adata, 
                            x_coordinate='Centroid X µm',
                            y_coordinate='Centroid Y µm',
                            imageid='Parent',
                            phenotype='phenotype', 
                            method='radius', 
                            radius=100, 
                            label='spatial_count')

Identifying neighbours within 100 pixels of every cell


KeyboardInterrupt: 

In [None]:
# cluster the results - using Leiden
adata = sm.tl.spatial_cluster(adata, 
                              df_name='spatial_count', 
                              method='leiden', 
                              n_pcs=10,
                              resolution=0.6,
                              random_state=42,
                              label='neigh_leiden')

In [None]:
# plot the slides coloured by neighbourhood
for slide, group in adata.obs.groupby('Parent'):
    # plot the xy coordinates 
    f, ax = plt.subplots(figsize=(10, 10))

    sns.scatterplot(
        x="Centroid X µm", 
        y="Centroid Y µm",
        hue="Phenotype",
        legend="full",
        palette=glasbey(adata.obs['Phenotype'].nunique()),
        data=group,
        ax=ax,
        s=0.5
    ).set(title=f'{slide} Neighbourhood xy plot')

    sns.despine()
    ax.legend(bbox_to_anchor=(1.05, 1), loc=2, markerscale=20, borderaxespad=0.)
    plt.show()

In [None]:
# visualize the results using voronoi plots
# d = adata.obs
# for area in d['Area'].unique():
#     ncells = len(d[d['Area'] == area].index)
#     if ncells <= 5000:
#         print(area)
#         sm.pl.voronoi(adata, 
#                       imageid='Area',
#                       x_coordinate='CENTERX',
#                       y_coordinate='CENTERY',
#                       flip_y=False,
#                       subset=area,
#                       color_by='neigh_leiden', 
#                  voronoi_edge_color = 'black',
#                  voronoi_line_width = 0.3, 
#                  voronoi_alpha = 0.8, 
#                  size_max=3000,
#                  overlay_points=None, 
#                  plot_legend=True, 
#                  legend_size=6)

In [None]:
# plot the composition of neighbourhoods
sm.pl.stacked_barplot (adata, 
                       x_axis='neigh_leiden', 
                       y_axis='phenotype',
                       figsize=(12, 8)
                       )

In [11]:
# spatial scatter plot with plotly
import plotly.express as px
import plotly.io as pio

def plotly (adata,phenotype,image_id=None,x='CENTERX',y='CENTERY',size=2, **kwargs):
    # if image_id is not None:
    #     adata = adata[adata.obs['imageid'] == image_id]    
    data = pd.DataFrame({'x':adata.obs[x], 'y':adata.obs[y],'col': adata.obs[phenotype]})
    data = data.sort_values(by=['col'])
    fig = px.scatter(data, x="x", y="y", color="col", **kwargs)
    fig.update_traces(marker=dict(size=size),selector=dict(mode='markers'),hoverlabel = dict(namelength = -1))
    fig.update_yaxes(autorange="reversed", tickformat='g')
    fig.update_xaxes(tickformat='g')
    fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    fig.show()

plotly (adata, phenotype='neigh_leiden', size=8)

In [None]:
# correlation plot to further help understand which clusters are similar
sm.pl.groupCorrelation(adata, 
                       groupBy='phenotype', 
                       condition='neigh_leiden', 
                       cmap='PiYG',
                       figsize=(12, 8))

In [None]:
# TODO: consolidate clusters to neighbourhoods
# rename_dict = {'RCN1': ['1'],
#                'RCN2': ['0', '4'],
#                 'RCN3': ['2'],
#                 'RCN4': ['3']}

# adata = sm.hl.rename(adata, rename=rename_dict, from_column='neigh_leiden', to_column='RCNs')

In [None]:
adata.obs

In [None]:
# visualize RCN compositions between DLN pre and post
# stacked bar plot
sm.pl.stacked_barplot (adata, 
                       x_axis='Timepoint', 
                       y_axis='phenotype')

In [None]:
# TODO: once neighbourhoods consolidated, 
# add code to do the stuff in the Spatial LDA part of the tutorial notebook