# Visualizations spatial data for BARtab/bartools paper

Upstream scripts: `data_preprocessing_m4_paper.py`

1. Visualize location of clones in tissue section

2. Visualize leiden clusters and marker gene expression

In [None]:
import stereo as st
import scanpy as sc
import utils_stereoseq as us
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import squidpy as sq

Reading in processed stereo-seq data at bin 50 with barcode information.

In [None]:
adata = sc.read(filename=f"/dawson_genomics/Projects/BGI_spatial/plots_paper/input_data/mouse4_bin50_bc.h5ad")

In [None]:
adata

Count What percentage of spots have barcode annotation. 

In [None]:
1 - adata.obs.barcode.isna().sum() / adata.shape[0], 1 - (adata.obs.barcode == "nan").sum() / adata.shape[0]

Get top 10 barcodes by number of bins detected.

In [None]:
# exclude nan which will always be most frequent
top10_barcodes = adata.obs["barcode"][adata.obs["barcode"] != "nan"].value_counts().head(10).index.values

Plot location of 10 most frequent barcodes.

Plot vector friendly

In [None]:
sc.set_figure_params(vector_friendly = True, transparent=True, dpi_save=300)
us.plot_barcode_grid(
    adata,
    top10_barcodes,
    # width, height
    (10, 5),
    5,
    invert=True
)
plt.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/10_most_frequent_barcodes_facet_m4_vector_friendly.pdf", transparent=True, bbox_inches='tight')

In [None]:
cmap = sns.color_palette("tab10", as_cmap=True)

figsize = (7, 7)
fig, ax = plt.subplots(figsize=figsize)
adata_top10_clones = adata[adata.obs["barcode"].isin(top10_barcodes), :]

sns.scatterplot(x=adata.obs["x"], y=adata.obs["y"], linewidth=0, marker="s", sizes=(3.3, 3.3), ax=ax, color=(0.9, 0.9, 0.9, 1), size=adata.obs["cell_id"], legend=False)
sns.scatterplot(x=adata_top10_clones.obs["x"], y=adata_top10_clones.obs["y"], hue=adata_top10_clones.obs["barcode"], linewidth=0, marker="s", sizes=(3.3, 3.3), ax=ax, size=adata_top10_clones.obs["barcode"], palette=sns.color_palette("tab10"))
sns.despine(top=True, right=True, left=True, bottom=True, offset=None, trim=False)
plt.grid(False)
plt.axis('equal')
plt.axis('off')
ax.invert_yaxis()
ax.legend(
    frameon=False,
    loc='center left',
    bbox_to_anchor=(1, 0.5),
    markerscale=3
)

fig.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/10_most_frequent_barcodes_m4_squares.pdf", transparent=True, bbox_inches='tight')
plt.show()

## Clustering

Visualize distribution of gene and UMI counts that were used for filtering in `data_preprocessing_m4_paper.py`

In [None]:
sc.set_figure_params(dpi=100)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(20, 5))
adata.obs["total_counts"].hist(bins=50, linewidth=0, ax=axs[0,0])
adata.obs["total_counts"].hist(range=(0, 2000), linewidth=0, bins=51, ax=axs[1,0])
adata.obs["n_genes_by_counts"].hist(bins=50, linewidth=0, ax=axs[0,1])
adata.obs["n_genes_by_counts"].hist(range=(0, 1000), linewidth=0, bins=51, ax=axs[1,1])

plt.tight_layout()

Loading filtered and clustered adata object. 

In [None]:
adata = sc.read(filename=f"/dawson_genomics/Projects/BGI_spatial/plots_paper/input_data/mouse4_bin50_bc_clustered.h5ad")

Plot UMI counts per bin.

In [None]:
from matplotlib.cm import get_cmap
figsize = (7, 7)
_, ax = plt.subplots(figsize=figsize)
sns.scatterplot(x=adata.obs["x"], y=adata.obs["y"], hue=adata.obs["total_counts"], linewidth=0, marker="s", sizes=(3.3, 3.3), ax=ax, size=adata.obs["total_counts"])
sns.despine(top=True, right=True, left=True, bottom=True, offset=None, trim=False)
plt.grid(False)
plt.axis('equal')
ax.invert_yaxis()
plt.axis('off')
plt.show()

In [None]:
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

Leiden clustering on UMAP at different resolutions

In [None]:
sc.pl.umap(adata, color=["leiden_1", "leiden_0.7", "leiden_0.5", "leiden_0.2"])

Leiden clustering on tissue section at different resolution

In [None]:
from matplotlib.cm import get_cmap
figsize = (7 * 4, 7)
_, axs = plt.subplots(1, 4, figsize=figsize)
for i, res in enumerate(["leiden_1", "leiden_0.7", "leiden_0.5", "leiden_0.2"]): 
    sns.scatterplot(x=adata.obs["x"], y=adata.obs["y"], hue=adata.obs[res], linewidth=0, marker="s", sizes=(2.5, 2.5), ax=axs[i], size=adata.obs[res])
    sns.despine(top=True, right=True, left=True, bottom=True, offset=None, trim=False, ax=axs[i])
    axs[i].grid(False)
    axs[i].axis('equal')
    axs[i].axis('off')
    axs[i].invert_yaxis()

Leiden clustering resolution 0.7 for paper.

In [None]:
from matplotlib.cm import get_cmap
figsize = (7, 7)
_, ax = plt.subplots(figsize=figsize)
sns.scatterplot(x=adata.obs["x"], y=adata.obs["y"], hue=adata.obs["leiden_0.7"], linewidth=0, marker="s", sizes=(3.3, 3.3), ax=ax, size=adata.obs["leiden_0.7"])
sns.despine(top=True, right=True, left=True, bottom=True, offset=None, trim=False)
plt.grid(False)
plt.axis('equal')
plt.axis('off')
ax.invert_yaxis()
ax.legend(
    frameon=False,
    loc='center left',
    bbox_to_anchor=(1, 0.5),
    markerscale=3
)

plt.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/leiden_cluster_m4.pdf", transparent=True, bbox_inches='tight')

In [None]:
from matplotlib.cm import get_cmap
sc.set_figure_params(vector_friendly = True, transparent=True)
figsize = (7, 7)
fig, ax = plt.subplots(figsize=figsize)
sc.pl.umap(adata, color=["leiden_0.7"], ax=ax, legend_loc='None', frameon=False, size=15, title="")
ax.set_aspect('equal')
# fig.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/leiden_cluster_umap_m4.pdf", transparent=True, bbox_inches='tight')

Seems like cluster 3 is defined by low total counts (bins on edge of tissue section and on edges of holes). Data is not normalized by counts per bin since this can be biologically relevant in the spacial context. 

In [None]:
sns.violinplot(adata.obs, x="leiden_0.7", y="total_counts")
plt.show()

Distribution of top10 clones across UMAP.

In [None]:
from matplotlib.cm import get_cmap
sc.set_figure_params(vector_friendly = True, transparent=True)
figsize = (7, 7)
fig, ax = plt.subplots(figsize=figsize)
sc.pl.umap(adata_top10_clones, color=["barcode"], ax=ax, legend_loc='None', frameon=False, size=15, title="")
ax.set_aspect('equal')
# fig.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/leiden_cluster_umap_m4.pdf", transparent=True, bbox_inches='tight')

## Cluster marker genes

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False, fontsize=14)

In [None]:
marker_genes = ["Cd74", "Hba-a1", "Elane", "Tagln", "Pf4", "Marco"]

In [None]:
fig = sc.pl.dotplot(adata, marker_genes, groupby='leiden_0.7', use_raw=True, return_fig=True)
fig.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/marker_gene_dotplot_m4.pdf", transparent=True, bbox_inches='tight')

In [None]:
fig = sc.pl.dotplot(adata, marker_genes, groupby='leiden_0.7', use_raw=False, return_fig=True)
fig.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/marker_gene_dotplot_m4_scaled.pdf", transparent=True, bbox_inches='tight')
fig.show()

In [None]:
adata_raw = adata.raw.to_adata()

In [None]:
cmap = sns.color_palette("viridis", as_cmap=True)
figsize = (7 * 6, 7)
_, axs = plt.subplots(1, 6, figsize=figsize)
for i, gene in enumerate(["Hba-a1", "Marco", "Cd74", "Tagln", "Pf4", "Elane"]): 
    sns.scatterplot(x=adata.obs["x"], y=adata.obs["y"], hue=adata_raw[:, gene].X.A.flatten(), linewidth=0, marker="s", sizes=(2.5, 2.5), ax=axs[i], size=adata_raw[:, gene].X.A.flatten(), palette=cmap)
    axs[i].set_title(gene)
    sns.despine(top=True, right=True, left=True, bottom=True, offset=None, trim=False, ax=axs[i])
    axs[i].grid(False)
    axs[i].axis('equal')
    axs[i].axis('off')
    axs[i].invert_yaxis()
    norm = plt.Normalize(adata_raw[:, gene].X.A.flatten().min(), adata_raw[:, gene].X.A.flatten().max())
    sm = plt.cm.ScalarMappable(cmap="viridis", norm=norm)
    axs[i].get_legend().remove()
    axs[i].figure.colorbar(sm, ax=axs[i], fraction=0.03, pad=0.04, label="log counts")
plt.subplots_adjust(top=0.5)

plt.savefig("/dawson_genomics/Projects/BGI_spatial/plots_paper/marker_gene_spatial_m4.pdf", transparent=True, bbox_inches='tight')