In [None]:
from anndata import read_h5ad
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

#download merscope h5ad and insert your path here
ad_path = '/allen/programs/celltypes/workgroups/hct/SEA-AD/MERSCOPE/MTG_PAPER_FINAL/MTG_Spatial_2024_07_26.h5ad'
ad_mtg = read_h5ad(ad_path)
sample_barcode = '1175046730' #barcode we'll use in examples

## Overview of the SEA-AD anndata object
 First - a look over all sections, colored by Donor ID and organized by Continuous Pseudo-progression Score 

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
donors = ad_mtg.obs['Donor'].unique().tolist()
cm = plt.get_cmap('nipy_spectral')
ax.set_prop_cycle(color=[cm(1.*i/len(donors)) for i in range(len(donors))])
for d in donors:
    subs = ad_mtg[ad_mtg.obs['Donor']==d]
    plt.scatter(subs.obsm['X_spatial_tiled'][:,0], subs.obsm['X_spatial_tiled'][:,1], s=.01, label = d, axes=ax)
plt.axis('equal')
plt.legend(bbox_to_anchor=[1.0, 1.03], markerscale=10, ncol=2, title='Donor IDs')
plt.title('Spatial view of sections per donor')
plt.xlabel('x coordinate')
plt.ylabel('y coordinate')

## Donor metadata
Let's take a look at the various components of the .obs, which includes metadata about the donors as well as information on cell classes, types, and supertypes. For starters, you'll note that:
-  Specimen Barcode is the unique identifier that links together the individual sections across all of our data. Each imaged section has a unique specimen barcode.
- There can be multiple donor ids per specimen barcode - that's because several sections could be sampled from a single donor.
- Metadata about a given donor is shared across all related specimen barcodes, so that no data is without appropriate associated metadata

In [None]:
#related donor information
display(ad_mtg.obs[['Donor', 'Barcode']].head())
# Barcodes relating to a single donor with several sections sampled
related_barcodes = ad_mtg.obs[ad_mtg.obs['Donor']=='H21.33.006']['Barcode'].unique()
print(list(related_barcodes))
# Metadata saved with each donor
single_specimen = ad_mtg.obs[ad_mtg.obs['Barcode']==list(related_barcodes)[0]]


### Each donor has various alzheimers scoring information stored in the obs. 
However, the information used in our analysis is the Continuous Pseudo-progression Score, or CPS for short, which assigns a uniquely calculated Alzheimer's Disease burden score described in detail in the publication. It's shown below per donor, so you can see how our donors span across CPS


In [None]:
#plot CPS score by donor
fig = plt.figure()
cps = ad_mtg.obs.sort_values('donor_pseudotime')
ax = fig.add_subplot(111, xlabel="Donor ID", ylabel="CPS", title='Continuous Pseudo-progression Score by Donor ID')
plt.xticks(rotation=90)
plt.scatter(cps['Donor'], cps['donor_pseudotime'], axes=ax)


### The obs also contains detailed cell typing information

In [None]:
#Let's plot the spatial subclass data for a random section
section_subset = ad_mtg[ad_mtg.obs['Barcode']==sample_barcode] 
subclasses = section_subset.obs.sort_values(by=['class', 'subclass'])['subclass'].unique().tolist()
colors = section_subset.uns['subclass_colors']
fig, ax = plt.subplots()
for i, subc in enumerate(subclasses):
    subs = section_subset[section_subset.obs['subclass']==subc]
    #note the .obsm class used here - this is showing the data rotated so L2/3 is at the top 
    plt.scatter(subs.obsm['X_selected_cell_spatial_tiled'][:,0], subs.obsm['X_selected_cell_spatial_tiled'][:,1], 
                s=.1, label = subc, axes=ax, color=colors[i])
plt.axis('equal')
plt.legend(bbox_to_anchor=[1.0, 1.03], markerscale=5, ncol=2, title='Subclasses')
plt.title('Subclasses in Specimen Barcode '+ sample_barcode)
plt.xlabel('x coordinate (um)') # microns - make others match
plt.ylabel('y coordinate (um)')


### Critical to our analysis was also the "Used in analysis" field
This is a critical filtering step taken to remove cells that were not used when comparing proportions between sections. More information on this can be found in the publication methods.

In [None]:
#highlighting which cells weren't included in analysis for all sections
donors = ad_mtg.obs['Donor'].unique().tolist()
fig, ax = plt.subplots(figsize=(20, 10))
for d in donors:
    analyzed_subset = ad_mtg[(ad_mtg.obs['Donor']==d)&(ad_mtg.obs['selected_cells']==True)]
    plt.scatter(analyzed_subset.obsm['X_selected_cell_spatial_tiled'][:,0], analyzed_subset.obsm['X_selected_cell_spatial_tiled'][:,1], 
                axes=ax, label=d, s=.01, alpha=0.5)
plt.axis('equal')
plt.legend(ncol=2, markerscale=10, title='Donors')
plt.title('Selected cells for each section, by donor')
plt.xlabel('cell x coordinate')
plt.ylabel('cell y coordinate')


### Explore the selected cells for each section

In [None]:
#plotting used/unused cells using napari coordinates for a single random section
section_subset = ad_mtg[ad_mtg.obs['Barcode']==sample_barcode]
fig, ax = plt.subplots()
analyzed_cells = section_subset[section_subset.obs['selected_cells']==True]
plt.scatter(analyzed_cells.obsm['X_selected_cell_spatial_tiled'][:,0], analyzed_cells.obsm['X_selected_cell_spatial_tiled'][:,1], axes=ax, color='blue', label='included in analysis', s=.01, alpha=0.5)
not_analyzed = section_subset[section_subset.obs['selected_cells']==False]
plt.scatter(not_analyzed.obsm['X_selected_cell_spatial_tiled'][:,0], not_analyzed.obsm['X_selected_cell_spatial_tiled'][:,1],axes=ax, color='red', label='not included in analysis', s=.01)
plt.axis('equal')
plt.legend(markerscale=5)
plt.title('Cells used in analysis for Specimen Barcode '+sample_barcode)
plt.xlabel('cell x coordinate')
plt.ylabel('cell y coordinate')


## Spatial Gene Expression

If you're more interested in seeing the spatial layout of various genes, you can view those results easily with scanpy's plotting function (shown below for a single section)

In [None]:
#Gene expression scatterplot colored by gene expression vector
import scanpy.pl as scp
section_subset = ad_mtg[ad_mtg.obs['Barcode']==sample_barcode]
# a few sample genes
scp.scatter(adata = section_subset, x='napari_x', y = 'napari_y', color='CUX2')
scp.scatter(adata = section_subset, x='napari_x', y = 'napari_y', color='RORB')
scp.scatter(adata = section_subset, x='napari_x', y = 'napari_y', color='LRRC4C')


### UMAPS
As you've seen above, the .obsm contains plottable spatial data for each of our collected sections. It also contains the umaps calculated for various cell components. You can choose to view the umap calculated for a single section, or for all sections, as shown below

In [None]:
subclasses = ad_mtg.obs.sort_values(by=['class', 'subclass'])['subclass'].unique().tolist()
colors = ad_mtg.uns['subclass_colors']

#umap of subclasses for all sections
fig, ax = plt.subplots()
for i, subc in enumerate(subclasses):
    subs = ad_mtg[ad_mtg.obs['subclass']==subc]
    plt.scatter(subs.obsm['X_umap'][:,0], subs.obsm['X_umap'][:,1], s=0.01, label = subc, axes=ax, color=colors[i])
plt.axis('equal')
plt.legend(bbox_to_anchor=[1.0, 1.03], markerscale=5, ncol=2, title='Subclasses')
plt.title('UMAP of subclasses for all sections')
plt.xlabel('UMAP 0')
plt.ylabel('UMAP 1') 


In [None]:
supertypes = ad_mtg.obs.sort_values(by=['class', 'subclass', 'supertype_scANVI_leiden'])['supertype_scANVI_leiden'].unique().tolist()
colors = ad_mtg.uns['cluster_colors']

#umap of all supertypes for all sections
fig, ax = plt.subplots()
for i, stype in enumerate(supertypes):
    subs = ad_mtg[ad_mtg.obs['supertype_scANVI_leiden']==stype]
    plt.scatter(subs.obsm['X_umap'][:,0], subs.obsm['X_umap'][:,1], s=0.01, label = stype, axes=ax, color=colors[i])
plt.axis('equal')
plt.title('UMAP of supertypes for all sections')
plt.xlabel('UMAP 0')
plt.ylabel('UMAP 1')


In [None]:
#umap of subclasses for a single random section
fig, ax = plt.subplots()
non_barcode_subset = ad_mtg[ad_mtg.obs['Barcode']!=sample_barcode]
for i, subc in enumerate(subclasses):
    subs = non_barcode_subset[non_barcode_subset.obs['subclass']==subc]
    plt.scatter(subs.obsm['X_umap'][:,0], subs.obsm['X_umap'][:,1], s=0.1, axes=ax, color='grey', alpha=.5)
barcode_subset = ad_mtg[ad_mtg.obs['Barcode']==sample_barcode]
for i, subc in enumerate(subclasses):
    subs = barcode_subset[barcode_subset.obs['subclass']==subc]
    plt.scatter(subs.obsm['X_umap'][:,0], subs.obsm['X_umap'][:,1], s=0.1, axes=ax, color=colors[i], label=subc)
            
plt.axis('equal')
plt.legend(bbox_to_anchor=[1.0, 1.03], markerscale=5, ncol=2, title='Subclasses')
plt.title('UMAP of subclasses for Specimen Barcode '+sample_barcode)
plt.xlabel('UMAP 0')
plt.ylabel('UMAP 1') 

In [None]:
supertypes = ad_mtg.obs.sort_values(by=['class', 'subclass', 'supertype_scANVI_leiden'])['supertype_scANVI_leiden'].unique().tolist()
colors = ad_mtg.uns['cluster_colors']

#umap of all supertypes for all sections
fig, ax = plt.subplots()
for i, stype in enumerate(supertypes):
    subs = ad_mtg[ad_mtg.obs['supertype_scANVI_leiden']==stype]
    plt.scatter(subs.obsm['X_umap'][:,0], subs.obsm['X_umap'][:,1], s=0.01, label = stype, axes=ax, color=colors[i])
plt.axis('equal')
plt.title('UMAP of supertypes for all sections')
plt.xlabel('UMAP 0')
plt.ylabel('UMAP 1')
