In [1]:
import anndata as ad
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import ccf_polygons as cpoly
import ccf_plots as cplot

%matplotlib inline

sns.set_style('white')

In [3]:
# import warnings
# warnings.filterwarnings('ignore', category=UserWarning)
# warnings.filterwarnings('ignore', category=FutureWarning)

In [4]:
adata = ad.read_h5ad("../data/merfish_638850_AIT17.custom_CCF_annotated_TH_ZI_only_2023-05-04_00-00-00/atlas_brain_638850_AIT17_custom_CCF_annotated_TH_ZI_only.h5ad")
adata

In [None]:
# for compatibility with new version, should align more fields eventually 
adata.obs['cluster'] = adata.obs['cluster_label']

In [5]:
# nice to have everything for plotting in the obs dataframe
adata.obs['cirro_x'] = adata.obsm['spatial_cirro'][:,0]
adata.obs['cirro_y'] = adata.obsm['spatial_cirro'][:,1]

# define and view CCF polygons

## calc polygons

In [8]:
ccf_polygons = cpoly.get_ccf_polygons(adata.obs)

In [9]:
ccf_polygons_th_zi = ccf_polygons[ccf_polygons.index.isin(cpoly.CCF_TH_NAMES, level="name")]

In [10]:
# polygon for whole thalamus outline
th_outline_polygons = cpoly.get_outline_polygon(adata.obs)

In [11]:
sections = ["1199651024", "1199651036", "1199651048"]
cplot.plot_ccf_overlay(adata.obs, ccf_polygons, sections=sections)

In [12]:
cplot.plot_section_outline(th_outline_polygons, sections='1199651060', alpha=0.05)

### define thalamus subset (requires atlas download)

can skip now, result saved

In [13]:
# import urllib.request, json 
# with urllib.request.urlopen("http://api.brain-map.org/api/v2/structure_graph_download/1.json") as url:
#     atlas = json.load(url)['msg']

In [14]:
# from bg_atlasapi.bg_atlas import BrainGlobeAtlas
# atlas = BrainGlobeAtlas("allen_mouse_100um", brainglobe_dir='/scratch/brainglobe/')

# ccf_polygons_th_zi = {x: ccf_polygons[x] for x in ccf_polygons.keys() 
#                       #549: thalamus, 797: ZI
#                       if (549 in atlas.structures[x]['structure_id_path'] 
#                           and not 549==atlas.structures[x]['structure_id_path'][-1])
#                       or 797 in atlas.structures[x]['structure_id_path']
#                      }

## view thalamus subset with subclass labels

In [15]:
adata_neuronal = adata[adata.obs["division_id_label"].isin(["3 PAL-sAMY-TH-HY-MB-HB neuronal","2 Subpallium GABAergic","4 CBX-MOB-other neuronal"])].copy()
# cluster_id as string to simplify querying
adata_neuronal.obs['cluster_id'] = pd.Categorical(adata_neuronal.obs['cluster_id'].astype(int).astype(str))

In [32]:
# may need to tweak clustering params on fine 
cplot.plot_ccf_overlay(adata.obs.iloc[[],:]#no foreground cells
                       , ccf_polygons_th_zi, sections=sections, highlight='all', 
                 bg_cells=adata_neuronal.obs, bg_shapes=False, min_group_count=0, legend=False)

In [17]:
subclasses = adata_neuronal.obs['subclass_label'].value_counts().loc[lambda x: x>300]
subclasses

In [18]:
len(subclasses)

In [19]:
adata_neuronal.obs.loc[lambda df: df['subclass_label'].isin(subclasses.index), 'division_id_label'].value_counts().loc[lambda x: x>0]

In [20]:
adata_neuronal.obs.loc[lambda df: df['subclass_label'].isin(subclasses.index), 'class_label'].value_counts().loc[lambda x: x>0]

In [21]:
adata_neuronal.obs.loc[lambda df: df['subclass_label'].isin(subclasses.index), 'supertype_label'].unique().shape[0]

In [22]:
adata_neuronal.obs.loc[lambda df: df['subclass_label'].isin(subclasses.index), 'cluster_label'].unique().shape[0]

In [33]:
from colorcet import glasbey
point_palette = dict(zip(subclasses.index, sns.color_palette(glasbey, n_colors=len(subclasses.index))))

from matplotlib.colors import rgb2hex
{x.replace(' ', '_'): rgb2hex(point_palette[x]) for x in point_palette.keys()}

In [47]:
obs = adata_neuronal.obs.loc[lambda df: df['subclass_label'].isin(subclasses.index)]
cplot.plot_ccf_overlay(obs, ccf_polygons_th_zi, bg_cells=adata_neuronal.obs, point_hue='subclass_label', sections=sections,
                 shape_palette='dark_outline', point_palette=point_palette, legend=False, highlight='all', 
                 min_group_count=30)

# Investigate prong 1 cluster-nucleus groups

In [48]:
nuclei_df = pd.read_csv("../resources/Prong 1 Vitessce links by nucleus.csv", index_col=0)

def get_obs_from_annotated_clusters(name, adata, nuclei_df):
    clusters = nuclei_df.loc[name, "annotated clusters"].split(', ')
    obs = adata.obs.loc[lambda df: df['cluster_label'].str[:4].isin(clusters)]
    return obs

## VM

In [63]:
# obs = get_obs_from_annotated_clusters('VM', adata, nuclei_df)
obs = adata_neuronal.obs.query("cluster_label=='1142 TH Prkcd Grin2c Glut_13'")
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['VM', 'VAL'], legend='both', bg_cells=adata_neuronal.obs, bg_shapes=False)

In [60]:
obs = adata_neuronal.obs.query("supertype_label=='TH Prkcd Grin2c Glut_13'")
obs['cluster_label'].value_counts().loc[lambda x: x>5]

In [64]:
# remove 1140? annotated as VPMpc
clusters = ['1138', '1141', '1142']
obs = adata.obs.loc[lambda df: df['cluster_label'].str[:4].isin(clusters)]
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['VM'], legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, shape_palette='greyscale')

### diff exp

In [65]:
import scanpy as sc

In [66]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1142'], reference='1138', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [68]:
section='1199651045'
gene = "Calb1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [69]:
section='1199651045'
gene = "Necab1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [70]:
section='1199651045'
gene = "Spon1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [71]:
section='1199651048'
gene = "Cbln1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [72]:
section='1199651048'
gene = 'Tmie'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [73]:
section='1199651045'
gene = 'Galnt18'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [74]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype_label', groups=['TH Prkcd Grin2c Glut_13'], method='wilcoxon', tie_correct=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [75]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1142'], method='wilcoxon', tie_correct=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [76]:
# ISH diff ex search
# https://mouse.brain-map.org/gene/show/87708

section='1199651048'

gene = "Galnt18"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [77]:
section='1199651057'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [78]:
gene = "Stxbp6"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [79]:
section='1199651048'

gene = "Spon1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [80]:
section='1199651045'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

### outlining VM / composition

In [81]:
import geopandas as gp
neuron_points = gp.GeoSeries.from_xy(*adata_neuronal.obsm['spatial_cirro'].T, index=adata_neuronal.obs_names)

In [83]:
import hdbscan

adata_vm = adata[adata.obs.query("section=='1199651048' & cluster_id==1142").index].copy()
df = adata_vm.obs
X = df[['cirro_x','cirro_y']].values
df['spatial_cluster'] = hdbscan.HDBSCAN(min_samples=10).fit_predict(X)
core_clusters = df['spatial_cluster'].value_counts().index[:2]
df['spatial_group'] = df['spatial_cluster'].apply(lambda x: 'core' if x in core_clusters else 'other')

In [85]:
sns.scatterplot(data=df,  x='cirro_x', y='cirro_y', hue='spatial_cluster', s=4,  legend=True, palette='tab10')

In [86]:
nucleus = cpoly.get_polygon_from_obs(df.query("spatial_group=='core'"))
subset = neuron_points.geometry.intersects(nucleus)
adata_neuronal.obs['in_nucleus'] = subset.values
adata_nucleus = adata_neuronal[subset]

In [87]:
# sc.pl.embedding(adata_nucleus, "spatial_cirro", color="supertype_label")
sc.pl.embedding(adata_nucleus, "spatial_cirro", color="cluster_label", s=20)

In [88]:
adata_neuronal.obs.query("section=='1199651048' & supertype_label=='TH Prkcd Grin2c Glut_13'").groupby(['cluster_label'])['in_nucleus'].apply(lambda x: x.value_counts())

In [89]:
# need to include VAL types ??

In [90]:
sc.tl.rank_genes_groups(adata_vm, 'spatial_group', groups=['core'], reference='other', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_vm, groups=['core'], n_genes=20)

## anterior

In [91]:
nuclei=['AD', 'AV', 'AMd', 'AMv']
section = '1199651060'
# cplot.plot_ccf_section(ccf_polygons_th_zi, section, highlight=nuclei, )
cplot.plot_ccf_overlay(adata.obs.loc[[],:], ccf_polygons_th_zi, sections=[section], highlight=nuclei, legend='both', bg_cells=adata_neuronal.obs, bg_shapes=True, min_group_count=0)

In [92]:
clusters = [str(x) for x in list(range(1169, 1171+1)) + list(range(1095, 1098+1))]
obs = adata.obs.loc[lambda df: df['cluster_label'].str[:4].isin(clusters)]

cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=nuclei, legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, shape_palette='greyscale')

In [93]:
# cplot.plot_nucleus_cluster_comparison_slices('AD', adata, nuclei_df, ccf_polygons_th_zi)
obs = get_obs_from_annotated_clusters('AD', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='AD')

In [94]:

# cplot.plot_nucleus_cluster_comparison_slices('AD', adata, nuclei_df, ccf_polygons_th_zi)
obs = get_obs_from_annotated_clusters('AD', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='AD')

### diff exp

#### AV

In [95]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass_label', groups=['AV Col27a1 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

##### Col27a1

In [102]:
section='1199651060'

gene = "Col27a1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD', 'AV']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

##### Sorcs3
Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV

That does not appear to be the case in brain3...

In [103]:
section='1199651060'

# Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV
# That does not appear to be the case in brain3...
gene = "Sorcs3"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AV']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AD

In [104]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass_label', groups=['AD Serpinb7 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [105]:
section='1199651060'

gene = "Syndig1l"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AM

In [106]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype_label', groups=['TH Prkcd Grin2c Glut_9'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [107]:
section='1199651060'

gene = "Cbln1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AMd', 'AMv']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

In [108]:
adata_neuronal.obs['anterior_type'] = (adata_neuronal.obs['supertype_label'].str.contains('Glut_9') |
                                       adata_neuronal.obs['supertype_label'].str.contains('AD') |
                                       adata_neuronal.obs['supertype_label'].str.contains('AV')).astype(str)

In [110]:
sc.tl.rank_genes_groups(adata_neuronal, 'anterior_type', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [111]:
section='1199651060'

gene = "C1ql3"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

In [112]:
section='1199651060'

gene = "Shox2"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplot.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

## MD

In [113]:
cluster = "1133 TH Prkcd Grin2c Glut_10"
obs = adata.obs.loc[lambda df: df['cluster_label']==cluster]
nuclei = ['MD']
palette = {cluster: "#bcbd22"}
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=nuclei, legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, 
                                       shape_palette='dark_outline', point_palette=palette, s=5)

In [116]:
obs = get_obs_from_annotated_clusters('MD', adata, nuclei_df)

cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=nuclei, legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, 
                                       shape_palette='dark_outline', s=5)

### diff exp

In [117]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1133'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

## other

In [118]:
obs = get_obs_from_annotated_clusters('PT', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='PT', bg_cells=adata_neuronal.obs, bg_shapes=False)

In [119]:
obs = adata_neuronal.obs.query("supertype_label.str.contains('PVT-PT')", engine='python')
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['PT','PVT'], bg_cells=adata_neuronal.obs, bg_shapes=False, legend='both')

In [120]:
obs = get_obs_from_annotated_clusters('PVT', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['PT','PVT'], bg_cells=adata_neuronal.obs, bg_shapes=False, legend='both')

In [121]:
obs = get_obs_from_annotated_clusters('LGd', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['LGd-co', 'LGd-ip', 'LGd-sh'], bg_cells=adata_neuronal.obs, bg_shapes=False)

In [122]:
obs = get_obs_from_annotated_clusters('SMT', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='SMT', bg_cells=adata_neuronal.obs, bg_shapes=False)

In [123]:
obs = get_obs_from_annotated_clusters('LD', adata, nuclei_df)
cplot.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='LD', bg_cells=adata_neuronal.obs, bg_shapes=False)