In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import anndata as ad
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import abc_load as abc
import ccf_polygons as cshapes
import ccf_plots as cplots

# from colorcet import glasbey
import scanpy as sc

%matplotlib inline

sns.set_style('white')

# Load TH+ZI subset of ABC atlas

In [3]:
adata = abc.load_adata(transform='log2', subset_to_TH_ZI=True, cirro_names=True,
                       flip_y=True, with_metadata=True, with_colors=True)

In [4]:
adata.obs['supertype'] = pd.Categorical(adata.obs['supertype'].str[5:])
adata.obs['subclass'] = pd.Categorical(adata.obs['subclass'].str[4:])

# define and view CCF polygons

## calc polygons

In [5]:
ccf_polygons = cshapes.get_ccf_polygons(adata.obs)

In [6]:
ccf_polygons_th_zi = ccf_polygons[ccf_polygons.index.isin(cshapes.CCF_TH_NAMES, level="name")]

In [7]:
# sections = ["1199651024", "1199651036", "1199651048"]
sections = ["C57BL6J-638850.32", "C57BL6J-638850.36", "C57BL6J-638850.40", "C57BL6J-638850.44"]
cplots.plot_ccf_overlay(adata.obs, ccf_polygons_th_zi, sections=sections)

## view thalamic neurons subset with subclass labels

In [8]:
# filter out non-neuronal cells, keep midbrain classes
adata_neuronal = abc.filter_adata_by_class(adata, filter_nonneuronal=True, 
                                           filter_midbrain=True)

In [9]:
# split off cluster_id from cluster_label, as string, to simplify querying
adata_neuronal.obs['cluster_id'] = pd.Categorical(
                                        adata_neuronal.obs['cluster'].apply(
                                        lambda x: x.split(' ')[0]))

In [10]:
# may need to tweak clustering params on fine 
cplots.plot_ccf_overlay(adata.obs.iloc[[],:]#no foreground cells
                       , ccf_polygons_th_zi, sections=sections, highlight='all', 
                           # x_field='x_section', y_field='y_section',
                 bg_cells=adata_neuronal.obs, bg_shapes=False, min_group_count=0, legend=False)

In [11]:
subclasses = adata_neuronal.obs['subclass'].value_counts().loc[lambda x: x>300]
print(f'{len(subclasses) = }')
subclasses

In [12]:
adata_neuronal.obs.loc[lambda df: df['subclass'].isin(subclasses.index), 'class'].value_counts().loc[lambda x: x>0]

In [13]:
n_supertypes = adata_neuronal.obs.loc[lambda df: df['subclass'].isin(subclasses.index), 'supertype'].unique().shape[0]
print(f'{n_supertypes = }')

In [14]:
n_clusters = adata_neuronal.obs.loc[lambda df: df['subclass'].isin(subclasses.index), 'cluster'].unique().shape[0]
print(f'{n_clusters = }')

In [15]:
tax_level = 'subclass'
point_palette = dict(zip(adata_neuronal.obs[tax_level].unique(),
                         adata_neuronal.obs[tax_level+'_color'].unique()))
point_palette

In [16]:
obs = adata_neuronal.obs.loc[lambda df: df['subclass'].isin(subclasses.index)]
cplots.plot_ccf_overlay(obs, ccf_polygons_th_zi, bg_cells=adata_neuronal.obs, point_hue='subclass', sections=sections,
                 shape_palette='dark_outline', point_palette=point_palette, legend=False, highlight='all',
                           # x_field='x_section', y_field='y_section',
                 min_group_count=30)

# Investigate prong 1 cluster-nucleus groups

In [17]:
# "resources/Prong 1 Vitessce links by nucleus.csv" has the old taxonomy level
# IDs and so isn't currently useful
# keeping this cell around in case we ever update it with current taxonomy IDs

# nuclei_df = pd.read_csv("resources/Prong 1 Vitessce links by nucleus.csv", index_col=0)

# def get_obs_from_annotated_clusters(name, adata, nuclei_df):
#     clusters = nuclei_df.loc[name, "annotated clusters"].split(', ')
#     obs = adata.obs.loc[lambda df: df['cluster_id'].isin(clusters)]
#     return obs

# # obs = get_obs_from_annotated_clusters('VM', adata, nuclei_df)
# obs = adata_neuronal.obs.query("cluster=='1142 TH Prkcd Grin2c Glut_13'")

## VM

In [18]:
nuclei = ['VM', 'VAL']

# from manual inspection of ABC Atlas cells in VM or VAL
supertypes = ['TH Prkcd Grin2c Glut_13',  # 0666 
              'TH Prkcd Grin2c Glut_1',  # 0654
              'RE-Xi Nox4 Glut_2'  # 0669 - maybe bleedthrough from another AP position?
             ]

clusters = ['2648 TH Prkcd Grin2c Glut_1',
            '2648 TH Prkcd Grin2c Glut_1',
            '2683 TH Prkcd Grin2c Glut_13', 
            '2687 TH Prkcd Grin2c Glut_13'
            # '2684 TH Prkcd Grin2c Glut_13', <100 cells
            # '2685 TH Prkcd Grin2c Glut_13',
            # '2686 TH Prkcd Grin2c Glut_13'
           ]

In [19]:
obs = adata_neuronal.obs[adata_neuronal.obs['cluster'].isin(clusters)]
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, 
                                              nuclei=nuclei, 
                                              legend='both', 
                                              bg_cells=adata_neuronal.obs, 
                                              bg_shapes=False, 
                                              shape_palette='greyscale')

In [20]:
obs = adata_neuronal.obs.query("supertype=='TH Prkcd Grin2c Glut_13'")
# obs = adata_neuronal.obs[adata_neuronal.obs['supertype'].isin(supertypes)]
# obs
obs['cluster'].value_counts().loc[lambda x: x>5]

In [21]:
# remove 1140? annotated as VPMpc
clusters = ['2683', '2687', '2686']
obs = adata.obs.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['VM'], legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, shape_palette='greyscale')

### diff exp

#### cluster DE

##### cluster 2687 vs 2683

In [24]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['2687'], 
                        reference='2683', method='wilcoxon', rankby_abs=True,
                        key_added='2687vs2683')
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20, key='2687vs2683')

In [25]:
de_df = sc.get.rank_genes_groups_df(adata_neuronal, group='2687', 
                                    key='2687vs2683')

# get the top 5 DE gene hits that are more and less expressed in the target group
de_genes_pos_top5 = de_df[de_df['scores']>0].head(5)['names'].tolist()
de_genes_neg_top5 = de_df[de_df['scores']<0].head(5)['names'].tolist()

In [26]:
section='C57BL6J-638850.40'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']

for gene in de_genes_pos_top5:
    cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, 
                               nuclei=nuclei, bg_shapes=False, 
                               highlight=highlight_nuclei, cmap='Blues', 
                               show_outline=True);

In [27]:
section='C57BL6J-638850.40'
nuclei=['VM', 'VAL']
highlight_nuclei=['VAL']

for gene in de_genes_neg_top5:
    cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, 
                               nuclei=nuclei, bg_shapes=False, 
                               highlight=highlight_nuclei, cmap='Blues', 
                               show_outline=True);

##### cluster 2687 vs rest

In [28]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['2687'], 
                        method='wilcoxon', tie_correct=True, 
                        key_added='2687vsRest')
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20, key='2687vsRest')

In [29]:
de_df = sc.get.rank_genes_groups_df(adata_neuronal, group='2687', key='2687vsRest')

# get the top 5 DE gene hits that are more and less expressed in the target group
de_genes_pos_top5 = de_df.head(5)['names'].tolist()
de_genes_pos_top5

In [30]:
# ISH diff ex search
# Galnt18 - https://mouse.brain-map.org/gene/show/87708

section='C57BL6J-638850.40'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']

for gene in de_genes_pos_top5:
    cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, 
                               nuclei=nuclei, bg_shapes=False, 
                               highlight=highlight_nuclei, cmap='Blues', 
                               show_outline=True);

#### supertype DE

In [31]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype', 
                        groups=['TH Prkcd Grin2c Glut_13'], method='wilcoxon', 
                        tie_correct=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

### outlining VM / composition

In [32]:
import geopandas as gp
neuron_points = gp.GeoSeries.from_xy(*adata_neuronal.obs[['cirro_x','cirro_y']].values.T, index=adata_neuronal.obs_names)

In [33]:
import hdbscan
section='C57BL6J-638850.40'
adata_vm = adata_neuronal[adata_neuronal.obs.query(f"section=='{section}' & cluster_id=='2687'").index].copy()
df = adata_vm.obs
X = df[['cirro_x','cirro_y']].values
df['spatial_cluster'] = hdbscan.HDBSCAN(min_samples=5).fit_predict(X)
core_clusters = df['spatial_cluster'].value_counts().index[:2]
df['spatial_group'] = df['spatial_cluster'].apply(lambda x: 'core' if x in core_clusters else 'other')

In [34]:
sns.scatterplot(data=df,  x='cirro_x', y='cirro_y', hue='spatial_cluster', s=4,  legend=True, palette='tab10')

In [35]:
nucleus = cshapes.get_polygon_from_obs(df.query("spatial_group=='core'"))
subset = neuron_points.geometry.intersects(nucleus)
adata_neuronal.obs['in_nucleus'] = subset.values
adata_nucleus = adata_neuronal[subset]

In [36]:
adata_neuronal.obs.query(f"section=='{section}' & supertype=='TH Prkcd Grin2c Glut_13'").groupby(['cluster'])['in_nucleus'].apply(lambda x: x.value_counts())

In [37]:
# need to include VAL types ??

In [38]:
sc.tl.rank_genes_groups(adata_vm, 'spatial_group', groups=['core'], reference='other', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_vm, groups=['core'], n_genes=20)

## anterior

In [39]:
nuclei=['AD', 'AV', 'AMd', 'AMv']
section='C57BL6J-638850.44'
# cplot.plot_ccf_section(ccf_polygons_th_zi, section, highlight=nuclei, )
cplots.plot_ccf_overlay(adata.obs.loc[[],:], ccf_polygons_th_zi, 
                       sections=[section], highlight=nuclei, legend='both', 
                       bg_cells=adata_neuronal.obs, bg_shapes=True, 
                       min_group_count=0)

In [40]:
nuclei = ['AD', 'AMd', 'AMv', 'AV']

# from manual inspection of ABC Atlas cells in AD, AM, AV
subclasses= ['AD Serpinb7 Glut',  # 147, AD
              'AV Col27a1 Glut',  # 148, AV
              'TH Prkcd Grin2c Glut'  # 151, AM
             ]

supertypes = ['AD Serpinb7 Glut_1',  # 0641, AD
              'AV Col27a1 Glut_1',  # 0642, AV
              'TH Prkcd Grin2c Glut_9' # 0662, AM
             ]

clusters = ['2613 AD Serpinb7 Glut_1', # AD
            '2614 AD Serpinb7 Glut_1',  # AD
            '2615 AV Col27a1 Glut_1',  # AV
            '2616 AV Col27a1 Glut_1',  # AV
            '2674 TH Prkcd Grin2c Glut_9',  # AM
            '2675 TH Prkcd Grin2c Glut_9',  # AM 
            '2676 TH Prkcd Grin2c Glut_9'  # AM
           ]

In [41]:
obs = adata.obs[adata.obs['cluster'].isin(clusters)]
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, 
                                              nuclei=nuclei, legend='cells',
                                              bg_cells=adata_neuronal.obs, 
                                              bg_shapes=False, 
                                              shape_palette='greyscale')

### diff exp

#### AV

In [42]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass', groups=['AV Col27a1 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

##### Col27a1

In [43]:
section = 'C57BL6J-638850.44'

gene = "Col27a1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD', 'AV']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

##### Sorcs3
Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV

That does not appear to be the case in brain3...

In [44]:

# Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV
# That does not appear to be the case in brain3...
gene = "Sorcs3"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AV']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AD

In [45]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass', groups=['AD Serpinb7 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [46]:

gene = "Syndig1l"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AM

In [47]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype', groups=['TH Prkcd Grin2c Glut_9'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [48]:

gene = "Cbln1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

In [49]:
adata_neuronal.obs['anterior_type'] = (adata_neuronal.obs['supertype'].str.contains('Glut_9') |
                                       adata_neuronal.obs['supertype'].str.contains('AD') |
                                       adata_neuronal.obs['supertype'].str.contains('AV')).astype(str)

In [50]:
sc.tl.rank_genes_groups(adata_neuronal, 'anterior_type', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [51]:

gene = "C1ql3"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

In [52]:

gene = "Shox2"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

## MD

In [53]:
cluster = "2678 TH Prkcd Grin2c Glut_10"
obs = adata.obs.loc[lambda df: df['cluster']==cluster]
nuclei = ['MD']
palette = {cluster: "#bcbd22"}
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, 
                                              nuclei=nuclei, legend='cells', 
                                              bg_cells=adata_neuronal.obs, 
                                              bg_shapes=False, 
                                              shape_palette='dark_outline', 
                                              point_palette=palette, s=5)

In [54]:
# TODO: should probably remove 1132 (2677) from annotation
clusters = ["2677 TH Prkcd Grin2c Glut_10",
            "2678 TH Prkcd Grin2c Glut_10"]
obs = adata.obs[adata.obs['cluster'].isin(clusters)]

cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, 
                                              nuclei=nuclei, legend='cells', 
                                              bg_cells=adata_neuronal.obs, 
                                              bg_shapes=False, 
                                              shape_palette='dark_outline', s=5)

### diff exp

In [55]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['2678'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

## other

In [56]:
# obs = get_obs_from_annotated_clusters('PT', adata_neuronal, nuclei_df)
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='PT', legend='cells', bg_cells=adata_neuronal.obs, bg_shapes=False, 
#                                        shape_palette='dark_outline', s=5)

In [57]:
# obs = adata_neuronal.obs.query("supertype.str.contains('PVT-PT')", engine='python')
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['PT','PVT'], bg_cells=adata_neuronal.obs, bg_shapes=False, legend='both')

In [None]:
# obs = get_obs_from_annotated_clusters('PVT', adata_neuronal, nuclei_df)
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['PT','PVT'], bg_cells=adata_neuronal.obs, bg_shapes=False, legend='both')

In [None]:
# obs = get_obs_from_annotated_clusters('LGd', adata_neuronal, nuclei_df)
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei=['LGd-co', 'LGd-ip', 'LGd-sh'], bg_cells=adata_neuronal.obs, bg_shapes=False)

In [None]:
# obs = get_obs_from_annotated_clusters('SMT', adata_neuronal, nuclei_df)
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='SMT', bg_cells=adata_neuronal.obs, bg_shapes=False)

In [None]:
# obs = get_obs_from_annotated_clusters('LD', adata_neuronal, nuclei_df)
# cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons_th_zi, nuclei='LD', bg_cells=adata_neuronal.obs, bg_shapes=False)