In [4]:
import anndata as ad
import pandas as pd
import shapely
import shapely.plotting as splot
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np

import ccf_plots as cplots
import abc_load as abc
get_ipython().run_line_magic('matplotlib', 'inline') 

# Load brain3 data

### alternate versions

In [5]:
rename = {
    'x_section':'cirro_x',
    'y_section':'cirro_y',
}
ccf_label = 'parcellation_substructure_realigned'

obs = pd.read_parquet("/data/realigned-metadata/abc_realigned_metadata_thalamus-boundingbox.parquet")
obs = obs.rename(columns=rename)
ccf_label = 'parcellation_substructure_realigned'
nn_classes = [
    "31 OPC-Oligo",
    "30 Astro-Epen",
    "33 Vascular",
    "34 Immune",
]

import nibabel
img = nibabel.load("/data/labels.nii.gz")
# could maybe keep the lazy dataobj and not convert to numpy?
ccf_polygons = np.array(img.dataobj)

In [3]:
rename = {
    'x_realigned':'cirro_x',
    'y_realigned':'cirro_y',
}
ccf_label = 'parcellation_substructure'

obs = abc.get_combined_metadata(version="20230630")
obs = obs.rename(columns=rename)
ccf_label = 'parcellation_substructure_realigned'
nn_classes = [
    "28 Astro-Epen",
    "29 Oligo",
    "31 Vascular",
    "32 Immune",
]

ccf_polygons = abc.get_ccf_labels_image(resampled=True).astype(int)

### preprocessing

In [7]:
th_names = abc.get_thalamus_substructure_names()
th_subregion_names = list(set(th_names).difference(['TH-unassigned']))

In [8]:
# subset just the neurons
obs_neurons = obs[~obs['class'].isin(nn_classes)]
obs_th = obs[obs[ccf_label].isin(th_names)]


In [9]:
obs_th_neurons = obs[obs[ccf_label].isin(th_names) & ~obs['class'].isin(nn_classes)]
# helps plotting etc by trimming categories
obs_th_neurons = obs_th_neurons.assign(
    **obs_th_neurons.select_dtypes(include=["category"]).apply(lambda x: x.cat.remove_unused_categories()).to_dict(orient='series'))

In [10]:
section_col = 'z_section'
obs_th_neurons[section_col] = obs_th_neurons[section_col].round(1)
sections = sorted(obs_th_neurons[section_col].unique())
sections_3 = [5.6, 6.4, 7.2]

## view thalamus subset with subclass labels

In [14]:
from importlib import reload
reload(abc)
reload(cplots)

In [15]:
# may need to tweak clustering params on fine 
cplots.plot_ccf_overlay(obs_th.iloc[[],:],#no foreground cells
                        ccf_polygons, point_hue=ccf_label, sections=sections_3, highlight='all', section_col=section_col,
                           # x_field='x_section', y_field='y_section',
                 bg_cells=obs_th_neurons, bg_shapes=False, min_group_count=0, legend=False, axes=True)

In [16]:
subclasses = obs_th_neurons['subclass'].value_counts().loc[lambda x: x>400]
subclasses

In [17]:
len(subclasses)

In [18]:
obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses.index), 'class'].value_counts().loc[lambda x: x>0]

In [19]:
obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses.index), 'supertype'].unique().shape[0]

In [20]:
obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses.index), 'cluster'].unique().shape[0]

In [21]:
from colorcet import glasbey
point_palette = dict(zip(subclasses.index, sns.color_palette(glasbey, n_colors=len(subclasses.index))))

from matplotlib.colors import rgb2hex
{x.replace(' ', '_'): rgb2hex(point_palette[x]) for x in point_palette.keys()}

In [23]:
reload(cplots)

In [24]:
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses.index)]
cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='subclass', sections=sections_3,
                 shape_palette='dark_outline', point_palette=point_palette, legend=False, highlight='all',
                 section_col=section_col,
                           # x_field='x_section', y_field='y_section',
                 min_group_count=30)

In [116]:
anterior_sec = 8.0
clusters = [str(x) for x in list(range(1169, 1171+1)) + list(range(1095, 1098+1))]
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['AD', 'AV', 'AMd', 'AMv']
cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='supertype', sections=[anterior_sec],
                 shape_palette='dark_outline',
                 ccf_names=nuclei, legend='cells', bg_shapes=False, 
                 section_col=section_col)

In [117]:

cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='cluster', sections=[anterior_sec],
                 shape_palette='dark_outline',
                 ccf_names=nuclei, legend=False, bg_shapes=False, 
                 section_col=section_col)

In [138]:
sections = [7.2, 7.0, 6.8]
clusters = ['1138','1141','1142']
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['VM','VAL']
cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='cluster', sections=sections,
                 shape_palette='dark_outline',
                 ccf_names=nuclei, legend='cells', bg_shapes=False, 
                 section_col=section_col)

In [139]:
pvt_sections = [7.0, 7.8]
nuclei = ['PVT']
subclasses = nuclei
obs = obs_th_neurons.loc[np.any([obs_th_neurons['subclass'].str.contains(x).values for x in subclasses], axis=0), :]
cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='supertype', sections=pvt_sections,
                 shape_palette='dark_outline',
                 ccf_names=nuclei, legend='cells', bg_shapes=False, 
                 section_col=section_col)

In [132]:
rt_sections = [7.2, 7.8]
nuclei = ['RT']
subclasses = ['RT ZI']
obs = obs_th_neurons.loc[np.any([obs_th_neurons['subclass'].str.contains(x).values for x in subclasses], axis=0), :]
cplots.plot_ccf_overlay(obs, ccf_polygons, bg_cells=obs_th_neurons, point_hue='supertype', sections=rt_sections,
                 shape_palette='dark_outline',
                 ccf_names=nuclei, legend='cells', bg_shapes=False, 
                 section_col=section_col)

# Investigate prong 1 cluster-nucleus groups

In [19]:
nuclei_df = pd.read_csv("resources/Prong 1 Vitessce links by nucleus.csv", index_col=0)

def get_obs_from_annotated_clusters(name, adata, nuclei_df):
    clusters = nuclei_df.loc[name, "annotated clusters"].split(', ')
    obs = adata.obs.loc[lambda df: df['cluster_id'].isin(clusters)]
    return obs

## VM

In [20]:
reload(cplots)

In [21]:
# obs = get_obs_from_annotated_clusters('VM', adata, nuclei_df)
obs = obs_th_neurons.query("cluster=='1142 TH Prkcd Grin2c Glut_13'")
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=['VM', 'VAL'], legend='both', bg_cells=obs_th_neurons, bg_shapes=False, shape_palette='greyscale')

In [22]:
obs = obs_th_neurons.query("supertype=='TH Prkcd Grin2c Glut_13'")
obs['cluster'].value_counts().loc[lambda x: x>5]

In [23]:
# remove 1140? annotated as VPMpc
clusters = ['1138', '1141', '1142']
obs = adata.obs.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=['VM'], legend='cells', bg_cells=obs_th_neurons, bg_shapes=False, shape_palette='greyscale')

### diff exp

In [24]:
import scanpy as sc

In [25]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1142'], reference='1138', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [26]:
section='C57BL6J-638850.40'
gene = "Calb1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [27]:
gene = "Necab1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [28]:
gene = "Spon1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [29]:
gene = "Cbln1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [30]:
gene = 'Tmie'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [31]:
gene = 'Galnt18'
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [32]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype', groups=['TH Prkcd Grin2c Glut_13'], method='wilcoxon', tie_correct=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [33]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1142'], method='wilcoxon', tie_correct=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [34]:
# ISH diff ex search
# https://mouse.brain-map.org/gene/show/87708

gene = "Galnt18"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [35]:
gene = "Stxbp6"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

In [36]:
gene = "Spon1"
nuclei=['VM', 'VAL']
highlight_nuclei=['VM']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, 
                    bg_shapes=False, highlight=highlight_nuclei,
                    cmap='Blues', show_outline=True);

### outlining VM / composition

In [39]:
import geopandas as gp
neuron_points = gp.GeoSeries.from_xy(*obs_th_neurons[['cirro_x','cirro_y']].values.T, index=obs_th_neurons_names)

In [45]:
import hdbscan
section='C57BL6J-638850.40'
adata_vm = adata_neuronal[obs_th_neurons.query(f"section=='{section}' & cluster_id=='1142'").index].copy()
df = adata_vm.obs
X = df[['cirro_x','cirro_y']].values
df['spatial_cluster'] = hdbscan.HDBSCAN(min_samples=5).fit_predict(X)
core_clusters = df['spatial_cluster'].value_counts().index[:2]
df['spatial_group'] = df['spatial_cluster'].apply(lambda x: 'core' if x in core_clusters else 'other')

In [46]:
sns.scatterplot(data=df,  x='cirro_x', y='cirro_y', hue='spatial_cluster', s=4,  legend=True, palette='tab10')

In [47]:
nucleus = cshapes.get_polygon_from_obs(df.query("spatial_group=='core'"))
subset = neuron_points.geometry.intersects(nucleus)
obs_th_neurons['in_nucleus'] = subset.values
adata_nucleus = adata_neuronal[subset]

In [49]:
obs_th_neurons.query(f"section=='{section}' & supertype=='TH Prkcd Grin2c Glut_13'").groupby(['cluster'])['in_nucleus'].apply(lambda x: x.value_counts())

In [50]:
# need to include VAL types ??

In [51]:
sc.tl.rank_genes_groups(adata_vm, 'spatial_group', groups=['core'], reference='other', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_vm, groups=['core'], n_genes=20)

## anterior

In [54]:
clusters = [str(x) for x in list(range(1169, 1171+1)) + list(range(1095, 1098+1))]
obs = adata.obs.loc[lambda df: df['cluster'].str[:4].isin(clusters)]

cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=nuclei, legend='cells', bg_cells=obs_th_neurons, bg_shapes=False, shape_palette='greyscale')

In [56]:
nuclei=['AD', 'AV', 'AMd', 'AMv']
section = 'C57BL6J-638850.44'
# cplots.plot_ccf_section(ccf_polygons, section, highlight=nuclei, )
cplots.plot_ccf_overlay(adata.obs.loc[[],:], ccf_polygons, sections=[section], highlight=nuclei, legend='both', bg_cells=obs_th_neurons, bg_shapes=True, min_group_count=0)

In [58]:
# cplots.plot_nucleus_cluster_comparison_slices('AD', adata, nuclei_df, ccf_polygons)
obs = get_obs_from_annotated_clusters('AD', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei='AD')

### diff exp

#### AV

In [60]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass', groups=['AV Col27a1 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

##### Col27a1

In [61]:
section = 'C57BL6J-638850.44'

gene = "Col27a1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD', 'AV']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

##### Sorcs3
Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV

That does not appear to be the case in brain3...

In [62]:

# Mathew's plots in brain1 suggested high expression exclusively in the '1097 AV Col27a1 Glut cluster' in the AV
# That does not appear to be the case in brain3...
gene = "Sorcs3"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AV']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AD

In [63]:
sc.tl.rank_genes_groups(adata_neuronal, 'subclass', groups=['AD Serpinb7 Glut'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [64]:

gene = "Syndig1l"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AD']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

#### AM

In [65]:
sc.tl.rank_genes_groups(adata_neuronal, 'supertype', groups=['TH Prkcd Grin2c Glut_9'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [66]:

gene = "Cbln1"
ant_nuclei=['AD', 'AV', 'AMd', 'AMv']
highlight_nuclei=['AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=ant_nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True, highlight=highlight_nuclei);

In [67]:
obs_th_neurons['anterior_type'] = (obs_th_neurons['supertype'].str.contains('Glut_9') |
                                       obs_th_neurons['supertype'].str.contains('AD') |
                                       obs_th_neurons['supertype'].str.contains('AV')).astype(str)

In [68]:
sc.tl.rank_genes_groups(adata_neuronal, 'anterior_type', method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

In [69]:

gene = "C1ql3"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

In [70]:

gene = "Shox2"
nuclei=['AD', 'AV', 'AMd', 'AMv']
cplots.plot_expression_ccf(adata_neuronal, section, gene, ccf_polygons, nuclei=nuclei, bg_shapes=False, 
                    cmap='Blues', show_outline=True);

## MD

In [71]:
cluster = "1133 TH Prkcd Grin2c Glut_10"
obs = adata.obs.loc[lambda df: df['cluster']==cluster]
nuclei = ['MD']
palette = {cluster: "#bcbd22"}
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=nuclei, legend='cells', bg_cells=obs_th_neurons, bg_shapes=False, 
                                       shape_palette='dark_outline', point_palette=palette, s=5)

In [72]:
# TODO: should probably remove 1132 from annotation
obs = get_obs_from_annotated_clusters('MD', adata_neuronal, nuclei_df)

cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=nuclei, legend='cells', bg_cells=obs_th_neurons, bg_shapes=False, 
                                       shape_palette='dark_outline', s=5)

### diff exp

In [73]:
sc.tl.rank_genes_groups(adata_neuronal, 'cluster_id', groups=['1133'], method='wilcoxon', rankby_abs=True)
sc.pl.rank_genes_groups(adata_neuronal, n_genes=20)

## other

In [76]:
obs = get_obs_from_annotated_clusters('PT', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei='PT', legend='cells', bg_cells=obs_th_neurons, bg_shapes=False, 
                                       shape_palette='dark_outline', s=5)

In [78]:
obs = obs_th_neurons.query("supertype.str.contains('PVT-PT')", engine='python')
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=['PT','PVT'], bg_cells=obs_th_neurons, bg_shapes=False, legend='both')

In [80]:
obs = get_obs_from_annotated_clusters('PVT', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=['PT','PVT'], bg_cells=obs_th_neurons, bg_shapes=False, legend='both')

In [81]:
obs = get_obs_from_annotated_clusters('LGd', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei=['LGd-co', 'LGd-ip', 'LGd-sh'], bg_cells=obs_th_neurons, bg_shapes=False)

In [82]:
obs = get_obs_from_annotated_clusters('SMT', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei='SMT', bg_cells=obs_th_neurons, bg_shapes=False)

In [83]:
obs = get_obs_from_annotated_clusters('LD', adata_neuronal, nuclei_df)
cplots.plot_nucleus_cluster_comparison_slices(obs, ccf_polygons, nuclei='LD', bg_cells=obs_th_neurons, bg_shapes=False)