In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from thalamus_merfish_analysis import ccf_plots as cplots
from thalamus_merfish_analysis import ccf_images as cimg
from thalamus_merfish_analysis import abc_load as abc
get_ipython().run_line_magic('matplotlib', 'inline') 

## Load brain3 data

In [3]:
# realigned just loads faster version here
obs = abc.get_combined_metadata(realigned=True, version="20230830", drop_unused=False)

In [4]:
obs.columns

In [5]:
realigned=False
ccf_polygons = abc.get_ccf_labels_image(resampled=True, realigned=realigned)
if realigned:
    ccf_label = 'parcellation_substructure_realigned'
    coords = 'section'
else:
    ccf_label = 'parcellation_substructure'
    coords = 'reconstructed'

### preprocessing

In [6]:
th_names = abc.get_thalamus_substructure_names()
th_subregion_names = list(set(th_names).difference(['TH-unassigned']))

In [7]:
# remove non-neuronal and some other outlier non-thalamus types
obs_neurons = abc.filter_adata_by_class(obs, filter_midbrain=False)

# remove non-neuronal only
# nn_classes = [
#     "31 OPC-Oligo",
#     "30 Astro-Epen",
#     "33 Vascular",
#     "34 Immune",
# ]
# obs_neurons = obs[~obs['class'].isin(nn_classes)]

In [8]:

# filter to CCF based on labels
# does not include fiber tracts etc 
# obs_th = obs[obs[ccf_label].isin(th_names)]

# filter to CCF using thalamus outline with a small buffer (slower)
obs_th = abc.label_thalamus_spatial_subset(obs, distance_px=5, realigned=realigned, filter_cells=True)

# subset just the neurons
obs_th_neurons = obs.loc[obs_neurons.index.intersection(obs_th.index)]
obs_th_strict = obs_th_neurons.loc[lambda df: df[ccf_label].isin(th_names)]


In [9]:
section_col = 'z_section'
sections_all = sorted(obs_th_neurons[section_col].unique())
# pick 3 example sections
sections_GRC = [6.4, 7.2, 7.8, 8.0]

In [10]:
sections_int = np.rint(np.array(sections_all)/0.2).astype(int)
ccf_boundaries = cimg.sectionwise_label_erosion(ccf_polygons, distance_px=1, fill_val=0, 
                                                return_edges=True, section_list=sections_int
                                                )

In [11]:
obs_th.columns

## view CCF

In [12]:
cplots.plot_ccf_overlay(obs_th_neurons.iloc[[],:], # no foreground cells; just the ccf shapes
                        ccf_polygons, sections=sections_GRC, point_hue=ccf_label, 
                        highlight='all', legend='ccf',
                        section_col=section_col,
                        x_col = 'x_'+coords,
                        y_col = 'y_'+coords,
                        bg_shapes=False, min_section_count=0, 
                        boundary_img=ccf_boundaries);

## View CCF as in ARA

I want to be able to generate the view you get from the ARA, but subset to just the thalamus.

Done:
1. Implemented a shape_palette='allen_reference_atlas' option that colors all CCF regions with a single salmon color that is used by the ARA for many of the thalalmic nuclei (#FE8084)

TODO
1. Confirm hex colors from ABC atlas metadata (used Illustratoyr eyedropper of ARA screenshot)
2. Add Allen Reference Atlas labels to plot on top of the centroid(?) of the nucleus
3. Use different salmon shades for the appropriate nuclei, as done in the actual ARA images

In [13]:
cplots.plot_ccf_overlay(obs_th_neurons.iloc[[],:], # no foreground cells; just the ccf shapes
                        ccf_polygons, sections=sections_GRC, point_hue=ccf_label, 
                        highlight='all', legend='ccf',
                        shape_palette='allen_reference_atlas',
                        section_col=section_col,
                        x_col = 'x_'+coords,
                        y_col = 'y_'+coords,
                        bg_shapes=False, min_section_count=0, 
                        boundary_img=ccf_boundaries);

## view taxonomy labels over CCF

### select thalamus celltype subsets

In [14]:
subclasses_all = obs_th_strict['subclass'].value_counts().loc[lambda x: x>100].index
print(len(subclasses_all))

supertypes_all = obs_th_strict['supertype'].value_counts().loc[lambda x: x>20].index
print(len(supertypes_all))

In [15]:
obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all), 'class'].value_counts()

In [16]:
supertypes_from_subclasses = obs_th_neurons.loc[
    lambda df: df['subclass'].isin(subclasses_all), 'supertype'].unique()
supertypes_from_subclasses.shape[0]

In [17]:
len(supertypes_all.intersection(supertypes_from_subclasses))

In [18]:
obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all), 'cluster'].unique().shape[0]

### Plots

In [19]:
palettes = {level: abc.get_taxonomy_palette(level) for level in 
            ['subclass','supertype','cluster']}

In [20]:
kwargs = dict(
    bg_cells=obs_th_strict,
    section_col=section_col,
    x_col = 'x_'+coords,
    y_col = 'y_'+coords,
    s=3, 
    shape_palette='dark_outline',
    boundary_img=ccf_boundaries
)

In [21]:
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
subclass_figs = cplots.plot_ccf_overlay(obs, ccf_polygons, 
                                        point_hue='subclass', 
                                        sections=sections_GRC,
                                        point_palette=palettes['subclass'],
                                        legend=None, #'cells', 
                                        **kwargs)

In [40]:
# save as pdfs
for i, fig in enumerate(subclass_figs):
    sec_name = sections_GRC[i]*10
    fig.savefig('/root/capsule/results/'+f'ccf_subclass_z{sec_name}.pdf', 
                bbox_inches='tight')
    fig.savefig('/root/capsule/results/'+f'ccf_cluster_z{sec_name}_dpi300.png', 
                bbox_inches='tight', dpi=300)

In [23]:
# TODO: this is not a good palette! maybe make supertypes of a subtype the same hue?
obs = obs_th_neurons.loc[lambda df: df['supertype'].isin(supertypes_all)]
cplots.plot_ccf_overlay(obs, ccf_polygons, 
                        point_hue='supertype', sections=sections_GRC,
                        point_palette=palettes['supertype'],
                        legend='cells', 
                        **kwargs);

In [49]:
kwargs = dict(
    bg_cells=obs_th_strict,
    section_col=section_col,
    x_col = 'x_'+coords,
    y_col = 'y_'+coords,
    s=2.5, 
    shape_palette='dark_outline',
    boundary_img=ccf_boundaries
)
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
cluster_figs = cplots.plot_ccf_overlay(obs, ccf_polygons, 
                                       point_hue='cluster', 
                                       sections=sections_GRC,
                                       point_palette=palettes['cluster'],
                                       legend=None, 
                                       **kwargs);

In [51]:
# save as
for i, fig in enumerate(cluster_figs):
    sec_name = int(sections_GRC[i]*10)
    fig.savefig('/root/capsule/results/'+f'ccf_cluster_z{sec_name}.pdf', 
                bbox_inches='tight')
    fig.savefig('/root/capsule/results/'+f'ccf_cluster_z{sec_name}_dpi300.png', 
                bbox_inches='tight', dpi=300)

## specific nuclei examples

### ANT

#### supertypes

In [25]:
sections = [8.0]
clusters = [str(x) for x in list(range(2613, 2616+1)) + list(range(2674, 2676+1))]
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['AD', 'AV', 'AMd', 'AMv']

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

In [26]:
# or plot all sections based on the selected cells
cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=None,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

#### clusters

In [27]:
cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### VAL + VM (clusters)

In [28]:

sections = [7.2, 7.0, 6.8]
clusters = ['2683','2684','2687']
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['VM','VAL']
cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### PVT

#### supertypes in one subclass

In [29]:
sections = [6.6, 7.8, 8.2]
nuclei = ['PVT']
# subclasses annotated as PVT
obs = obs_th_neurons.loc[np.any([obs_th_neurons['subclass'].str.contains(x).values for x in nuclei], axis=0), :]
cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### RT

#### supertypes in one subclass

In [30]:
sections = [7.2, 7.8]
nuclei = ['RT','ZI']
match = 'RT-ZI'
obs = obs_th_neurons.loc[obs_th_neurons['subclass'].str.contains(match)]
cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

## other nuclei from annotations

### PT

In [31]:
nucleus='PT'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        ccf_names=[nucleus],
                        **kwargs);

### LGd

In [32]:

nuclei=['LGd-co', 'LGd-ip', 'LGd-sh']
obs = abc.get_obs_from_annotated_clusters('LGd', obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=nuclei,
                        **kwargs);

### SMT

In [33]:
nucleus='SMT'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=[nucleus],
                        **kwargs);

### LD

In [34]:
nucleus='LD'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=[nucleus],
                        **kwargs);

### PVT-IMD overlap

In [35]:
# by annotation
obs = pd.concat([
    abc.get_obs_from_annotated_clusters('PVT', obs_th_neurons),
    abc.get_obs_from_annotated_clusters('PVT IMD', obs_th_neurons)
])
nuclei = ['PVT','IMD']
# by subclass (excluding)
# obs = obs_th_neurons.loc[lambda df: df['subclass'].str.contains('PVT')]
# obs = obs.loc[obs.index.difference(obs_anno.index)]

# by cluster
# clusters = ['2617','2629']
# obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]

cplots.plot_ccf_overlay(obs, ccf_polygons,
                        min_group_count=0,min_section_count=20,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=nuclei, 
                        **kwargs);

## Clusters, no CCF overlay

In [36]:
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
nuclei = ['AD', 'AV', 'AMd', 'AMv']
kwargs = dict(
    bg_cells=obs_th_strict,
    section_col=section_col,
    x_col = 'x_'+coords,
    y_col = 'y_'+coords,
    s=5, 
    shape_palette='dark_outline',
    boundary_img=ccf_boundaries
)
cplots.plot_ccf_overlay(obs, ccf_polygons, 
                        point_hue='cluster', sections=[sections_GRC[1]],
                        point_palette=palettes['cluster'],
                        ccf_names=nuclei,
                        legend=None, 
                        **kwargs);

## Gene expression, no CCF overlay

In [37]:
adata = abc.load_adata(transform='log2', subset_to_TH_ZI=True, with_metadata=True,
                       flip_y=False, round_z=True, cirro_names=False, 
                       with_colors=True, realigned=False, loaded_metadata=None)

adata = abc.filter_adata_by_class(adata, filter_nonneuronal=True, 
                                  filter_midbrain=True, 
                                  filter_others=True)

In [38]:
cplots.plot_expression_ccf(adata, 'Gad2', ccf_polygons, 
                           sections=[7.2], 
                           nuclei=['AD'], highlight=['AD'], # not in this section so result is no ccf overlays
                           s=0.5, cmap='Blues', show_outline=False, 
                           bg_shapes=False, axes=False,  
                           section_col='z_reconstructed', 
                           x_col='x_reconstructed',
                           y_col='y_reconstructed',
                           boundary_img=None)

In [39]:
cplots.plot_expression_ccf(adata, 'Slc17a7', ccf_polygons, 
                           sections=[7.2], 
                           nuclei=['AD'], highlight=['AD'], # not in this section so result is no ccf overlays
                           s=0.5, cmap='Blues', show_outline=False, 
                           bg_shapes=False, axes=False,  
                           section_col='z_reconstructed', 
                           x_col='x_reconstructed',
                           y_col='y_reconstructed',
                           boundary_img=None)