In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from thalamus_merfish_analysis import ccf_plots as cplots
from thalamus_merfish_analysis import ccf_images as cimg
from thalamus_merfish_analysis import abc_load as abc
get_ipython().run_line_magic('matplotlib', 'inline') 

## Load brain3 data

In [3]:
# realigned just loads faster version here
obs = abc.get_combined_metadata(realigned=True, version="20230830", drop_unused=False)

In [4]:
realigned=False
ccf_images = abc.get_ccf_labels_image(resampled=True, realigned=realigned)
if realigned:
    ccf_label = 'parcellation_substructure_realigned'
    coords = 'section'
else:
    ccf_label = 'parcellation_substructure'
    coords = 'reconstructed'

### preprocessing

In [5]:
th_names = abc.get_thalamus_substructure_names()
th_subregion_names = list(set(th_names).difference(['TH-unassigned']))

In [6]:
# remove non-neuronal and some other outlier non-thalamus types
obs_neurons = abc.filter_by_class_thalamus(obs, filter_midbrain=False)
# filter to thalamus boundaries (add a buffer here if wanted)
obs_th_neurons = abc.filter_by_thalamus_coords(obs_neurons, realigned=realigned)
obs_th_strict = obs_th_neurons
# strict thalamus subset if above has a buffer
# obs_th_strict = abc.filter_by_thalamus_coords(obs_neurons, realigned=realigned)

In [8]:
section_col = 'z_section'
sections_all = sorted(obs_th_neurons[section_col].unique())
# pick 3 example sections
sections_3 = [6.4, 7.2, 8.0]

In [9]:
# precalculate boundaries
sections_int = np.rint(np.array(sections_all)/0.2).astype(int)
ccf_boundaries = cimg.sectionwise_label_erosion(ccf_images, distance_px=1, fill_val=0, 
                                                return_edges=True, section_list=sections_int
                                                )
# can skip and def as none to calculate on the fly
# ccf_boundaries = None

## view CCF

In [10]:
cplots.plot_ccf_overlay(obs_th_neurons.iloc[[],:], # no foreground cells; just the ccf shapes
                        ccf_images, sections=sections_3, point_hue=ccf_label, 
                        # highlight='all', # not currently implemented
                        legend='ccf',
                        section_col=section_col,
                        x_col = 'x_'+coords,
                        y_col = 'y_'+coords,
                        min_section_count=0, 
                        boundary_img=ccf_boundaries);

## view taxonomy labels over CCF

### select thalamus celltype subsets

In [11]:
obs_th_strict['subclass'].value_counts().loc[lambda x: x>100]

In [20]:
# checked all 100-500 count - may also want to exclude PRC-PAG subclass
#  103 PVHd-DMH Lhx6 Gaba is likely in ZI, but a bit unclear
subclasses_all = obs_th_strict['subclass'].value_counts().loc[lambda x: x>150].index
print(len(subclasses_all))

supertypes_all = obs_th_strict['supertype'].value_counts().loc[lambda x: x>20].index
print(len(supertypes_all))

clusters_all = obs_th_strict['cluster'].value_counts().loc[lambda x: x>5].index
print(len(clusters_all))

In [14]:
supertypes_from_subclasses = obs_th_neurons.loc[
    lambda df: df['subclass'].isin(subclasses_all), 'supertype'].unique()
supertypes_from_subclasses.shape[0]

In [15]:
# checked others in the symmetric diff, none seem relevant
# thus probably safe to restrict by subclass, but may miss a few one-off clusters
len(supertypes_all.intersection(supertypes_from_subclasses))

In [26]:
clusters_from_subclasses = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all), 'cluster'].unique()
f"{clusters_from_subclasses.shape[0]} clusters in subclasses, {len(clusters_all.intersection(clusters_from_subclasses))} from sig cluster list."

In [31]:
f"""{len(obs_th_neurons.loc[lambda df: df['cluster'].isin(clusters_all)])} cells filtered by cluster, 
{len(obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)])} filtered by subclass"""

### plotting defaults

In [None]:
glasbey2 = cplots.glasbey + cplots.glasbey[::-1]
cluster_palette_glasbey = dict(zip(clusters_all, glasbey2[:len(clusters_all)]))
pd.Series(cluster_palette_glasbey).to_csv("resources/cluster_palette_glasbey.csv")


In [38]:
palettes = {level: abc.get_taxonomy_palette(level) for level in 
            ['subclass','supertype','cluster']}

In [43]:

kwargs = dict(
    bg_cells=obs_th_strict,
    section_col=section_col,
    x_col = 'x_'+coords,
    y_col = 'y_'+coords,
    s=3, 
    face_palette=None,
    edge_color='grey',
    boundary_img=ccf_boundaries
)

In [21]:
# exploring edge cell types
obs = obs_th_neurons.loc[lambda df: df['subclass']=="103 PVHd-DMH Lhx6 Gaba"]
point_hue='supertype'
cplots.plot_ccf_overlay(obs, ccf_images, bg_cells=obs_th_neurons,
                        point_hue=point_hue, point_palette=palettes[point_hue],
                        **kwargs);

### subclass

In [22]:
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
cplots.plot_ccf_overlay(obs, ccf_images, 
                        point_hue='subclass', sections=sections_3,
                        point_palette=palettes['subclass'],
                        legend='cells', 
                        **kwargs);

### supertype

In [23]:
# TODO: this is not a good palette! maybe make supertypes of a subtype the same hue?
obs = obs_th_neurons.loc[lambda df: df['supertype'].isin(supertypes_all)]
cplots.plot_ccf_overlay(obs, ccf_images, 
                        point_hue='supertype', sections=sections_3,
                        point_palette=palettes['supertype'],
                        legend='cells', 
                        **kwargs);

### cluster

In [44]:
obs = obs_th_neurons.loc[lambda df: df['cluster'].isin(clusters_all)]
# obs = obs_th_neurons

figs = cplots.plot_ccf_overlay(obs, ccf_images, 
                        point_hue='cluster', sections=sections_3,
                        point_palette=cluster_palette_glasbey,
                        legend=None, 
                        **dict(kwargs, s=2))

for i, fig in enumerate(figs):
    fig.savefig(f"/results/cluster_labels_section{i}.pdf", transparent=True)

In [21]:
# taxonomy palette - not ideal
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
cplots.plot_ccf_overlay(obs, ccf_images, 
                        point_hue='cluster', sections=sections_3,
                        point_palette=palettes['cluster'],
                        legend=None, 
                        **kwargs);

## specific nuclei examples

### ANT

#### supertypes

In [45]:
sections = [8.0]
clusters = [str(x) for x in list(range(2613, 2616+1)) + list(range(2674, 2676+1))]
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['AD', 'AV', 'AMd', 'AMv']

cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

In [47]:
figs = cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=cluster_palette_glasbey,
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);
figs[0].savefig(f"/results/cluster_labels_anterior_nuclei.pdf", transparent=True)

In [23]:
# or plot all sections based on the selected cells
cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=None,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

#### clusters

In [24]:
cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### VAL + VM (clusters)

In [25]:

sections = [7.2, 7.0, 6.8]
clusters = ['2683','2684','2687']
obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]
nuclei = ['VM','VAL']
cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### PVT

#### supertypes in one subclass

In [26]:
sections = [6.6, 7.8, 8.2]
nuclei = ['PVT']
# subclasses annotated as PVT
obs = obs_th_neurons.loc[np.any([obs_th_neurons['subclass'].str.contains(x).values for x in nuclei], axis=0), :]
cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

### RT

#### supertypes in one subclass

In [27]:
sections = [7.2, 7.8]
nuclei = ['RT','ZI']
match = 'RT-ZI'
obs = obs_th_neurons.loc[obs_th_neurons['subclass'].str.contains(match)]
cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        sections=sections,
                        ccf_names=nuclei, legend='cells',
                        **kwargs);

## other nuclei from annotations

### PT

In [28]:
nucleus='PT'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='supertype', point_palette=palettes['supertype'],
                        ccf_names=[nucleus],
                        **kwargs);

### LGd

In [29]:

nuclei=['LGd-co', 'LGd-ip', 'LGd-sh']
obs = abc.get_obs_from_annotated_clusters('LGd', obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=nuclei,
                        **kwargs);

### SMT

In [30]:
nucleus='SMT'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=[nucleus],
                        **kwargs);

### LD

In [31]:
nucleus='LD'
obs = abc.get_obs_from_annotated_clusters(nucleus, obs_th_neurons)

cplots.plot_ccf_overlay(obs, ccf_images,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=[nucleus],
                        **kwargs);

### PVT-IMD overlap

In [32]:
# by annotation
obs = pd.concat([
    abc.get_obs_from_annotated_clusters('PVT', obs_th_neurons),
    abc.get_obs_from_annotated_clusters('PVT IMD', obs_th_neurons)
])
nuclei = ['PVT','IMD']
# by subclass (excluding)
# obs = obs_th_neurons.loc[lambda df: df['subclass'].str.contains('PVT')]
# obs = obs.loc[obs.index.difference(obs_anno.index)]

# by cluster
# clusters = ['2617','2629']
# obs = obs_th_neurons.loc[lambda df: df['cluster'].str[:4].isin(clusters)]

cplots.plot_ccf_overlay(obs, ccf_images,
                        min_group_count=0,min_section_count=20,
                        point_hue='cluster', point_palette=palettes['cluster'],
                        ccf_names=nuclei, 
                        **kwargs);

## subclass view of all sections

In [33]:
obs = obs_th_neurons.loc[lambda df: df['subclass'].isin(subclasses_all)]
cplots.plot_ccf_overlay(obs, ccf_images, 
                        point_hue='subclass', point_palette=palettes['subclass'],
                        min_group_count=30,
                        **kwargs);