# 4_metrics_nucleus_celltype_diversity

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from thalamus_merfish_analysis import ccf_plots as cplot
from thalamus_merfish_analysis import ccf_images as cimg
from thalamus_merfish_analysis import ccf_registration as ccf
from thalamus_merfish_analysis import abc_load as abc
from thalamus_merfish_analysis import ccf_erode as cerd
from thalamus_merfish_analysis import diversity_metrics as divmet
from thalamus_merfish_analysis import diversity_plots as dplot

get_ipython().run_line_magic('matplotlib', 'inline') 

## Load ABC Thalamus dataset

In [3]:
# load just the obs
obs = abc.load_standard_thalamus(data_structure='obs')

In [4]:
# load CCF image volumes
realigned=False
ccf_images = abc.get_ccf_labels_image(resampled=True, realigned=realigned)

In [5]:
# set spatial column names to `_reconstructed` coordinate space
coords = '_reconstructed'
x_col = 'x'+coords
y_col = 'y'+coords
section_col = z_col = 'z'+coords

## Define eroded CCF regions

In [6]:
obs_erode = cerd.label_cells_by_eroded_ccf(obs, ccf_images) # default is erosion by 5px (5um)

In [7]:
# There is poor alignment in section 6.6 between PF celltypes and the PF CCF structure
# So, we'll set all cells in section 6.6 to 'unassigned' CCF structure
obs_erode.loc[lambda df: df['z_section']==6.6, cerd.ERODED_CCF_STRUCTURE_COL] = 'unassigned'

In [8]:
# We can visualize this mismatch (see `154 PF Fzd5 Glut` subclass in dark blue) 
# by using our plotting functions from the `2_view_thalamus_celltypes_ccf` notebook
taxonomy_level = ['subclass']#, 'supertype']
for level in taxonomy_level:
    sec66_fig = cplot.plot_ccf_overlay(obs_erode, ccf_images, 
                                        point_hue=level, 
                                        sections=[6.6],
                                        point_palette=abc.get_taxonomy_palette(level),
                                        legend='cells',
                                        section_col=section_col,
                                        x_col=x_col, 
                                        y_col=y_col,)

## Calculate default set of metrics

In [9]:
ccf_label='parcellation_structure_eroded'
th_ccf_metrics = divmet.calculate_diversity_metrics(obs_erode, ccf_label=ccf_label)
display(th_ccf_metrics.head(5))

## Plot default metrics

### Select regions to plot

In [10]:
regions_to_plot = dplot.TH_DIVERSITY_REGIONS

### Plot cluster count & frac on dual y-axis barplot

In [11]:
fig = dplot.barplot_dual_y_count_frac(th_ccf_metrics.loc[regions_to_plot], 'cluster', 
                                gt5_only=True)
# fig.savefig("/results/nuclei_cluster_counts_barplot.pdf", transparent=True)

### Plot unique cell type count

In [12]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                          'count',
                                          ylabel="cell type count")

In [13]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                          'count_gt5',
                                          ylabel='cell type (with >5 cells) count')

### Plot: cell type count / # cell types

In [14]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                          'frac',
                                          ylabel='cell type count / # cell types')

In [15]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                          'frac_gt5',
                                          ylabel='cell type (with >5 cells) count / # cell types')

### Plot cell type counts, normalized to # of cells in each CCF region

This controls for differing cell densities & region sizes across CCF structures

There is a wide range of sizes (here, we mean # of cells) & cell densities across thalamic CCF structures:

In [16]:
print('thalamic structure with fewest cells:\n', 
      th_ccf_metrics.loc[regions_to_plot]['count_cells'].idxmin(), 
      th_ccf_metrics.loc[regions_to_plot]['count_cells'].min())
print('thalamic structure with most cells:\n', 
      th_ccf_metrics.loc[regions_to_plot]['count_cells'].idxmax(), 
      th_ccf_metrics.loc[regions_to_plot]['count_cells'].max())


In [18]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                        'count_cells', 
                                        taxonomy_levels=None,
                                        ylabel='cell count')

The effect of CCf structure size is weak, but present, and should be accounted for

In [19]:
# effect of region size is weak but probably still present...
import seaborn as sns
sns.regplot(th_ccf_metrics.loc[regions_to_plot], x='count_cells', y='count_gt5_cluster', 
            color='#F99D20', label='cluster');

In [20]:
fig = dplot.plot_metric_multiple_levels(th_ccf_metrics.loc[regions_to_plot], 
                                          'count_norm2cells', 
                                          ylabel='cell type count / # cells')

## Display CCF structures shaded by metrics

We have also provided a function in `ccf_plots` that allows you to shade the 
CCF structures by a given metric.

We display 'cluster count / # cells' as a usage example.

In [21]:
# need to load the CCF image volumes
ccf_images = abc.get_ccf_labels_image(resampled=True).astype(int)

# pick the same 3 example sections used in the `2_view_thalamus_celltypes_ccf` notebook
sections_3 = [6.4, 7.2, 8.0] 

In [22]:
# plot the cell type counts in the CCF structures
figs = cplot.plot_metrics_ccf(ccf_images, 
                              th_ccf_metrics['count_norm2cells_cluster'], 
                              sections_3,
                              vmin=0, vmax=0.15, cmap='Oranges',
                              cb_label='cluster count / # cells')

## Calculate local diversity index

In [23]:
local_isi_df = divmet.calculate_local_diversity_metric(obs_erode, 
                                                       divmet.inverse_simpsons_index, 
                                                       metric_name='isi', 
                                                       n_neighbors=15)
local_isi_df

In [24]:
sections_3 = [6.4, 7.2, 8.0]
metric_name = 'local_isi_cluster'
for section in sections_3:
    fig = dplot.plot_local_metric_ccf_section(obs_erode, local_isi_df, ccf_images,
                                              section, metric_name)

## Proportions as stacked bar graphs

### Subclass proportions by region

In [25]:
fig = dplot.barplot_stacked_proportions(obs_erode, 'subclass', th_ccf_metrics, legend=True)

### Supertype proportions by region

In [26]:
fig = dplot.barplot_stacked_proportions(obs_erode, 'supertype', th_ccf_metrics, legend=True)

### Cluster proportions by region

In [27]:
# without legend
fig = dplot.barplot_stacked_proportions(obs_erode, 'cluster', th_ccf_metrics, legend=False)

In [28]:
#  with legend - uncomment to view the very long list of clusters
# fig = dplot.barplot_stacked_proportions(obs_erode, 'cluster', th_ccf_metrics, legend=True)