# Fig. S3 | River Plots for subclass vs thalamic nuclei

River Plots, aka Sankey Diagrams

In [1]:
import sys
sys.path.append('/code/')
from thalamus_merfish_analysis import abc_load as abc
from thalamus_merfish_analysis import ccf_images as cimg
from thalamus_merfish_analysis import ccf_erode as cerd
from thalamus_merfish_analysis import diversity_plots as dplots
from thalamus_merfish_analysis.distance_metrics import TH_PARCELLATION_STRUCTURES

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
results_dir = '../../results'

## Load thalamus data

In [3]:
# Load the cell metadata only
obs_th = abc.load_standard_thalamus(data_structure='obs')

### Erode CCF region boundaries & re-label cell parcellations to account for slight misalignments

In [4]:
# Load the CCF structures image
ccf_images = abc.get_ccf_labels_image()

# Merge substructures into structures (esp. AMd + AMv -> AM; LGd-co + LGd-ip + LGd-sh -> LG)
ccf_images = cerd.merge_substructures(ccf_images, ccf_level='structure')

# We'll use the eroded CCF structures to calculate similarity metrics
ccf_metrics_level = "structure"
obs_erode, ccf_label_eroded = cerd.label_cells_by_eroded_ccf(obs_th, 
                                                             ccf_images, 
                                                             ccf_level=ccf_metrics_level, 
                                                             distance_px=5) # default is erosion by 5px (50um)

# There is poor alignment in section 6.6 between PF celltypes and the PF CCF structure
# So, we'll set all cells in section 6.6 to 'unassigned' CCF structure
obs_erode.loc[lambda df: df['z_section']==6.6, ccf_label_eroded] = 'unassigned'

### Load color palettes

In [5]:
# load published color palettes for the ABC Atlas taxonomy
abc_palettes = {level: abc.get_taxonomy_palette(level) for level in 
                ['neurotransmitter','class', 'subclass','supertype','cluster']}

## Fig. S3A | River plot for all subclasses vs all thalamic nuclei as shown in Fig. S1

In [6]:
subclasses_from_fig2 = [
    '093 RT-ZI Gnb3 Gaba',
    '101 ZI Pax6 Gaba',
    '109 LGv-ZI Otx2 Gaba',
    '145 MH Tac2 Glut',
    '146 LH Pou4f1 Sox1 Glut',
    '147 AD Serpinb7 Glut',
    '148 AV Col27a1 Glut',
    '149 PVT-PT Ntrk1 Glut',
    '150 CM-IAD-CL-PCN Sema5b Glut',
    '151 TH Prkcd Grin2c Glut',
    '152 RE-Xi Nox4 Glut',
    '153 MG-POL-SGN Nts Glut',
    '154 PF Fzd5 Glut',
    '156 MB-ant-ve Dmrta2 Glut',
    '168 SPA-SPFm-SPFp-POL-PIL-PoT Sp9 Glut',
    '190 ND-INC Foxd2 Glut',
    '197 SNr Six3 Gaba', 
    '203 LGv-SPFp-SPFm Nkx2-2 Tcf7l2 Gaba',
]

In [7]:
fig_subclasses_all = dplots.sankey_diagram(
    obs_erode, 
    source_col='subclass', 
    target_col='parcellation_structure_eroded',
    source_cats_to_plot=subclasses_from_fig2, 
    target_cats_to_plot=TH_PARCELLATION_STRUCTURES,
    source_color_dict=abc_palettes['subclass'],
)

# write_html doesn't require a kaleido package install
# to save to an Illustrator-editable PDF, open the html in a browser and print to PDF
fig_subclasses_all.write_html(
    f"{results_dir}/figS3_river_plot_subclasses_all.html",
)

## Fig. S3B | River diagram for subclasses 150 & 151 vs the 25 thalamic nuclei shown in Fig. 2

In [8]:
fig_subclasses_150_151 = dplots.sankey_diagram(
    obs_erode, 
    source_col='subclass', 
    target_col='parcellation_structure_eroded',
    source_cats_to_plot=['150 CM-IAD-CL-PCN Sema5b Glut', 
                         '151 TH Prkcd Grin2c Glut'], 
    target_cats_to_plot=dplots.TH_DIVERSITY_REGIONS,
    source_color_dict=abc_palettes['subclass'],
)

fig_subclasses_150_151.write_html(
    f"{results_dir}/figS3_river_plot_subclasses_150_151.html",
)