# Figure 3 | Differentially expressed genes distinguish adjacent cell types in the anterior thalamic nuclei.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import scanpy as sc

import sys
sys.path.append('/code/')
from thalamus_merfish_analysis import abc_load as abc
from thalamus_merfish_analysis import ccf_images as cimg
from thalamus_merfish_analysis import ccf_erode as cerode
from thalamus_merfish_analysis import ccf_plots as cplots

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from matplotlib import rcParams
rcParams['ps.fonttype'] = 42
rcParams['pdf.fonttype'] = 42
rcParams['font.size'] = 7

import colorcet as cc

import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline') 

In [3]:
results_dir = '../../results'

## Load thalamus data

In [4]:
# Load the full thalamus anndata object
adata_th = abc.load_standard_thalamus(data_structure='adata')

# Subset to just the left hemisphere
adata_th_left = adata_th[adata_th.obs['left_hemisphere']].copy()

In [5]:
# set spatial column names to `_reconstructed` coordinate space
coords = '_reconstructed'

x_col = 'x'+coords
y_col = 'y'+coords
section_col = z_col = 'z'+coords

In [6]:
# Load the CCF structures image
ccf_images = abc.get_ccf_labels_image()

# Merge substructures into structures (esp. AMd + AMv -> AM; LGd-co + LGd-ip + LGd-sh -> LG)
ccf_images = cerode.merge_substructures(ccf_images, ccf_level='structure')

# Precompute boundaries to use multiple times in this notebook w/o need to recompute
# we only need to load the boundaries for sections that contain the thalamus
sections_all = sorted(adata_th.obs[section_col].unique())
sections_int = np.rint(np.array(sections_all)/0.2).astype(int)

# precompute boundaries for thalamic sections
ccf_boundaries = cimg.sectionwise_label_erosion(ccf_images, distance_px=1, 
                                                fill_val=0, return_edges=True, 
                                                section_list=sections_int
                                                )

In [7]:
# Load the CCF structures image - just the left hemisphere
ccf_images_left = abc.get_ccf_labels_image(subset_to_left_hemi=True)

# Merge substructures into structures (esp. AMd + AMv -> AM; LGd-co + LGd-ip + LGd-sh -> LG)
ccf_images_left = cerode.merge_substructures(ccf_images_left, ccf_level='structure')

# Precompute boundaries to use multiple times in this notebook w/o need to recompute
# we only need to load the boundaries for sections that contain the thalamus
sections_all = sorted(adata_th_left.obs[section_col].unique())
sections_int = np.rint(np.array(sections_all)/0.2).astype(int)

# precompute boundaries for thalamic sections
ccf_boundaries_left = cimg.sectionwise_label_erosion(ccf_images_left, distance_px=1, 
                                                fill_val=0, return_edges=True, 
                                                section_list=sections_int
                                                )

In [8]:
# if you reload thalamus_merfish_analysis.ccf_plots after this cell has been run, 
# cplots.CCF_REGIONS_DEFAULT will be reset to None & you'll need to re-run this cell
cplots.CCF_REGIONS_DEFAULT = abc.get_thalamus_names()

In [9]:
# load the published color palettes for the ABC Atlas taxonomy
abc_palettes = {level: abc.get_taxonomy_palette(level) for level in 
                ['neurotransmitter','class', 'subclass','supertype','cluster']}

# We have also provided a secondary color palette for the cluster level that 
# attempts to increase the color contrast between spatially negihboring clusters
# TODO: once released as package, this won't be accessible through a hardcode
palette_df = pd.read_csv('/code/thalamus_merfish_analysis/resources/cluster_palette_glasbey.csv')
abc_palettes['cluster'] = dict(zip(palette_df['Unnamed: 0'], palette_df['0']))

## Load data for just the Somatosensory cells

In [10]:
# get the cluster annotations for the somatosensory nuclei
nuclei_somatosensory = ['PO', 'VPM', 'VPL']
obs_annot = abc.get_obs_from_annotations(nuclei_somatosensory, 
                                         adata_th.obs, 
                                         taxonomy_level='cluster',
                                         include_shared_clusters=True)

# use the list of ATN-specific clusters from obs_annot to filter & reset the 
# cluster col in the full anndata object
obs_annot.loc[:,'cluster'] = obs_annot.loc[:,'cluster'].cat.remove_unused_categories()
cluster_list_annot = sorted(obs_annot['cluster'].cat.categories)
# ATN-subset of the full anndata object
adata_annot = adata_th[adata_th.obs['cluster'].isin(cluster_list_annot)].copy()
# reset both cluster and supertype categories
for level in ['supertype','cluster']:
    adata_annot.obs.loc[:,level] = adata_annot.obs.loc[:,level].cat.remove_unused_categories()
    
# adata_annot.obs.loc[:,'cluster'] = adata_annot.obs.loc[:,'cluster'].cat.reorder_categories(cluster_list_annot, ordered=True)

In [11]:
sections_somatosensory = [6.2, 6.4, 6.6, 6.8, 7.0, 7.2, 7.6]

## Fig. X? | Somatosensory subclasses

In [12]:
# set kwargs fpr annotated cluster plots
kwargs_cluster_annotations = dict(
    section_col=z_col,
    x_col=x_col,
    y_col=y_col,
    point_size=0.5,
    figsize=(4, 2),
    face_palette=None,
    edge_color='silver'
    )

# get the cluster annotations for the nucleus of interest
nucleus = nuclei_somatosensory
sections_to_plot = sections_somatosensory
nuclei_highlight = nuclei_somatosensory
taxonomy_level = 'subclass'
obs_annot = abc.get_obs_from_annotations(nucleus, 
                                         adata_th.obs, 
                                         taxonomy_level=taxonomy_level,
                                         include_shared_clusters=True)

plt.rcParams.update({'font.size': 7})
figs_annot = cplots.plot_ccf_overlay(obs_annot, 
                                    ccf_images,
                                    boundary_img=ccf_boundaries,
                                    bg_cells=adata_th.obs, 
                                    ccf_highlight=nuclei_highlight,
                                    point_hue=taxonomy_level, 
                                    sections=sections_to_plot,
                                    point_palette=abc_palettes[taxonomy_level],
                                    legend='cells',
                                    **kwargs_cluster_annotations)

for i, sec in enumerate(sections_to_plot):
    figs_annot[i].savefig(f'{results_dir}/figX_{taxonomy_level}_annotations_z{sec}_somatosensory.pdf', 
                          transparent=True, bbox_inches='tight')
    figs_annot[i].savefig(f'{results_dir}/figX_{taxonomy_level}_annotations_z{sec}_somatosensory.png', 
                          transparent=True, bbox_inches='tight', dpi=1200)

## Fig. X? | Somatosensory supertypes

In [13]:
# set kwargs fpr annotated cluster plots
kwargs_cluster_annotations = dict(
    section_col=z_col,
    x_col=x_col,
    y_col=y_col,
    point_size=0.5,
    figsize=(4, 2),
    face_palette=None,
    edge_color='silver'
    )

# get the cluster annotations for the nucleus of interest
nucleus = nuclei_somatosensory
sections_to_plot = sections_somatosensory
nuclei_highlight = nuclei_somatosensory
taxonomy_level = 'supertype'
obs_annot = abc.get_obs_from_annotations(nucleus, 
                                         adata_th.obs, 
                                         taxonomy_level=taxonomy_level,
                                         include_shared_clusters=True)

plt.rcParams.update({'font.size': 7})
figs_annot = cplots.plot_ccf_overlay(obs_annot, 
                                    ccf_images,
                                    boundary_img=ccf_boundaries,
                                    bg_cells=adata_th.obs, 
                                    ccf_highlight=nuclei_highlight,
                                    point_hue=taxonomy_level, 
                                    sections=sections_to_plot,
                                    point_palette=abc_palettes[taxonomy_level],
                                    legend='cells',
                                    **kwargs_cluster_annotations)
for i, sec in enumerate(sections_to_plot):
    figs_annot[i].savefig(f'{results_dir}/figX_supertype_annotations_z{sec}_somatosensory.pdf', 
                          transparent=True, bbox_inches='tight')
    figs_annot[i].savefig(f'{results_dir}/figX_supertype_annotations_z{sec}_somatosensory.png', 
                          transparent=True, bbox_inches='tight', dpi=1200)

## Fig. 3C | Dotplot of differentially expressed genes (DEGs) in the 3 ATN supertypes

### Fig. S2 | Exploratory DEG dotplot (5 genes per supertype)

In [14]:
supertypes_annot_all = adata_annot.obs['supertype'].cat.categories
display(supertypes_annot_all.to_list())

In [15]:
supertypes_annot = [
    '0654 TH Prkcd Grin2c Glut_1', 
    '0659 TH Prkcd Grin2c Glut_6',
]

In [16]:
deg_list_somatosensory_st = []

for st in supertypes_annot:
  print(st)
  sc.tl.rank_genes_groups(adata_annot, groupby="supertype", groups=[st],
                          reference='rest', n_genes=5, method="wilcoxon")
  deg_list_somatosensory_st.extend(sc.get.rank_genes_groups_df(adata_annot, group=st).names.to_list())

print(deg_list_somatosensory_st)

# manually set gene group labels for dotplot
var_group_positions_st = [
  [0,4], 
  [5,9], 
]
var_group_labels_st = [
  'Somatosensory supertype 654\nvs rest', 
  'Posterior PO supertype 659\nvs rest',
]
# dotplot
dotplot_st_explore = sc.pl.dotplot(adata_annot,
                                  deg_list_somatosensory_st,
                                #   sorted(list(set(deg_list_ATN))), 
                                  groupby='supertype', 
                                  dendrogram=False,
                                  cmap='Blues',
                                  var_group_positions=var_group_positions_st,
                                  var_group_labels=var_group_labels_st,
                                  var_group_rotation=0)

In [17]:
deg_list_somatosensory_st_manual = [
    'Gpr4',
    'St6galnac5',
    'Tnc',
]

## Fig. 3B | Gene expression of supertype DEGs in the ATN

In [18]:
# show outlines for all the ATN
nuclei_to_highlight = nuclei_somatosensory
sections_to_plot = sections_somatosensory

kwargs_ATN_expr = dict(
    boundary_img=ccf_boundaries_left,
    section_col=section_col,
    x_col=x_col,
    y_col=y_col,
    cmap='Blues',
    # cb_vmin_vmax=(0,7),
    point_size=1.5,
    edge_color='silver',
    edgecolors='silver',
    linewidths=0.1,
    # figsize=(1.8,1.8),
    figsize=(8,8),
    )

# AD DEGs
for gene in deg_list_somatosensory_st_manual:
    fig = cplots.plot_expression_ccf(adata_th_left, 
                               gene, 
                               ccf_images_left,
                               sections=sections_to_plot,
                               highlight=nuclei_to_highlight,
                            #    zoom_to_highlighted=True,
                               **kwargs_ATN_expr)
    fig[0].savefig(f'{results_dir}/degs_somatosensory_supertypes_{gene}.pdf',
                   transparent=True, bbox_inches='tight', dpi=1200)
    fig[0].savefig(f'{results_dir}/degs_somatosensory_supertypes_{gene}.png',
                   transparent=True, bbox_inches='tight', dpi=1200)

## Fig. X? | Somatosensory clusters

In [19]:
# set kwargs fpr annotated cluster plots
kwargs_cluster_annotations = dict(
    section_col=z_col,
    x_col=x_col,
    y_col=y_col,
    point_size=0.5,
    figsize=(4, 2),
    face_palette=None,
    edge_color='silver'
    )

# get the cluster annotations for the nucleus of interest
sections_to_plot = sections_somatosensory
nuclei_highlight = nuclei_somatosensory
taxonomy_level = 'cluster'
obs_annot_clust_left = abc.get_obs_from_annotations(nuclei_somatosensory, 
                                                    adata_th_left.obs, 
                                                    taxonomy_level=taxonomy_level,
                                                    include_shared_clusters=True)
obs_annot_clust = abc.get_obs_from_annotations(nuclei_somatosensory, 
                                                adata_th.obs, 
                                                taxonomy_level=taxonomy_level,
                                                include_shared_clusters=True)

plt.rcParams.update({'font.size': 7})
figs_annot = cplots.plot_ccf_overlay(obs_annot_clust, # obs_annot_clust_left
                                    ccf_images_left,
                                    boundary_img=ccf_boundaries, #_left,
                                    bg_cells=adata_th.obs, #adata_th_left.obs, 
                                    ccf_highlight=nuclei_highlight,
                                    point_hue=taxonomy_level, 
                                    sections=sections_to_plot,
                                    point_palette=abc_palettes[taxonomy_level],
                                    legend='cells',
                                    **kwargs_cluster_annotations)
# figs_annot[0].savefig(f'{results_dir}/figX_cluster_annotations_z80_ATN.pdf', 
#                       transparent=True, bbox_inches='tight')
# figs_annot[0].savefig(f'{results_dir}/figX_cluster_annotations_z80_ATN.png', 
#                       transparent=True, bbox_inches='tight', dpi=1200)

In [20]:
cluster_somat_clusters = [
    '2648 TH Prkcd Grin2c Glut_1', # VPL/PO
    '2649 TH Prkcd Grin2c Glut_1', # VPM
    '2663 TH Prkcd Grin2c Glut_6', #PO/Eth
]

In [21]:
# set kwargs fpr annotated cluster plots
kwargs_cluster_annotations = dict(
    section_col=z_col,
    x_col=x_col,
    y_col=y_col,
    point_size=4,
    figsize=(1.75, 1.5),
    face_palette=None,
    edge_color='silver',
    )

## Fig. 3H | Dotplot of differentially expressed genes (DEGs) in the 7 ATN clusters

In [22]:
# # manual medial-to-lateral cluster ordering for dotplots
# clusters_med_to_lat_ATN = [
#     '2613 AD Serpinb7 Glut_1',
#     '2614 AD Serpinb7 Glut_1',
#     '2615 AV Col27a1 Glut_1',
#     '2616 AV Col27a1 Glut_1',
#     '2674 TH Prkcd Grin2c Glut_9',
#     '2676 TH Prkcd Grin2c Glut_9',
#     '2675 TH Prkcd Grin2c Glut_9',
# ]

# # reorder by manual medial-to-lateral ordering
# adata_annot.obs.loc[:,'cluster'] = adata_annot.obs.loc[:,'cluster'].cat.reorder_categories(clusters_med_to_lat_ATN, ordered=True)
# adata_annot.obs.loc[:,'cluster'].cat.categories

In [23]:
cluster_somat_clusters = [
    '2648 TH Prkcd Grin2c Glut_1', 
    '2649 TH Prkcd Grin2c Glut_1',
    '2663 TH Prkcd Grin2c Glut_6',
]

### Exploratory dotplots for ATN cluster DEGs (5 genes per neighboring cluster pair)

In [24]:
# Helper function to get the top DEGs for a given cluster
def get_rank_genes_list(adata, 
                        groupby='cluster', 
                        group='', 
                        reference='rest', 
                        n_genes=5):
    # find DEGs
    sc.tl.rank_genes_groups(adata, 
                            groupby="cluster", 
                            groups=[group],
                            reference=reference, 
                            n_genes=n_genes, 
                            method="wilcoxon")
    # get DEGs
    deg_list = sc.get.rank_genes_groups_df(adata, group=group).names.to_list()
    
    return deg_list

def create_var_group_positions(n_genes, deg_list):
    total_genes = len(deg_list)
    var_group_positions = []
    for i in range(0, total_genes, n_genes):
        start = i
        end = min(i + n_genes - 1, total_genes - 1)
        var_group_positions.append([start, end])
    return var_group_positions

In [25]:
deg_cluster_vs_rest = []
n_genes = 5

# 2648 vs rest
deg_cluster_vs_rest.extend(get_rank_genes_list(adata_th, 
                                               group='2648 TH Prkcd Grin2c Glut_1', 
                                               reference='rest',
                                               n_genes=n_genes))
print(deg_cluster_vs_rest)

# 2649 vs rest
deg_cluster_vs_rest.extend(get_rank_genes_list(adata_th, 
                                               group='2649 TH Prkcd Grin2c Glut_1', 
                                               reference='rest',
                                               n_genes=n_genes))
print(deg_cluster_vs_rest)

# 2663 vs rest
deg_cluster_vs_rest.extend(get_rank_genes_list(adata_th, 
                                               group='2663 TH Prkcd Grin2c Glut_6', 
                                               reference='rest',
                                               n_genes=n_genes))
print(deg_cluster_vs_rest)


# manually set gene group labels for dotplot
var_group_labels = [
  '2648 (VPL/PO) vs rest',
  '2649 (VPM) vs rest',
  '2663 (PO/Eth) vs rest',
  ]
var_group_positions = create_var_group_positions(n_genes, deg_cluster_vs_rest)

# dotplot
sc.pl.dotplot(adata_annot,
              deg_cluster_vs_rest, 
              groupby='cluster', 
              dendrogram=False,
              cmap='Blues',
              var_group_positions=var_group_positions,
              var_group_labels=var_group_labels,
              # var_group_rotation=0,
              )

In [26]:
deg_paired_clust_list = []
n_genes = 5

# 2648 vs 2649
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2648 TH Prkcd Grin2c Glut_1', 
                                               reference='2649 TH Prkcd Grin2c Glut_1',
                                               n_genes=n_genes))
print(deg_paired_clust_list)
# sc.pl.rank_genes_groups(adata_annot, group=cluster_list_annot[0])
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2649 TH Prkcd Grin2c Glut_1', 
                                               reference='2648 TH Prkcd Grin2c Glut_1',
                                               n_genes=n_genes))
print(deg_paired_clust_list)

# 2648 vs 2663
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2648 TH Prkcd Grin2c Glut_1',
                                               reference='2663 TH Prkcd Grin2c Glut_6',
                                               n_genes=n_genes))
print(deg_paired_clust_list)
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2663 TH Prkcd Grin2c Glut_6',
                                               reference='2648 TH Prkcd Grin2c Glut_1',
                                               n_genes=n_genes))
print(deg_paired_clust_list)

# 2649 vs 2663
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2649 TH Prkcd Grin2c Glut_1', 
                                               reference='2663 TH Prkcd Grin2c Glut_6',
                                               n_genes=n_genes))
deg_paired_clust_list.extend(get_rank_genes_list(adata_annot, 
                                               group='2663 TH Prkcd Grin2c Glut_6', 
                                               reference='2649 TH Prkcd Grin2c Glut_1',
                                               n_genes=n_genes))
print(deg_paired_clust_list)

# set gene group labels for dotplot
var_group_positions = create_var_group_positions(n_genes, deg_paired_clust_list)
var_group_labels = [
  '2648 (VPL/PO)\n vs \n2649 (VPM)',
  '2649 (VPM)\n vs \n2648 (VPL/PO)',
  '2648 (VPL/PO)\n vs \n2663 (PO/Eth)',
  '2663 (PO/Eth)\n vs \n2648 (VPL/PO)',
  '2649 (VPM)\n vs \n2663 (PO/Eth)',
  '2663 (PO/Eth)\n vs \n2649 (VPM)',
]

# dotplot
sc.pl.dotplot(adata_annot,
              deg_paired_clust_list, 
              groupby='cluster', 
              dendrogram=False,
              cmap='Blues',
              var_group_positions=var_group_positions,
              var_group_labels=var_group_labels,
            #   var_group_rotation=0,
              )

### Curated dotplot of ATN cluster DEGs for Fig. 3H

In [27]:
marker_genes_manual = [
    # 2648 (VPL/PO)
    'Ramp3', 'Spon1', 'Tcf7l2',
    # 2649 (VPM)  
    'Kcnab3', 'Pvalb', 'Scn4b',
    # 2663 (PO/Eth)           
    'Gpr4', 'Calb1', 'St6galnac5',              
]                             

var_group_positions = create_var_group_positions(n_genes=3, deg_list=marker_genes_manual)

var_group_labels = ['2648 (VPL/PO)',
                    '2649 (VPM)',
                    '2663 (PO/Eth)',
                    ]

In [28]:
fig_curated = sc.pl.dotplot(adata_annot,
                            marker_genes_manual,
                            groupby='cluster', 
                            dendrogram=False,
                            cmap='Blues', #cc.cm.blues, cc.cm.CET_CBL3_r
                            var_group_positions=var_group_positions,
                            var_group_labels=var_group_labels,
                            var_group_rotation=90,
                            return_fig=True,
                            )

fig_curated.savefig(f'{results_dir}/degs_curated_dotplot_ATN.pdf',
                    transparent=True, bbox_inches='tight')
# fig_curated.savefig(f'{results_dir}/degs_curated_dotplot_ATN.png',
#                     transparent=True, bbox_inches='tight')

In [29]:
# show outlines for all the ATN
nuclei_to_highlight = nuclei_somatosensory
sections_to_plot = sections_somatosensory

kwargs_ATN_expr = dict(
    boundary_img=ccf_boundaries_left,
    section_col=section_col,
    x_col=x_col,
    y_col=y_col,
    cmap='Blues',
    # cb_vmin_vmax=(0,7),
    point_size=1.5,
    edge_color='silver',
    edgecolors='silver',
    linewidths=0.1,
    # figsize=(1.8,1.8),
    figsize=(8,8),
    )

# AD DEGs
for i, gene in enumerate(marker_genes_manual):
    if i==0:
        print('2648 (VPL/PO) genes')
    elif i==3:
        print('2649 (VPM) genes')
    elif i==6:
        print('2663 (PO/Eth) genes')
        
    fig = cplots.plot_expression_ccf(adata_th_left, 
                               gene, 
                               ccf_images_left,
                               sections=sections_to_plot,
                               highlight=nuclei_to_highlight,
                            #    zoom_to_highlighted=True,
                               **kwargs_ATN_expr)
    for j, sec in enumerate(sections_to_plot):
        fig[j].savefig(f'{results_dir}/degs_somatosensory_clusters_{gene}_z{sec}.pdf',
                       transparent=True, bbox_inches='tight', dpi=1200)
        fig[j].savefig(f'{results_dir}/degs_somatosensory_clusters_{gene}_z{sec}.png',
                       transparent=True, bbox_inches='tight', dpi=1200)