In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pathlib
import anndata as ad
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

from shapely.geometry import Polygon, MultiLineString, box, Point, MultiPolygon
from shapely.ops import unary_union, polygonize
import shapely.plotting as splot
import shapely

from collections import defaultdict

from  geojson.geometry import GeometryCollection
import geojson

from thalamus_merfish_analysis.ccf_polygons import get_outline_polygon
from thalamus_merfish_analysis.ccf_plots import plot_section_outline
from thalamus_merfish_analysis import abc_load as abc
from thalamus_merfish_analysis import taxonomy_versions as tv

%matplotlib inline

In [3]:
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning, 
                        message=r".*is_categorical_dtype is deprecated*")

# Load brain1 + brain3 data

In [4]:
results_dir = '../results/md_annotation_figures/'
import os
os.makedirs(results_dir, exist_ok=True)

## Load & subset anndata

In [5]:
# Load brain1 + brain3 combined adata with MD annotations
adata = ad.read_h5ad('../data/merfish_609882and638850_AIT17_annotated_TH_ZI_only_shared_genes_only_MD_annotations_2023-06-12_00-00-00/Brain_1_3_TH_ZI_MDannotations.h5ad')

In [6]:
adata

In [7]:
# list of neuronal divisions so we can just focus on neurons
divisions_neuronal = [
                      '2 Subpallium GABAergic',
                      '3 PAL-sAMY-TH-HY-MB-HB neuronal',
                      '4 CBX-MOB-other neuronal'
                     ]

adata_neuronal = adata[adata.obs.division_id_label.isin(divisions_neuronal)].copy()

In [8]:
# use .obs['brain']==1 or ==3 to distinguish between brain1 vs brain3 cells
# if .obs['brain'] doesn't exist, you can instead use the codebook names:
# codebook_brain1 = 'VZG147'
# codebook_brain3 = 'wholebrain031822a'

adata_br3_neuronal = adata_neuronal[adata_neuronal.obs.brain==3].copy()
adata_br1_neuronal = adata_neuronal[adata_neuronal.obs.brain==1].copy()

In [9]:
# nice to have everything for plotting in the obs dataframe
adata_br3_neuronal.obs['cirro_x'] = adata_br3_neuronal.obsm['spatial_cirro'][:,0]
adata_br3_neuronal.obs['cirro_y'] = adata_br3_neuronal.obsm['spatial_cirro'][:,1]

## Load MD annotations

In [10]:
# Get the MD annotations from the .uns
gc = geojson.loads(adata.uns['MD_polygons'])

In [11]:
# Get polygons from gc
MD_polys = [shapely.geometry.Polygon(poly['coordinates']) for poly in gc['geometries']]  #YZ coords stored as shapely polygons (expected by cells_in_polygon())
MD_shapes_z = [ poly['napari_z_coords'] for poly in gc['geometries'] ]  # Z coords

In [12]:
# Get per polygon metadata
MD_polys_brain_id = adata.uns['MD_polys_brain']  # which brain it's for
MD_polys_L_R = adata.uns['MD_polys_left_right']  # whether it's a left or right MD

# Plotting functions

## def plot_clusters_in_annotation()

In [13]:
def plot_clusters_in_annotation(adata_neuronal, clusters, section, z_plane, 
                                legend=True, custom_colors=False,
                                cl_colors=None):
    # ** only plotting brain3 **
    brain = 3

    fig = plt.figure(figsize=(12,6))
    
    # display all the cells in this section in grey
    mask_sec = (adata_neuronal.obs.section==section)
    xy_all = adata_neuronal.obs.loc[mask_sec, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color='grey', markersize=0.5)
    plt.gca().set_aspect('equal')
    

    # picked colormaps with large perceptual differences btwn adjacent colors
    if not custom_colors:
        rgb_cm = np.concatenate((np.asarray(plt.get_cmap('tab10').colors), 
                                 np.asarray(plt.get_cmap('Dark2').colors)))
        cl_colors = np.zeros((rgb_cm.shape[0],4))
        cl_colors[:,:3] = rgb_cm
        cl_colors[:,3] = 1
        color_iter = 0
        
    MD_cluster_dict = tv.convert_taxonomy_labels(clusters, 
                                                    'cluster', 
                                                    label_format='id_label',
                                                    input_version='20230630', 
                                                    output_version='20230830',
                                                    output_as_dict=True)
    
    # plot the cells in the given clusters
    for cl in clusters:
        mdmask = adata_neuronal.obs.is_in_MD.values
        clmask = np.logical_and(mask_sec, (adata_neuronal.obs.cluster_label==cl))

        msize=2
        # plot the in-MD cells darker
        xy_cl_MD = adata_neuronal.obs.loc[np.logical_and(mdmask,clmask), 
                                          ["napari_y_brain1and3","napari_x_brain1and3"]].values
        if custom_colors:
            plt.scatter(xy_cl_MD[:,1], -xy_cl_MD[:,0], s=msize, 
                        alpha=1, facecolor=cl_colors[cl], label=MD_cluster_dict[cl])
        else:
            plt.scatter(xy_cl_MD[:,1], -xy_cl_MD[:,0], s=msize, 
                        alpha=1, facecolor=cl_colors[color_iter], label=MD_cluster_dict[cl])

        # plot the other cells lighter
        xy_cl_other = adata_neuronal.obs.loc[np.logical_and(~mdmask,clmask), 
                                             ["napari_y_brain1and3","napari_x_brain1and3"]].values
        if custom_colors:
            # plt.scatter(xy_cl_other[:,1], -xy_cl_other[:,0], s=msize,
            #         alpha=0.4, facecolor=cl_colors[cl])
            plt.scatter(xy_cl_other[:,1], -xy_cl_other[:,0], s=msize,
                    alpha=1, facecolor=cl_colors[cl])
        else:
            plt.scatter(xy_cl_other[:,1], -xy_cl_other[:,0], s=msize,
                        alpha=0.4, facecolor=cl_colors[color_iter])
            plt.scatter(xy_cl_other[:,1], -xy_cl_other[:,0], s=msize,
                        alpha=1, facecolor=cl_colors[color_iter])
            color_iter+=1

    if legend:
        lgnd = plt.legend(ncols=3, loc='upper center', bbox_to_anchor=(0.5,0), 
                          fontsize=10, markerscale=7,frameon=False)
        for handle in lgnd.legend_handles:
            handle.set_sizes([16])
        

    # get the MD annotations from the .uns
    gc = geojson.loads(adata_neuronal.uns['MD_polygons'])
    md_polys = [shapely.geometry.Polygon(poly['coordinates']) for poly in gc['geometries']]  #YZ coords stored as shapely polygons (expected by cells_in_polygon())
    md_shapes_z = [ poly['napari_z_coords'] for poly in gc['geometries'] ]  # Z coords
    md_polys_brain_id = adata_neuronal.uns['MD_polys_brain']  # which brain it's for
    # Plot the right polygon(s) for this section
    poly_index = np.where( (np.array([int(x) for x in md_shapes_z])==z_plane) & 
                           (MD_polys_brain_id==brain) )[0]
    for i in poly_index:
        plt.plot(np.array(md_polys[i].exterior.coords)[:,1], 
                 -np.array(md_polys[i].exterior.coords)[:,0], 'k')
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

## def plot_cell_types_with_annotation()

In [14]:
def plot_cell_types_with_annotation(adata_neuronal, cell_types, tax_level,
                                    section, z_plane, legend=True, 
                                    custom_colors=False, label_color_dict=None):
    # ** only plotting brain3 **
    brain = 3

    fig = plt.figure(figsize=(12,6))

    # define marker sizes
    msize_bkgd = 0.5
    msize_annot = 3
    
    # define colors
    th_bkgd_color = 'grey'
    
    # display all the cells in this section in grey
    mask_sec = (adata_neuronal.obs.section==section)
    xy_all = adata_neuronal.obs.loc[mask_sec, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color=th_bkgd_color, markersize=0.5)
    plt.gca().set_aspect('equal')
        
    # brain3 data in this notebook is v20230630, but we want to use v20230830
    # cell type labels and colors, so we need to do some conversions
    tax_labels_list = tv.convert_taxonomy_labels(cell_types, tax_level, 
                                                  label_format='id_label',
                                                  input_version='20230630', 
                                                  output_version='20230830',
                                                  output_as_dict=False)
    tax_labels_dict = dict(zip(cell_types, tax_labels_list))
    if not custom_colors:
        # get latest, v20230830, ABC Atlas color palette
        label20230830_color20230830_dict = tv.get_color_dictionary(tax_labels_list, 
                                                                    tax_level, 
                                                                    label_format='id_label',
                                                                    version='20230830')
        # make old cluster labels the key to the latest color dictionary
        label20230630_color20230830_dict = dict(zip(cell_types, list(label20230830_color20230830_dict.values())))
        label_color_dict = label20230630_color20230830_dict
    
    # Plot specified cell types
    if tax_level=='cluster':
        groupby_col_name = tax_level+'_label'
    else:
        groupby_col_name = tax_level+'_id_label'

    for label, gb in adata_neuronal.obs.groupby(groupby_col_name, observed=True):
        # only display the relevant supertypes
        if label not in cell_types:
            continue

        sec_st_mask = np.logical_and(mask_sec, 
                                     adata_neuronal.obs[groupby_col_name]==label)

        # display all the neurons in this section in grey
        xy_sec_celltype = adata_neuronal.obs.loc[sec_st_mask, ["napari_y_brain1and3","napari_x_brain1and3"]].values
        plt.plot(xy_sec_celltype[:,1], -xy_sec_celltype[:,0], '.', 
                 color=label_color_dict[label], 
                 markersize=msize_annot, label=tax_labels_dict[label])
        plt.legend(loc='upper center', bbox_to_anchor=(0.5,0.05), ncol=1,
                  fontsize=14, markerscale=7, frameon=False)

    # get the MD annotations from the .uns
    gc = geojson.loads(adata_neuronal.uns['MD_polygons'])
    md_polys = [shapely.geometry.Polygon(poly['coordinates']) for poly in gc['geometries']]  #YZ coords stored as shapely polygons (expected by cells_in_polygon())
    md_shapes_z = [ poly['napari_z_coords'] for poly in gc['geometries'] ]  # Z coords
    md_polys_brain_id = adata_neuronal.uns['MD_polys_brain']  # which brain it's for
    # Plot the right polygon(s) for this section
    poly_index = np.where( (np.array([int(x) for x in md_shapes_z])==z_plane) & 
                           (MD_polys_brain_id==brain) )[0]
    for i in poly_index:
        plt.plot(np.array(md_polys[i].exterior.coords)[:,1], 
                 -np.array(md_polys[i].exterior.coords)[:,0], 'k')
                 
    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

## def plot_expression_annotation()

In [15]:
from scipy import sparse

def plot_expression_annotation(adata_neuronal, gene, section, plate, 
                               annotation=True, section_outline=True):
    # ** only plotting brain3 **

    sec_mask = adata_neuronal.obs.section==section
    # The transcript counts are stored in a sparse matrix, which cannot be
    # directly plotted w/out first: making dense, converting to an np.array,
    # and then flattening
    gene_exp = np.array(sparse.csr_matrix.todense(adata_neuronal[sec_mask][:,gene].X)).flatten()

    # Plot figure                           
    fig = plt.figure(figsize=(9,6))

    colormap = 'Blues' #'viridis'
    marker_size = 0.5
    
    xy = adata_neuronal.obs.loc[sec_mask,["napari_y_brain1and3","napari_x_brain1and3"]].values
    sc = plt.scatter(xy[:,1], -xy[:,0], c=gene_exp,
                cmap=colormap, s=marker_size)
    plt.title(gene)
    cbar = plt.colorbar(sc, fraction=0.025, pad=0.04)
    cbar.set_label('log2(CPV+1)')

    if annotation:
        # select the right polygon(s) for this section
        poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                               (MD_polys_brain_id==3) )[0]
        for i in poly_index:
            plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
                     -np.array(MD_polys[i].exterior.coords)[:,0], 'k', alpha=0.6)

    if section_outline:
        th_outline_polygons = get_outline_polygon(adata_br3_neuronal.obs,
                                                  coordinate_type='napari')
        plot_section_outline(th_outline_polygons, sections=section, alpha=0.2)
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

    return fig

## def plot_PC_spatial_dist()

In [16]:
def plot_PC_spatial_dist(adata_neuronal, PC_distance, PC_id, section, plate, 
                         brain=3, annotation=True, section_outline=True, 
                         colormap='Oranges'):
    
    # sec_mask = adata_neuronal.obs.section==section
    sec_mask = np.logical_and(adata_neuronal.obs.section==section,
                              adata_neuronal.obs.is_in_MD.values)

    # Plot figure                           
    fig = plt.figure(figsize=(9,6))

    # marker_size = 0.5
    marker_size = 7
    
    xy = adata_neuronal.obs.loc[sec_mask,["napari_y_brain1and3","napari_x_brain1and3"]].values
    sc = plt.scatter(xy[:,1], -xy[:,0], c=PC_distance[sec_mask],
                cmap=colormap, s=marker_size)
    plt.title('PC_'+str(PC_id))
    cbar = plt.colorbar(sc, fraction=0.025, pad=0.04)

    if annotation:
        # select the right polygon(s) for this section
        poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                               (MD_polys_brain_id==brain) )[0]
        for i in poly_index:
            plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
                     -np.array(MD_polys[i].exterior.coords)[:,0], 'k', alpha=0.6)

    # if section_outline:
    #     th_outline_polygons = get_outline_polygon(adata_neuronal.obs,
    #                                               coordinate_type='napari')
    #     plot_section_outline(th_outline_polygons, sections=section, alpha=0.2)
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

    return fig

## demonstrate plot_section_outline()

In [17]:
# set up whole-thalamus outline
th_outline_polygons = get_outline_polygon(adata_br3_neuronal.obs, coordinate_type='napari')
plot_section_outline(th_outline_polygons, sections='1199651060', alpha=0.2)

# Generate list of MD clusters

In [18]:
# Find all the clusters that are represented in the MD in brain3
MD_clusters = adata_br3_neuronal.obs.loc[adata_br3_neuronal.obs.is_in_MD,"cluster_label"].unique()
# MD_clusters = [cl for cl in MD_clusters if adata_br3_neuronal.obs.loc[adata.obs.cluster_label==cl,"division_id_label"].values[0] in divisions_neuronal]
MD_clusters = np.asarray(sorted(list(MD_clusters)))
# MD_clusters

In [19]:
# shorten the list to only those clusters that have >N cells across both brains
min_cells = 20
MD_cluster_minCells = np.array([])
for cluster in MD_clusters:
    mdclmask = np.logical_and(adata_br3_neuronal.obs.is_in_MD.values, (adata_br3_neuronal.obs.cluster_label==cluster).values)
    if np.sum(mdclmask)>min_cells:
        MD_cluster_minCells = np.append(MD_cluster_minCells, cluster)

# print(reversed(MD_cluster_minCells))
MD_cluster_minCells

In [20]:
# remove the clusters that are likely from interlaminar cells on the boundary 
# of the MD ('CM-IAD-CL-PCN'), and not actually MD cells themselves
MD_cluster_minCells_Prkcd = np.array([cl for cl in MD_cluster_minCells if 'TH Prkcd' in cl])
MD_cluster_minCells_Prkcd

# Cell types in MD plots

## MD clusters also present in other thalamic nucleI

In [21]:
MD_cluster_minCells[4:]

In [22]:
# I want to use the new ABC Atlas color palette but with the old data, I need
# to do some stuff to make that happen
MD_clusters_to_plot = MD_cluster_minCells[4:]
MD_cluster_minCells_v20230830 = tv.convert_taxonomy_labels(MD_clusters_to_plot, 
                                                            'cluster', 
                                                            label_format='id_label',
                                                            input_version='20230630', 
                                                            output_version='20230830',
                                                            output_as_dict=False)
MD_cluster_dict = tv.convert_taxonomy_labels(MD_clusters_to_plot, 
                                                            'cluster', 
                                                            label_format='id_label',
                                                            input_version='20230630', 
                                                            output_version='20230830',
                                                            output_as_dict=True)
# get latest, v20230830, ABC Atlas color palette
MD_cluster_color_dict = tv.get_color_dictionary(MD_cluster_minCells_v20230830, 
                                                   'cluster', 
                                                   label_format='id_label',
                                                   version='20230830')
# make old cluster labels the key to the latest color dictionary
MD_cluster_color_dict = dict(zip(MD_clusters_to_plot, list(MD_cluster_color_dict.values())))
MD_cluster_color_dict

In [23]:
toms_section = '1199651045'
z_plane = 12

# release version color palette
plot_clusters_in_annotation(adata_br3_neuronal, MD_clusters_to_plot, 
                            toms_section, z_plane,
                            custom_colors=True, cl_colors=MD_cluster_color_dict)

In [24]:
# matplotlib color palette
plot_clusters_in_annotation(adata_br3_neuronal, MD_clusters_to_plot, 
                            toms_section, z_plane, custom_colors=False)

## Clusters, subclasses used for manual MD annotation

### clusters

#### Top two clusters in MD (and elsewhere)

In [25]:
section = '1199651045'
plate = 75
z_plane = 12
clusters_to_plot = ['1130 TH Prkcd Grin2c Glut_1', # inside MD and also far away
                    '1133 TH Prkcd Grin2c Glut_10' # mostly in MD + a little above
                   ]
plot_cell_types_with_annotation(adata_br3_neuronal, clusters_to_plot, 'cluster',
                                section, z_plane, legend=True)

In [26]:
# picked colormaps with large perceptual differences btwn adjacent colors
rgb_cm = np.concatenate((np.asarray(plt.get_cmap('tab10').colors), 
                         np.asarray(plt.get_cmap('Dark2').colors)))
group_colors = np.zeros((rgb_cm.shape[0],4))
group_colors[:,:3] = rgb_cm
group_colors[:,3] = 1

# hardcoded, custom colors
clusters_to_plot = ['1130 TH Prkcd Grin2c Glut_1', # inside MD and also far away
                    '1133 TH Prkcd Grin2c Glut_10' # mostly in MD + a little above
                   ]
md_cl_colors = group_colors[[9,1]] 

color_dict = dict(zip(clusters_to_plot, md_cl_colors))

# Plot with custom colors
section = '1199651045'
plate = 75
z_plane = 12

plot_cell_types_with_annotation(adata_br3_neuronal, clusters_to_plot, 'cluster',
                                section, z_plane, legend=True, custom_colors=True,
                                label_color_dict=color_dict)

#### Top primarily-MD cluster

In [27]:
section = '1199651045'
plate = 75
z_plane = 12
clusters_to_plot = [
                      '1133 TH Prkcd Grin2c Glut_10' #0663
                      ]
plot_cell_types_with_annotation(adata_br3_neuronal, clusters_to_plot, 'cluster',
                                section, z_plane, legend=True)

### supertypes

#### Top supertype

In [28]:
section = '1199651045'
plate = 75
z_plane = 12
supertypes_to_plot = [
                      # '0270 TH Prkcd Grin2c Glut_1',
                      '0271 TH Prkcd Grin2c Glut_10'
                      ]
plot_cell_types_with_annotation(adata_br3_neuronal, supertypes_to_plot, 'supertype',
                                section, z_plane, legend=True)

#### All MD supertypes

In [29]:
section = '1199651045'
plate = 75
z_plane = 12
supertypes_to_plot = [
                      '0270 TH Prkcd Grin2c Glut_1',
                      '0271 TH Prkcd Grin2c Glut_10'
                      ]
plot_cell_types_with_annotation(adata_br3_neuronal, supertypes_to_plot, 'supertype',
                                section, z_plane, legend=True)

# Gene expression in MD heatmaps

In [30]:
toms_section = '1199651045'
toms_section_plate = 75

section = '1199651048'
plate = 73
gene = 'Slc17a7'
fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)

In [31]:
section = '1199651048'
plate = 73
gene = 'Scn4b'
fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)

## DE genes - manually curated list

In [32]:
# I think I got these from Mathew??
MD_DE_genes = ['Mop', 'Tnnt1', 'Necab1', 'Calb2', 'Prkcd', 'Slc17a7', 'Hs3st1',
               'Scn4b', 'Rasgrp1', 'Rgs4', 'Pcp4l1', 'Tspan9', 'Cnih3', 'Rgs16',
               'Cacng5', 'Kcnc2']

MD_DE_genes_br13 = [deg for deg in MD_DE_genes if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_br13)

In [33]:
%matplotlib inline

section = '1199651048'
plate = 73

for gene in MD_DE_genes_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

## DE genes - scanpy's rank_genes_groups() list

In [34]:
# import scanpy as sc
# sc.tl.rank_genes_groups(adata_br3_neuronal, 'cluster_id', groups=['1133'], method='wilcoxon', rankby_abs=True)
# sc.pl.rank_genes_groups(adata_br3_neuronal, n_genes=20)

In [35]:
# using scanpy's rank_genes_groups()
MD_DE_genes_pos = ['Necab1', 'Ramp3', 'Stxbp6', 'Calb1', 'Shisa6', 'Tll1',
                   'Gpr4', 'Grin2c', 'Tox', 'Adra1b', 'A830036E02Rik']
MD_DE_genes_pos_br13 = [deg for deg in MD_DE_genes_pos if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_pos_br13)

MD_DE_genes_neg = ['Hs3st4', 'Grm3', 'Sulf2', 'Adcy2', 'Grik1', 'Rou3f3', 
                   'Cnr1', 'Gfra1', 'Gsta4']
MD_DE_genes_neg_br13 = [deg for deg in MD_DE_genes_neg if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_neg_br13)

### positive DE genes

In [36]:
section = '1199651048'
plate = 73

for gene in MD_DE_genes_pos_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

### negative DE genes

In [37]:
section = '1199651048'
plate = 73

for gene in MD_DE_genes_neg_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

# 2D Histograms of MD

## Plotting Functions

### def plot_cell_type_spatial_distribution_in_md()

In [38]:
# from mpl_toolkits.axes_grid1 import ImageGrid
from mpl_toolkits.axes_grid1 import AxesGrid
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

def plot_cell_type_spatial_distribution_in_md(ad, cell_type_labels, 
                                              taxonomy_level_col, 
                                              sections=None, brain=3,
                                              binlimits=(-500,500), binsize=30, 
                                              n_cols=5, hist_range=(0,7)):
    '''
    Parameters
    ----------
    ad : AnnData
        AnnData object with obs
    cell_type_labels : list of strings
        list of strings containing all cell type labels to be plotted
    taxonomy_level_col: str, {'cluster_label', 'supertype_id_label', 'class_id_label'}
        string for the ad.obs column name that contains the cell_type_labels
    sections : list of {strings, int}, default=None
        specifying sections by either section ID strings or napari z-coords
    brain : {1,3}
        specifies which brain, 1 or 3, to plot data for
    binlimits : tuple, default=(-500,500)
        sets 2D hist x & y bin limits
    binsize : int, default=30
        sets 2D hist bin size, in um
    n_cols : int, default=5
        sets # of subplot columns in the figure
    hist_range : tuple, default=(0,7)
        sets 2D hist colorbar limits
    
    Returns
    -------
    fig
    '''
    # enable specifying sections by either section ID strings or napari z-coords
    if sections is None:
        sections = ad.obs.section.unique()
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], str):
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], int):
        sec_values = ad.obs.napari_z_brain1and3.values
        
    print('brain '+str(brain))

    # set up subplot parameters
    n_cell_types = len(cell_type_labels)
    n_rows = int(np.ceil(n_cell_types / n_cols))
    
    fig = plt.figure(figsize=(n_cols*3.1, n_rows*3))
    grid = AxesGrid(fig, 111,  # similar to subplot(121)
                    nrows_ncols=(n_rows,n_cols),
                    axes_pad=0.30,
                    share_all=True,
                    label_mode="1",
                    cbar_location="right",
                    cbar_mode="edge",
                    cbar_pad=0.25,
                    cbar_size="7%",
                    direction="row"  # plots all columns in one row before moving to next row
                    )
    
    for i, ax in enumerate(grid):
        cell_type = cell_type_labels[i]
        
        # filter for cells in the brain, section, MD, and cluster we want
        sec_mask = [True if sec in sections else False for sec in sec_values]
        br_sec_mask = np.logical_and((ad.obs.brain==brain).values, sec_mask)
        md_ct_mask = np.logical_and(ad.obs['is_in_MD'].values, 
                                    (ad.obs[taxonomy_level_col]==cell_type).values)
        combomask = np.logical_and(br_sec_mask, md_ct_mask)
        
        cl_xy = ad.obs.loc[combomask, 
                           ["napari_y_from_center", "napari_x_from_center"]
                          ].values

        # 2D histogram
        bin_egdes = np.arange(binlimits[0],binlimits[1],binsize)
        hh = ax.hist2d(cl_xy[:,1], -cl_xy[:,0], bins = [bin_egdes, bin_egdes],
                       vmin=hist_range[0], vmax=hist_range[1])
        # colorbar (only displays on axes specified by AxesGrid params)
        ax.cax.colorbar(hh[3], ax=ax,
                        label='cells per '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel',
                        fraction=0.045, pad=0.04)
        
        # set up & format current subplot
        ax.set_title(cell_type)
        ax.set_aspect('equal')
        ax.set_xticks([])
        ax.set_yticks([])

        # Add scalebar to just the first 2D histogram
        if i==0:
            microns = 100
            scalebar = AnchoredSizeBar(ax.transData, microns, 
                                       str(microns)+r' $\mu$m', 
                                       'lower left', pad=0.1, sep=4, 
                                       color='white', size_vertical=20,
                                       frameon=False)
            ax.add_artist(scalebar)
        
    # throws warning, but ensures subplot titles length <= subplots' widths
    fig.tight_layout()
    plt.show()

    return fig

### def plot_gene_spatial_distribution_in_md()

In [39]:
def plot_gene_spatial_distribution_in_md(ad, md_genes, sections=None, brain=3,
                                         method='sum',
                                         binlimits=(-500,500), binsize=30, 
                                         n_cols=5, hist_range=(0,7)):

    # enable specifying sections by either section ID strings or napari z-coords
    if sections is None:
        sections = ad.obs.section.unique()
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], str):
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], int):
        sec_values = ad.obs.napari_z_brain1and3.values

    # set up subplot parameters
    n_genes = len(md_genes)
    n_rows = int(np.ceil(n_genes / n_cols))
        
    # fig = plt.figure(figsize=(n_cols*3.1, n_rows*3))
    # sp_counter = 0 # subplot counter
    
    fig = plt.figure(figsize=(n_cols*3.1, n_rows*3))
    grid = AxesGrid(fig, 111,  # similar to subplot(121)
                    nrows_ncols=(n_rows,n_cols),
                    axes_pad=0.30,
                    share_all=True,
                    label_mode="1",
                    cbar_location="right",
                    cbar_mode="edge",
                    cbar_pad=0.25,
                    cbar_size="7%",
                    direction="row"  # plots all columns in one row before moving to next row
                    )
    
    for i, ax in enumerate(grid):
        gene = md_genes[i]
        
        # filter for cells in the brain, section, MD, and cluster we want
        sec_mask = [True if sec in sections else False for sec in sec_values]
        sec_br_md_mask = np.logical_and( np.logical_and(ad.obs.brain==brain, 
                                                     sec_mask),
                                      ad.obs.is_in_MD.values)
        
        # set combomask, gene_count_weights & label based on method we're using
        if method=='sum':
            # sum the gene counts (transcripts) per pixel using the 'weights'
            # input argument of hist2d()
            combomask = sec_br_md_mask
            gene_xy = ad.obs.loc[combomask, ["napari_y_from_center", 
                                             "napari_x_from_center"] ].values
            gene_count_weights = ad.X[combomask, ad.var_names==gene].A.flatten()
            count_label = 'summed transcripts\nper '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel'
        
        elif method=='threshold':
            # count the number of cells with expression above the 95th percentile
            # for this gene per pixel
            gene_counts = adata_neuronal.X[:, adata_neuronal.var_names==gene].A.flatten()
            gene_count_95pct = np.percentile(gene_counts, 75)
            gene_mask = gene_counts > gene_count_95pct
            
            combomask = np.logical_and(sec_br_md_mask, gene_mask)
            gene_xy = ad.obs.loc[combomask, ["napari_y_from_center", 
                                             "napari_x_from_center"] ].values
            gene_count_weights = None
            count_label = 'cells with gene counts > 95th pct\nper '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel'

        # 2D histogram
        bin_egdes = np.arange(binlimits[0],binlimits[1],binsize)
        hh = ax.hist2d(gene_xy[:,1], -gene_xy[:,0], weights=gene_count_weights,
                       bins = [bin_egdes, bin_egdes],
                       vmin=hist_range[0], vmax=hist_range[1])
        ax.cax.colorbar(hh[3], ax=ax, label=count_label,
                        fraction=0.045, pad=0.04)
        
        # set up & format current subplot
        ax.set_title(gene)
        ax.set_aspect('equal')
        ax.set_xticks([])
        ax.set_yticks([])

        # Add scalebar to just the first 2D histogram
        if i==0:
            microns = 100
            scalebar = AnchoredSizeBar(ax.transData, microns, 
                                       str(microns)+r' $\mu$m', 
                                       'lower left', pad=0.1, sep=4, 
                                       color='white', size_vertical=20,
                                       frameon=False)
            ax.add_artist(scalebar)
        
    # throws warning, but ensures subplot titles length <= subplots' widths
    fig.tight_layout()
    plt.show()

    return fig    

### Plot aligned MD polygon outlines

Generate diagram to illustrate how we're arriving at the averaged 2D histogram plots

In [40]:
MD_centers = np.array([p.centroid.coords[0] for p in MD_polys])

# Show the brain3 L & R MD polygons aligned by centroid & overlaid
plt.figure(figsize=(6,4))
for ii in range(len(MD_polys_L_R)):
    # only plot brain3 for the EAB slides
    if MD_polys_brain_id[ii]==1:
        continue

    msize = 10
    
    if MD_polys_L_R[ii] =='right':
        plt.plot((MD_centers[ii][1] - MD_centers[ii][1]), 
                 (MD_centers[ii][0] - MD_centers[ii][0]), "*k", markersize=msize)
        
        # negate y-coords so the polygons (made in napari using its unique coord
        # defaults) are displayed right-side-up using matplotlib
        plt.plot((np.array(MD_polys[ii].exterior.coords)[:,1] - MD_centers[ii][1]), 
                 -(np.array(MD_polys[ii].exterior.coords)[:,0] - MD_centers[ii][0]), 
                 'k')

    elif MD_polys_L_R[ii] =='left':
        plt.plot((MD_centers[ii][1] - MD_centers[ii][1]), 
                 (MD_centers[ii][0] - MD_centers[ii][0]), "*k", markersize=msize)
        
        # negate y-coords so the polygons' are displayed to right-side-up
        # negate x-coords so the left polygons are flipped to match the right polygons
        plt.plot(-(np.array(MD_polys[ii].exterior.coords)[:,1] - MD_centers[ii][1]), 
                 -(np.array(MD_polys[ii].exterior.coords)[:,0] - MD_centers[ii][0]), 
                 'k')

plt.axis('equal')
plt.box(False)
plt.xticks([])
plt.yticks([]);

## Supertypes in MD

In [41]:
# middle AP sections of MD for filtering purposes
MD_mid_z_range = [7,8,9,10,11,12]
MD_supertype = ["0270 TH Prkcd Grin2c Glut_1",
                "0280 TH Prkcd Grin2c Glut_6",
                "0271 TH Prkcd Grin2c Glut_10",
                "0273 TH Prkcd Grin2c Glut_12",
                "0274 TH Prkcd Grin2c Glut_13"]

In [42]:
# brain1 2D histograms
# print('brain 1')
fig = plot_cell_type_spatial_distribution_in_md(adata_neuronal,
                                                MD_supertype, 
                                                'supertype_id_label',
                                                sections=MD_mid_z_range, 
                                                brain=1, n_cols=5, 
                                                hist_range=(0,10))
fig_name = 'MD_supertype_distributions_brain1only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight',dpi=300)

# brain3 2D histograms
# print('brain 3')
fig = plot_cell_type_spatial_distribution_in_md(adata_neuronal,
                                                MD_supertype, 
                                                'supertype_id_label',
                                                sections=MD_mid_z_range, 
                                                brain=3, n_cols=5, 
                                                hist_range=(0,7))
fig_name = 'MD_supertype_distributions_brain3only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight',dpi=300)

## Clusters in MD

### brain1 vs brain3 MD cluster spatial distribution

In [43]:
# middle AP sections of MD for filtering purposes
MD_mid_z_range = [7,8,9,10,11,12]

# brain1 2D histograms
fig = plot_cell_type_spatial_distribution_in_md(adata_neuronal,
                                                MD_cluster_minCells_Prkcd, 
                                                'cluster_label',
                                                sections=MD_mid_z_range, 
                                                brain=1, n_cols=5, 
                                                hist_range=(0,10))
fig_name = 'MD_cluster_distributions_brain1only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight',dpi=300)

# brain3 2D histograms
fig = plot_cell_type_spatial_distribution_in_md(adata_neuronal,
                                                MD_cluster_minCells_Prkcd, 
                                                'cluster_label',
                                                sections=MD_mid_z_range, 
                                                brain=3, n_cols=5, 
                                                hist_range=(0,7))
fig_name = 'MD_cluster_distributions_brain3only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight',dpi=300)

### brain3 clusters, spatial distribution in MD

In [44]:
# middle AP sections of MD for filtering purposes
MD_mid_z_range = [7,8,9,10,11,12]

# only plot the clusters that have a reasonable # of cells in brain3 MD
min_cells = 100
MD_brain3_clusters = np.array([])
for cluster in MD_cluster_minCells_Prkcd:
    mdclb3mask = np.logical_and(np.logical_and(adata_neuronal.obs.brain==3,
                                               adata_neuronal.obs.is_in_MD.values),
                                (adata_neuronal.obs.cluster_label==cluster).values)
    if np.sum(mdclb3mask)>min_cells:
        MD_brain3_clusters = np.append(MD_brain3_clusters, cluster)

# plot 2D histogram of cluster spatial distributions
# fig = plot_cluster_spatial_distribution_in_md(adata_neuronal,MD_brain3_clusters, 
#                                               sections=MD_mid_z_range, brain=3,
#                                               n_cols=3)
fig = plot_cell_type_spatial_distribution_in_md(adata_neuronal,
                                                MD_brain3_clusters, 
                                                'cluster_label',
                                                sections=MD_mid_z_range, 
                                                brain=3, n_cols=3, 
                                                hist_range=(0,7))
fig_name = 'MD_cluster_distributions_brain3only_midSecOnly_6clusters.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight')

## Gene expr in MD

In [45]:
# Manual list take from looking at plots in "MD Figure > Gene expression in MD" subsection 
MD_core_shell_genes = ['Slc17a7', 'Scn4b', 'Ramp3', 'Rasgrp1', 'Rgs4', 'Pcp4l1', #core
                       'Necab1', 'Stxbp6', 'Calb1', 'Tox'] #shell

### # of cells w/ gene count > thresh

In [46]:
fig = plot_gene_spatial_distribution_in_md(adata_neuronal, MD_core_shell_genes, 
                                           sections=MD_mid_z_range, brain=3,
                                           method='threshold',
                                           binlimits=(-500,500), binsize=30, 
                                           n_cols=5, hist_range=(0,7))

### summed gene counts

In [47]:
fig = plot_gene_spatial_distribution_in_md(adata_neuronal, MD_core_shell_genes, 
                                           sections=MD_mid_z_range, brain=3,
                                           method='sum',
                                           binlimits=(-500,500), binsize=30, 
                                           n_cols=5, hist_range=(0,30))

# PCA spatial distribution

In [48]:
# Load Mathew's PCA vectors - description from Mathew: 
# "top 50 PCs for those cells in "BRL_TH_subset_brain_1.h5ad" that have been 
# annotated as belonging to MD"
brain1_MD_PCAs = pd.read_csv('resources/brain1_MD_PCAs_fromMathew.csv', index_col=0)
brain1_MD_PCAs

In [49]:
# List of genes (no Blanks!) shared  between the two brains
b13_joint_genes = [gene for gene in adata_br3_neuronal.var_names if 'Blank' not in gene]
len(b13_joint_genes)

In [50]:
MD_PCAs_b13_joint_genes = brain1_MD_PCAs[brain1_MD_PCAs.index.isin(b13_joint_genes)]
MD_PCAs_b13_joint_genes

## brain3

In [51]:
cell_x_gene_brain3 = np.array(sparse.csr_matrix.todense(adata_br3_neuronal[:,b13_joint_genes].X))
len(cell_x_gene_brain3[0])

In [52]:
# get just one PCA
PC_0 = MD_PCAs_b13_joint_genes['0'].values
len(PC_0)
cell_PC_0_dot_products = np.dot(cell_x_gene_brain3, PC_0)

# Calculate the dot product between the gene expression vector of each cell and
# the top 5 PC vectors
pc_top5 = ['0','1','2','3','4']
pc_top10 = ['0','1','2','3','4','5','6','7','8','9']
cell_PC_dot_products_b3 = np.dot(cell_x_gene_brain3, MD_PCAs_b13_joint_genes[pc_top10])
cell_PC_dot_products_b3 = np.transpose(cell_PC_dot_products_b3)

In [53]:
section = '1199651054'
plate = 69

for i, pc in enumerate(cell_PC_dot_products_b3):
    # print(pc[:5])
    show_MD_outline = True
    fig = plot_PC_spatial_dist(adata_br3_neuronal[:,b13_joint_genes], pc, i,
                               section, plate, brain=3,
                               annotation=show_MD_outline, colormap='coolwarm')
    if show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain3_pc'+str(i)+'_MDonly.png'
    elif not show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain3_pc'+str(i)+'_noMD.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

## brain1

In [54]:
cell_x_gene_brain1 = np.array(sparse.csr_matrix.todense(adata_br1_neuronal[:,b13_joint_genes].X))
len(cell_x_gene_brain1[0])

In [55]:
# Calculate the dot product between the gene expression vector of each cell and
# the top 5 or 10 PC vectors
pc_top5 = ['0','1','2','3','4']
pc_top10 = ['0','1','2','3','4','5','6','7','8','9']
cell_PC_dot_products_b1 = np.dot(cell_x_gene_brain1, MD_PCAs_b13_joint_genes[pc_top10])
cell_PC_dot_products_b1 = np.transpose(cell_PC_dot_products_b1)

In [56]:
section = '1198980101'
plate = 73

for i, pc in enumerate(cell_PC_dot_products_b1):
    # print(pc[:5])
    show_MD_outline = True
    fig = plot_PC_spatial_dist(adata_br1_neuronal[:,b13_joint_genes], pc, i,
                               section, plate, brain=3,
                               annotation=show_MD_outline, colormap='Oranges')
    if show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain1_pc'+str(i)+'.png'
    elif not show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain1_pc'+str(i)+'_noMD.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')