In [1]:
import numpy as np
import pathlib
import anndata as ad
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

from shapely.geometry import Polygon, MultiLineString, box, Point, MultiPolygon
from shapely.ops import unary_union, polygonize
import shapely.plotting as splot
import shapely

from collections import defaultdict

from  geojson.geometry import GeometryCollection
import geojson

%matplotlib inline

# Set results write path

In [2]:
results_dir = '../results/md_annotation_figures/'

# Load & subset anndata

In [3]:
# Load brain1 + brain3 combined adata with MD annotations
adata = ad.read_h5ad('../data/merfish_609882and638850_AIT17_annotated_TH_ZI_only_shared_genes_only_MD_annotations_2023-06-12_00-00-00/Brain_1_3_TH_ZI_MDannotations.h5ad')

In [4]:
adata

In [5]:
# list of neuronal divisions so we can just focus on neuons
divisions_neuronal = [
                    '2 Subpallium GABAergic',
                    '3 PAL-sAMY-TH-HY-MB-HB neuronal',
                    '4 CBX-MOB-other neuronal'
                   ]

adata_neuronal = adata[adata.obs.division_id_label.isin(divisions_neuronal)].copy()

In [6]:
# use .obs['brain']==1 or ==3 to distinguish between brain1 vs brain3 cells
# if .obs['brain'] doesn't exist, you can instead use the codebook names:
# codebook_brain1 = 'VZG147'
# codebook_brain3 = 'wholebrain031822a'

adata_br3_neuronal = adata_neuronal[adata_neuronal.obs.brain==3].copy()
adata_br1_neuronal = adata_neuronal[adata_neuronal.obs.brain==1].copy()

In [7]:
# nice to have everything for plotting in the obs dataframe
adata_br3_neuronal.obs['cirro_x'] = adata_br3_neuronal.obsm['spatial_cirro'][:,0]
adata_br3_neuronal.obs['cirro_y'] = adata_br3_neuronal.obsm['spatial_cirro'][:,1]

In [8]:
adata_br3_neuronal

# Load MD annotations

In [9]:
# Get the MD annotations from the .uns
gc = geojson.loads(adata.uns['MD_polygons'])

In [10]:
# Get polygons from gc
MD_polys = [shapely.geometry.Polygon(poly['coordinates']) for poly in gc['geometries']]  #YZ coords stored as shapely polygons (expected by cells_in_polygon())
MD_shapes_z = [ poly['napari_z_coords'] for poly in gc['geometries'] ]  # Z coords

In [11]:
# Get per polygon metadata
MD_polys_brain_id = adata.uns['MD_polys_brain']  # which brain it's for
MD_polys_L_R = adata.uns['MD_polys_left_right']  # whether it's a left or right MD

# Plotting functions

## poly_from_points(), get_outline_polygon()

In [12]:
def poly_from_points(XY, min_points=0, allow_holes=False):
    '''
    Take a set of XY coordinates and generate a shapely Polygon that's a
    concave hull outline around all the given coordinates.
    Borrowed from Tom's code in brain3 ccf views
    '''
    if XY.shape[0] < min_points:
        return None
    poly = shapely.concave_hull(shapely.multipoints(XY), 
                                allow_holes=allow_holes, ratio=0.3)
    if type(poly) is shapely.Polygon:
        return poly
    else:
        return None

def get_outline_polygon(obs_data, min_points=50, coordinate_type='cirro'):
    ''' Take all the XY cell coordinates in each section and generate a concave
    hull Polygon that encompasses all points in that section'''
    outline_polygons = defaultdict(dict) 
    for section, df in obs_data.groupby('section'):
        if coordinate_type=='napari':
            XY_n = df[['napari_x_brain1and3','napari_y_brain1and3']].values
            XY = np.asarray([[coord[0], -coord[1]] for coord in XY_n])
            # print(XY)
        elif coordinate_type=='cirro':
            XY = df[['cirro_x','cirro_y']].values
        poly = poly_from_points(XY, min_points=min_points)
        if poly is not None:
            # groupby is returning key as a number, but on my workstation
            # it returns it as a tuple with one entry here?? despite
            # identical code & versions (pandas=1.5.3)
            # Here I use [section], there I use [section[0]]
            outline_polygons[section] = poly
    return outline_polygons

## plot_shape(), plot_section_outline()

In [13]:
def plot_shape(poly, edgecolor='black', **kwargs):
    '''Displays shapely Polygon(s) using shapely's splot()
    Borrowed from Tom's code in brain3 ccf views'''
    if type(poly) is shapely.GeometryCollection:
        for subpoly in poly.geoms:
            patch = splot.plot_polygon(subpoly, add_points=False, 
                                       edgecolor=edgecolor, **kwargs)
    else:
        patch = splot.plot_polygon(poly, add_points=False, 
                                   edgecolor=edgecolor, **kwargs)
    return patch

def plot_section_outline(outline_polygons, sections=None, axes=False, 
                         facecolor='none', edgecolor='black', alpha=0.05):
    ''' Displays the per-section outline_polygons from get_outline_polygon() for
    the specified sections'''
    if sections is None:
        sections = obs['section'].unique()
    elif isinstance(sections, str):
        sections = [sections]
    
    for section in sections:
        plot_shape(outline_polygons[section],facecolor=facecolor,
                   edgecolor=edgecolor,alpha=alpha)
        if not axes:
            plt.gca().set_aspect('equal')
            plt.box(False)
            plt.xticks([])
            plt.yticks([])

## plot_clusters_in_annotation()

In [14]:
def plot_clusters_in_annotation(adata_neuronal, clusters, section, plate, legend=True):
    # ** only plotting brain3 **

    fig = plt.figure(figsize=(12,6))
    
    # display all the cells in this section in grey
    mask_sec = (adata_neuronal.obs.section==section)
    xy_all = adata_neuronal.obs.loc[mask_sec, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color='lightgrey', markersize=0.5)
    plt.gca().set_aspect('equal')
    

    # picked colormaps with large perceptual differences btwn adjacent colors
    rgb_cm = np.concatenate((np.asarray(plt.get_cmap('tab10').colors), 
                             np.asarray(plt.get_cmap('Dark2').colors)))
    cl_colors = np.zeros((rgb_cm.shape[0],4))
    cl_colors[:,:3] = rgb_cm
    cl_colors[:,3] = 1
    color_iter = 0
    
    # plot the cells in the given clusters
    for cl in clusters:
        mdmask = adata_neuronal.obs.is_in_MD.values
        clmask = np.logical_and(mask_sec, (adata_neuronal.obs.cluster_label==cl))

        msize=2
        # plot the in-MD cells darker
        xy_cl_MD = adata_neuronal.obs.loc[np.logical_and(mdmask,clmask), 
                                          ["napari_y_brain1and3","napari_x_brain1and3"]].values
        plt.scatter(xy_cl_MD[:,1], -xy_cl_MD[:,0], s=msize, 
                    alpha=1, facecolor=cl_colors[color_iter], label=cl)

        # plot the other cells lighter
        xy_cl_other = adata_neuronal.obs.loc[np.logical_and(~mdmask,clmask), 
                                             ["napari_y_brain1and3","napari_x_brain1and3"]].values
        plt.scatter(xy_cl_other[:,1], -xy_cl_other[:,0], s=msize,
                    alpha=0.4, facecolor=cl_colors[color_iter])
        
        color_iter+=1

    if legend:
        lgnd = plt.legend(ncols=3, loc='upper center', bbox_to_anchor=(0.5,0), fontsize=10)
        for handle in lgnd.legend_handles:
            handle.set_sizes([16])
        
    # select the right polygon(s) for this section
    poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                           (MD_polys_brain_id==3) )[0]
    for i in poly_index:
        plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
                 -np.array(MD_polys[i].exterior.coords)[:,0], 'k')
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

## plot_expression_annotation()

In [15]:
from scipy import sparse

def plot_expression_annotation(adata_neuronal, gene, section, plate, 
                               annotation=True, section_outline=True):
    # ** only plotting brain3 **

    sec_mask = adata_neuronal.obs.section==section
    # The transcript counts are stored in a sparse matrix, which cannot be
    # directly plotted w/out first: making dense, converting to an np.array,
    # and then flattening
    gene_exp = np.array(sparse.csr_matrix.todense(adata_neuronal[sec_mask][:,gene].X)).flatten()

    # Plot figure                           
    fig = plt.figure(figsize=(9,6))

    colormap = 'Blues' #'viridis'
    marker_size = 0.5
    
    xy = adata_neuronal.obs.loc[sec_mask,["napari_y_brain1and3","napari_x_brain1and3"]].values
    sc = plt.scatter(xy[:,1], -xy[:,0], c=gene_exp,
                cmap=colormap, s=marker_size)
    plt.title(gene)
    cbar = plt.colorbar(sc, fraction=0.025, pad=0.04)
    cbar.set_label('log2(CPV+1)')

    if annotation:
        # select the right polygon(s) for this section
        poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                               (MD_polys_brain_id==3) )[0]
        for i in poly_index:
            plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
                     -np.array(MD_polys[i].exterior.coords)[:,0], 'k', alpha=0.6)

    if section_outline:
        th_outline_polygons = get_outline_polygon(adata_br3_neuronal.obs,
                                                  coordinate_type='napari')
        plot_section_outline(th_outline_polygons, sections=section, alpha=0.2)
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

    return fig

## plot_PC_spatial_dist()

In [16]:
def plot_PC_spatial_dist(adata_neuronal, PC_distance, PC_id, section, plate, 
                         brain=3, annotation=True, section_outline=True, 
                         colormap='Oranges'):
    
    # sec_mask = adata_neuronal.obs.section==section
    sec_mask = np.logical_and(adata_neuronal.obs.section==section,
                              adata_neuronal.obs.is_in_MD.values)

    # Plot figure                           
    fig = plt.figure(figsize=(9,6))

    # marker_size = 0.5
    marker_size = 7
    
    xy = adata_neuronal.obs.loc[sec_mask,["napari_y_brain1and3","napari_x_brain1and3"]].values
    sc = plt.scatter(xy[:,1], -xy[:,0], c=PC_distance[sec_mask],
                cmap=colormap, s=marker_size)
    plt.title('PC_'+str(PC_id))
    cbar = plt.colorbar(sc, fraction=0.025, pad=0.04)

    if annotation:
        # select the right polygon(s) for this section
        poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                               (MD_polys_brain_id==brain) )[0]
        for i in poly_index:
            plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
                     -np.array(MD_polys[i].exterior.coords)[:,0], 'k', alpha=0.6)

    # if section_outline:
    #     th_outline_polygons = get_outline_polygon(adata_neuronal.obs,
    #                                               coordinate_type='napari')
    #     plot_section_outline(th_outline_polygons, sections=section, alpha=0.2)
                 

    # formatting
    plt.gca().set_aspect('equal')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)

    return fig

## Demonstrate plot_section_outline()

In [17]:
# set up whole-thalamus outline
th_outline_polygons = get_outline_polygon(adata_br3_neuronal.obs, coordinate_type='napari')
plot_section_outline(th_outline_polygons, sections='1199651060', alpha=0.2)

# Generate list of MD clusters

In [18]:
# Find all the clusters that are represented in the MD in brain3
MD_clusters = adata_br3_neuronal.obs.loc[adata_br3_neuronal.obs.is_in_MD,"cluster_label"].unique()
# MD_clusters = [cl for cl in MD_clusters if adata_br3_neuronal.obs.loc[adata.obs.cluster_label==cl,"division_id_label"].values[0] in divisions_neuronal]
MD_clusters = np.asarray(sorted(list(MD_clusters)))
# MD_clusters

In [19]:
# shorten the list to only those clusters that have >N cells across both brains
min_cells = 20
MD_cluster_minCells = np.array([])
for cluster in MD_clusters:
    mdclmask = np.logical_and(adata_br3_neuronal.obs.is_in_MD.values, (adata_br3_neuronal.obs.cluster_label==cluster).values)
    if np.sum(mdclmask)>min_cells:
        MD_cluster_minCells = np.append(MD_cluster_minCells, cluster)

# print(reversed(MD_cluster_minCells))
MD_cluster_minCells

In [20]:
# remove the clusters that are likely from interlaminar cells on the boundary 
# of the MD ('CM-IAD-CL-PCN'), and not actually MD cells themselves
MD_cluster_minCells_Prkcd = np.array([cl for cl in MD_cluster_minCells if 'TH Prkcd' in cl])
MD_cluster_minCells_Prkcd

# Cell types in MD plots

## MD clusters also present in other thalamic nucleI

In [21]:
toms_section = '1199651045'
toms_section_plate = 75
plot_clusters_in_annotation(adata_br3_neuronal, reversed(MD_cluster_minCells), toms_section, plate=toms_section_plate)

## Clusters, subclasses used for manual MD annotation

In [22]:
# picked colormaps with large perceptual differences btwn adjacent colors
rgb_cm = np.concatenate((np.asarray(plt.get_cmap('tab10').colors), 
                         np.asarray(plt.get_cmap('Dark2').colors)))
group_colors = np.zeros((rgb_cm.shape[0],4))
group_colors[:,:3] = rgb_cm
group_colors[:,3] = 1
# group_colors

In [23]:
MD_centers = np.array([p.centroid.coords[0] for p in MD_polys])

In [24]:
color_iter = 0
section = '1199651045'
plate = 75

# Set up & format figure
fig = plt.figure(figsize=(12,8))
plt.axis('equal')
plt.box(False)
plt.xticks([])
plt.yticks([]);

# define colors
th_bkgd_color = 'grey'
md_boundary_color = 'dimgrey'
md_cl_colors = group_colors[[9,1]]  # hardcode colors so clusters are easy to see

# define marker sizes
msize_bkgd = 0.5
msize_boundary = 4
msize_md_cl = 4

sec_mask = (adata_br3_neuronal.obs.section==section)

# Plot neuronal divisions in grey to show shape of thalamus
divisions_to_plot = ['2 Subpallium GABAergic',
                    '3 PAL-sAMY-TH-HY-MB-HB neuronal',
                    '4 CBX-MOB-other neuronal']
groupby_col_name = 'division_id_label'
for division, gb in adata_br3_neuronal.obs.groupby(groupby_col_name):
    # Only display neuronal cells
    if division not in divisions_to_plot:
        continue
        
    sec_div_mask = np.logical_and(sec_mask, 
                                  adata_br3_neuronal.obs.division_id_label==division)
    
    # display all the neurons in this section in grey
    xy_all = adata_br3_neuronal.obs.loc[sec_div_mask, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color=th_bkgd_color, markersize=msize_bkgd)



    
# Which subclasses are helpful in distinguishing MD boundaries?
subclasses_to_plot = ['067 PVT-PT Ntrk1 Glut', # PVT (above MD)
                      '068 CM-IAD-CL-PCN Glut'  # below MD
                     ]
# Plot subclasses
groupby_col_name = 'subclass_id_label'
for subclass, gb in adata.obs.groupby(groupby_col_name):
    # Only display neuronal cells
    if subclass not in subclasses_to_plot:
        continue
        
    sec_sbc_mask = np.logical_and(sec_mask, 
                                  adata_br3_neuronal.obs.subclass_id_label==subclass)
    
    # display all the neurons in this section in grey
    xy_all = adata_br3_neuronal.obs.loc[sec_sbc_mask, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color=md_boundary_color, markersize=msize_boundary)


        
# Which supertypes are helpful?
supertypes_to_plot = ['0290 PF Fzd5 Glut_1', '0291 PF Fzd5 Glut_2', #PF (posterior end of MD)
                      '0273 TH Prkcd Grin2c Glut_12',  # midline
                      '0274 TH Prkcd Grin2c Glut_13',  # lateral/sides of MD
                      '0279 TH Prkcd Grin2c Glut_5',  # lateral/sides of MD
                      ]
# '0271 TH Prkcd Grin2c Glut_10',  # inside MD, includes clusters 1132, 1133
# Plot supertypes
groupby_col_name = 'supertype_id_label'
for supertype, gb in adata.obs.groupby(groupby_col_name):
    # only display the relevant supertypes
    if supertype not in supertypes_to_plot:
        continue

    sec_st_mask = np.logical_and(sec_mask, 
                                  adata_br3_neuronal.obs.supertype_id_label==supertype)
    
    # display all the neurons in this section in grey
    xy_all = adata_br3_neuronal.obs.loc[sec_st_mask, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color=md_boundary_color, markersize=msize_boundary)


    
# Which clusters are helpful?
# too few cells to be worth plotting: '1132 TH Prkcd Grin2c Glut_10'
clusters_to_plot = ['1130 TH Prkcd Grin2c Glut_1', # inside MD and also far away
                    '1133 TH Prkcd Grin2c Glut_10' # mostly in MD + a little above
                   ]

cl_color_iter = 0
# Plot clusters
groupby_col_name = 'cluster_label'
for cluster, gb in adata.obs.groupby(groupby_col_name):
    # only display the MD clusters
    if cluster not in clusters_to_plot:
        continue
        
    sec_cl_mask = np.logical_and(sec_mask, 
                                  adata_br3_neuronal.obs.cluster_label==cluster)
    
    # display all the neurons in this section in grey
    xy_all = adata_br3_neuronal.obs.loc[sec_cl_mask, ["napari_y_brain1and3","napari_x_brain1and3"]].values
    plt.plot(xy_all[:,1], -xy_all[:,0], '.', color=md_cl_colors[cl_color_iter], markersize=msize_md_cl)
    cl_color_iter+=1

    
# Add the MD polygons on top
# select the right polygon(s) for this section
poly_index = np.where( (np.array([int(x) for x in MD_shapes_z])==plate) & 
                       (MD_polys_brain_id==3) )[0]
for i in poly_index:
    plt.plot(np.array(MD_polys[i].exterior.coords)[:,1], 
             -np.array(MD_polys[i].exterior.coords)[:,0], 'k')
    plt.plot(MD_centers[i][1], -MD_centers[i][0], "*k", markersize=10)

fig_name = 'th_groups_used_for_MD_annotation_sec'+section+'.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight')

# Gene expression in MD heatmaps

In [25]:
toms_section = '1199651045'
toms_section_plate = 75

section = '1199651048'
plate = 73
gene = 'Slc17a7'
fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)

In [26]:
section = '1199651048'
plate = 73
gene = 'Scn4b'
fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)

## DE genes - manually curated list

In [27]:
# I think I got these from Mathew??
MD_DE_genes = ['Mop', 'Tnnt1', 'Necab1', 'Calb2', 'Prkcd', 'Slc17a7', 'Hs3st1',
               'Scn4b', 'Rasgrp1', 'Rgs4', 'Pcp4l1', 'Tspan9', 'Cnih3', 'Rgs16',
               'Cacng5', 'Kcnc2']

MD_DE_genes_br13 = [deg for deg in MD_DE_genes if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_br13)

In [28]:
%matplotlib inline

section = '1199651048'
plate = 73

for gene in MD_DE_genes_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

## DE genes - scanpy's rank_genes_groups() list

In [29]:
# import scanpy as sc
# sc.tl.rank_genes_groups(adata_br3_neuronal, 'cluster_id', groups=['1133'], method='wilcoxon', rankby_abs=True)
# sc.pl.rank_genes_groups(adata_br3_neuronal, n_genes=20)

In [30]:
# using scanpy's rank_genes_groups()
MD_DE_genes_pos = ['Necab1', 'Ramp3', 'Stxbp6', 'Calb1', 'Shisa6', 'Tll1',
                   'Gpr4', 'Grin2c', 'Tox', 'Adra1b', 'A830036E02Rik']
MD_DE_genes_pos_br13 = [deg for deg in MD_DE_genes_pos if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_pos_br13)

MD_DE_genes_neg = ['Hs3st4', 'Grm3', 'Sulf2', 'Adcy2', 'Grik1', 'Rou3f3', 
                   'Cnr1', 'Gfra1', 'Gsta4']
MD_DE_genes_neg_br13 = [deg for deg in MD_DE_genes_neg if deg in adata.var_names]
print('DE genes present in both brain1 & brain3:', MD_DE_genes_neg_br13)

### positive DE genes

In [31]:
section = '1199651048'
plate = 73

for gene in MD_DE_genes_pos_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate, annotation=True)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

### negative DE genes

In [32]:
section = '1199651048'
plate = 73

for gene in MD_DE_genes_neg_br13:
    fig = plot_expression_annotation(adata_br3_neuronal, gene, section, plate)
    fig_name = 'MD_DE_genes_expr_brain3_'+gene+'.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

# 2D Histograms of MD

## Plot aligned MD polygon outlines

Generate diagram to illustrate how we're arriving at the averaged 2D histogram plots

In [33]:
MD_centers = np.array([p.centroid.coords[0] for p in MD_polys])

# Show the brain3 L & R MD polygons aligned by centroid & overlaid
plt.figure(figsize=(6,4))
for ii in range(len(MD_polys_L_R)):
    # only plot brain3 for the EAB slides
    if MD_polys_brain_id[ii]==1:
        continue

    msize = 10
    
    if MD_polys_L_R[ii] =='right':
        plt.plot((MD_centers[ii][1] - MD_centers[ii][1]), 
                 (MD_centers[ii][0] - MD_centers[ii][0]), "*k", markersize=msize)
        
        # negate y-coords so the polygons (made in napari using its unique coord
        # defaults) are displayed right-side-up using matplotlib
        plt.plot((np.array(MD_polys[ii].exterior.coords)[:,1] - MD_centers[ii][1]), 
                 -(np.array(MD_polys[ii].exterior.coords)[:,0] - MD_centers[ii][0]), 
                 'k')

    elif MD_polys_L_R[ii] =='left':
        plt.plot((MD_centers[ii][1] - MD_centers[ii][1]), 
                 (MD_centers[ii][0] - MD_centers[ii][0]), "*k", markersize=msize)
        
        # negate y-coords so the polygons' are displayed to right-side-up
        # negate x-coords so the left polygons are flipped to match the right polygons
        plt.plot(-(np.array(MD_polys[ii].exterior.coords)[:,1] - MD_centers[ii][1]), 
                 -(np.array(MD_polys[ii].exterior.coords)[:,0] - MD_centers[ii][0]), 
                 'k')

plt.axis('equal')
plt.box(False)
plt.xticks([])
plt.yticks([]);

## Clusters

### plot_cluster_spatial_distribution_in_md()

In [34]:
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

def plot_cluster_spatial_distribution_in_md(ad, md_clusters, 
                                            sections=None, brain=3,
                                            binlimits=(-500,500), binsize=30, 
                                            n_cols=5, hist_range=(0,7)):

    # enable specifying sections by either section ID strings or napari z-coords
    if sections is None:
        sections = ad.obs.section.unique()
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], str):
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], int):
        sec_values = ad.obs.napari_z_brain1and3.values

    # set up subplot parameters
    n_clusters = len(md_clusters)
    n_rows = int(np.ceil(n_clusters / n_cols))
        
    fig = plt.figure(figsize=(n_cols*3.1, n_rows*3))
    sp_counter = 0 # subplot counter
    
    for cluster in md_clusters:
        # filter for cells in the brain, section, MD, and cluster we want
        sec_mask = [True if sec in sections else False for sec in sec_values]
        br_sec_mask = np.logical_and((ad.obs.brain==brain).values, sec_mask)
        md_cl_mask = np.logical_and(ad.obs.is_in_MD.values, 
                                    (ad.obs.cluster_label==cluster).values)
        combomask = np.logical_and(br_sec_mask, md_cl_mask)
        
        cl_xy = ad.obs.loc[combomask, 
                           ["napari_y_from_center", "napari_x_from_center"]
                          ].values

        # set up & format current subplot
        sp_counter+=1
        ax = fig.add_subplot(n_rows, n_cols, sp_counter)
        ax.set_title(cluster)
        ax.set_aspect('equal')
        ax.set_xticks([])
        ax.set_yticks([])

        # 2D histogram
        bin_egdes = np.arange(binlimits[0],binlimits[1],binsize)
        hh = ax.hist2d(cl_xy[:,1], -cl_xy[:,0], bins = [bin_egdes, bin_egdes],
                       vmin=hist_range[0], vmax=hist_range[1])
        cb = plt.colorbar(hh[3], ax=ax,
                          label='cells per '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel',
                          fraction=0.045, pad=0.04)

        # Add scalebar to just the first 2D histogram
        if sp_counter==1:
            microns = 100
            scalebar = AnchoredSizeBar(ax.transData, microns, 
                                       str(microns)+r' $\mu$m', 
                                       'lower left', pad=0.1, sep=4, 
                                       color='white', size_vertical=20,
                                       frameon=False)
            ax.add_artist(scalebar)
        
    # format whole figure 
    plt.subplots_adjust(hspace=0.5)
    plt.subplots_adjust(wspace=2.0)
    fig.tight_layout()
    fig.subplots_adjust(top=0.97)

    return fig

### brain1 vs brain3 MD cluster spatial distribution

In [35]:
# middle AP sections of MD for filtering purposes
MD_mid_z_range = [7,8,9,10,11,12]

# brain1 2D histograms
fig = plot_cluster_spatial_distribution_in_md(adata_neuronal,
                                              MD_cluster_minCells_Prkcd, 
                                              sections=MD_mid_z_range, brain=1,
                                              n_cols=5, hist_range=(0,10))
plt.suptitle('brain1, mid-AP MD sections', fontsize=16, y=1.0)
fig_name = 'MD_cluster_distributions_brain1only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight')


# brain3 2D histograms
fig = plot_cluster_spatial_distribution_in_md(adata_neuronal,
                                              MD_cluster_minCells_Prkcd, 
                                              sections=MD_mid_z_range, brain=3,
                                              n_cols=5, hist_range=(0,7))
plt.suptitle('brain3, mid-AP MD sections', fontsize=16, y=1.0)
fig_name = 'MD_cluster_distributions_brain3only_midSecOnly.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight')

### brain3 clusters, spatial distribution in MD

In [36]:
# middle AP sections of MD for filtering purposes
MD_mid_z_range = [7,8,9,10,11,12]

# only plot the clusters that have a reasonable # of cells in brain3 MD
min_cells = 100
MD_brain3_clusters = np.array([])
for cluster in MD_cluster_minCells_Prkcd:
    mdclb3mask = np.logical_and(np.logical_and(adata_neuronal.obs.brain==3,
                                               adata_neuronal.obs.is_in_MD.values),
                                (adata_neuronal.obs.cluster_label==cluster).values)
    if np.sum(mdclb3mask)>min_cells:
        MD_brain3_clusters = np.append(MD_brain3_clusters, cluster)

# plot 2D histogram of cluster spatial distributions
fig = plot_cluster_spatial_distribution_in_md(adata_neuronal,MD_brain3_clusters, 
                                              sections=MD_mid_z_range, brain=3,
                                              n_cols=3)
plt.suptitle('brain3, mid-AP MD sections', fontsize=16, y=1.0)
fig_name = 'MD_cluster_distributions_brain3only_midSecOnly_6clusters.png'
fig.savefig(results_dir+fig_name,bbox_inches='tight')

## Genes

In [37]:
# Manual list take from looking at plots in "MD Figure > Gene expression in MD" subsection 
MD_core_shell_genes = ['Slc17a7', 'Scn4b', 'Ramp3', 'Rasgrp1', 'Rgs4', 'Pcp4l1', #core
                    'Necab1', 'Stxbp6', 'Calb1', 'Tox'] #shell

### plot_gene_spatial_distribution_in_md()

In [38]:
def plot_gene_spatial_distribution_in_md(ad, md_genes, sections=None, brain=3,
                                         method='sum',
                                         binlimits=(-500,500), binsize=30, 
                                         n_cols=5, hist_range=(0,7)):

    # enable specifying sections by either section ID strings or napari z-coords
    if sections is None:
        sections = ad.obs.section.unique()
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], str):
        sec_values = ad.obs.section.values
    elif isinstance(sections[0], int):
        sec_values = ad.obs.napari_z_brain1and3.values

    # set up subplot parameters
    n_genes = len(md_genes)
    n_rows = int(np.ceil(n_genes / n_cols))
        
    fig = plt.figure(figsize=(n_cols*3.1, n_rows*3))
    sp_counter = 0 # subplot counter
    
    for gene in md_genes:
        # filter for cells in the brain, section, MD, and cluster we want
        sec_mask = [True if sec in sections else False for sec in sec_values]
        sec_br_md_mask = np.logical_and( np.logical_and(ad.obs.brain==brain, 
                                                     sec_mask),
                                      ad.obs.is_in_MD.values)
        
        # set combomask, gene_count_weights & label based on method we're using
        if method=='sum':
            # sum the gene counts (transcripts) per pixel using the 'weights'
            # input argument of hist2d()
            combomask = sec_br_md_mask
            gene_xy = ad.obs.loc[combomask, ["napari_y_from_center", 
                                             "napari_x_from_center"] ].values
            gene_count_weights = ad.X[combomask, ad.var_names==gene].A.flatten()
            count_label = 'summed transcripts\nper '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel'
        
        elif method=='threshold':
            # count the number of cells with expression above the 95th percentile
            # for this gene per pixel
            gene_counts = adata_neuronal.X[:, adata_neuronal.var_names==gene].A.flatten()
            gene_count_95pct = np.percentile(gene_counts, 75)
            gene_mask = gene_counts > gene_count_95pct
            
            combomask = np.logical_and(sec_br_md_mask, gene_mask)
            gene_xy = ad.obs.loc[combomask, ["napari_y_from_center", 
                                             "napari_x_from_center"] ].values
            gene_count_weights = None
            count_label = 'cells with gene counts > 95th pct\nper '+str(binsize)+'x'+str(binsize)+r'$\mu$'+'m pixel'


        # set up & format current subplot
        sp_counter+=1
        ax = fig.add_subplot(n_rows, n_cols, sp_counter)
        ax.set_title(cluster)
        ax.set_aspect('equal')
        ax.set_xticks([])
        ax.set_yticks([])

        # 2D histogram
        bin_egdes = np.arange(binlimits[0],binlimits[1],binsize)
        hh = ax.hist2d(gene_xy[:,1], -gene_xy[:,0], weights=gene_count_weights,
                       bins = [bin_egdes, bin_egdes],
                       vmin=hist_range[0], vmax=hist_range[1])
        cb = plt.colorbar(hh[3], ax=ax, label=count_label,
                          fraction=0.045, pad=0.04)

        # Add scalebar to just the first 2D histogram
        if sp_counter==1:
            microns = 100
            scalebar = AnchoredSizeBar(ax.transData, microns, 
                                       str(microns)+r' $\mu$m', 
                                       'lower left', pad=0.1, sep=4, 
                                       color='white', size_vertical=20,
                                       frameon=False)
            ax.add_artist(scalebar)
        
    # format whole figure 
    plt.subplots_adjust(hspace=0.5)
    plt.subplots_adjust(wspace=2.0)
    fig.tight_layout()
    fig.subplots_adjust(top=0.97)

    return fig    

### # of cells w/ gene count > thresh

In [39]:
fig = plot_gene_spatial_distribution_in_md(adata_neuronal, MD_core_shell_genes, 
                                           sections=MD_mid_z_range, brain=3,
                                           method='threshold',
                                           binlimits=(-500,500), binsize=30, 
                                           n_cols=5, hist_range=(0,7))

### summed gene counts

In [40]:
fig = plot_gene_spatial_distribution_in_md(adata_neuronal, MD_core_shell_genes, 
                                           sections=MD_mid_z_range, brain=3,
                                           method='sum',
                                           binlimits=(-500,500), binsize=30, 
                                           n_cols=5, hist_range=(0,30))

# PCA spatial distribution

In [42]:
# Load Mathew's PCA vectors - description from Mathew: 
# "top 50 PCs for those cells in "BRL_TH_subset_brain_1.h5ad" that have been 
# annotated as belonging to MD"
brain1_MD_PCAs = pd.read_csv('../resources/brain1_MD_PCAs_fromMathew.csv', index_col=0)
brain1_MD_PCAs

In [43]:
# List of genes (no Blanks!) shared  between the two brains
b13_joint_genes = [gene for gene in adata_br3_neuronal.var_names if 'Blank' not in gene]
len(b13_joint_genes)

In [44]:
MD_PCAs_b13_joint_genes = brain1_MD_PCAs[brain1_MD_PCAs.index.isin(b13_joint_genes)]
MD_PCAs_b13_joint_genes

## brain3

In [45]:
cell_x_gene_brain3 = np.array(sparse.csr_matrix.todense(adata_br3_neuronal[:,b13_joint_genes].X))
len(cell_x_gene_brain3[0])

In [46]:
# get just one PCA
PC_0 = MD_PCAs_b13_joint_genes['0'].values
len(PC_0)
cell_PC_0_dot_products = np.dot(cell_x_gene_brain3, PC_0)

# Calculate the dot product between the gene expression vector of each cell and
# the top 5 PC vectors
pc_top5 = ['0','1','2','3','4']
pc_top10 = ['0','1','2','3','4','5','6','7','8','9']
cell_PC_dot_products_b3 = np.dot(cell_x_gene_brain3, MD_PCAs_b13_joint_genes[pc_top10])
cell_PC_dot_products_b3 = np.transpose(cell_PC_dot_products_b3)

In [47]:
section = '1199651054'
plate = 69

for i, pc in enumerate(cell_PC_dot_products_b3):
    # print(pc[:5])
    show_MD_outline = True
    fig = plot_PC_spatial_dist(adata_br3_neuronal[:,b13_joint_genes], pc, i,
                               section, plate, brain=3,
                               annotation=show_MD_outline, colormap='coolwarm')
    if show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain3_pc'+str(i)+'_MDonly.png'
    elif not show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain3_pc'+str(i)+'_noMD.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')

## brain1

In [48]:
cell_x_gene_brain1 = np.array(sparse.csr_matrix.todense(adata_br1_neuronal[:,b13_joint_genes].X))
len(cell_x_gene_brain1[0])

In [49]:
# Calculate the dot product between the gene expression vector of each cell and
# the top 5 or 10 PC vectors
pc_top5 = ['0','1','2','3','4']
pc_top10 = ['0','1','2','3','4','5','6','7','8','9']
cell_PC_dot_products_b1 = np.dot(cell_x_gene_brain1, MD_PCAs_b13_joint_genes[pc_top10])
cell_PC_dot_products_b1 = np.transpose(cell_PC_dot_products_b1)

In [50]:
section = '1198980101'
plate = 73

for i, pc in enumerate(cell_PC_dot_products_b1):
    # print(pc[:5])
    show_MD_outline = True
    fig = plot_PC_spatial_dist(adata_br1_neuronal[:,b13_joint_genes], pc, i,
                               section, plate, brain=3,
                               annotation=show_MD_outline, colormap='Oranges')
    if show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain1_pc'+str(i)+'.png'
    elif not show_MD_outline:
        fig_name = 'pc_spatial_distribution_brain1_pc'+str(i)+'_noMD.png'
    fig.savefig(results_dir+fig_name,bbox_inches='tight')