# Notebook to simulate HCR data

modified from Mathew's pre-CodeOcean notebook to run in this capsule

In [1]:
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

%matplotlib inline

## Load old brain1 & brain3 data assets

In [2]:
# Load MERFISH data
adata_brain1 = ad.read_h5ad("../../data/merfish_609882_AIT17.1_annotated_TH_ZI_only_2023-02-16_00-00-00/atlas_brain_609882_AIT17_1_annotated_TH_ZI_only.h5ad")
adata_brain3 = ad.read_h5ad("../../data/merfish_638850_AIT17.custom_CCF_annotated_TH_ZI_only_2023-05-04_00-00-00/atlas_brain_638850_AIT17_custom_CCF_annotated_TH_ZI_only.h5ad")

In [3]:
# Subset to neuronal
neuronal_divisions = ["3 PAL-sAMY-TH-HY-MB-HB neuronal","2 Subpallium GABAergic","4 CBX-MOB-other neuronal"]
adata_brain1 = adata_brain1[adata_brain1.obs["division_id_label"].isin(neuronal_divisions)].copy()
adata_brain3 = adata_brain3[adata_brain3.obs["division_id_label"].isin(neuronal_divisions)].copy()

In [4]:
adata_brain1

In [5]:
adata_brain1.obs['section'].cat.categories[2:24]
# len(adata_brain1.obs['section'].cat.categories[2:24])

In [6]:
ALL_SECTION_PAIRS_BRAIN1_BRAIN3 = np.array([
                                            ['1198980077','1199651060'],
                                            ['1198980089','1199651054'],
                                            ['1198980101','1199651048'],
                                            ['1198980108','1199651045'],
                                            ['1198980114','1199651042'],
                                            ['1198980120','1199651039'],
                                            ['1198980134','1199651033'],
                                            ['1198980146','1199651027'],
                                            ['1198980152','1199651024']
                                           ], dtype=object)

In [7]:
BRAIN1_SECTION_IDS = np.array(['1198980077', '1198980080', '1198980086', 
                                '1198980089', '1198980092', '1198980095', 
                                '1198980098', '1198980101', '1198980105', 
                                '1198980108', '1198980111', '1198980114', 
                                '1198980117', '1198980120', '1198980123',
                                '1198980131', '1198980134', '1198980137', 
                                '1198980140', '1198980146', '1198980149', 
                                '1198980152'], dtype=object)

In [8]:
ALL_SECTION_PAIRS_BRAIN1_BRAIN3[:,0]

## Plotting functions

### def hcrVis()

In [9]:
def hcrVis(merData,genes, section = '1198980089'):
    """ Draw on MERSCOPE data to plot separate and overlaid expression of 3 genes. """
    plt.figure(figsize=(24,3), dpi=80)
    
    # Subset based on requested section
    merSection = merData[merData.obs["section"] == section]
    
    # Get normalized expression of each gene
    gene1Norm = merSection[:,genes[0]].X / merSection[:,genes[0]].X.max()
    gene2Norm = merSection[:,genes[1]].X / merSection[:,genes[1]].X.max()
    gene3Norm = merSection[:,genes[2]].X / merSection[:,genes[2]].X.max()

    # Convert each genes normalized expression into an RGB value
    colorR = np.concatenate((gene1Norm, np.zeros([len(gene1Norm),2])),axis=1)
    colorG = np.concatenate((np.zeros([len(gene1Norm),1]),gene2Norm,np.zeros([len(gene1Norm),1])),axis=1)
    colorB = np.concatenate((np.zeros([len(gene1Norm),2]),gene3Norm),axis=1)
    
    # Combine each gene for overlaid plot
    colorRGB = np.concatenate((gene1Norm, gene2Norm, gene3Norm),axis=1)
    colors = (colorR, colorG, colorB, colorRGB)
    genes.append('Overlay') # Append for labeling purposes

    # For each channel, plot spatial expression
    for count, color in enumerate(colors):
        plt.subplot(1,4,count+1)
        plt.scatter(merSection.obsm["spatial_cirro"][:,0],
                    merSection.obsm["spatial_cirro"][:,1],
                    s=30, marker='.', color=color)
        plt.title(genes[count])
        plt.axis('equal')
        plt.axis('off')

### def hcrVis_multisection(merData, genes)

In [10]:
def hcrVis_multisection(merData, genes):
    """ Draw on MERSCOPE data to plot separate and overlaid expression of 3 genes. """
    
    if len(genes) != 3:
        raise Exception('must input exactly 3 genes')
    
    n_cols = 4 # 3 genes + overlay
    n_rows = len(BRAIN1_SECTION_IDS)

    fig, axs = plt.subplots(n_rows, n_cols, figsize=(20,75))

    for i, sec_id in enumerate(BRAIN1_SECTION_IDS):
        
        # Subset based on requested section
        merSection = merData[merData.obs["section"] == sec_id]

        # Get normalized expression of each gene
        gene1Norm = merSection[:,genes[0]].X / merSection[:,genes[0]].X.max()
        gene2Norm = merSection[:,genes[1]].X / merSection[:,genes[1]].X.max()
        gene3Norm = merSection[:,genes[2]].X / merSection[:,genes[2]].X.max()

        # Convert each genes normalized expression into an RGB value
        colorR = np.concatenate((gene1Norm, np.zeros([len(gene1Norm),2])),axis=1)
        colorG = np.concatenate((np.zeros([len(gene1Norm),1]),gene2Norm,np.zeros([len(gene1Norm),1])),axis=1)
        colorB = np.concatenate((np.zeros([len(gene1Norm),2]),gene3Norm),axis=1)

        # Combine each gene for overlaid plot
        colorRGB = np.concatenate((gene1Norm, gene2Norm, gene3Norm),axis=1)
        colors = (colorR, colorG, colorB, colorRGB)
        genes.append('Overlay') # Append for labeling purposes

        # For each channel, plot spatial expression
        for c, color in enumerate(colors):
            # plt.subplot(1,4,count+1)
            ax = axs[i,c]
            ax.scatter(merSection.obsm["spatial_cirro"][:,0],
                        merSection.obsm["spatial_cirro"][:,1],
                        s=20, marker='.', color=color)
            ax.set_title(genes[c])
            ax.set_aspect('equal', 'box')
            # ax.set_xticks([])
            # ax.set_yticks([])
            ax.axis('off')

    # plt.axis('off')

## Hemi Thalamus HCR Bridging Dataset Gene Selections

### Round 1 Gene Set 

*(decided on 1/31/24)*

Pvalb

C1ql3

Slc17a7 (VGLUT1)

In [11]:
geneList = ["Pvalb","C1ql3","Slc17a7"]
hcrVis_multisection(adata_brain1,geneList)

# sections_brain1 = ALL_SECTION_PAIRS_BRAIN1_BRAIN3[:,0]
# for i, sec in enumerate(sections_brain1):
#     hcrVis(adata_brain1,geneList,sec)

#### would Gad2 be better than Pvalb?

Gad2 labels the ZI in addition to the TH, whereas Pvalb is limite to just the TH (not expressed highly in ZI)

In [12]:
geneList = ["Gad2","C1ql3","Slc17a7"]
hcrVis_multisection(adata_brain1,geneList)

In [13]:
geneList = ["Gad1","C1ql3","Slc17a7"]
hcrVis_multisection(adata_brain1,geneList)

#### Would Calb2 be better than C1ql3 in the posterior?

Yes, probably best to go with Gad2, Calb2, Slc17a7 for this 1st round of genes

In [14]:
geneList = ["Gad2","Calb2","Slc17a7"]
hcrVis_multisection(adata_brain1,geneList)

#### Calb2 / C1ql3 bleedthrough test

In [15]:
geneList = ["Calb2","C1ql3","Slc17a7"]
hcrVis_multisection(adata_brain1,geneList)

#### Exploring other gene combos

In [16]:
geneList = ["Gad2",'Calb2',"Cnih3"]

sections_brain1 = ALL_SECTION_PAIRS_BRAIN1_BRAIN3[:,0]

for i, sec in enumerate(sections_brain1):
    hcrVis(adata_brain1,geneList,sec)

In [17]:
geneList = ["C1ql3",'Syndig1l',"Ankrd6"]

for i, sec in enumerate(sections_brain1):
    hcrVis(adata_brain1,geneList,sec)

In [18]:
geneList = ["C1ql3",'Calb2',"Slc17a7"]

for i, sec in enumerate(sections_brain1):
    hcrVis(adata_brain1,geneList,sec)

#### Would Slc17a6(VGLUT2) be better than Slc17a7(VGLUT1)?

nope

In [19]:
geneList = ["Pvalb","C1ql3","Slc17a6"]

sections_brain1 = ALL_SECTION_PAIRS_BRAIN1_BRAIN3[:,0]

for i, sec in enumerate(sections_brain1):
    hcrVis(adata_brain1,geneList,sec)

## Other gene sets

In [20]:
# Figure out HCR probes to order
geneList = ["Pvalb","C1ql3","Slc17a7"]
hcrVis(adata_brain1,geneList,'1198980089')

In [21]:
# Figure out HCR probes to order
geneList = ["Scn4b","Hs3st1","Slc17a7"]
hcrVis(adata_brain1,geneList,'1198980089')

In [22]:
# Figure out HCR probes to order
geneList = ["Kirrel3","C1ql3","Calb2"]
hcrVis(adata_brain1,geneList,'1198980089')

In [23]:
# Figure out HCR probes to order
geneList = ["Necab1","Scn4b","C1ql3"]
hcrVis(adata_brain1,geneList,'1198980089')

In [24]:
geneList = ["Kirrel3","Scn4b","Cbln1"]
hcrVis(adata_brain1,geneList,'1198980089')

In [25]:
geneList = ["Slc17a7","Kirrel3","Syndig1l"]
hcrVis(adata_brain1,geneList,'1198980089')

In [26]:
geneList = ["Calb2","C1ql3","Syndig1l"]
hcrVis(adata_brain1,geneList,'1198980120')

### Developmentally relevant genes

In [27]:
# Pcdh8 and Cdh12 seem to form non-overlapping populations
geneList = ["Pcdh8","Cdh12","Tmem132c"]
hcrVis(adata_brain1,geneList,'1198980120')

In [28]:
# Cbln's seem to form a stepwise gradient
geneList = ["Cbln4","Cbln1","Cbln2"]
hcrVis(adata_brain1,geneList,'1198980089')

In [29]:
# Tmem intersection
geneList = ["Tmem132c","Deptor","Tmem255a"]
hcrVis(adata_brain1,geneList,'1198980120')

## Look specifically at MD

In [30]:
# def hcrMD(merData,genes):
#     """ Draw on MERSCOPE data to plot separate and overlaid expression of 3 genes. """
#     plt.figure(figsize=(12,9), dpi=80)
#     xBounds = [-84000, -82000]
#     yBounds = [87000, 88500]
#     spacer = 100
    
#     # Subset to approximately area MD
#     merSection = merData[merData.obs["section"] == '1198980089']
#     merSection = merSection[(merSection.obsm["spatial_cirro"][:,0] > xBounds[0]) & 
#                             (merSection.obsm["spatial_cirro"][:,0] < xBounds[1])]
#     merSection = merSection[(merSection.obsm["spatial_cirro"][:,1] > yBounds[0]) & 
#                             (merSection.obsm["spatial_cirro"][:,1] < yBounds[1])]
#     print(len(MerSection))
#     # Get normalized expression of each gene
#     gene1Norm = merSection[:,genes[0]].X / merSection[:,genes[0]].X.max()
#     gene2Norm = merSection[:,genes[1]].X / merSection[:,genes[1]].X.max()
#     gene3Norm = merSection[:,genes[2]].X / merSection[:,genes[2]].X.max()

#     # Convert each genes normalized expression into an RGB value
#     colorR = np.concatenate((gene1Norm, np.zeros([len(gene1Norm),2])),axis=1)
#     colorG = np.concatenate((np.zeros([len(gene1Norm),1]),gene2Norm,np.zeros([len(gene1Norm),1])),axis=1)
#     colorB = np.concatenate((np.zeros([len(gene1Norm),2]),gene3Norm),axis=1)
    
#     # Combine each gene for overlaid plot
#     colorRGB = np.concatenate((gene1Norm, gene2Norm, gene3Norm),axis=1)
#     colors = (colorR, colorG, colorB, colorRGB)
#     genes.append('Overlay') # Append for labeling purposes
    
#     # Plot
#     plt.figure(figsize=(16,16))

#     # For each channel, plot spatial expression
#     for count, color in enumerate(colors):
#         plt.subplot(2,2,count+1)
#         plt.subplots_adjust(wspace=0.01, hspace=0.1)
#         plt.scatter(merSection.obsm["spatial_cirro"][:,0],merSection.obsm["spatial_cirro"][:,1],
#                     s=75, color = color, edgecolors='black') #s=120
#         # Draw bounding box
#         plt.plot([xBounds[1] - spacer,xBounds[0] + spacer,xBounds[0] + spacer,xBounds[1] - spacer,xBounds[1] - spacer],
#                  [yBounds[1] - spacer,yBounds[1] - spacer,yBounds[0] + spacer,yBounds[0] + spacer,yBounds[1] - spacer],'k')
#         plt.title(genes[count],fontsize = 18, fontweight='bold')
#         plt.axis('off')
#         plt.axis('equal')

In [31]:
def hcrMDhorizontal(merData,genes):
    """ Draw on MERSCOPE data to plot separate and overlaid expression of 3 genes. """
    plt.figure(figsize=(12,9), dpi=80)
    xBounds = [-83650, -82000]
    yBounds = [87200, 88300]
    spacer = 100
    
    # Subset to approximately area MD
    merSection = merData[merData.obs["section"]=='1198980089']
    merSection = merSection[(merSection.obsm["spatial_cirro"][:,0] > xBounds[0]) & 
                            (merSection.obsm["spatial_cirro"][:,0] < xBounds[1])]
    merSection = merSection[(merSection.obsm["spatial_cirro"][:,1] > yBounds[0]) & 
                            (merSection.obsm["spatial_cirro"][:,1] < yBounds[1])]
    
    # Get normalized expression of each gene
    gene1Norm = merSection[:,genes[0]].X / merSection[:,genes[0]].X.max()
    gene2Norm = merSection[:,genes[1]].X / merSection[:,genes[1]].X.max()
    gene3Norm = merSection[:,genes[2]].X / merSection[:,genes[2]].X.max()

    # Convert each genes normalized expression into an RGB value
    colorR = np.concatenate((gene1Norm, np.zeros([len(gene1Norm),2])),axis=1)
    colorG = np.concatenate((np.zeros([len(gene1Norm),1]),gene2Norm,np.zeros([len(gene1Norm),1])),axis=1)
    colorB = np.concatenate((np.zeros([len(gene1Norm),2]),gene3Norm),axis=1)
    
    # Combine each gene for overlaid plot
    colorRGB = np.concatenate((gene1Norm, gene2Norm, gene3Norm),axis=1)
    colors = (colorR, colorG, colorB, colorRGB)
    genes.append('Overlay') # Append for labeling purposes
    
    # Plot
    plt.figure(figsize=(32,8))

    # For each channel, plot spatial expression
    for count, color in enumerate(colors):
        plt.subplot(1,4,count+1)
        plt.subplots_adjust(wspace=0.01, hspace=0.1)
        plt.scatter(merSection.obsm["spatial_cirro"][:,0],merSection.obsm["spatial_cirro"][:,1],
                    s=50, color = color, edgecolors=None) #s=75
        # Draw bounding box
        plt.plot([xBounds[1] + spacer,xBounds[0] - spacer,xBounds[0] - spacer,xBounds[1] + spacer,xBounds[1] + spacer],
                 [yBounds[1] + spacer,yBounds[1] + spacer,yBounds[0] - spacer,yBounds[0] - spacer,yBounds[1] + spacer],'k')
        plt.title(genes[count],fontsize = 18, fontweight='bold')
        plt.axis('off')
        plt.axis('equal')

In [32]:
# Remaining probes
geneList = ["Slc17a7","Hs3st1","Scn4b"]
hcrMDhorizontal(adata_brain1,geneList);

In [33]:
# Try to mimic Phillips et al 2019 (sub Slc17a7 for Tnnt1)
geneList = ["Hs3st1","Necab1","Calb2"]
hcrMDhorizontal(adata_brain1,geneList)

In [34]:
# Try to mimic Phillips et al 2019 (sub Slc17a7 for Tnnt1)
geneList = ["Syndig1l","Kirrel3","Scn4b"]
hcrMDhorizontal(adata_brain1,geneList)

In [35]:
# Remaining probes
geneList = ["Slc17a7","Scn4b","C1ql3"]
hcrMDhorizontal(adata_brain1,geneList)

In [36]:
# Try to mimic Phillips et al 2019 (sub Slc17a7 for Tnnt1)
geneList = ["Slc17a7","Necab1","Calb2"]
hcrMDhorizontal(adata_brain1,geneList)

In [37]:
# Look at overlap of central MD expressing genes
geneList = ["Slc17a7","Hs3st1","Clmp"]
hcrMDhorizontal(adata_brain1,geneList)

In [38]:
# Look at overlap of broadly MD expressing genes
geneList = ["Necab1","Syndig1l","Syndig1l"]
hcrMDhorizontal(adata_brain1,geneList)

In [39]:
#(Frog face)
geneList = ["C1ql3","Prkcd","Scn4b"]
hcrMDhorizontal(adata_brain1,geneList)

In [40]:
geneList = ["C1ql3","Syndig1l","Ankrd6"]
hcrMDhorizontal(adata_brain1,geneList)