## Load Libraries & Data

In [None]:
import importlib
import matplotlib.pyplot as plt
import pathlib
import seaborn as sns
import scanpy as sc
import pandas as pd
import anndata
import os 
import numpy as np 

local_path = os.getcwd()
print(local_path)

In [None]:
## Load the reference dataset
dataset = "reference"
technology = "singleomeCR6"
region = "MTG"
date = "2022-04-08"
filename=os.path.join("../../RNAseq", "ingest","output", region + "_" + dataset + "_" + technology + "." + date + ".h5ad")
print(filename)
adata_ref = sc.read_h5ad(filename)
    
    

In [None]:
adata_spatial_new = anndata.read_h5ad("/allen/programs/celltypes/workgroups/hct/SEA-AD/MERSCOPE/MTG_PAPER_FINAL/MTG_Spatial_2024_07_26.h5ad")

In [None]:
adata_spatial_new.obs.columns

In [None]:
len(list(adata_spatial_new.obs["Unique Donor ID"].unique()))

In [None]:
len(list(adata_spatial_new.obs["uwa"].unique()))

In [None]:
MERSCOPE_genes = [g for g in adata_spatial_new.var.index if "Blank" not in g]

## Compute Averages


In [None]:
def compute_averages(temp_adata, subclass_field="subclass", subset_genes_to = None):
    avg_exp = []
    subclass_name = []
    subclass_records = []
    for s_ in temp_adata.obs[subclass_field].unique().to_numpy():
        temp = temp_adata[temp_adata.obs[subclass_field] == s_]
        avg_info = dict(mean_expression= temp.X.mean(axis=0), subclass=s_)
        avg_exp.append(temp.X.mean(axis=0))
        subclass_name.append(s_)
    avg_values = np.vstack(avg_exp)

    if "gene_ids" in temp_adata.var.keys():
        gene_names = temp_adata.var["gene_ids"].values

    else:
        gene_names = temp_adata.var.index.values
    if subset_genes_to:
        if isinstance(subset_genes_to, list):
        
            gene_in_subset =[g in subset_genes_to for g in gene_names]
            avg_values = avg_values[:,gene_in_subset]
            gene_names = [g for g in gene_names if g in subset_genes_to ]
        else:
            raise ValueError("please supply a list of genes to use for subsetting output")
            
    tdf = pd.DataFrame(data = avg_values)
    tdf.index = subclass_name
    tdf.columns = gene_names
    return tdf

In [None]:
avgnew = compute_averages(adata_spatial_new, subset_genes_to=MERSCOPE_genes)
avgsn = compute_averages(adata_ref, subclass_field="subclass", subset_genes_to=MERSCOPE_genes)

In [None]:
# Re-arrangement Subclass
new_subclass = ['Astro', 'OPC', 'Oligo', 'Micro-PVM', 'VLMC', 'Endo', 
                'Chandelier', 'Pvalb','Sst', 'Lamp5_Lhx6','Lamp5', 'Sncg', 'Pax6', 'Vip','Sst Chodl',
                'L2/3 IT','L4 IT', 'L5 IT', 'L5 ET', 'L5/6 NP', 
                'L6 IT Car3', 'L6 IT',  'L6b', 'L6 CT',]

avgnew = avgnew.loc[new_subclass,:]
avgsn = avgsn.loc[new_subclass,:]

In [None]:
# Re-arrangement Genes
new_genes = [ 'SLC14A1','ETNPPL', 'PAX6', 'RYR3',  'GPC5','FBXL7','CACHD1','NPAS3','LUZP2', 'GRM7',
        'NLGN1', 'ITGA8', 'TNR','TMEM255A','PDZD2','PDE4B','MOG','CD22', 'CD74', 'CTSS', 'DCN','DLC1', 'SNTB1', 'EBF1','LAMA4','PRRX1','PRKG1','CDH6','DACH1', 'ID3','NOSTRIN','PALMD',
             
             'SLC32A1','GAD2','CNTNAP5','KAZN','UNC5B','CNTN5','HPSE2','ZNF804A','PVALB', 'ANK1','ASTN2','SOX6','BTBD11','ZNF385D','SULF1','CALB1','LHX6','STXBP6','GRIN3A', 'PRRT4','GRIP2','DLX1','PDGFD','LAMP5','EYA4','RGS12','EGFR','NDNF', 'VIP','ROBO1',
         'L3MBTL4','SORCS3', 'NOS1','NPY','TACR1','CHODL',
              
             
             'GRIN2A','SATB2', 'DCLK1', 'ATRNL1','CSMD1','RBFOX3',  'RBFOX1','CUX2','RFX3','HS6ST3', 'CLSTN2', 'SLC24A2','TSHZ2','RORB','LRRK1', 'DCC','CACNA2D3', 
       'KCNIP4','TAFA1','GRM8','NRG1', 'SV2C','CARTPT','HCN1','PEX5L','SLIT3','SORCS1',
            'HS3ST2', 'TOX', 'FEZF2', 'HTR2C','NXPH2', 'TLL1','COL11A1','SMYD1', 'NTNG2','THEMIS','ITGB8','TH', 'SEMA6D',  'CBLN2', 'TMEM132D','ADAMTS3','DGKG','KIAA1217','GRIK3', 'SCUBE1','ADAMTSL1','SEMA3E',
             
             
        
         'MEIS2', 'LRP1B',  'KCNMB2', 'FOXP2', 'CA10', 
       'ROBO2', 
         'NFIA', 'FRMPD4',
        'GRID2', 'FGF13', 
        'TENM2',  'GALNTL6',
       'LRRC4C', 'KIRREL3', 'ASIC2', 'RGS6',   
         
         'NKAIN2', 'GRIP1', 
       'PLD5', 'PLCB1',    'FGF12',
          
         
         'HTR2A', 
         'ZMAT4']

In [None]:

avgnew = avgnew.loc[:, new_genes]

avgsn = avgsn.loc[:, new_genes]

## Plotting at the subclass level

In [None]:
plt.figure(figsize=[60, 30]) # Total figure size
sns.set(font_scale=1.8) # Size of fonts for genes and be able to read them

plt.subplot(2, 1, 1)
plt.title("MERFISH detection (z-score by gene)")
t = avgnew.values
t = t - t.mean(axis=0)
t = t / t.std(axis=0)
out = sns.heatmap(t, vmin=-2,vmax=5, center=0,
               yticklabels=avgnew.index,
               xticklabels=avgnew.columns,
               cmap = "RdBu_r")
plt.subplot(2, 1, 2)
plt.title("snRNASeq detection (z-score by gene)")
t = avgsn.values
t = t - t.mean(axis=0)
t = t / t.std(axis=0)
out = sns.heatmap(t, vmin=-2,vmax=5, center=0,
               yticklabels=avgsn.index,
               xticklabels=avgsn.columns,
               cmap = "RdBu_r")

# plt.savefig("../plots_for_figures/new_detection_MERSCOPE_v_snRNASeq_subclass.pdf")
# plt.savefig("../plots_for_figures/new_detection_MERSCOPE_v_snRNASeq_subclass.svg")