In [None]:
%load_ext autoreload 
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib.colors import Colormap
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


import pathlib
import pandas as pd
import numpy as np

import json
from shapely.geometry import Polygon


import geojson


import anndata as ad
import seaborn as sns

from cycler import cycler
import matplotlib
matplotlib.rcParams['svg.fonttype']='none'
matplotlib.rcParams['pdf.fonttype']=42

import scipy

## Import and organize the data



In [None]:
adad = ad.read_h5ad("/allen/programs/celltypes/workgroups/hct/SEA-AD/MERSCOPE/MTG_PAPER_FINAL/MTG_Spatial_2024_07_26.h5ad")

In [None]:
[print(a) for a in adad.obs["Donor"].unique()];

In [None]:
rnaseqprops = pd.read_csv("/allen/programs/celltypes/workgroups/hct/SEA-AD/MERSCOPE/proportion_analysis/MTG_AD_metadata_keepers.2022-04-13.csv")

In [None]:
# find the common donors between scRNASeq and MERSCOPE
#alternative columns
shared_donors = adad.obs["Donor"].unique()

shared_rnaseq = rnaseqprops.loc[rnaseqprops.donor_name.isin(shared_donors),:]
print(shared_donors.shape)

In [None]:
# list of vulnerable supertypes:
vtypes = [
"Lamp5_3",
"Lamp5_5",
"Sncg_2",
"Sncg_1",
"Sncg_8",
"Vip_2",
"Vip_11",
"Vip_13",
"Vip_1",
"Vip_12",
"Sst_3",
"Sst_19",
"Sst_11",
"Sst_20",
"Sst_22",
"Sst_23",
"Sst_25",
"Sst_2",
"Pvalb_6",
"Pvalb_5",
"Pvalb_8",
"Pvalb_3",
"Pvalb_2",
"Pvalb_15",
"Pvalb_14",
"L2/3 IT_1",
"L2/3 IT_6",
"L2/3 IT_7",
"L2/3 IT_5",
"L2/3 IT_13",
"L2/3 IT_10",
"L2/3 IT_8",
"L2/3 IT_12",
"L2/3 IT_3",
"Astro_2",
"OPC_2",
"Oligo_2",
"Micro-PVM_3-SEAAD"
]


In [None]:
# RNASeq ONLY. ALL DONORS
all_donor_records=[]
for donor, gb in rnaseqprops.groupby("donor_name"):
    donor_dict = {"donor": donor}
    total_neurons = np.sum(gb.class_scANVI != "glia")
    donor_dict.update({"total_neurons":total_neurons})
    for subclass in shared_rnaseq.subclass_scANVI.unique():
        # RNAseq vulnerable and non-vulnerable
        subclass_df = gb.loc[gb.subclass_scANVI==subclass,:]
        n_vulnerable = np.sum(subclass_df.supertype_scANVI.isin(vtypes) )
        n_nonvulnerable = subclass_df.shape[0] - n_vulnerable
        donor_dict.update({subclass+"*_v_RNASeq" : n_vulnerable,
                           subclass+"*_nv_RNASeq" : n_nonvulnerable})        
        # now all clusters:
        for supertype in list( subclass_df.supertype_scANVI.unique()):
            gb_cl = subclass_df.loc[subclass_df.supertype_scANVI==supertype,:]
            donor_dict.update({supertype+"*_RNAseq": gb_cl.shape[0]})
            
        
            
    all_donor_records.append(donor_dict)
    
    
    
all_donor_props = pd.DataFrame.from_records(all_donor_records)

In [None]:
per_donor_records=[]
for donor, gb in shared_rnaseq.groupby("donor_name"):
    print(donor)
    total_neurons_RNASeq = np.sum(gb.class_scANVI != "glia")
    

    gb_MERSCOPE = adad.obs.loc[np.logical_and(adad.obs["selected_cells"],adad.obs.Donor==donor),:].copy()
    total_neurons_MERSCOPE = np.sum(gb_MERSCOPE["class"] != "glia")

    
    donor_dict = {"donor": donor,
             "donor_pseudotime": adad.obs.loc[adad.obs.Donor==donor,["donor_pseudotime"]].values[0][0],
             "total_neurons_RNASeq":total_neurons_RNASeq,
                 "total_neurons_MERSCOPE":total_neurons_MERSCOPE}

 
    
    if gb_MERSCOPE.shape[0]==0:
        print("no cells in this donor")
        continue
    for subclass in shared_rnaseq.subclass_scANVI.unique():
        # RNAseq vulnerable and non-vulnerable
        subclass_df = gb.loc[gb.subclass_scANVI==subclass,:]
        n_vulnerable = np.sum(subclass_df.supertype_scANVI_leiden.isin(vtypes) )
        n_nonvulnerable = subclass_df.shape[0] - n_vulnerable

        donor_dict.update({subclass+"*_v_RNASeq" : n_vulnerable,
                           subclass+"*_nv_RNASeq" : n_nonvulnerable})        
        # now all clusters:
        for supertype in list( subclass_df.supertype_scANVI_leiden.unique()):
            gb_cl = subclass_df.loc[subclass_df.supertype_scANVI_leiden==supertype,:]
            donor_dict.update({supertype+"*_RNAseq": gb_cl.shape[0]})
            
        

                          
                          
        # MERSCOPE
        subclass_df = gb_MERSCOPE.loc[gb_MERSCOPE.subclass==subclass,:]
        n_vulnerable = np.sum(subclass_df.supertype_scANVI_leiden.isin(vtypes) )
        n_nonvulnerable = subclass_df.shape[0] - n_vulnerable
        
        donor_dict.update({subclass+"*_v_MERSCOPE" : n_vulnerable,
                           subclass+"*_nv_MERSCOPE" : n_nonvulnerable})
                                  
                # now all clusters:
        for supertype in list( subclass_df.supertype_scANVI_leiden.unique()):
            gb_cl = subclass_df.loc[subclass_df.supertype_scANVI_leiden==supertype,:]
            donor_dict.update({supertype+"*_MERSCOPE": gb_cl.shape[0]})
            
            
    per_donor_records.append(donor_dict)
    
    
    
per_donor_props = pd.DataFrame.from_records(per_donor_records)

# Article Figures

In [None]:

plt.figure(figsize = [4,5])
for ii,s in enumerate(["Sst"]):

    sns.scatterplot(x = np.log( per_donor_props.loc[:,s+"*_v_MERSCOPE"]/per_donor_props.loc[:,"total_neurons_MERSCOPE"]) ,
                               y = np.log10( per_donor_props.loc[:,s+"*_v_RNASeq"]/per_donor_props.loc[:,"total_neurons_RNASeq"])) #hue = per_donor_props.loc[:,"donor_pseudotime"],

    corr_values = scipy.stats.pearsonr(per_donor_props.loc[:,s+"*_v_MERSCOPE"]/per_donor_props.loc[:,"total_neurons_MERSCOPE"], per_donor_props.loc[:,s+"*_v_RNASeq"]/per_donor_props.loc[:,"total_neurons_RNASeq"])
    plt.title(s+ " affected types in MERSCOPE and RNASeq\n correlation = "+str(corr_values[0])[:5] )
    plt.xlabel("MERSCOPE log10(affected / total neurons)")
    plt.ylabel("RNASeq log10(affected / total neurons)")
    


# focus on affected SST supertypes over CPS:

In [None]:
matplotlib.rcParams['axes.prop_cycle'] = cycler(color=plt.cm.tab10.colors)




plt.figure(figsize = [4,5])

# plt.subplot(2,1,2)
for s in ["Sst"]:

    sns.regplot(x = per_donor_props.loc[:,"donor_pseudotime"], y = np.log10( per_donor_props.loc[:,s+"*_v_MERSCOPE"]/per_donor_props.loc[:,"total_neurons_MERSCOPE"]),order=1, label = s+" MERSCOPE")

    
for s in ["Sst"]:
    
    sns.regplot(x = per_donor_props.loc[:,"donor_pseudotime"], y = np.log10( per_donor_props.loc[:,s+"*_v_RNASeq"]/per_donor_props.loc[:,"total_neurons_RNASeq"]),order=1, label = s+" RNASeq")

plt.title("Affected Sst loss over CPS")
plt.legend()
plt.ylabel("log10(affected cell count / all neurons)")
plt.xlabel("CPS")
plt.savefig("/allen/programs/celltypes/workgroups/hct/SEA-AD/MERSCOPE/MTG_PAPER_FINAL/SST_proportions_vs_CPS.svg")

In [None]:
genes = [g for g in adad.var.index if "Blank-" not in g]
adad.obs["transcript_counts"] = adad[:,genes].layers["raw"].sum(axis=1)


In [None]:
print("mean transcripts in selected cells:"+str(adad.obs.loc[adad.obs.selected_cells,:].transcript_counts.mean()))

In [None]:
print("mean volume in selected cells:"+str(adad.obs.loc[adad.obs.selected_cells,:].volume.mean()))