### Load needed libraries

In [None]:
import os
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import glob
import re
import copy
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import rpy2
%load_ext rpy2.ipython
import warnings
from scipy import stats as sp_stats
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False


pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# From Manuscript Supplementary Table 1
Donors = pd.read_excel(os.path.join(pwd, "input", "Extended Data Figure 7", "Supplementary Table 1.xlsx"), sheet_name="SEA-AD_Cohort_Metadata")

# Results from one supertype self projection run from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
RNAseq_reference = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 7", "supertype_iterative_scANVI_results.2022-03-24.csv"), index_col=0)

# Results from one supertype self projection run for MERFISH genes from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
MERFISH_reference = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 7", "supertype_MERFISH_iterative_scANVI_results.2022-07-01.csv"), index_col=0)

# Great Apes metadata, from https://www.science.org/doi/10.1126/science.adf6812
great_ape_metadata = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 7", "Great_ApesMetadata_version101_20220321.csv"), index_col=0)

### Extended Data Figure 7a

In [None]:
Donors = Donors.loc[Donors["MERFISH2"] == "Y", :]

Donors["APOE4 Status"] = Donors["APOE Genotype"].str.contains("4")
Donors["Co-morbidity"] = "None"
Donors.loc[(Donors["Overall CAA Score"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Overall CAA Score"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "CAA"

Donors.loc[(Donors["Highest Lewy Body Disease"].isin(["Neocortical (Diffuse)"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Highest Lewy Body Disease"].isin(["Neocortical (Diffuse)"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "LBD"

Donors.loc[(Donors["LATE"].isin(["LATE Stage 3"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["LATE"].isin(["LATE Stage 3"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "LATE-NC"

Donors.loc[(Donors["Arteriolosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Arteriolosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Arteriolosclerosis"

Donors.loc[(Donors["Atherosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Atherosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Atherosclerosis"

Donors.loc[(Donors["Total Microinfarcts (not observed grossly)"] > 6) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Total Microinfarcts (not observed grossly)"] > 6) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Microinfarcts"

In [None]:
%%R -i Donors
# Code comes from Mike H

library(pheatmap)

vars=colnames(Donors)
ustage=unique(Donors[,"Overall AD neuropathological Change"])

# bracketed by Overall AD neuropathological Change
datnad=Donors[Donors[,"Overall AD neuropathological Change"]=="Not AD",]
datlow=Donors[Donors[,"Overall AD neuropathological Change"]=="Low",]
datmid=Donors[Donors[,"Overall AD neuropathological Change"]=="Intermediate",]
dathigh=Donors[Donors[,"Overall AD neuropathological Change"]=="High",]

mfmat=matrix(0,2,4,dimnames=list(c("Male","Female"),c("No AD","Low","Intermediate","High")))
mfmat[1,1]=table(datnad[,"Sex"])[2]
mfmat[2,1]=table(datnad[,"Sex"])[1]
mfmat[1,2]=table(datlow[,"Sex"])[2]
mfmat[2,2]=table(datlow[,"Sex"])[1]
mfmat[1,3]=table(datmid[,"Sex"])[2]
mfmat[2,3]=table(datmid[,"Sex"])[1]
mfmat[1,4]=table(dathigh[,"Sex"])[2]
mfmat[2,4]=table(dathigh[,"Sex"])[1]
mfper=mfmat
for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}


pdf("output/Extended Data Figure 7a_barplot_Sex_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("lightblue","antiquewhite2"))
dev.off()


mfmat=matrix(0,2,4,dimnames=list(c("APOE4","No APOE4"),c("No AD","Low","Intermediate","High")))
mfmat[1,1]=table(datnad[,"APOE4 Status"])[2]
mfmat[2,1]=table(datnad[,"APOE4 Status"])[1]
mfmat[1,2]=table(datlow[,"APOE4 Status"])[2]
mfmat[2,2]=table(datlow[,"APOE4 Status"])[1]
mfmat[1,3]=table(datmid[,"APOE4 Status"])[2]
mfmat[2,3]=table(datmid[,"APOE4 Status"])[1]
mfmat[1,4]=table(dathigh[,"APOE4 Status"])[2]
mfmat[2,4]=table(dathigh[,"APOE4 Status"])[1]
mfper=mfmat
mfper[1,1]=0; mfper[1,2]=0
for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}

mfmat=matrix(0,2,4,dimnames=list(c("APOE4","No APOE4"),c("No AD","Low","Intermediate","High")))
mfmat[2,1]=table(datnad[,"APOE4 Status"])[1]
mfmat[2,2]=table(datlow[,"APOE4 Status"])[1]
mfmat[1,3]=table(datmid[,"APOE4 Status"])[2]
mfmat[2,3]=table(datmid[,"APOE4 Status"])[1]
mfmat[1,4]=table(dathigh[,"APOE4 Status"])[2]
mfmat[2,4]=table(dathigh[,"APOE4 Status"])[1]
mfper=mfmat
mfper[1,1]=0; mfper[1,2]=0
for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}

pdf("output/Extended Data Figure 7a_barplot_APOE4 Status_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("darkseagreen","gray90"))
dev.off()


mfmat=matrix(0,2,4,dimnames=list(c("Dementia","No Dementia"),c("No AD","Low","Intermediate","High")))
mfmat[2,1]=table(datnad[,"Cognitive Status"])[1]

for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}


pdf("output/Extended Data Figure 7a_barplot_Cognitive Status_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("red","gray90"))
dev.off()

par(las=2)

# Braak
ubraak=unique(Donors[,"Braak"])
ustage=unique(Donors[,"Overall AD neuropathological Change"])

brakorder=c("Braak 0","Braak II","Braak III","Braak IV","Braak V","Braak VI")
stageorder=c("High","Intermediate","Low","Not AD")

brakmat=matrix(0,4,6,dimnames=list(stageorder,brakorder))


for (i in 1:length(stageorder)) {
  for (j in 1:length(brakorder)) {
    brakmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Braak"]==brakorder[j])))
  }
}


brakper=brakmat
for (j in 1:6) {
  brakper[,j]=round(brakper[,j]/sum(brakper[,j]),2)
}

pdf("output/Extended Data Figure 7a_heatmap_Braak_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","indianred","brown3","darkred"))(50)
pheatmap(brakmat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()
 
# Thal
uthal=unique(Donors[,"Thal"])
thalorder=c("Thal 0","Thal 1","Thal 2","Thal 3","Thal 4","Thal 5")

thalmat=matrix(0,4,6,dimnames=list(stageorder,thalorder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(thalorder)) {
    thalmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Thal"]==thalorder[j])))
  }
}


thalper=thalmat
for (j in 1:6) {
  thalper[,j]=round(thalper[,j]/sum(thalper[,j]),2)
}


pdf("output/Extended Data Figure 7a_heatmap_Thal_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","darkseagreen1","darkseagreen","darkseagreen4"))(50)
pheatmap(thalmat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()



# CERAD Score
ucscore=unique(Donors[,"CERAD score"])
cscoreorder=c("Absent","Sparse","Moderate","Frequent")

cscoremat=matrix(0,4,4,dimnames=list(stageorder,cscoreorder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(cscoreorder)) {
    cscoremat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"CERAD score"]==cscoreorder[j])))
  }
}


cscoreper=cscoremat
for (j in 1:4) {
  cscoreper[,j]=round(cscoreper[,j]/sum(cscoreper[,j]),2)
}


pdf("output/Extended Data Figure 7a_heatmap_CERAD score_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","skyblue1","deepskyblue2","deepskyblue4"))(50)
pheatmap(cscoremat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()


pdf("output/Extended Data Figure 7a_boxplot_Age at Death_by_Overall AD neuropathological Change.pdf",width=6,height=8)
par(mfrow=c(4,2))
agerange=c(min(Donors[,"Age at Death"]),max(Donors[,"Age at Death"]))
boxplot(datnad[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="No AD",horizontal=T)
stripchart(datnad[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)        

boxplot(datlow[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="Low",horizontal=T)
stripchart(datlow[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)    

boxplot(datmid[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="Intermediate",horizontal=T)
stripchart(datmid[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)     

boxplot(dathigh[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="High",horizontal=T)
stripchart(dathigh[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)             
dev.off()

# Co-morbidities
umorb=unique(Donors[,"Co-morbidity"])

umorborder=c("None","LBD","Atherosclerosis","Arteriolosclerosis","Microinfarcts","LATE-NC","Multiple")

morbmat=matrix(0,4,7,dimnames=list(stageorder,umorborder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(umorborder)) {
    morbmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Co-morbidity"]==umorborder[j])))
  }
}


morbper=morbmat
for (i in 1:4) {
  morbper[i,]=round(morbper[i,]/sum(morbper[i,]),2)
}

pdf("output/Extended Data Figure 7a_barplot_Co-morbidity_by_Overall AD neuropathological Change.pdf",width=6,height=8)
barplot(t(morbper[4:1,]),horiz=T)
dev.off()

### Extended Data Figure 7c-e

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Extended Data Figure 7f

In [None]:
MERFISH_reference = MERFISH_reference.loc[:, ["supertype_held_scANVI", "supertype_held_conf_scANVI", "subclass_held_scANVI", "subclass_held_conf_scANVI"]]
MERFISH_reference.columns = "MERFISH_" + MERFISH_reference.columns
RNAseq_reference = RNAseq_reference.merge(MERFISH_reference, left_index=True, right_index=True, how="left")
RNAseq_reference = RNAseq_reference.merge(great_ape_metadata.loc[:, ["subclass", "cluster"]], how="left", left_index=True, right_index=True)

low_confidence = ["L2/3 IT_4", "L2/3 IT_9", "L2/3 IT_11",
                  "L5 IT_4",
                  "L5/6 NP_5",
                  "Micro-PVM_3",
                  "Pvalb_4", "Pvalb_11",
                  "Sncg_7",
                  "Sst_6", "Sst_8", "Sst_14", "Sst_15", "Sst_16", "Sst_17", "Sst_18", "Sst_21", "Sst_24", "Sst_26", 
                  "Vip_3", "Vip_7",  "Vip_8", "Vip_10", "Vip_17", "Vip_20", "Vip_22"]
RNAseq_reference["supertype"] = RNAseq_reference["cluster"].copy()
RNAseq_reference["supertype"] = RNAseq_reference["supertype"].astype("object")
for i in low_confidence:
    RNAseq_reference.loc[RNAseq_reference["cluster"] == i, "supertype"] = "Unknown"
RNAseq_reference["supertype"] = RNAseq_reference["supertype"].astype("category")

In [None]:
df_self_proj = RNAseq_reference.loc[:, ["subclass", "subclass_held_scANVI", "subclass_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].astype("category")
df_self_proj["pred"] = df_self_proj["pred"].astype("category")
result_subclass = get_scores(df=df_self_proj)
result_subclass["model_version"] = "Subclass (Full)"
df_self_proj = RNAseq_reference.loc[:, ["subclass", "MERFISH_subclass_held_scANVI", "MERFISH_subclass_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].astype("category")
df_self_proj["pred"] = df_self_proj["pred"].astype("category")
result_merfish = get_scores(df=df_self_proj)
result_merfish["model_version"] = "Subclass (MERFISH)"
result = pd.concat([result_subclass, result_merfish], axis=0)

plt.rcParams["figure.figsize"] = (1.5,4)
ax = sns.boxplot(
    data=result,
    x="model_version",
    y="f1",
    showfliers=False,
    palette="Greys"
);
ax = sns.stripplot(
    data=result,
    x="model_version",
    y="f1",
    color="0.25",
    edgecolor="white",
    linewidth=0.5,
    alpha=0.5
);
ax.axhline(0.7, linestyle="dashed", color="black")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set(xlabel="", ylabel="F1 Score");
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 7f_boxplot_Subclass F1 scores with all genes versus MERFISH panel.pdf"), bbox_inches="tight")
plt.show()

df_self_proj = RNAseq_reference.loc[:, ["supertype", "supertype_held_scANVI", "supertype_held_conf_scANVI"]]
df_self_proj = df_self_proj.loc[df_self_proj["supertype"] != "Unknown", :]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].astype("category")
df_self_proj["pred"] = df_self_proj["pred"].astype("category")
result_supertype = get_scores(df=df_self_proj)
result_supertype["model_version"] = "Supertype (Full)"
df_self_proj = RNAseq_reference.loc[:, ["supertype", "MERFISH_supertype_held_scANVI", "MERFISH_supertype_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].astype("category")
df_self_proj["pred"] = df_self_proj["pred"].astype("category")
result_merfish = get_scores(df=df_self_proj)
result_merfish["model_version"] = "Supertype (MERFISH)"
result = pd.concat([result_supertype, result_merfish], axis=0)

plt.rcParams["figure.figsize"] = (1.5,4)
ax = sns.boxplot(
    data=result,
    x="model_version",
    y="f1",
    showfliers=False,
    palette="Greys"
);
ax = sns.stripplot(
    data=result,
    x="model_version",
    y="f1",
    color="0.25",
    edgecolor="white",
    linewidth=0.5,
    alpha=0.5
);
ax.axhline(0.7, linestyle="dashed", color="black")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set(xlabel="", ylabel="F1 Score");
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 7f_boxplot_Supertype F1 scores with all genes versus MERFISH panel.pdf"), bbox_inches="tight")
plt.show()

### Extended Data Figure 5g

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Extended Data Figure 5h

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Extended Data Figure 5i

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder