### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import copy
from matplotlib import pyplot as plt
%matplotlib inline
import rpy2
%load_ext rpy2.ipython
import warnings
from scipy import stats as sp_stats
from helper_functions import *

warnings.filterwarnings("ignore")
sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=500, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False


pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/
RNAseq = pd.read_csv(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "SEAAD_MTG_RNAseq_all-nuclei_metadata.2024-02-13.csv"), index_col=0)
ATACseq = pd.read_csv(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "SEAAD_MTG_ATACseq_all-nuclei_metadata.2024-02-13.csv"), index_col=0)
RNAseq["Subclass"] = RNAseq["Subclass"].astype("category").cat.reorder_categories(color_order["subclass_label"].unique())

# From Manuscript Supplementary Table 1
Donors = pd.read_excel(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "Supplementary Table 1.xlsx"), sheet_name="SEA-AD_Cohort_Metadata")

# [NOT PUBLIC] From ACT database (must be requested from ACT)
Memory = pd.read_csv(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "long_mem_scores_2022-07-20.txt"), sep="\t")

# From https://portal.brain-map.org/explore/seattle-alzheimers-disease/seattle-alzheimers-disease-brain-cell-atlas-download
quant_neuropath = pd.read_csv(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "sea-ad_all_mtg_quant_neuropath_bydonorid_081122.csv"), index_col=1)
quant_neuropath = quant_neuropath.drop("Unnamed: 0", axis=1)

# From 00_build_input_data.py
nuclear_cytosolic = pd.read_csv(os.path.join(pwd, "input", "Figure 1 and Extended Data Figure 1 and 2", "nuclear_cytosolic.csv"), index_col=0)

### Figure 1b-c

In [None]:
Donors["APOE4 Status"] = Donors["APOE Genotype"].str.contains("4")
Donors["Co-morbidity"] = "None"
Donors.loc[(Donors["Overall CAA Score"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Overall CAA Score"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "CAA"

Donors.loc[(Donors["Highest Lewy Body Disease"].isin(["Neocortical (Diffuse)"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Highest Lewy Body Disease"].isin(["Neocortical (Diffuse)"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "LBD"

Donors.loc[(Donors["LATE"].isin(["LATE Stage 3"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["LATE"].isin(["LATE Stage 3"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "LATE-NC"

Donors.loc[(Donors["Arteriolosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Arteriolosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Arteriolosclerosis"

Donors.loc[(Donors["Atherosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Atherosclerosis"].isin(["Severe"])) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Atherosclerosis"

Donors.loc[(Donors["Total Microinfarcts (not observed grossly)"] > 6) & (Donors["Co-morbidity"] != "None"), "Co-morbidity"] = "Multiple"
Donors.loc[(Donors["Total Microinfarcts (not observed grossly)"] > 6) & (Donors["Co-morbidity"] == "None"), "Co-morbidity"] = "Microinfarcts"


In [None]:
%%R -i Donors
# Code comes from Mike H

library(pheatmap)

vars=colnames(Donors)
ustage=unique(Donors[,"Overall AD neuropathological Change"])

# bracketed by Overall AD neuropathological Change
datnad=Donors[Donors[,"Overall AD neuropathological Change"]=="Not AD",]
datlow=Donors[Donors[,"Overall AD neuropathological Change"]=="Low",]
datmid=Donors[Donors[,"Overall AD neuropathological Change"]=="Intermediate",]
dathigh=Donors[Donors[,"Overall AD neuropathological Change"]=="High",]

mfmat=matrix(0,2,4,dimnames=list(c("Male","Female"),c("No AD","Low","Intermediate","High")))
mfmat[1,1]=table(datnad[,"Sex"])[2]
mfmat[2,1]=table(datnad[,"Sex"])[1]
mfmat[1,2]=table(datlow[,"Sex"])[2]
mfmat[2,2]=table(datlow[,"Sex"])[1]
mfmat[1,3]=table(datmid[,"Sex"])[2]
mfmat[2,3]=table(datmid[,"Sex"])[1]
mfmat[1,4]=table(dathigh[,"Sex"])[2]
mfmat[2,4]=table(dathigh[,"Sex"])[1]
mfper=mfmat
for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}


pdf("output/Figure 1b_barplot_Sex_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("lightblue","antiquewhite2"))
dev.off()


mfmat=matrix(0,2,4,dimnames=list(c("APOE4","No APOE4"),c("No AD","Low","Intermediate","High")))
mfmat[1,1]=table(datnad[,"APOE4 Status"])[2]
mfmat[2,1]=table(datnad[,"APOE4 Status"])[1]
mfmat[1,2]=table(datlow[,"APOE4 Status"])[2]
mfmat[2,2]=table(datlow[,"APOE4 Status"])[1]
mfmat[1,3]=table(datmid[,"APOE4 Status"])[2]
mfmat[2,3]=table(datmid[,"APOE4 Status"])[1]
mfmat[1,4]=table(dathigh[,"APOE4 Status"])[2]
mfmat[2,4]=table(dathigh[,"APOE4 Status"])[1]
mfper=mfmat
mfper[1,1]=0; mfper[1,2]=0
for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}

pdf("output/Figure 1b_barplot_APOE4 Status_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("darkseagreen","gray90"))
dev.off()


mfmat=matrix(0,2,4,dimnames=list(c("Dementia","No Dementia"),c("No AD","Low","Intermediate","High")))
mfmat[1,1]=table(datnad[,"Cognitive Status"])[1]
mfmat[2,1]=table(datnad[,"Cognitive Status"])[2]
mfmat[1,2]=table(datlow[,"Cognitive Status"])[1]
mfmat[2,2]=table(datlow[,"Cognitive Status"])[2]
mfmat[1,3]=table(datmid[,"Cognitive Status"])[1]
mfmat[2,3]=table(datmid[,"Cognitive Status"])[2]
mfmat[1,4]=table(dathigh[,"Cognitive Status"])[1]
mfmat[2,4]=table(dathigh[,"Cognitive Status"])[2]
mfmat[1,1]=0; mfmat[2,1]=9
mfper=mfmat

for (j in 1:4) {
  mfper[,j]=mfper[,j]/sum(mfper[,j])
}


pdf("output/Figure 1c_barplot_Cognitive Status_by_Overall AD neuropathological Change.pdf",width=5,height=5)
par(mfrow=c(1,1))
barplot(mfper,horiz=T,col=c("red","gray90"))
dev.off()

par(las=2)

# Braak
ubraak=unique(Donors[,"Braak"])
ustage=unique(Donors[,"Overall AD neuropathological Change"])

brakorder=c("Braak 0","Braak II","Braak III","Braak IV","Braak V","Braak VI")
stageorder=c("High","Intermediate","Low","Not AD")

brakmat=matrix(0,4,6,dimnames=list(stageorder,brakorder))


for (i in 1:length(stageorder)) {
  for (j in 1:length(brakorder)) {
    brakmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Braak"]==brakorder[j])))
  }
}


brakper=brakmat
for (j in 1:6) {
  brakper[,j]=round(brakper[,j]/sum(brakper[,j]),2)
}

pdf("output/Figure 1c_heatmap_Braak_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","indianred","brown3","darkred"))(50)
pheatmap(brakmat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()
 
# Thal
uthal=unique(Donors[,"Thal"])
thalorder=c("Thal 0","Thal 1","Thal 2","Thal 3","Thal 4","Thal 5")

thalmat=matrix(0,4,6,dimnames=list(stageorder,thalorder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(thalorder)) {
    thalmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Thal"]==thalorder[j])))
  }
}


thalper=thalmat
for (j in 1:6) {
  thalper[,j]=round(thalper[,j]/sum(thalper[,j]),2)
}


pdf("output/Figure 1c_heatmap_Thal_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","darkseagreen1","darkseagreen","darkseagreen4"))(50)
pheatmap(thalmat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()



# CERAD Score
ucscore=unique(Donors[,"CERAD score"])
cscoreorder=c("Absent","Sparse","Moderate","Frequent")

cscoremat=matrix(0,4,4,dimnames=list(stageorder,cscoreorder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(cscoreorder)) {
    cscoremat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"CERAD score"]==cscoreorder[j])))
  }
}


cscoreper=cscoremat
for (j in 1:4) {
  cscoreper[,j]=round(cscoreper[,j]/sum(cscoreper[,j]),2)
}


pdf("output/Figure 1c_heatmap_CERAD score_by_Overall AD neuropathological Change.pdf",width=6,height=4)
mpal=colorRampPalette(c("white","skyblue1","deepskyblue2","deepskyblue4"))(50)
pheatmap(cscoremat,cluster_rows=F,cluster_cols=F,display_numbers=T,legend=F,number_color = "black",fontsize_number=12,col=mpal)
dev.off()


pdf("output/Figure 1b_boxplot_Age at Death_by_Overall AD neuropathological Change.pdf",width=6,height=8)
par(mfrow=c(4,2))
agerange=c(min(Donors[,"Age at Death"]),max(Donors[,"Age at Death"]))
boxplot(datnad[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="No AD",horizontal=T)
stripchart(datnad[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)        

boxplot(datlow[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="Low",horizontal=T)
stripchart(datlow[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)    

boxplot(datmid[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="Intermediate",horizontal=T)
stripchart(datmid[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)     

boxplot(dathigh[,"Age at Death"],col = "gray90",outine=F,ylim=c(agerange[1],agerange[2]),main="High",horizontal=T)
stripchart(dathigh[,"Age at Death"],method = "jitter",pch = 19,col = 4,vertical = FALSE, add = TRUE)             
dev.off()

# Co-morbidities
umorb=unique(Donors[,"Co-morbidity"])

umorborder=c("None","LBD","Atherosclerosis","Arteriolosclerosis","Microinfarcts","LATE-NC","Multiple")

morbmat=matrix(0,4,7,dimnames=list(stageorder,umorborder))

for (i in 1:length(stageorder)) {
  for (j in 1:length(umorborder)) {
    morbmat[i,j]=length(intersect(which(Donors[,"Overall AD neuropathological Change"]==stageorder[i]),which(Donors[,"Co-morbidity"]==umorborder[j])))
  }
}


morbper=morbmat
for (i in 1:4) {
  morbper[i,]=round(morbper[i,]/sum(morbper[i,]),2)
}

pdf("output/Figure 1c_barplot_Co-morbidity_by_Overall AD neuropathological Change.pdf",width=6,height=8)
barplot(t(morbper[4:1,]),horiz=T)
dev.off()

### Extended Data Figure 1b

In [None]:
# Boxplots
plt.rcParams["figure.figsize"] = (3,4)
batch_var = "Donor ID"
covariate = "Overall AD neuropathological Change"
y_values = [
    "PMI",
    "Fresh Brain Weight",
    "Brain pH",
    "RIN",
]

to_get = copy.copy(y_values)
to_get.append(batch_var)
to_get.append(covariate)

metadata = RNAseq.loc[RNAseq[covariate] != "Reference", to_get].copy()
metadata = metadata.drop_duplicates().reset_index(drop=True)

for i in y_values:
    metadata[i] = metadata[i].replace("Unavailable", np.nan)
    metadata[i] = metadata[i].astype("float32")
    ax = sns.boxplot(data=metadata, x=covariate, y=i, showfliers=False, palette="tab20");
    ax = sns.swarmplot(data=metadata, x=covariate, y=i, color="0.25", edgecolor="white", linewidth=0.5);
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    ax.set(xlabel="")

    plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 1b_boxplot_" + i + "_by_" + covariate + ".pdf"),  bbox_inches='tight')
    plt.show()

batch_var = "library_prep"
covariate = "Overall AD neuropathological Change"
y_values = [
    "Used in analysis",
    "quantification_fmol",
    "NeuN positive fraction on FANS",
]

to_get = copy.copy(y_values)
to_get.append(batch_var)
to_get.append(covariate)

metadata = RNAseq.loc[RNAseq[covariate] != "Reference", to_get].copy()
metadata = metadata.drop(
    "Used in analysis",
    axis=1
).merge(
    metadata.loc[
        :,
        ["Used in analysis", "library_prep"]
    ].groupby("library_prep").mean(),
    left_on="library_prep",
    right_index=True,
    how="left"
).drop_duplicates().reset_index(drop=True)

for i in y_values:
    metadata[i] = metadata[i].replace("Unavailable", np.nan)
    metadata[i] = metadata[i].astype("float32")
    ax = sns.boxplot(data=metadata, x=covariate, y=i, showfliers=False, palette="tab20");
    ax = sns.swarmplot(data=metadata, x=covariate, y=i, color="0.25", edgecolor="white", linewidth=0.5);
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    ax.set(xlabel="")

    plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 1b_boxplot_" + i + "_by_" + covariate + ".pdf"),  bbox_inches='tight')
    plt.show()

In [None]:
# Principle component analysis on pre-sequencing metadata
metadata = RNAseq.loc[
    (RNAseq["Neurotypical reference"] == False) & (RNAseq["method"] == "10Xv3.1") & (RNAseq["Fresh Brain Weight"] != "Unavailable"),
    [
        "Donor ID",
        "library_prep",
        "Overall AD neuropathological Change",
        "PMI",
        "Fresh Brain Weight",
        "Brain pH",
        "NeuN positive fraction on FANS",
        "library_input_ng",
        "quantification_fmol",
        "avg_size_bp",
        "RIN",
    ]
]
for i in metadata.columns[3:]:
    metadata[i] = metadata[i].astype("float32")
    
metadata["Donor ID"] = metadata["Donor ID"].astype("category")
metadata.columns = [
        "Donor ID",
        "library_prep",
        "ADNC",
        "Post mortem interval",
        "Brain weight",
        "Brain pH",
        "NeuN positive by FACS",
        "Amplified cDNA concentration",
        "Library concentration",
        "Library Insert size",
        "RNA Quality (RIN)",
]
metadata_X = metadata.loc[
    :,
    [
        "Donor ID",
        "Post mortem interval",
        "Brain weight",
        "Brain pH",
        "NeuN positive by FACS",
        "Amplified cDNA concentration",
        "Library concentration",
        "Library Insert size",
        "RNA Quality (RIN)",
    ]
].groupby(["Donor ID"]).mean()
metadata_obs = metadata.loc[
    :,
    [
        "Donor ID",
        "ADNC",
    ]
].drop_duplicates().reset_index()
metadata_obs.index = metadata_obs["Donor ID"].copy()
metadata_obs = metadata_obs.drop("Donor ID", axis=1)
metadata_obs = metadata_obs.loc[metadata_X.index, :]
metadata_obs["ADNC"] = metadata_obs["ADNC"].astype("category")
metadata_obs["ADNC"] = metadata_obs["ADNC"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
metadata_ad = ad.AnnData(X=metadata_X, obs=metadata_obs)
sc.pp.scale(metadata_ad)
sc.pp.pca(metadata_ad)
metadata_ad.obs["PC1"] = metadata_ad.obsm["X_pca"][:, 0]

# Violin plots
sc.pl.violin(metadata_ad, keys=["PC1"], groupby="ADNC", size=5, rotation=90, palette="tab20", save="_pre-sequencing_metrics_PC1_by_ADNC.pdf")
os.rename(os.path.join(pwd, "figures", "violin_pre-sequencing_metrics_PC1_by_ADNC.pdf"), os.path.join(pwd, "output", "Extended Data Figure 1b_violinplot_pre-sequencing_metrics_PC1_by_ADNC.pdf"))
                       
# Heatmap
features = metadata_ad.var_names.to_series()
pc = 0
g = metadata_ad.varm['PCs'][:, pc]
o = np.argsort(g)
sel = np.concatenate((o[:7], o[-1:])).tolist()
emb = metadata_ad.obsm['X_pca'][:, pc]
donors = np.argsort(emb)
tempdata = metadata_ad[donors, features[sel].index.tolist()]
tempdata.obs["Donor ID"] = tempdata.obs.index.astype("category")
tempdata.obs["Donor ID"] = tempdata.obs["Donor ID"].cat.reorder_categories(pd.Index(metadata_ad.obs.iloc[donors,].index))
donor_colors = tempdata.obs.loc[:, ["ADNC"]]
donor_colors["ADNC"] = donor_colors["ADNC"].cat.rename_categories(
    {
        "Not AD": "#226894",
        "Low": "#aac0db",
        "Intermediate": "#e47733",
        "High": "#f0b382",
    },
)    
tmp = pd.DataFrame(tempdata.X.T)
tmp.index = tempdata.var_names
ax = sns.clustermap(
    tmp,
    row_cluster=False,
    col_cluster=False,
    cmap="RdBu_r",
    figsize=(16,4),
    yticklabels=True,
    xticklabels=False,
    col_colors=donor_colors["ADNC"].to_list(),
    cbar_pos=(0.10, 0.18, 0.01, 0.4),
    **{"center": 0}
);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 1b_heatmap_pre-sequencing_metics_PC1.pdf"), bbox_inches="tight")
plt.show()

### Extended Data Figure 1c

In [None]:
plt.rcParams["figure.figsize"] = (2.5,4)
covariate = "Overall AD neuropathological Change"
adata = ad.AnnData(obs=RNAseq.loc[(RNAseq[covariate] != "Reference") & (RNAseq["Used in analysis"] == True), :])
adata.obs[covariate] = adata.obs[covariate].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
sc.pl.violin(
    adata,
    keys=["Genes detected"],
    groupby=covariate,
    stripplot=False,
    rotation=90,
    save="_Genes detected_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_RNAseq_Genes detected_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_RNAseq_Genes detected_by_" + covariate + ".pdf"))

sc.pl.violin(
    adata,
    keys=["Number of UMIs"],
    groupby=covariate,
    stripplot=False,
    log=True,
    rotation=90,
    save="_Number of UMIs_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_RNAseq_Number of UMIs_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_RNAseq_Number of UMIs_by_" + covariate + ".pdf"))


adata = ad.AnnData(obs=ATACseq.loc[(ATACseq[covariate] != "Reference") & (ATACseq["Used in analysis"] == True), :])
adata.obs[covariate] = adata.obs[covariate].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
sc.pl.violin(
    adata,
    keys=["Peaks detected"],
    groupby=covariate,
    stripplot=False,
    rotation=90,
    save="_Peaks detected_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_ATACseq_Peaks detected_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_ATACseq_Peaks detected_by_" + covariate + ".pdf"))

sc.pl.violin(
    adata,
    keys=["Number of UMIs"],
    groupby=covariate,
    stripplot=False,
    log=True,
    rotation=90,
    save="_Number of UMIs_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_ATACseq_Number of UMIs_by" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_ATACseq_Number of UMIs_by_" + covariate + ".pdf"))


### Extended Data Figure 1d

In [None]:
plt.rcParams["figure.figsize"] = (2.5,4)
covariate = "method"
adata = ad.AnnData(obs=RNAseq.loc[(RNAseq[covariate] != "Reference") & (RNAseq["Used in analysis"] == True), :])
sc.pl.violin(
    adata,
    keys=["Genes detected"],
    groupby=covariate,
    stripplot=False,
    rotation=90,
    save="_Genes detected_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_RNAseq_Genes detected_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_RNAseq_Genes detected_by_" + covariate + ".pdf"))

sc.pl.violin(
    adata,
    keys=["Number of UMIs"],
    groupby=covariate,
    stripplot=False,
    log=True,
    rotation=90,
    save="_Number of UMIs_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_RNAseq_Number of UMIs_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_RNAseq_Number of UMIs_by_" + covariate + ".pdf"))


adata = ad.AnnData(obs=ATACseq.loc[(ATACseq[covariate] != "Reference") & (ATACseq["Used in analysis"] == True), :])
sc.pl.violin(
    adata,
    keys=["Peaks detected"],
    groupby=covariate,
    stripplot=False,
    rotation=90,
    save="_Peaks detected_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_ATACseq_Peaks detected_by_" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_ATACseq_Peaks detected_by_" + covariate + ".pdf"))

sc.pl.violin(
    adata,
    keys=["Number of UMIs"],
    groupby=covariate,
    stripplot=False,
    log=True,
    rotation=90,
    save="_Number of UMIs_by_" + covariate + ".pdf",
    **{"palette": "tab20"}
)
os.rename(os.path.join(pwd, "figures", "violin_ATACseq_Number of UMIs_by" + covariate + ".pdf"), os.path.join(pwd, "output", "Extended Data Figure 1c_violinplot_ATACseq_Number of UMIs_by_" + covariate + ".pdf"))

### Extended Data Figure 2a

In [None]:
# Run PCA on RNAseq library metrics
RNAseq_metrics = RNAseq.loc[
    (RNAseq["Neurotypical reference"] == False) & (RNAseq["method"] == "10Xv3.1"),
    [
        "Donor ID",
        "library_prep",
        "Overall AD neuropathological Change",
        "GEX_Fraction_of_transcriptomic_reads_in_cells",
        "GEX_Mean_raw_reads_per_cell",
        "GEX_Median_genes_per_cell",
        "GEX_Median_UMI_counts_per_cell",
        "GEX_Reads_mapped_confidently_to_genome",
        "GEX_Reads_mapped_confidently_to_intronic_regions",
        "GEX_Reads_mapped_confidently_to_exonic_regions",
        "GEX_Reads_mapped_confidently_to_intergenic_regions",
        "GEX_Reads_mapped_antisense_to_gene",
        "GEX_Total_genes_detected",
    ]
]
RNAseq_metrics["Donor ID"] = RNAseq_metrics["Donor ID"].astype("category")
RNAseq_metrics.columns = [
        "Donor ID",
        "library_prep",
        "ADNC",
        "Fraction of transcriptomic reads in cells",
        "Mean raw reads per cell",
        "Median genes per cell",
        "Median UMI counts per cell",
        "Fraction mapped uniquely to genome",
        "Fraction mapped uniquely to intronic regions",
        "Fraction mapped uniquely to exonic regions",
        "Fraction mapped uniquely to intergenic regions",
        "Fraction mapped antisense",
        "Total genes detected in the library",
]
RNAseq_metrics_X = RNAseq_metrics.loc[
    :,
    [
        "Donor ID",
        "Fraction of transcriptomic reads in cells",
        "Mean raw reads per cell",
        "Median genes per cell",
        "Median UMI counts per cell",
        "Fraction mapped uniquely to genome",
        "Fraction mapped uniquely to intronic regions",
        "Fraction mapped uniquely to intergenic regions",
        "Fraction mapped antisense",
        "Total genes detected in the library",
    ]
].groupby(["Donor ID"]).mean()
RNAseq_metrics_obs = RNAseq_metrics.loc[
    :,
    [
        "Donor ID",
        "ADNC",
    ]
].drop_duplicates().reset_index()
RNAseq_metrics_obs.index = RNAseq_metrics_obs["Donor ID"].copy()
RNAseq_metrics_obs = RNAseq_metrics_obs.drop("Donor ID", axis=1)
RNAseq_metrics_obs = RNAseq_metrics_obs.loc[RNAseq_metrics_X.index, :]
RNAseq_metrics_obs["ADNC"] = RNAseq_metrics_obs["ADNC"].astype("category")
RNAseq_metrics_obs["ADNC"] = RNAseq_metrics_obs["ADNC"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
RNAseq_metrics_ad = ad.AnnData(X=RNAseq_metrics_X, obs=RNAseq_metrics_obs)
sc.pp.scale(RNAseq_metrics_ad)
sc.pp.pca(RNAseq_metrics_ad)
RNAseq_metrics_ad.obs["PC1"] = RNAseq_metrics_ad.obsm["X_pca"][:, 0]

# Plot the scaled values for the features
genes = RNAseq_metrics_ad.var_names.to_series()
pc = 0
g = RNAseq_metrics_ad.varm['PCs'][:, pc]
o = np.argsort(g)
sel = np.concatenate((o[:7], o[-1:])).tolist()
emb = RNAseq_metrics_ad.obsm['X_pca'][:, pc]
donors = np.argsort(emb)
tempdata = RNAseq_metrics_ad[donors, genes[sel].index.tolist()]
tempdata.obs["Donor ID"] = tempdata.obs.index.astype("category")
tempdata.obs["Donor ID"] = tempdata.obs["Donor ID"].cat.reorder_categories(pd.Index(RNAseq_metrics_ad.obs.iloc[donors,].index))
donor_colors = tempdata.obs.loc[:, ["ADNC"]]
donor_colors["ADNC"] = donor_colors["ADNC"].cat.rename_categories(
    {
        "Not AD": "#226894",
        "Low": "#aac0db",
        "Intermediate": "#e47733",
        "High": "#f0b382",
    },
)    
tmp = pd.DataFrame(tempdata.X.T)
tmp.index = tempdata.var_names
ax = sns.clustermap(
    tmp,
    row_cluster=False,
    col_cluster=False,
    cmap="RdBu_r",
    figsize=(16,4),
    yticklabels=True,
    xticklabels=False,
    col_colors=donor_colors["ADNC"].to_list(),
    cbar_pos=(0.10, 0.18, 0.01, 0.4),
    **{"linewidths": 1, "linecolor": "lightgrey", "center": 0}
);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 2a_heatmap_RNAseq_metrics_PC1.pdf"), bbox_inches="tight")
plt.show()

In [None]:
# Run PCA on ATACseq library metrics
ATACseq_metrics = ATACseq.loc[
    (ATACseq["Neurotypical reference"] == False) & (ATACseq["method"] == "10xATAC_v1.1"),
    [
        "Donor ID",
        "library_prep",
        "Overall AD neuropathological Change",
        "ATAC_Mean_raw_read_pairs_per_cell",
        "ATAC_TSS_enrichment_score",
        "ATAC_Confidently_mapped_read_pairs",
        "ATAC_Fraction_of_genome_in_peaks",
        "ATAC_Fraction_of_high_quality_fragments_overlapping_TSS",
        "ATAC_Fraction_of_high_quality_fragments_overlapping_peaks",
        "ATAC_Fraction_of_transposition_events_in_peaks_in_cells",
        "ATAC_Median_high_quality_fragments_per_cell",
    ]
]
ATACseq_metrics.columns = [
        "Donor ID",
        "library_prep",
        "ADNC",
        "Mean raw read pairs per cell",
        "TSS enrichment score",
        "Fraction of uniquely mapped read pairs",
        "Fraction of genome in peaks",
        "Fraction of fragments overlapping TSS",
        "Fraction of fragments overlapping peaks",
        "Fraction of transposition events in peaks in cells",
        "Median fragments per cell",
]
ATACseq_metrics_X = ATACseq_metrics.loc[
    :,
    [
        "Donor ID",
        "Mean raw read pairs per cell",
        "TSS enrichment score",
        "Fraction of uniquely mapped read pairs",
        "Fraction of genome in peaks",
        "Fraction of fragments overlapping TSS",
        "Fraction of fragments overlapping peaks",
        "Fraction of transposition events in peaks in cells",
        "Median fragments per cell",
    ]
].groupby(["Donor ID"]).mean()
ATACseq_metrics_obs = ATACseq_metrics.loc[
    :,
    [
        "Donor ID",
        "ADNC",
    ]
].drop_duplicates().reset_index().drop("index", axis=1)
ATACseq_metrics_obs.index = ATACseq_metrics_obs["Donor ID"].copy()
ATACseq_metrics_obs = ATACseq_metrics_obs.drop("Donor ID", axis=1)
ATACseq_metrics_obs = ATACseq_metrics_obs.loc[ATACseq_metrics_X.index, :]
ATACseq_metrics_obs["ADNC"] = ATACseq_metrics_obs["ADNC"].astype("category")
ATACseq_metrics_obs["ADNC"] = ATACseq_metrics_obs["ADNC"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
ATACseq_metrics_ad = ad.AnnData(X=ATACseq_metrics_X, obs=ATACseq_metrics_obs)
ATACseq_metrics_ad

sc.pp.scale(ATACseq_metrics_ad)
sc.pp.pca(ATACseq_metrics_ad)
ATACseq_metrics_ad.obs["PC1"] = ATACseq_metrics_ad.obsm["X_pca"][:, 0]

# Plot the scaled values for the features
genes = ATACseq_metrics_ad.var_names.to_series()
pc = 0
g = ATACseq_metrics_ad.varm['PCs'][:, pc]
o = np.argsort(g)
sel = np.concatenate((o[:7],o[-1:])).tolist()
emb = ATACseq_metrics_ad.obsm['X_pca'][:, pc]
donors = np.argsort(emb)
tempdata = ATACseq_metrics_ad[donors, genes[sel].index.tolist()]
tempdata.obs["Donor ID"] = tempdata.obs.index.astype("category")
tempdata.obs["Donor ID"] = tempdata.obs["Donor ID"].cat.reorder_categories(pd.Index(ATACseq_metrics_ad.obs.iloc[donors,].index))
donor_colors = tempdata.obs.loc[:, ["ADNC"]]
donor_colors["ADNC"] = donor_colors["ADNC"].cat.rename_categories(
    {
        "Not AD": "#226894",
        "Low": "#aac0db",
        "Intermediate": "#e47733",
        "High": "#f0b382",
    },
)    
tmp = pd.DataFrame(tempdata.X.T)
tmp.index = tempdata.var_names
ax = sns.clustermap(
    tmp,
    row_cluster=False,
    col_cluster=False,
    cmap="RdBu_r",
    figsize=(16,4),
    yticklabels=True,
    xticklabels=False,
    col_colors=donor_colors["ADNC"].to_list(),
    cbar_pos=(0.10, 0.18, 0.01, 0.4),
    **{"linewidths": 1, "linecolor": "lightgrey", "center": 0}
);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 2a_heatmap_ATACseq_metrics_PC1.pdf"), bbox_inches="tight")
plt.show()

### Figure 1d

In [None]:
# Plot the RNAseq and ATACseq PCs against one another per donor
plt.rcParams["figure.figsize"] = (4,4)
ax = sns.scatterplot(
    x=RNAseq_metrics_ad.obsm["X_pca"][:, 0] - (RNAseq_metrics_ad.obsm["X_pca"][:, 0]).min(),
    y=ATACseq_metrics_ad.obsm["X_pca"][:, 0] - (ATACseq_metrics_ad.obsm["X_pca"][:, 0]).min(),
    hue=RNAseq_metrics_ad.obs["ADNC"], palette="tab20"
);
ax = sns.regplot(
    x=RNAseq_metrics_ad.obsm["X_pca"][:, 0] - (RNAseq_metrics_ad.obsm["X_pca"][:, 0]).min(),
    y=ATACseq_metrics_ad.obsm["X_pca"][:, 0] - (ATACseq_metrics_ad.obsm["X_pca"][:, 0]).min(),
    scatter=False,
    line_kws={"color": "lightgrey", "lw": 2}
);
ax.set_xlabel("snRNAseq library metrics");
ax.set_ylabel("snATACseq library metrics");
ax.axvline(x=1.5 - (RNAseq_metrics_ad.obsm["X_pca"][:, 0]).min(), **{"linestyle": "--", "color": "red"});
ax.axhline(y=2.5 - (ATACseq_metrics_ad.obsm["X_pca"][:, 0]).min(), **{"linestyle": "--", "color": "red"});
ax.legend(bbox_to_anchor=(1.05, 1), ncol=1);
plt.savefig(os.path.join(pwd, "output", "Figure 1d_scatterplot_RNAseq_metrics_PC1_versus_ATACseq_metrics_PC1.pdf"), bbox_inches="tight")
plt.show()

### Figure 1e and Extended Data Figure 2b

In [None]:
RNAseq["Memory groups"] = "ADNC0-2"
RNAseq.loc[(RNAseq["Overall AD neuropathological Change"] == "High") & (RNAseq["Severely Affected Donor"] == "N"), "Memory groups"] = "ADNC3"
RNAseq.loc[(RNAseq["Overall AD neuropathological Change"] == "High") & (RNAseq["Severely Affected Donor"] == "Y"), "Memory groups"] = "SA"
RNAseq["Memory groups"] = RNAseq["Memory groups"].astype("category")

Memory = Memory.loc[~Memory["donor_name"].isna(),:]
Memory = Memory.loc[(Memory["visit"] % 2) == 0, :]

Memory["visit"] = Memory["visit"] / 2
Memory = Memory.merge(RNAseq.loc[:, ["Donor ID", "Memory groups"]].drop_duplicates().reset_index(drop=True), left_on="donor_name", right_on="Donor ID", how="left")
to_keep = []
Memory["normalized_visit"] = 0
for i in Memory["Donor ID"].unique():
    Memory.loc[Memory["Donor ID"] == i, "normalized_visit"] = Memory.loc[Memory["Donor ID"] == i, "visit"] - Memory.loc[Memory["Donor ID"] == i, "visit"].max()


In [None]:
# Plot memory curves split by SA donors, other high pathology donors, and all other donors
plt.rcParams["figure.figsize"] = (8,8)
colors = {
    "ADNC0-2": "grey",
    "ADNC3": "goldenrod",
    "SA": "dodgerblue"
}

for j in ["VSP_E", "LAN_E", "EXF_E", "MEM_E"]:
    for i in Memory["Memory groups"].unique():
        data = Memory[Memory["Memory groups"] == i]
        x = data["normalized_visit"].to_numpy()
        y = data[j].to_numpy()
        xgrid = np.linspace(x.min(), x.max())
        K = 1000
        smooths = np.stack([smooth(x, y, xgrid) for k in range(K)]).T   
        mean = np.nanmean(smooths, axis=1)
        stderr = sp_stats.sem(smooths, axis=1)
        stderr = np.nanstd(smooths, axis=1, ddof=0)
        plt.fill_between(xgrid, mean - 2 * stderr, mean + 2 * stderr, color=colors[i], alpha=0.06)
        plt.plot(xgrid, mean, color=colors[i])
    plt.ylim((-1.5, 1.5));
    plt.xlabel("Visits until death");
    plt.ylabel("Cognitive score in " + j + " domain");
    if j == "MEM_E":
        plt.savefig(os.path.join(pwd, "output", "Figure 1e_lmplot_Visits until death_versus_Cognitive score in " + j + " domain.pdf"), bbox_inches="tight");
    else:
        plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 2b_lmplot_Visits until death_versus_Cognitive score in " + j + " domain.pdf"), bbox_inches="tight");

    plt.show();

### Figure 1g

In [None]:
RNAseq_metrics_ad.obs = RNAseq_metrics_ad.obs.merge(Donors.loc[:, ["Donor ID", "Severely Affected Donor"]].fillna("N"), how="left", left_index=True, right_on="Donor ID")
RNAseq_metrics_ad.obs.index = RNAseq_metrics_ad.obs["Donor ID"].copy()

In [None]:
# Plot RNAseq PC1 versus quantitative neuropathology
plt.rcParams["figure.figsize"] = (4,4)
ax = sns.scatterplot(
    x=RNAseq_metrics_ad.obsm["X_pca"][:, 0] - (RNAseq_metrics_ad.obsm["X_pca"][:, 0]).min() + 0.01,
    y=quant_neuropath.loc[RNAseq_metrics_ad.obs_names, "number of NeuN positive cells per area_Layer3"].to_list(),
    hue=RNAseq_metrics_ad.obs["Severely Affected Donor"],
);
ax = sns.regplot(
    x=RNAseq_metrics_ad.obsm["X_pca"][:, 0] - (RNAseq_metrics_ad.obsm["X_pca"][:, 0]).min() + 0.01,
    y=quant_neuropath.loc[RNAseq_metrics_ad.obs_names, "number of NeuN positive cells per area_Layer3"].to_list(),
    scatter=False,
    logistic=True,
    line_kws={"color": "lightgrey", "lw": 2}
);
ax.set_ylabel("number of NeuN positive cells per area_Layer3");
ax.set_xlabel("snRNAseq library metrics");
plt.savefig(os.path.join(pwd, "output", "Figure 1g_lmplot_RNAseq_metrics_PC1_versus_number of NeuN positive cells per area_Layer3.pdf"), bbox_inches="tight");
plt.show()

### Figure 1h

In [None]:
# Plot unnormalized expression of cytosolic (mitochondrial) and nuclear RNA species
plt.rcParams["figure.figsize"] = (2,4)
to_plot = [
    "MT-CO1",
    "MT-ND3",
    "MALAT1",
    "MEG3"
]
for i in to_plot:
    sns.boxplot(
        data=nuclear_cytosolic,
        x="Severely Affected Donor",
        y=i,
        showfliers=False,
    );
    plt.xlabel("Severely Affected Donor");
    plt.savefig(os.path.join(pwd, "output", "Figure 1h_boxplot_" + i + "_by_Severely Affected Donor.pdf"), bbox_inches="tight");
    plt.show();

### Figure 1i

In [None]:
# Need code from Giuseppe/Mariano

### Extended Data Figure 1c-e

In [None]:
# Need code from Giuseppe/Mariano

### Extended Data Figure 1f

In [None]:
plt.rcParams["figure.figsize"] = (20,5)
to_get = [
    "library_prep",
    "Subclass",
    "Severely Affected Donor",
    "Fraction mitochondrial UMIs",
]
metadata = RNAseq.loc[RNAseq["Overall AD neuropathological Change"] != "Reference", to_get].copy()
metadata["Mitochondrial Flag"] = metadata["Fraction mitochondrial UMIs"] > 0.05
metadata = metadata.groupby(["library_prep", "Subclass", "Severely Affected Donor"]).mean().reset_index()

sns.stripplot(
    data=metadata,
    x="Subclass",
    y="Mitochondrial Flag",
    alpha=0.5,
    hue="Severely Affected Donor",
    dodge=True,
);
plt.xticks(rotation=90, ha="right");
plt.ylabel("Fraction removed for the\nfraction of mitochondrial reads");
plt.legend(bbox_to_anchor=(1.01, 1.0), loc="upper left");
plt.savefig(os.path.join(pwd, "output", "Figure 1f_stripplot_Fraction removed for the fraction of mitochondrial reads_by_Subclass_and_Severely Affected Donor.pdf"), bbox_inches="tight")
plt.show()

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))