### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import re
import copy
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from scipy import stats as sp_stats

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=500, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# [NOT PUBLIC] We are working to share this object, it requires amending DUAs with RADC and Sage Bionetworks
public_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_SEAAD_and_public_datasets_singleomeCR6_UMIs_only_expanded_de_genes_2.2023-12-12.h5ad"))

# [NOT PUBLIC] We are working to share these scores, it requires amending DUAs with RADC and Sage Bionetworks
Signature_Scores = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "Signature Scores.csv"), index_col=0)

# scCODA results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
A9_RNAseq_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "A9_RNAseq", "Continuous_Pseudo-progression_Score", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0)
A9_RNAseq_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "A9_RNAseq", "objects", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_abundances.h5ad"))
Mathys_RNAseq_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Mathys_2023", "Overall_AD_neuropathological_Change_codes", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0)
Mathys_RNAseq_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Mathys_2023", "objects", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_abundances.h5ad"))
Green_RNAseq_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Green_2023", "Overall_AD_neuropathological_Change_codes", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0)
Green_RNAseq_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Green_2023", "objects", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_abundances.h5ad"))

A9_RNAseq_non_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "A9_RNAseq", "Continuous_Pseudo-progression_Score", "Non-neuronal and Non-neural_Supertype_results.csv"), index_col=0)
A9_RNAseq_non_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "A9_RNAseq", "objects", "Non-neuronal and Non-neural_Supertype_abundances.h5ad"))
Mathys_RNAseq_non_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Mathys_2023", "Overall_AD_neuropathological_Change_codes", "Non-neuronal and Non-neural_Supertype_results.csv"), index_col=0)
Mathys_RNAseq_non_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Mathys_2023", "objects", "Non-neuronal and Non-neural_Supertype_abundances.h5ad"))
Green_RNAseq_non_neuronal = pd.read_csv(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Green_2023", "Overall_AD_neuropathological_Change_codes", "Non-neuronal and Non-neural_Supertype_results.csv"), index_col=0)
Green_RNAseq_non_neuronal_adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 4 and Extended Data Figure 9", "PFC_Green_2023", "objects", "Non-neuronal and Non-neural_Supertype_abundances.h5ad"))

### Figure 4a

In [None]:
selected_cells = (public_adata.obs["Cognitive Status"] != "Reference") & (public_adata.obs["Used in analysis"] == "True")
ranks = pd.DataFrame(np.zeros((11, 0)), index=["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"])

In [None]:
# CERAD score
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "CERAD score"]].drop_duplicates().loc[:, ["Source", "CERAD score"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[~df["CERAD score"].isin(["Reference", ""]), :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["CERAD score"] != "", :].copy()
df["CERAD score"] = df["CERAD score"].cat.remove_unused_categories()
df["CERAD score"] = df["CERAD score"].cat.reorder_categories(["Absent", "Sparse", "Moderate", "Frequent"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "CERAD score", "Fraction stage"]].pivot(index="Source", columns="CERAD score", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_cerad.pdf"), bbox_inches="tight")
plt.show()

# Braak stage
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Braak"]].drop_duplicates().loc[:, ["Source", "Braak"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Braak"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Braak"] != "", :].copy()
df["Braak"] = df["Braak"].cat.remove_unused_categories()

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Braak", "Fraction stage"]].pivot(index="Source", columns="Braak", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_braak.pdf"), bbox_inches="tight")
plt.show()


# ADNC
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Overall AD neuropathological Change"]].drop_duplicates().loc[:, ["Source", "Overall AD neuropathological Change"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Overall AD neuropathological Change"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Overall AD neuropathological Change"] != "", :].copy()
df["Overall AD neuropathological Change"] = df["Overall AD neuropathological Change"].cat.remove_unused_categories()
df["Overall AD neuropathological Change"] = df["Overall AD neuropathological Change"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Overall AD neuropathological Change", "Fraction stage"]].pivot(index="Source", columns="Overall AD neuropathological Change", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_adnc.pdf"), bbox_inches="tight")
plt.show()

# Fraction with APOE4
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "APOE4 Status"]].drop_duplicates().loc[:, ["Source", "APOE4 Status"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["APOE4 Status"] == "Y", :].copy()
df = df.rename(
    {
        "proportion": "Fraction APOE4+"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction APOE4+", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_fraction apoe4.pdf"), bbox_inches="tight")
plt.show()

# Fraction with dementia

df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Cognitive Status"]].drop_duplicates().loc[:, ["Source", "Cognitive Status"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Cognitive Status"] == "Dementia", :].copy()
df = df.rename(
    {
        "proportion": "Fraction dementia"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction dementia", y="Source", hue="SEA-AD");
ax.set(ylabel=None);

plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_fraction dementia.pdf"), bbox_inches="tight")
plt.show()

# Severe co-morbidity
public_adata.obs["Severe co-morbidity"] = "False"
public_adata.obs.loc[public_adata.obs["Highest Lewy Body Disease"] == "Neocortical (Diffuse)", "Severe co-morbidity"] = "True"
public_adata.obs.loc[public_adata.obs["LATE"] == "Late Stage 3", "Severe co-morbidity"] = "True"
public_adata.obs.loc[public_adata.obs["Overall CAA Score"] == "Severe", "Severe co-morbidity"] = "True"
public_adata.obs.loc[public_adata.obs["Atherosclerosis"] == "Severe", "Severe co-morbidity"] = "True"
public_adata.obs.loc[public_adata.obs["Arteriolosclerosis"] == "Severe", "Severe co-morbidity"] = "True"
public_adata.obs.loc[public_adata.obs["Source"].isin(["Lau_2020", "Leng_2021", "Morabito_2021", "Yang_2022"]), "Severe co-morbidity"] = ""

df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Severe co-morbidity"]].drop_duplicates().loc[:, ["Source", "Severe co-morbidity"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Severe co-morbidity"] == "True", :].copy()
df = df.rename(
    {
        "proportion": "Fraction of donors with a severe co-morbidity"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction of donors with a severe co-morbidity", y="Source", hue="SEA-AD");
ax.set(ylabel=None);

plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4a_barplot_fraction of donors with a severe co-morbidity.pdf"), bbox_inches="tight")
plt.show()

### Extended Data Figure 9a

In [None]:
# LBD
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Highest Lewy Body Disease"]].drop_duplicates()
df.loc[df["Highest Lewy Body Disease"] == "Not Identified (olfactory bulb assessed)", "Highest Lewy Body Disease"] = "Not Identified (olfactory bulb not assessed)"
df.loc[df["Highest Lewy Body Disease"] == "Olfactory bulb only", "Highest Lewy Body Disease"] = "Not Identified (olfactory bulb not assessed)"
df.loc[df["Highest Lewy Body Disease"] == "Amygdala-predominant", "Highest Lewy Body Disease"] = "Not Identified (olfactory bulb not assessed)"
df["Highest Lewy Body Disease"] = df["Highest Lewy Body Disease"].cat.remove_unused_categories()
df = df.loc[:, ["Source", "Highest Lewy Body Disease"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Highest Lewy Body Disease"] != "Reference", :].copy()
df.loc[df["Highest Lewy Body Disease"] == "Not Identified (olfactory bulb assessed)", "Highest Lewy Body Disease"] = "Not Identified (olfactory bulb not assessed)"

df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Highest Lewy Body Disease"] != "", :].copy()
df["Highest Lewy Body Disease"] = df["Highest Lewy Body Disease"].cat.remove_unused_categories()
df["Highest Lewy Body Disease"] = df["Highest Lewy Body Disease"].cat.reorder_categories(["Not Identified (olfactory bulb not assessed)", "Brainstem-predominant", "Limbic (Transitional)", "Neocortical (Diffuse)"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Highest Lewy Body Disease", "Fraction stage"]].pivot(index="Source", columns="Highest Lewy Body Disease", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_lbd.pdf"), bbox_inches="tight")
plt.show()

# LATE-NC
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "LATE"]].drop_duplicates().loc[:, ["Source", "LATE"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["LATE"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["LATE"] != "", :].copy()
df["LATE"] = df["LATE"].cat.remove_unused_categories()
df["LATE"] = df["LATE"].cat.reorder_categories(["Not Identified", "LATE Stage 1", "LATE Stage 2", "LATE Stage 3", "Unclassifiable"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "LATE", "Fraction stage"]].pivot(index="Source", columns="LATE", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_late.pdf"), bbox_inches="tight")
plt.show()

# CAA
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Overall CAA Score"]].drop_duplicates().loc[:, ["Source", "Overall CAA Score"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Overall CAA Score"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Overall CAA Score"] != "", :].copy()
df["Overall CAA Score"] = df["Overall CAA Score"].cat.remove_unused_categories()
df["Overall CAA Score"] = df["Overall CAA Score"].cat.reorder_categories(["Not identified", "Mild", "Moderate", "Severe"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Overall CAA Score", "Fraction stage"]].pivot(index="Source", columns="Overall CAA Score", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_caa.pdf"), bbox_inches="tight")
plt.show()

# Atherosclerosis
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Atherosclerosis"]].drop_duplicates().loc[:, ["Source", "Atherosclerosis"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Atherosclerosis"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Atherosclerosis"] != "", :].copy()
df["Atherosclerosis"] = df["Atherosclerosis"].cat.remove_unused_categories()
df["Atherosclerosis"] = df["Atherosclerosis"].cat.reorder_categories(["None", "Mild", "Moderate", "Severe"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Atherosclerosis", "Fraction stage"]].pivot(index="Source", columns="Atherosclerosis", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_athero.pdf"), bbox_inches="tight")
plt.show()

# Arteriolosclerosis
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Arteriolosclerosis"]].drop_duplicates().loc[:, ["Source", "Arteriolosclerosis"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Arteriolosclerosis"] != "Reference", :].copy()
df = df.rename(
    {
        "proportion": "Fraction stage"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df = df.loc[df["Arteriolosclerosis"] != "", :].copy()
df["Arteriolosclerosis"] = df["Arteriolosclerosis"].cat.remove_unused_categories()
df["Arteriolosclerosis"] = df["Arteriolosclerosis"].cat.reorder_categories(["None", "Mild", "Moderate", "Severe"])

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Arteriolosclerosis", "Fraction stage"]].pivot(index="Source", columns="Arteriolosclerosis", values="Fraction stage")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], :].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_arterio.pdf"), bbox_inches="tight")
plt.show()

# Fraction of female donors
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Sex"]].drop_duplicates().loc[:, ["Source", "Sex"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Sex"] == "Female", :].copy()
df = df.rename(
    {
        "proportion": "Fraction female"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction female", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_fraction female.pdf"), bbox_inches="tight")
plt.show()

# Age at death
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "Age at Death"]].drop_duplicates()
df["Age at Death"] = df["Age at Death"].astype("object")
df.loc[df["Age at Death"] == "90+", "Age at Death"] = 90
df["Age at Death"] = [np.int64(np.round(np.float64(i), 0)) for i in df["Age at Death"]]
df["Binned age at death"] = df["Age at Death"].copy()
df.loc[df["Age at Death"] >= 90, "Binned age at death"] = ">90"
df.loc[df["Age at Death"] < 80, "Binned age at death"] = "<80"
df.loc[(df["Age at Death"] >= 80) & (df["Age at Death"] < 90), "Binned age at death"] = "80-90"
df["Binned age at death"] = df["Binned age at death"].astype("category")
df["Binned age at death"] = df["Binned age at death"].cat.reorder_categories(["<80", "80-90", ">90"])
df = df.loc[:, ["Source", "Binned age at death"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df["Binned age at death"] = df["Binned age at death"].astype("str")
df = df.rename(
    {
        "proportion": "Fraction age"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Binned age at death", "Fraction age"]].pivot(index="Source", columns="Binned age at death", values="Fraction age")
df = df.loc[["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"], ["<80", "80-90", ">90"]].copy()
ax = df.plot(kind="barh", stacked=True, cmap="YlGnBu", **{"width": 0.8});
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1));
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9a_barplot_age at death.pdf"), bbox_inches="tight")
plt.show()

### Figure 4b and Extended Data Figure 9b

In [None]:
# Number of donors
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID"]].drop_duplicates().loc[:, "Source"].value_counts(sort=False).reset_index()
df = df.rename(
    {
        "count": "No. donors",
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
df = df.sort_values(by="Source", ascending=False)
print("Min: " + str(df["No. donors"].min()) + " Max: " + str(df["No. donors"].max()))

plt.rcParams["figure.figsize"] = (2,0.4)
ax = sns.scatterplot(data=df, x="No. donors", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_number of donors.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="No. donors", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_number of donors.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="No. donors", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "No. donors"] = tmp["index"]
plt.show()

# Post mortem interval
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID", "PMI"]].drop_duplicates()
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["PMI"] = [np.float64(i) for i in df["PMI"].replace("", np.nan)]

plt.rcParams["figure.figsize"] = (2,2)
ax = sns.boxplot(data=df, x="PMI", y="Source", hue="SEA-AD", showfliers=False);
ax = sns.stripplot(data=df, x="PMI", y="Source", color="grey", size=2, alpha=0.5, jitter=0.2);
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd,  "output", "Extended Data Figure 9b_barplot_PMI.pdf"), bbox_inches="tight")
plt.show()

df = df.loc[:, ["Source", "PMI"]].groupby("Source").mean().reset_index()
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
df = df.sort_values(by="Source", ascending=False)

plt.rcParams["figure.figsize"] = (2,0.4)
print("Min: " + str(df["PMI"].min()) + " Max: " + str(df["PMI"].max()))
df["PMI"] = df["PMI"] * -1
ax = sns.scatterplot(data=df, x="PMI", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_PMI.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="PMI", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "PMI"] = tmp["index"]
plt.show()

# Cells in the PFC
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[selected_cells, ["Source"]].value_counts(sort=False).reset_index()
df = df.rename(
    {
        "count": "Cells in DLPFC"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["Cells in DLPFC"].min()) + " Max: " + str(df["Cells in DLPFC"].max()))
ax = sns.scatterplot(data=df, x="Cells in DLPFC", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_cells in DLPFC.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Cells in DLPFC", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_cells in DLPFC.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="Cells in DLPFC", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "Cells in DLPFC"] = tmp["index"]
plt.show()

# Cells per donor
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[selected_cells, ["Source", "Donor ID"]].value_counts(sort=False).reset_index().drop(["Donor ID"], axis=1).groupby("Source").mean().reset_index()
df = df.rename(
    {
        "count": "Cells per donor"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["Cells per donor"].min()) + " Max: " + str(df["Cells per donor"].max()))
ax = sns.scatterplot(data=df, x="Cells per donor", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_cells per donor.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Cells per donor", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_cells per donor.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="Cells per donor", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "Cells per donor"] = tmp["index"]
plt.show()

# UMIs per cell
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[selected_cells, ["Number of UMIs", "Source"]].groupby("Source").mean().reset_index()
df = df.rename(
    {
        "Number of UMIs": "UMIs per cell"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["UMIs per cell"].min()) + " Max: " + str(df["UMIs per cell"].max()))
ax = sns.scatterplot(data=df, x="UMIs per cell", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_umis per cell.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="UMIs per cell", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_umis per cell.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="UMIs per cell", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "UMIs per cell"] = tmp["index"]
plt.show()

# Genes per cell
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[selected_cells, ["Genes detected", "Source"]].groupby("Source").mean().reset_index()
df = df.rename(
    {
        "Genes detected": "Genes per cell"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["Genes per cell"].min()) + " Max: " + str(df["Genes per cell"].max()))
ax = sns.scatterplot(data=df, x="Genes per cell", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_genes per cell.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Genes per cell", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_genes per cell.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="Genes per cell", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "Genes per cell"] = tmp["index"]
plt.show()

# Fraction of mitochondrial UMIs per cell
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[selected_cells, ["Fraction mitochondrial UMIs", "Source"]].groupby("Source").mean().reset_index()
df = df.rename(
    {
        "Fraction mitochondrial UMIs": "Fraction MT UMIs per cell"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["Fraction MT UMIs per cell"].min()) + " Max: " + str(df["Fraction MT UMIs per cell"].max()))
df["Fraction MT UMIs per cell"] = df["Fraction MT UMIs per cell"] * -1
ax = sns.scatterplot(data=df, x="Fraction MT UMIs per cell", y=1, hue="SEA-AD", palette="tab10", size="Size");
df["Fraction MT UMIs per cell"] = df["Fraction MT UMIs per cell"] * -1
ax.set(xticklabels=[], yticklabels=[]);
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_fraction mt umis per cell.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction MT UMIs per cell", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_fraction mt umis per cell.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="Fraction MT UMIs per cell", ascending=True).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "Fraction MT UMIs per cell"] = tmp["index"]
plt.show()

# Fraction of cells that passed QC
plt.rcParams["figure.figsize"] = (2,0.4)
df = public_adata.obs.loc[(public_adata.obs["Cognitive Status"] != "Reference"), ["Source", "Used in analysis"]].groupby("Source").value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Used in analysis"] == "True", :].copy()
df = df.rename(
    {
        "proportion": "Fraction Passed QC"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Size"] = 1
df.loc[df["Source"] == "SEA-AD", "Size"] = 5
print("Min: " + str(df["Fraction Passed QC"].min()) + " Max: " + str(df["Fraction Passed QC"].max()))
ax = sns.scatterplot(data=df, x="Fraction Passed QC", y=1, hue="SEA-AD", palette="tab10", size="Size");
ax.set(xticklabels=[], yticklabels=[], xlim=(0,1));
ax.tick_params(bottom=False, left=False);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4b_metricplot_fraction passed qc.pdf"), bbox_inches="tight")
plt.show()

plt.rcParams["figure.figsize"] = (2,2)
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.barplot(data=df, x="Fraction Passed QC", y="Source", hue="SEA-AD");
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 9b_barplot_fraction passed qc.pdf"), bbox_inches="tight")

tmp = df.sort_values(by="Fraction Passed QC", ascending=False).reset_index(drop=True).reset_index()
tmp.index = tmp["Source"].copy()
ranks.loc[tmp.index, "Fraction Passed QC"] = tmp["index"]
plt.show()

### Figure 4c

In [None]:
public_adata = public_adata[selected_cells, :].copy()

In [None]:
Signature_Scores["index"] = Signature_Scores["index"].astype("category")
Signature_Scores["index"] = Signature_Scores["index"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
Signature_Scores["SEA-AD"] = Signature_Scores["index"] == "SEA-AD"
plt.rcParams["figure.figsize"] = (2,2)
ax = sns.boxplot(data=Signature_Scores, x="value", y="index", hue="SEA-AD", palette="tab10", showfliers=False);
ax = sns.stripplot(data=Signature_Scores, x="value", y="index", color="grey", size=2, alpha=0.5, jitter=0.2);
ax.set(ylabel=None, xlabel="Supertype signature scores");
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4c_boxplot_Supertype signature scores.pdf"), bbox_inches="tight")
plt.show()



In [None]:
df = public_adata.obs.loc[:, ["Source", "Supertype", "Supertype confidence"]].groupby(["Source", "Supertype"]).mean().reset_index()
df["Source"] = df["Source"].astype("category")
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
df["SEA-AD"] = df["Source"] == "SEA-AD"
plt.rcParams["figure.figsize"] = (2,2)
ax = sns.boxplot(data=df, x="Supertype confidence", y="Source", hue="SEA-AD", palette="tab10", showfliers=False);
ax = sns.stripplot(data=df, x="Supertype confidence", y="Source", color="grey", size=2, alpha=0.5, jitter=0.2);
ax.set(ylabel=None, xlabel="Supertype confidence");
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4c_boxplot_Supertype confidence.pdf"), bbox_inches="tight")
plt.show()

### Figure 4d

In [None]:
plt.rcParams["figure.figsize"] = (8,8)

subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]]
subclass_colors.index = subclass_colors["subclass_label"]
subclass_colors = subclass_colors["subclass_color"].to_dict()

sc.pl.umap(
    public_adata,
    color=["Source"],
    frameon=False,
    size=0.05,
    title=None,
    save="_Public Source.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Public Source.pdf"), os.path.join(pwd, "output", "Figure 4d_umap_Public Source.pdf"))

sc.pl.umap(
    public_adata,
    color=["Subclass"],
    legend_loc="on data",
    palette=subclass_colors,
    frameon=False,
    size=0.05,
    save="_Public Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Public Subclass.pdf"), os.path.join(pwd, "output", "Figure 4d_umap_Public Subclass.pdf"))


### Extended Figure 9c

In [None]:
neighborhoods = {
    "MGE": ["Sst Chodl", "Sst", "Pvalb", "Chandelier"], # 4
    "CGE": ["Lamp5", "Lamp5 Lhx6", "Pax6", "Sncg", "Vip"], # 5
    "IT": ["L2/3 IT", "L4 IT", "L5 IT", "L6 IT", "L6 IT Car3"], # 5
    "Deep Projecting": ["L5 ET", "L5/6 NP", "L6b", "L6 CT"], # 4
    "Glia": ["Oligodendrocyte", "OPC", "Astrocyte"], # 3
    "Vascular and Immune": ["Endothelial", "VLMC", "Microglia-PVM"] # 3
}

In [None]:
sc.pl.embedding(
    public_adata[public_adata.obs["Subclass"].isin(neighborhoods["MGE"]), :],
    basis="X_MGE_umap",
    color=["Signature Score"],
    legend_loc="on data",
    frameon=False,
    cmap="RdBu_r",
    size=1,
    save="_Public MGE Signature Score.pdf"
)
os.rename(os.path.join(pwd, "figures", "X_MGE_umap_Public MGE Signature Score.pdf"), os.path.join(pwd, "output", "Extended Data Figure 9c_umap_Public MGE Signature Score.pdf"))


### Extended Figure 9d

In [None]:
plt.rcParams["figure.figsize"] = (8,8)

supertype_colors = color_order.loc[:, ["cluster_label", "cluster_color"]]
supertype_colors.index = supertype_colors["cluster_label"]
supertype_colors = supertype_colors["cluster_color"].to_dict()
supertype_colors["VLMC_2-SEAAD"] = supertype_colors['Pericyte_2-SEAAD']
supertype_colors["VLMC_3-SEAAD"] = supertype_colors['Micro-PVM_4-SEAAD']

for i,j in neighborhoods.items():
    sc.pl.embedding(
        public_adata[public_adata.obs["Subclass"].isin(j), :],
        basis="X_" + i + "_umap",
        color=["Supertype"],
        legend_loc="on data",
        palette=supertype_colors,
        frameon=False,
        size=0.5,
        save="_Public " + i + " Supertype.pdf"
    )
    os.rename(os.path.join(pwd, "figures", "X_" + i + "_umap_Public " + i + " Supertype.pdf"), os.path.join(pwd, "output", "Extended Data Figure 9d_umap_Public " + i + " Supertype.pdf"))

    sc.pl.embedding(
        public_adata[public_adata.obs["Subclass"].isin(j), :],
        basis="X_" + i + "_umap",
        color=["Source"],
        legend_loc="on data",
        frameon=False,
        size=0.5,
        save="_Public " + i + " Source.pdf"
    )
    os.rename(os.path.join(pwd, "figures", "X_" + i + "_umap_Public " + i + " Source.pdf"), os.path.join(pwd, "output", "Extended Data Figure 9d_umap_Public " + i + " Source.pdf"))


### Figure 4e

In [None]:
# Neuronal
cell_types = np.intersect1d(np.intersect1d(A9_RNAseq_neuronal["Cell Type"].unique(), Mathys_RNAseq_neuronal["Cell Type"].unique()), Green_RNAseq_neuronal["Cell Type"].unique())
A9_RNAseq_neuronal = A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Cell Type"].isin(cell_types), :].copy()
Mathys_RNAseq_neuronal = Mathys_RNAseq_neuronal.loc[Mathys_RNAseq_neuronal["Cell Type"].isin(cell_types), :].copy()
Green_RNAseq_neuronal = Green_RNAseq_neuronal.loc[Green_RNAseq_neuronal["Cell Type"].isin(cell_types), :].copy()

df = pd.concat(
    [
        A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(), 
        Mathys_RNAseq_neuronal.loc[Mathys_RNAseq_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(),
        Green_RNAseq_neuronal.loc[Green_RNAseq_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean()

    ],
    axis=1
).reset_index()
df.columns = ["Cell Type", "SEA-AD", "Base Inclusion Probability", "Mathys_2023", "Mathys Inclusion Probability", "Green_2023", "Green Inclusion Probability"]
df["Cell Type"] = df["Cell Type"].astype("category")
df["Cell Type"] = df["Cell Type"].cat.reorder_categories(
    color_order.loc[color_order["cluster_label"].isin(df["Cell Type"]), "cluster_label"]
)

df.index = df["Cell Type"].copy()
plt.rcParams["figure.figsize"] = (20,2)
ax = sns.heatmap(
    df.loc[df["Cell Type"].cat.categories, ["SEA-AD", "Mathys_2023", "Green_2023"]].T,
    cmap="RdBu_r",
    xticklabels=True,
    center=0,
    vmin=-1
);
ax.set(xlabel=None, title="Effect size across AD");
plt.savefig(os.path.join(pwd, "output", "Figure 4e_heatmap_Neuronal supertype replication.pdf"), bbox_inches="tight")
plt.show()

# Non-neuronal
cell_types = np.intersect1d(np.intersect1d(A9_RNAseq_non_neuronal["Cell Type"].unique(), Mathys_RNAseq_non_neuronal["Cell Type"].unique()), Green_RNAseq_non_neuronal["Cell Type"].unique())
A9_RNAseq_non_neuronal = A9_RNAseq_non_neuronal.loc[A9_RNAseq_non_neuronal["Cell Type"].isin(cell_types), :].copy()
Mathys_RNAseq_non_neuronal = Mathys_RNAseq_non_neuronal.loc[Mathys_RNAseq_non_neuronal["Cell Type"].isin(cell_types), :].copy()
Green_RNAseq_non_neuronal = Green_RNAseq_non_neuronal.loc[Green_RNAseq_non_neuronal["Cell Type"].isin(cell_types), :].copy()

df = pd.concat(
    [
        A9_RNAseq_non_neuronal.loc[A9_RNAseq_non_neuronal["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(), 
        Mathys_RNAseq_non_neuronal.loc[Mathys_RNAseq_non_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(),
        Green_RNAseq_non_neuronal.loc[Green_RNAseq_non_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(), 

    ],
    axis=1
).reset_index()
df.columns = ["Cell Type", "SEA-AD", "Base Inclusion Probability", "Mathys_2023", "Mathys Inclusion Probability", "Green_2023", "Green Inclusion Probability"]
df["Cell Type"] = df["Cell Type"].astype("category")
df["Cell Type"] = df["Cell Type"].cat.reorder_categories(
    color_order.loc[color_order["cluster_label"].isin(df["Cell Type"]), "cluster_label"]
)

df.index = df["Cell Type"].copy()
plt.rcParams["figure.figsize"] = (4,2)
ax = sns.heatmap(
    df.loc[df["Cell Type"].cat.categories, ["SEA-AD", "Mathys_2023", "Green_2023"]].T,
    cmap="RdBu_r",
    xticklabels=True,
    center=0,
    vmin=-1
);
ax.set(xlabel=None, title="Effect size across AD");
plt.savefig(os.path.join(pwd, "output", "Figure 4e_heatmap_Non-neuronal supertype replication.pdf"), bbox_inches="tight")
plt.show()

### Extended Data Figure 9e

In [None]:
# Green_2023
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()
subclass_colors["Not Significant"] = "lightgrey"

A9_RNAseq_neuronal = A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Cell Type"].isin(Green_RNAseq_neuronal["Cell Type"]), :].copy()
Green_RNAseq_neuronal = Green_RNAseq_neuronal.loc[Green_RNAseq_neuronal["Cell Type"].isin(A9_RNAseq_neuronal["Cell Type"]), :].copy()

df = pd.concat(
    [
        A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(), 
        Green_RNAseq_neuronal.loc[Green_RNAseq_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean()
    ],
    axis=1
).reset_index()
df.columns = ["Cell Type", "Base Effect Size", "Base Inclusion Probability", "New Effect Size", "New Inclusion Probability"]
df["Subclass"] = [re.sub("^(.*)_[0-9]+(_[0-9]+)?(-SEAAD)?$", "\\1", i).replace("Lamp5_Lhx6", "Lamp5 Lhx6") for i in df["Cell Type"]]
df["Subclass"] = df["Subclass"].astype("category")
subclass_order = color_order.loc[color_order["subclass_label"].isin(df["Subclass"]), "subclass_label"].drop_duplicates().to_list()
subclass_order.append("Not Significant")
df["Subclass"] = df["Subclass"].cat.reorder_categories(subclass_order)
slope, intercept, r_value, p_value, std_err = sp_stats.linregress(
    df["Base Effect Size"],
    df["New Effect Size"]
)

plt.rcParams["figure.figsize"] = (4,4)
ax = sns.scatterplot(
    data=df,
    x="Base Effect Size",
    y="New Effect Size",
    hue="Subclass",
    palette=subclass_colors
);
ax.set(xlabel="Effect size in SEA-AD", ylabel="Effect size in Green_2023", title="Correlation=" + str(np.round(r_value,2)));
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(os.path.join(pwd, "output", "Figure 4e_scatterplot_Neuronal supertype replication Green.pdf"), bbox_inches="tight")
plt.show()


# Mathys_2023

A9_RNAseq_neuronal = A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Cell Type"].isin(Mathys_RNAseq_neuronal["Cell Type"]), :].copy()
Mathys_RNAseq_neuronal = Mathys_RNAseq_neuronal.loc[Mathys_RNAseq_neuronal["Cell Type"].isin(A9_RNAseq_neuronal["Cell Type"]), :].copy()

df = pd.concat(
    [
        A9_RNAseq_neuronal.loc[A9_RNAseq_neuronal["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean(), 
        Mathys_RNAseq_neuronal.loc[Mathys_RNAseq_neuronal["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter", "Inclusion probability"]].groupby("Cell Type").mean()
    ],
    axis=1
).reset_index()
df.columns = ["Cell Type", "Base Effect Size", "Base Inclusion Probability", "New Effect Size", "New Inclusion Probability"]
df["Subclass"] = [re.sub("^(.*)_[0-9]+(_[0-9]+)?(-SEAAD)?$", "\\1", i).replace("Lamp5_Lhx6", "Lamp5 Lhx6") for i in df["Cell Type"]]
df["Subclass"] = df["Subclass"].astype("category")
subclass_order = color_order.loc[color_order["subclass_label"].isin(df["Subclass"]), "subclass_label"].drop_duplicates().to_list()
subclass_order.append("Not Significant")
df["Subclass"] = df["Subclass"].cat.reorder_categories(subclass_order)
slope, intercept, r_value, p_value, std_err = sp_stats.linregress(
    df["Base Effect Size"],
    df["New Effect Size"]
)

plt.rcParams["figure.figsize"] = (4,4)
ax = sns.scatterplot(
    data=df,
    x="Base Effect Size",
    y="New Effect Size",
    hue="Subclass",
    palette=subclass_colors
);
ax.set(xlabel="Effect size in SEA-AD", ylabel="Effect size in Mathys_2023", title="Correlation=" + str(np.round(r_value,2)));
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.savefig(os.path.join(pwd, "output", "Figure 4e_scatterplot_Neuronal supertype replication Mathys.pdf"), bbox_inches="tight")
plt.show()

### Figure 4f

In [None]:
#### public_adata.obs["Source_Donor ID"] = public_adata.obs["Source"].astype("str") + "__" + public_adata.obs["Donor ID"].astype("str")
public_adata.obs["Supertype"] = public_adata.obs["Supertype"].cat.remove_unused_categories()
df = public_adata.obs.loc[selected_cells & (public_adata.obs["Class"] != 'Non-neuronal and Non-neural'), ["Supertype", "Source_Donor ID"]].groupby(["Source_Donor ID"]).value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Supertype"].isin(public_adata.obs.loc[public_adata.obs["Class"] != 'Non-neuronal and Non-neural', "Supertype"].unique()), :]
df["Source"] = [i[0] for i in df["Source_Donor ID"].str.split("__")]
df["Donor ID"] = [i[1] for i in df["Source_Donor ID"].str.split("__")]
df = df.rename(
    {
        "proportion": "Relative abundance",
    },
    axis=1
)
df["Relative abundance"] = df["Relative abundance"] > 0

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Supertype", "Relative abundance"]].groupby(["Source", "Supertype"]).value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Supertype"].isin(public_adata.obs.loc[public_adata.obs["Class"] != 'Non-neuronal and Non-neural', "Supertype"].unique()), :]
df = df.loc[df["Relative abundance"] == True, :].copy()
df = df.rename(
    {
        "proportion": "Fraction of donors a neuronal supertype was captured in"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Source"] = df["Source"].astype("category")
df["Source"] = df["Source"].cat.add_categories(["Olah_2020", "Yang_2022"])
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
ax = sns.boxplot(data=df, x="Fraction of donors a neuronal supertype was captured in", y="Source", showfliers=False);
ax = sns.stripplot(data=df, x="Fraction of donors a neuronal supertype was captured in", y="Source", color="grey", size=2, alpha=0.5);
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4f_Donor neuronal supertype fraction.pdf"), bbox_inches="tight")
plt.show()

public_adata.obs["Source_Donor ID"] = public_adata.obs["Source"].astype("str") + "__" + public_adata.obs["Donor ID"].astype("str")
public_adata.obs["Supertype"] = public_adata.obs["Supertype"].cat.remove_unused_categories()
df = public_adata.obs.loc[selected_cells & (public_adata.obs["Class"] == 'Non-neuronal and Non-neural'), ["Supertype", "Source_Donor ID"]].groupby(["Source_Donor ID"]).value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Supertype"].isin(public_adata.obs.loc[public_adata.obs["Class"] == 'Non-neuronal and Non-neural', "Supertype"].unique()), :]
df["Source"] = [i[0] for i in df["Source_Donor ID"].str.split("__")]
df["Donor ID"] = [i[1] for i in df["Source_Donor ID"].str.split("__")]
df = df.rename(
    {
        "proportion": "Relative abundance",
    },
    axis=1
)
df["Relative abundance"] = df["Relative abundance"] > 0

plt.rcParams["figure.figsize"] = (2,2)
df = df.loc[:, ["Source", "Supertype", "Relative abundance"]].groupby(["Source", "Supertype"]).value_counts(normalize=True, sort=False).reset_index()
df = df.loc[df["Supertype"].isin(public_adata.obs.loc[public_adata.obs["Class"] == 'Non-neuronal and Non-neural', "Supertype"].unique()), :]
df = df.loc[df["Relative abundance"] == True, :].copy()
df = df.rename(
    {
        "proportion": "Fraction of donors a non-neuronal supertype was captured in"
    },
    axis=1
)
df["SEA-AD"] = df["Source"] == "SEA-AD"
df["Source"] = df["Source"].astype("category")
df["Source"] = df["Source"].cat.reorder_categories(np.flip(["SEA-AD", "Green_2023", "Mathys_2023", "Cain_2022", "Yang_2022", "Morabito_2021", "Leng_2021", "Lau_2020", "Zhou_2020", "Olah_2020", "Mathys_2019"]))
df = df.loc[~df["Supertype"].isin(["VLMC_2-SEAAD", "VLMC_3-SEAAD", "Pericyte_2-SEAAD"]), :]
ax = sns.boxplot(data=df, x="Fraction of donors a non-neuronal supertype was captured in", y="Source", showfliers=False);
ax = sns.stripplot(data=df, x="Fraction of donors a non-neuronal supertype was captured in", y="Source", color="grey", size=2, alpha=0.5);
ax.set(ylabel=None);
plt.legend([],[], frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 4f_Donor non-neuronal supertype fraction.pdf"), bbox_inches="tight")
plt.show()

### Figure 4g

In [None]:
# SEA-AD
neurons = copy.copy(A9_RNAseq_neuronal_adata)
results_table_neurons = copy.copy(A9_RNAseq_neuronal)
neurons = neurons[:, np.intersect1d(neurons.var_names, results_table_neurons["Cell Type"])].copy()
glia = copy.copy(A9_RNAseq_non_neuronal_adata)
results_table_glia = copy.copy(A9_RNAseq_non_neuronal)
glia = glia[:, np.intersect1d(glia.var_names, results_table_glia["Cell Type"])].copy()
x = ((neurons.X > 0).sum(axis=0) / neurons.shape[0]).tolist()
x.extend(((glia.X > 0).sum(axis=0) / glia.shape[0]).tolist())
y = results_table_neurons.loc[results_table_neurons["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list()
y.extend(results_table_glia.loc[results_table_glia["Covariate"] == "Continuous_Pseudo-progression_Score", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list())
y = np.abs(y)
plt.rcParams["figure.figsize"] = (1.5,3)
ax = sns.scatterplot(
    y=x,
    x=y,
);
plt.axhline(0.75, linestyle="--", color="lightgrey")
ax.set(ylabel="Fraction of donors with a supertype", xlabel="Effect Size", title="SEA-AD", ylim=(0,1.1));
plt.savefig(os.path.join(pwd, "output", "Figure 4g_scatterplot_SEA-AD effect size versus zero inflation.pdf"), bbox_inches="tight")
plt.show()

# Green_2023
neurons = copy.copy(Green_RNAseq_neuronal_adata)
results_table_neurons = copy.copy(Green_RNAseq_neuronal)
neurons = neurons[:, np.intersect1d(neurons.var_names, results_table_neurons["Cell Type"])].copy()
glia = copy.copy(Green_RNAseq_non_neuronal_adata)
results_table_glia = copy.copy(Green_RNAseq_non_neuronal)
glia = glia[:, np.intersect1d(glia.var_names, results_table_glia["Cell Type"])].copy()
x = ((neurons.X > 0).sum(axis=0) / neurons.shape[0]).tolist()
x.extend(((glia.X > 0).sum(axis=0) / glia.shape[0]).tolist())
y = results_table_neurons.loc[results_table_neurons["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list()
y.extend(results_table_glia.loc[results_table_glia["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list())
y = np.abs(y)
plt.rcParams["figure.figsize"] = (1.5,3)
ax = sns.scatterplot(
    y=x,
    x=y,
);
plt.axhline(0.75, linestyle="--", color="lightgrey")
ax.set(ylabel="Fraction of donors with a supertype", xlabel="Effect Size", title="Green_2023", ylim=(0,1.1));
plt.savefig(os.path.join(pwd, "output", "Figure 4g_scatterplot_Green effect size versus zero inflation.pdf"), bbox_inches="tight")
plt.show()


# Mathys_2023
neurons = copy.copy(Mathys_RNAseq_neuronal_adata)
results_table_neurons = copy.copy(Mathys_RNAseq_neuronal)
neurons = neurons[:, np.intersect1d(neurons.var_names, results_table_neurons["Cell Type"])].copy()
glia = copy.copy(Mathys_RNAseq_non_neuronal_adata)
results_table_glia = copy.copy(Mathys_RNAseq_non_neuronal)
glia = glia[:, np.intersect1d(glia.var_names, results_table_glia["Cell Type"])].copy()
x = ((neurons.X > 0).sum(axis=0) / neurons.shape[0]).tolist()
x.extend(((glia.X > 0).sum(axis=0) / glia.shape[0]).tolist())
y = results_table_neurons.loc[results_table_neurons["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list()
y.extend(results_table_glia.loc[results_table_glia["Covariate"] == "Overall_AD_neuropathological_Change_codes", ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list())
y = np.abs(y)
plt.rcParams["figure.figsize"] = (1.5,3)
ax = sns.scatterplot(
    y=x,
    x=y,
);
plt.axhline(0.75, linestyle="--", color="lightgrey")
ax.set(ylabel="Fraction of donors with a supertype", xlabel="Effect Size", title="Mathys_2023", ylim=(0,1.1));
plt.savefig(os.path.join(pwd, "output", "Figure 4g_scatterplot_Mathys effect size versus zero inflation.pdf"), bbox_inches="tight")
plt.show()


### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))