### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns
import re
import glob
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from helper_functions import *

warnings.filterwarnings("ignore")
sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=500, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()


### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From 00_build_input_data.py
adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "SEAAD_MTG_RNAseq_final-nuclei_no_data.2024-02-13.h5ad"))

# Subclass UMAP coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
Microglia_PVM_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "Microglia-PVM", "obs_names.csv"), index_col=0).index
Microglia_PVM_umap = np.load(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "Microglia-PVM", "X_umap.npy"))

L5_IT_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "L5 IT", "obs_names.csv"), index_col=0).index
L5_IT_umap = np.load(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "L5 IT", "X_umap.npy"))

# scCODA results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
datasets = glob.glob(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", "*", "*", "*.csv"))

### Figure 3a

In [None]:
adata.obs["Reference Subclass"] = adata.obs["Subclass"].copy()
adata.obs.loc[adata.obs["Neurotypical reference"] == "False", "Reference Subclass"] = np.nan
adata.uns["Reference Subclass_colors"] = adata.uns["Subclass_colors"].copy()

adata.obs["Reference Supertype"] = adata.obs["Supertype"].copy()
adata.obs.loc[adata.obs["Neurotypical reference"] == "False", "Reference Supertype"] = np.nan
adata.uns["Reference Supertype_colors"] = adata.uns["Supertype_colors"].copy()

sc.pp.subsample(adata, fraction=1)

In [None]:
# Subclass plots
plt.rcParams["figure.figsize"] = (8,8)

sc.pl.umap(
    adata,
    color=["Reference Subclass"],
    size=1,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    na_in_legend=False,
    save="_Whole Taxonomy_Reference Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Whole Taxonomy_Reference Subclass.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_Whole Taxonomy_Reference Subclass.pdf"))

sc.pl.umap(
    adata,
    color=["Subclass"],
    size=1,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Whole Taxonomy_Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Whole Taxonomy_Subclass.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_Whole Taxonomy_Subclass.pdf"))


In [None]:
# L5 IT supertype plot
plt.rcParams["figure.figsize"] = (8,8)

sub = adata[L5_IT_obs_names, :].copy()
sub.obsm["X_umap"] = L5_IT_umap
sc.pp.subsample(sub, fraction=1)

sc.pl.umap(
    sub,
    color=["Reference Supertype"],
    size=10,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    na_in_legend=False,
    save="_L5 IT_Reference Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_L5 IT_Reference Supertype.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_L5 IT_Reference Supertype.pdf"))

sc.pl.umap(
    sub,
    color=["Supertype"],
    size=10,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_L5 IT_Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_L5 IT_Supertype.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_L5 IT_Supertype.pdf"))


sub = adata[Microglia_PVM_obs_names, :].copy()
sub.obsm["X_umap"] = Microglia_PVM_umap
sub.obs.loc[~sub.obs["Reference Supertype"].isin(["Micro-PVM_1", "Micro-PVM_2"]), "Reference Supertype"] = np.nan

sc.pp.subsample(sub, fraction=1)

sc.pl.umap(
    sub,
    color=["Reference Supertype"],
    size=10,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    na_in_legend=False,
    save="_Microglia-PVM_Reference Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia-PVM_Reference Supertype.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_Microglia-PVM_Reference Supertype.pdf"))

sc.pl.umap(
    sub,
    color=["Supertype (non-expanded)"],
    size=10,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    palette=color_order.loc[color_order["cluster_label"].isin(["Micro-PVM_1", "Micro-PVM_2"]), "cluster_color"].to_list(),
    save="_Microglia-PVM_Supertype (non-expanded).pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia-PVM_Supertype (non-expanded).pdf"), os.path.join(pwd, "output", "Figure 3a_umap_Microglia-PVM_Supertype (non-expanded).pdf"))
sc.pl.umap(
    sub,
    color=["Supertype"],
    size=10,
    legend_loc="on data",
    frameon=False,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Microglia-PVM_Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia-PVM_Supertype.pdf"), os.path.join(pwd, "output", "Figure 3a_umap_Microglia-PVM_Supertype.pdf"))


### Figure 3b and Extended Data Figure 8a

In [None]:
plot_size = {
    "A9_RNAseq": [-1.75, 1.75],
    "MTG_RNAseq": [-1.75, 1.75],
    "PFC_Mathys_2023": [-0.5, 0.5],
    "PFC_Green_2023": [-0.5, 0.5]

}
plot_order = {
    "Non-neuronal and Non-neural": color_order.loc[color_order["class_label"] == "Non-neuronal and Non-neural", "cluster_label"].to_list(),
    "Neuronal: Glutamatergic Neuronal: GABAergic": color_order.loc[color_order["class_label"] != "Non-neuronal and Non-neural", "cluster_label"].to_list(),
}
plot_colors = {
    "Non-neuronal and Non-neural": color_order.loc[color_order["class_label"] == "Non-neuronal and Non-neural", "cluster_color"].to_list(),
    "Neuronal: Glutamatergic Neuronal: GABAergic": color_order.loc[color_order["class_label"] != "Non-neuronal and Non-neural", "cluster_color"].to_list(),
}
plot_dims = {
    "Non-neuronal and Non-neural": (10,4),
    "Neuronal: Glutamatergic Neuronal: GABAergic": (30,4)
}

for i in datasets:
    region = i.split("/")[-3]
    test = i.split("/")[-2]
    population = i.split("/")[-1].split("_")[0]

    print("Region: " + region + " Test: " + test + " Population: " + population)
    
    results_table = pd.read_csv(i, index_col=0)

    missing_types = np.setdiff1d(plot_order[population], results_table["Cell Type"])
    for missing_type in missing_types:
        to_add = results_table.loc[results_table["Cell Type"] == results_table.loc[:, "Cell Type"].iloc[0], :]
        to_add["Cell Type"] = missing_type
        to_add["Final Parameter"] = 0
        to_add["SD"] = 0
        to_add["Inclusion probability"] = 0
        results_table = pd.concat([results_table, to_add], axis=0)

    extra_types = np.setdiff1d(results_table["Cell Type"], plot_order[population])

    if len(extra_types) > 0:
        print("WARNING: Removing extra types " + str(list(extra_types)))
        results_table = results_table.loc[~results_table["Cell Type"].isin(extra_types), :].copy()
    
    results_table = results_table.sort_values(by="Covariate")

    results_table["Credible"] = results_table["Final Parameter"] != 0
    try:
        results_table["Cell Type"] = results_table["Cell Type"].astype("category")
        results_table["Cell Type"] = results_table["Cell Type"].cat.reorder_categories(plot_order[population])

    except:
        pass

    plt.rcParams['figure.figsize'] = plot_dims[population]
    
    df = results_table.loc[:, ["Covariate", "Cell Type", "Final Parameter"]].groupby(["Covariate", "Cell Type"]).mean().reset_index()
    if (test == "Continuous_Pseudo-progression_Score" and region in ["MTG_RNAseq", "A9_RNAseq"]) or (test == "Overall_AD_neuropathological_Change_codes" and region in ["PFC_Mathys_2023", "PFC_Green_2023"]):
        ax = sns.heatmap(df.pivot(index="Covariate", columns="Cell Type", values="Final Parameter"), center=0, cmap="RdBu_r", vmin=plot_size[region][0], vmax=plot_size[region][1]);
        ax.set_ylabel("");
        ax.set_xlabel("");
        plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 8a_heatmap_" + region + "_" + population + "_along_" + test + ".pdf"), bbox_inches="tight")
        plt.show()

    if region in ["MTG_RNAseq", "A9_RNAseq"]:
        for covariate in np.unique(results_table["Covariate"]):
            if covariate not in ["Cognitive_Status[T.No dementia]", "Overall_AD_neuropathological_Change_codes", "Continuous_Pseudo-progression_Score"]:
                continue
    
            print("Covariate: " + covariate + " Test: " + test + " Population: " + population)
    
            df = results_table.loc[results_table["Covariate"] == covariate, :]
            
            # Flip the sign of the effect size
            if test == "Cognitive_Status" and covariate == "Cognitive_Status[T.No dementia]":
                df["Final Parameter"] = -1 * df["Final Parameter"]
                
            ax = sns.barplot(data=df, x="Cell Type", y="Final Parameter", palette=plot_colors[population] if population in plot_colors.keys() else None);
            ax.set_xlabel("");
            ax.set_ylabel("Effect size along\n" + covariate.replace("_", " "));
            ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=90)
            ax.set_ylim(plot_size[region]);
            plt.savefig(os.path.join(pwd, "output", "Figure 3b_barplot_" + region + "_" + population + "_along_" + test + ".pdf"), bbox_inches="tight")
            plt.show()

### Extended Data Figure 8b

In [None]:
test_pairs = [
    ["MTG_RNAseq", "MTG_ATACseq"],
    ["MTG_RNAseq", "MTG_RNAseq_Donor"],
    ["MTG_RNAseq", "MTG_RNAseq_No_SA_Donors"],
    ["MTG_RNAseq", "MTG_RNAseq_PMI"],
]
for test_pair in test_pairs:
    test_x = test_pair[0]
    test_y = test_pair[1]
    test_var = "Continuous_Pseudo-progression_Score"
    results_x = pd.concat(
        [
            pd.read_csv(os.path.join("input", "Figure 3 and Extended Data 8", test_x, test_var, "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0),
            pd.read_csv(os.path.join("input", "Figure 3 and Extended Data 8", test_x, test_var, "Non-neuronal and Non-neural_Supertype_results.csv"), index_col=0)
        ],
        axis=0
    )
    results_y = pd.concat(
        [
            pd.read_csv(os.path.join("input", "Figure 3 and Extended Data 8", test_y, test_var, "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0),
            pd.read_csv(os.path.join("input", "Figure 3 and Extended Data 8", test_y, test_var, "Non-neuronal and Non-neural_Supertype_results.csv"), index_col=0)
        ],
        axis=0
    )
    
    common_types = np.intersect1d(results_x["Cell Type"].unique(), results_y["Cell Type"].unique())
    
    results_x = results_x.loc[results_x["Cell Type"].isin(common_types), :].copy()
    results_y = results_y.loc[results_y["Cell Type"].isin(common_types), :].copy()
    
    
    slope, intercept, r_value, p_value, std_err = sp_stats.linregress(
        results_x.loc[results_x["Covariate"] == test_var, ["Final Parameter", "Cell Type"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list(),
        results_y.loc[results_y["Covariate"] == test_var, ["Final Parameter", "Cell Type"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list()
    )
    
    plt.rcParams["figure.figsize"] = (4,4)
    ax = sns.regplot(
        x=results_x.loc[results_x["Covariate"] == test_var, ["Final Parameter", "Cell Type"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list(),
        y=results_y.loc[results_y["Covariate"] == test_var, ["Final Parameter", "Cell Type"]].groupby("Cell Type").mean().loc[:, "Final Parameter"].to_list(),    
    );
    ax.set(xlabel="Effect size on CPS\nin " + test_x.replace("MTG_", ""), ylabel="Effect size on CPS\nin " + test_y.replace("MTG_", ""), title="Comparison across supertypes, r=" + str(np.round(r_value,2)));
    plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 8b_regplot_" + test_x.replace("MTG_", "") + "_versus_" + test_y.replace("MTG_", "") + "_effect_size_CPS.pdf"), bbox_inches="tight");
    plt.show()

### Figure 3c

In [None]:
regions = ["MTG_RNAseq", "A9_RNAseq"]
for region in regions:
    neurons = sc.read_h5ad(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", region, "objects", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_abundances.h5ad"))
    glia = sc.read_h5ad(os.path.join(pwd, "input", "Figure 3 and Extended Data 8", region, "objects", "Non-neuronal and Non-neural_Supertype_abundances.h5ad"))

    neurons.var["Subclass"] = [re.sub("_[0-9]{1,3}", "", c) for c in neurons.var.index]
    neurons.var["Subclass"] = neurons.var["Subclass"].astype("category")
    neurons.var["Subclass"] = neurons.var["Subclass"].cat.rename_categories(
        {
            "Lamp5_Lhx6": "Lamp5 Lhx6",
        },
    )

    glia.var["Subclass"] = [re.sub("_[0-9]{1,3}(_[0-9])?(-SEAAD)?", "", c) for c in glia.var.index]
    glia.var["Subclass"] = glia.var["Subclass"].astype("category")
    glia.var["Subclass"] = glia.var["Subclass"].cat.rename_categories(
        {
            "Astro": "Astrocyte",
            "Oligo": "Oligodendrocyte",
            "Endo": "Endothelial",
            "Micro-PVM": "Microglia-PVM"
        },
    )

    # In all three tests (Cognitive Status, ADNC, and CPS)
    affected_supertypes = [
        "Lamp5_3",
        "Lamp5_5",
        "Sncg_1",
        "Sncg_8",
        "Vip_2",
        "Vip_11",
        "Vip_13",
        "Vip_1",
        "Sst_3",
        "Sst_19",
        "Sst_9",
        "Sst_11",
        "Sst_20",
        "Sst_23",
        "Sst_25",
        "Sst_2",
        "Pvalb_6",
        "Pvalb_5",
        "Pvalb_8",
        "Pvalb_3",
        "Pvalb_2",
        "Pvalb_15",
        "Pvalb_14",
        "Pvalb_10",
        "L2/3 IT_1",
        "L2/3 IT_6",
        "L2/3 IT_7",
        "L2/3 IT_5",
        "L2/3 IT_13",
        "L2/3 IT_10",
        "L2/3 IT_8",
        "L2/3 IT_12",
        "L2/3 IT_3",
        "Astro_2",
        "OPC_2",
        "Oligo_2",
        "Micro-PVM_3-SEAAD"
    ]

    subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
    subclass_colors.index = subclass_colors["subclass_label"].copy()
    subclass_colors = subclass_colors["subclass_color"].to_dict()

    ax = lmplots(
        neurons,
        feature_name="Continuous_Pseudo-progression_Score",
        figsize=(2.5,4),
        y_scale="log_relative",
        to_plot="Subclass",
        cmap=subclass_colors,
        to_plot_filter=["Lamp5", "Sncg", "Vip", "Sst", "Pvalb", "L2/3 IT"],
        celltype_filter=affected_supertypes,
    )
    ax.set_ylabel("log(relative abundance)");
    ax.set_xlabel("Pseudoprogression");
    plt.legend([],frameon=False);
    plt.savefig(os.path.join(pwd, "output", "Figure 3c_lmplplot_" + region + "_neuronal_affected_supertypes_relative_abundance_by_subclass_versus_CPS.pdf"), bbox_inches="tight")
    plt.show();

    ax = lmplots(
        glia,
        feature_name="Continuous_Pseudo-progression_Score",
        figsize=(2.5,4),
        y_scale="log_relative",
        to_plot="Subclass",
        cmap=subclass_colors,
        to_plot_filter=["Astrocyte", "Microglia-PVM", "Oligodendrocyte", "OPC"],
        celltype_filter=affected_supertypes,
    )
    ax.set_ylabel("log(relative abundance)");
    ax.set_xlabel("Pseudoprogression");
    plt.legend([],frameon=False);
    plt.savefig(os.path.join(pwd, "output", "Figure 3c_lmplplot_" + region + "_non-neuronal_affected_supertypes_relative_abundance_by_subclass_versus_CPS.pdf"), bbox_inches="tight")
    plt.show();


### Figure 3d

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))