### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))
great_ape_colors = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 5", "Great_Ape_subclass_cluster_colors.csv"))

# From https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/
adata = sc.read_h5ad(os.path.join(pwd, "input", "Extended Data Figure 5", "SEAAD_MTG_RNAseq_final-nuclei_no_data_signature_scores.2024-02-13.h5ad"))
adata_ref = adata[adata.obs["Neurotypical reference"] == "True", :].copy()

# Results from one cluster self projection run from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
self_proj = pd.read_csv(os.path.join("input", "Extended Data Figure 5", "cluster_iterative_scANVI_results.2022-03-21.csv"), index_col=0)

# Results from one supertype self projection run from  https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
supertype_self_proj = pd.read_csv(os.path.join("input", "Extended Data Figure 5", "supertype_iterative_scANVI_results.2022-03-24.csv"), index_col=0)

# Coordinates from one self projection run for all IT Excitatory neurons and L5 IT
Excitatory_obs_names = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 5", "Reference Excitatory obs_names.csv"), index_col=0).index
Excitatory_umap = np.load(os.path.join(pwd, "input", "Extended Data Figure 5","Reference Excitatory X_umap.npy"))

L5_IT_obs_names = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 5", "Reference L5 IT obs_names.csv"), index_col=0).index
L5_IT_umap = np.load(os.path.join(pwd, "input", "Extended Data Figure 5","Reference L5 IT X_umap.npy"))

# Great Apes metadata, from https://www.science.org/doi/10.1126/science.adf6812
great_ape_metadata = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 5", "Great_ApesMetadata_version101_20220321.csv"), index_col=0)

# Initial mapping and QC AnnData for SEA-AD Microglia from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
Micro_PVM = sc.read_h5ad(os.path.join(pwd, "input", "Extended Data Figure 5", "Micro-PVM_scANVI_no_data.2022-04-08.h5ad"))

# Subclass UMAP coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
Microglia_PVM_obs_names = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 5", "Microglia-PVM", "obs_names.csv"), index_col=0).index
Microglia_PVM_umap = np.load(os.path.join(pwd, "input", "Extended Data Figure 5", "Microglia-PVM", "X_umap.npy"))

### Extended Data Figure 5b

In [None]:
adata_ref.obs["cluster"] = great_ape_metadata.loc[adata_ref.obs_names, "cluster"].to_list()
adata_ref.obs["cluster"] = adata_ref.obs["cluster"].astype("category")

low_confidence = ["L2/3 IT_4", "L2/3 IT_9", "L2/3 IT_11",
                  "L5 IT_4",
                  "L5/6 NP_5",
                  "Micro-PVM_3",
                  "Pvalb_4", "Pvalb_11",
                  "Sncg_7",
                  "Sst_6", "Sst_8", "Sst_14", "Sst_15", "Sst_16", "Sst_17", "Sst_18", "Sst_21", "Sst_24", "Sst_26", 
                  "Vip_3", "Vip_7",  "Vip_8", "Vip_10", "Vip_17", "Vip_20", "Vip_22"]
adata_ref.obs["supertype"] = adata_ref.obs["cluster"].copy()
adata_ref.obs["supertype"] = adata_ref.obs["supertype"].astype("object")
for i in low_confidence:
    adata_ref.obs.loc[adata_ref.obs["cluster"] == i, "supertype"] = "Unknown"
adata_ref.obs["supertype"] = adata_ref.obs["supertype"].astype("category")

adata_ref.obs["class_held"] = self_proj.loc[:, "class_held"]
adata_ref.obs["class_held"] = adata_ref.obs["class_held"].astype("category")

adata_ref.obs["subclass_held"] = self_proj.loc[:, "subclass_held"]
adata_ref.obs["subclass_held"] = adata_ref.obs["subclass_held"].astype("category")

adata_ref.obs["cluster_held"] = self_proj.loc[:, "cluster_held"]
adata_ref.obs["cluster_held"] = adata_ref.obs["cluster_held"].astype("category")

adata_ref.obs["supertype_held"] = supertype_self_proj.loc[:, "supertype_held"]
adata_ref.obs["supertype_held"] = adata_ref.obs["supertype_held"].astype("category")

adata_ref.obs["class_held_scANVI"] = self_proj.loc[:, "class_held_scANVI"]
adata_ref.obs["class_held_scANVI"] = adata_ref.obs["class_held_scANVI"].astype("category")
adata_ref.obs["class_held_conf_scANVI"] = self_proj.loc[:, "class_held_conf_scANVI"]

adata_ref.obs["subclass_held_scANVI"] = self_proj.loc[:, "subclass_held_scANVI"]
adata_ref.obs["subclass_held_scANVI"] = adata_ref.obs["subclass_held_scANVI"].astype("category")
adata_ref.obs["subclass_held_conf_scANVI"] = self_proj.loc[:, "subclass_held_conf_scANVI"]

adata_ref.obs["cluster_held_scANVI"] = self_proj.loc[:, "cluster_held_scANVI"]
adata_ref.obs["cluster_held_scANVI"] = adata_ref.obs["cluster_held_scANVI"].astype("category")
adata_ref.obs["cluster_held_conf_scANVI"] = self_proj.loc[:, "cluster_held_conf_scANVI"]

adata_ref.obs["supertype_held_scANVI"] = supertype_self_proj.loc[:, "supertype_held_scANVI"]
adata_ref.obs["supertype_held_scANVI"] = adata_ref.obs["supertype_held_scANVI"].astype("category")
adata_ref.obs["supertype_held_conf_scANVI"] = supertype_self_proj.loc[:, "supertype_held_conf_scANVI"]

adata_ref.obs["class_held"] = adata_ref.obs["class_held"].cat.rename_categories(
    {
        "exc": "Neuronal: Glutamatergic",
        "inh": "Neuronal: GABAergic",
        "glia": "Non-neuronal and Non-neural"
    },
)
adata_ref.obs["class_held_scANVI"] = adata_ref.obs["class_held_scANVI"].cat.rename_categories(
    {
        "exc": "Neuronal: Glutamatergic",
        "inh": "Neuronal: GABAergic",
        "glia": "Non-neuronal and Non-neural"
    },
)

adata_ref.obs["subclass_held"] = adata_ref.obs["subclass_held"].cat.rename_categories(
    {
        "Lamp5_Lhx6": "Lamp5 Lhx6",
        "Astro": "Astrocyte",
        "Oligo": "Oligodendrocyte",
        "Endo": "Endothelial",
        "Micro-PVM": "Microglia-PVM"
    },
)
adata_ref.obs["subclass_held_scANVI"] = adata_ref.obs["subclass_held_scANVI"].cat.rename_categories(
    {
        "Lamp5_Lhx6": "Lamp5 Lhx6",
        "Astro": "Astrocyte",
        "Oligo": "Oligodendrocyte",
        "Endo": "Endothelial",
        "Micro-PVM": "Microglia-PVM"
    },
)

In [None]:
# Class plots
sub = adata_ref.copy()
sc.pp.subsample(sub, fraction=1)

class_colors = {}
class_colors["Neuronal: Glutamatergic"] = color_order.loc[color_order["subclass_label"] == "L5 IT", "subclass_color"].iloc[0]
class_colors["Neuronal: GABAergic"] = color_order.loc[color_order["subclass_label"] == "Sst", "subclass_color"].iloc[0]
class_colors["Non-neuronal and Non-neural"] = color_order.loc[color_order["subclass_label"] == "OPC", "subclass_color"].iloc[0]
class_colors["Unknown"] = "#EEEEEE99"

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Class"],
    size=3,
    legend_loc="on data",
    frameon=False,
    palette=class_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference Whole Taxonomy_Class.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference Whole Taxonomy_Class.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference Whole Taxonomy_Class.pdf"))

df_self_proj = sub.obs.loc[:, ["Class", "class_held_scANVI", "class_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
_ = plot_confusion(df=df_self_proj, figsize=(1.5,1.5))
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5b_heatmap_Reference Whole Taxonomy_Class versus Class_scANVI.pdf"), bbox_inches='tight')
plt.show()

In [None]:
# Excitatory Subclass plots
obs_names = adata_ref.obs_names.copy()
adata_ref.obs.index = adata_ref.obs["sample_id"].copy()
sub = adata_ref[Excitatory_obs_names, :].copy()
adata_ref.obs_names = obs_names.copy()
sub.obsm["X_umap"] = Excitatory_umap
sc.pp.subsample(sub, fraction=1)
    
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()
subclass_colors["Unknown"] = "#EEEEEE99"

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Subclass"],
    size=3,
    legend_loc="on data",
    frameon=False,
    palette=subclass_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference Excitatory_Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference Excitatory_Subclass.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference Excitatory_Subclass.pdf"))

df_self_proj = sub.obs.loc[:, ["Subclass", "subclass_held_scANVI", "subclass_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
_ = plot_confusion(df=df_self_proj, figsize=(4,4))
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5b_heatmap_Reference Excitatory_Subclass versus Subclass_scANVI.pdf"), bbox_inches='tight')


# L5 IT Supertype plots

obs_names = adata_ref.obs_names.copy()
adata_ref.obs.index = adata_ref.obs["sample_id"].copy()
sub = adata_ref[L5_IT_obs_names, :].copy()
adata_ref.obs_names = obs_names.copy()
sub.obsm["X_umap"] = L5_IT_umap
sub = sub[sub.obs["cluster"].str.startswith("L5 IT"), :].copy()
sub = sub[sub.obs["supertype_held_scANVI"].str.startswith("L5 IT"), :].copy()


sc.pp.subsample(sub, fraction=1)
    
cluster_colors = great_ape_colors.loc[:, ["cluster", "cluster_color"]].drop_duplicates()
cluster_colors.index = cluster_colors["cluster"].copy()
cluster_colors = cluster_colors["cluster_color"].to_dict()
cluster_colors["Unknown"] = "#EEEEEE99"


plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["cluster"],
    size=20,
    legend_loc="on data",
    frameon=False,
    palette=cluster_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_cluster.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_cluster.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_cluster.pdf"))


plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["cluster_held"],
    size=20,
    legend_loc="on data",
    frameon=False,
    palette=cluster_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_cluster_held.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_cluster_held.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_cluster_held.pdf"))

sc.pl.umap(
    sub,
    color=["cluster_held_scANVI"],
    size=20,
    legend_loc="on data",
    frameon=False,
    palette=cluster_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_cluster_held_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_cluster_held_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_cluster_held_scANVI.pdf"))

sc.pl.umap(
    sub,
    color=["cluster_held_conf_scANVI"],
    size=20,
    frameon=False,
    cmap="YlGnBu_r",
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_cluster_held_conf_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_cluster_held_conf_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_cluster_held_conf_scANVI.pdf"))


sc.pl.umap(
    sub,
    color=["supertype_held"],
    size=20,
    legend_loc="on data",
    frameon=False,
    palette=cluster_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_supertype_held.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_supertype_held.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_supertype_held.pdf"))


sc.pl.umap(
    sub,
    color=["supertype_held_scANVI"],
    size=20,
    legend_loc="on data",
    frameon=False,
    palette=cluster_colors,
    legend_fontsize=16,
    legend_fontweight="bold",
    title="",
    save="_Reference L5 IT_supertype_held_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Reference L5 IT_supertype_held_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5b_umap_Reference L5 IT_supertype_held_scANVI.pdf"))

df_self_proj = sub.obs.loc[:, ["cluster", "cluster_held_scANVI", "cluster_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
_ = plot_confusion(df=df_self_proj, figsize=(4,4))
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5b_heatmap_Reference L5 IT_cluster versus cluster_scANVI.pdf"), bbox_inches='tight')


df_self_proj = sub.obs.loc[:, ["supertype", "supertype_held_scANVI", "supertype_held_conf_scANVI"]]
df_self_proj = df_self_proj.loc[df_self_proj["supertype"] != "Unknown", :]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].cat.remove_unused_categories()
df_self_proj["pred"] = df_self_proj["pred"].cat.remove_unused_categories()
_ = plot_confusion(df=df_self_proj, figsize=(4,4))
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5b_heatmap_Reference L5 IT_supertype versus supertype_scANVI.pdf"), bbox_inches='tight')



In [None]:
# F1 score boxplots
df_self_proj = adata_ref.obs.loc[:, ["cluster", "cluster_held_scANVI", "cluster_held_conf_scANVI"]]
df_self_proj.columns = ["true", "pred", "conf"]
result_cluster = get_scores(df=df_self_proj)
result_cluster["model_version"] = "Clusters"

df_self_proj = adata_ref.obs.loc[:, ["supertype", "Supertype (non-expanded)", "Supertype confidence"]]
df_self_proj = df_self_proj.loc[df_self_proj["supertype"] != "Unknown", :]
df_self_proj.columns = ["true", "pred", "conf"]
df_self_proj["true"] = df_self_proj["true"].cat.remove_unused_categories()
df_self_proj["pred"] = df_self_proj["pred"].cat.remove_unused_categories()
result_supertype = get_scores(df=df_self_proj)
result_supertype["model_version"] = "Supertypes"

result = pd.concat([result_cluster, result_supertype], axis=0)

plt.rcParams["figure.figsize"] = (1.5,4)
ax = sns.boxplot(
    data=result,
    x="model_version",
    y="f1",
    showfliers=False,
    palette="Greys"
);
ax = sns.stripplot(
    data=result,
    x="model_version",
    y="f1",
    color="0.25",
    edgecolor="white",
    linewidth=0.5,
    alpha=0.5
);
ax.axhline(0.7, linestyle="dashed", color="black")
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set(xlabel="", ylabel="F1 Score");

plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5b_boxplot_cluster_versus_supertype_F1_scores.pdf"), bbox_inches="tight")

### Extended Data Figure 5c

In [None]:
Micro_PVM.obs["Uncertainty"] = 1 - Micro_PVM.obs["supertype_conf_scANVI"]
supertype_colors = color_order.loc[:, ["cluster_label", "cluster_color"]].drop_duplicates()
supertype_colors.index = supertype_colors["cluster_label"].copy()
supertype_colors.drop("cluster_label", axis=1, inplace=True)
supertype_colors = supertype_colors["cluster_color"].to_dict()
supertype_colors["SEA-AD"] = "#EEEEEE99"

Micro_PVM.obs["supertype"] = Micro_PVM.obs["supertype"].cat.rename_categories(
    {"Unknown": "SEA-AD"},
)

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    Micro_PVM,
    color=["supertype"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_supertype.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_supertype.pdf"))


sc.pl.umap(
    Micro_PVM,
    color=["supertype_scANVI"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_supertype_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_supertype_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_supertype_scANVI.pdf"))

sc.pl.umap(
    Micro_PVM,
    color=["Uncertainty"],
    size=20,
    frameon=False,
    vmin=-0.05,
    cmap="YlGnBu",
    title="",
    save="_Microglia_Uncertainty.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Uncertainty.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Uncertainty.pdf"))

colors = list(supertype_colors.values())
np.random.shuffle(colors)
sc.pl.umap(
    Micro_PVM,
    color=["leiden_merged"],
    size=20,
    frameon=False,
    palette=colors,
    legend_loc="on data",
    title="",
    save="_Microglia_Cluster.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Cluster.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Cluster.pdf"))

sc.pl.umap(
    Micro_PVM,
    color=["cluster_doublet_score"],
    size=20,
    frameon=False,
    vmin=-0.05,
    cmap="YlGnBu",
    title="",
    save="_Microglia_Cluster Doublet score.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Cluster Doublet score.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Cluster Doublet score.pdf"))

sc.pl.umap(
    Micro_PVM,
    color=["cluster_fraction_mito"],
    size=20,
    frameon=False,
    cmap="YlGnBu",
    title="",
    save="_Microglia_Cluster Fraction of Mitochondrial UMIs.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Cluster Fraction of Mitochondrial UMIs.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Cluster Fraction of Mitochondrial UMIs.pdf"))

sc.pl.umap(
    Micro_PVM,
    color=["cluster_donor_split"],
    size=20,
    frameon=False,
    cmap="YlGnBu",
    title="",
    save="_Microglia_Cluster Donor entropy.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Cluster Donor entropy.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Cluster Donor entropy.pdf"))

sc.pl.umap(
    Micro_PVM,
    color=["nFeature_RNA"],
    size=20,
    frameon=False,
    cmap="YlGnBu",
    title="",
    save="_Microglia_Genes detected.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Genes detected.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5c_umap_Microglia_Genes detected.pdf"))

### Extended Data Figure 5d

In [None]:
plt.rcParams["figure.figsize"] = (6,4)
class_colors = {}
class_colors["Neuronal: Glutamatergic"] = color_order.loc[color_order["subclass_label"] == "L5 IT", "subclass_color"].iloc[0]
class_colors["Neuronal: GABAergic"] = color_order.loc[color_order["subclass_label"] == "Sst", "subclass_color"].iloc[0]
class_colors["Non-neuronal and Non-neural"] = color_order.loc[color_order["subclass_label"] == "OPC", "subclass_color"].iloc[0]
class_colors["Unknown"] = "#EEEEEE99"

df = sc.get.obs_df(adata, ["library_prep", "Class", "Supertype confidence", "Supertype Signature Score", "Used in analysis", "Continuous Pseudo-progression Score", "Neurotypical reference"])
df = df.loc[(df["Used in analysis"] == True) & (df["Neurotypical reference"] == "False"), :].groupby(["library_prep", "Class", "Continuous Pseudo-progression Score", "Used in analysis", "Neurotypical reference"]).mean().reset_index().dropna()

ax = sns.lmplot(
    data=df,
    x="Continuous Pseudo-progression Score",
    y="Supertype confidence",
    scatter_kws={"alpha": 0.2},
    hue="Class",
    palette=class_colors
);
ax.set(ylim=(0.95,1));
plt.ylabel("Supertype probabilities");
plt.xlabel("CPS");
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 5d_regplot_Supertype confidence across CPS_splitby_class.pdf"), bbox_inches="tight")
plt.show()

### Extended Data Figure 5e

In [None]:
supertype_colors = color_order.loc[:, ["cluster_label", "cluster_color"]].drop_duplicates()
supertype_colors.index = supertype_colors["cluster_label"].copy()
supertype_colors = supertype_colors["cluster_color"].to_dict()
supertype_colors["Unknown"] = "#EEEEEE"

Micro_PVM = Micro_PVM[
    (Micro_PVM.obs["cluster_doublet_score"] < 0.1) &
    (Micro_PVM.obs["cluster_fraction_mito"] < 0.05) &
    (Micro_PVM.obs["cluster_donor_split"] < 0.4) &
    (Micro_PVM.obs["nFeature_RNA"] > 1000)
].copy()

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    Micro_PVM,
    color=["supertype_scANVI"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_subsetted_supertype_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_subsetted_supertype_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_subsetted_supertype_scANVI.pdf"))


In [None]:
sub.obs.loc[:, ["leiden_merged2", "Neurotypical reference"]].groupby(["leiden_merged2"]).sum()

In [None]:
sub = adata[Microglia_PVM_obs_names, :].copy()
sub.obsm["X_umap"] = Microglia_PVM_umap
sc.pp.subsample(sub, fraction=1)

sub.obs["Reference Supertype"] = sub.obs["Supertype (non-expanded)"].copy()
sub.obs["Reference Supertype"] = sub.obs["Reference Supertype"].cat.add_categories(["SEA-AD"])
sub.obs.loc[sub.obs["Neurotypical reference"] == "False", "Reference Supertype"] = "SEA-AD"

sub.obs["leiden_merged2"] = sub.obs["Supertype"].copy()
sub.obs["leiden_merged2"] = sub.obs["leiden_merged2"].cat.rename_categories(
    {
        "Micro-PVM_3-SEAAD": "0",
        "Micro-PVM_1": "7",
        "Micro-PVM_2_3-SEAAD": "9",
        "Micro-PVM_4-SEAAD": "10",
        "Micro-PVM_2": "85",
        "Micro-PVM_2_1-SEAAD": "116",
        "Monocyte": "135",
        "Lymphocyte": "200"
    },
)

splitby = "Supertype (non-expanded)"
fractions = pd.DataFrame(columns=["Max supertype fraction", "Fraction total reference"])
sub.obs["Neurotypical reference"] = sub.obs["Neurotypical reference"] == "True"
supertype_counts = sub.obs.groupby(["leiden_merged2", splitby]).size()
reference_counts = sub.obs.loc[:, ["leiden_merged2", "Neurotypical reference"]].groupby(["leiden_merged2"]).sum() / sub.obs["Neurotypical reference"].value_counts().loc[True]
for k in sub.obs["leiden_merged2"].cat.categories:
    l = np.nanmax(supertype_counts[k] / sub.obs.groupby([splitby]).size())
    m = reference_counts.loc[k, :].max()
    fractions = pd.concat([fractions, pd.DataFrame([[l, m]], index=[k], columns=["Max supertype fraction", "Fraction total reference"])], axis=0)
sub.obs = sub.obs.merge(fractions, left_on="leiden_merged2", right_index=True, how="left")

supertype_colors = color_order.loc[:, ["cluster_label", "cluster_color"]].drop_duplicates()
supertype_colors.index = supertype_colors["cluster_label"].copy()
supertype_colors = supertype_colors["cluster_color"].to_dict()
supertype_colors["SEA-AD"] = "#EEEEEE99"

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Reference Supertype"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_supertype.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_supertype.pdf"))


sc.pl.umap(
    sub,
    color=["Supertype (non-expanded)"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_supertype_scANVI.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_supertype_scANVI.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_supertype_scANVI.pdf"))


np.random.seed(100)
colors = list(supertype_colors.values())
np.random.shuffle(colors)
sc.pl.umap(
    sub,
    color=["leiden_merged2"],
    size=20,
    frameon=False,
    palette=colors,
    legend_loc="on data",
    title="",
    save="_Microglia_Cluster.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Cluster.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_Cluster.pdf"))

sc.pl.umap(
    sub,
    color=["Max supertype fraction"],
    size=20,
    frameon=False,
    cmap="YlGnBu",
    vmin=-0.05,
    title="",
    save="_Microglia_Max supertype fraction.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Max supertype fraction.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_Max supertype fraction.pdf"))

sc.pl.umap(
    sub,
    color=["Fraction total reference"],
    size=20,
    frameon=False,
    cmap="YlGnBu",
    vmin=-0.05,
    title="",
    save="_Microglia_Fraction total reference.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_Fraction total reference.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_Fraction total reference.pdf"))


sc.pl.umap(
    sub,
    color=["Supertype"],
    size=20,
    palette=supertype_colors,
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia_supertype_scANVI_expanded.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia_supertype_scANVI_expanded.pdf"), os.path.join(pwd, "output", "Extended Data Figure 5e_umap_Microglia_supertype_scANVI_expanded.pdf"))

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))