### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import re
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from scipy import stats as sp_stats
from scipy import sparse as sp_sparse
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From 00_build_input_data.py
ATACseq = sc.read_h5ad(os.path.join(pwd, "input", "Extended Data Figure 6", "SEAAD_MTG_ATACseq_all-nuclei_no_data.2024-02-13.h5ad"))
ATACseq.obs_names = [re.sub("([ATGC]+)-([^-]+)-[0-9]+", "\\1-\\2", i) for i in ATACseq.obs_names]
ATACseq.obs["modality"] = "accessibility"
ATACseq.obs.loc[ATACseq.obs["method"] == "10xMulti", "modality"] = "paired"
ATACseq.obs.index = ATACseq.obs.index + "_" + ATACseq.obs["modality"]

# From 00_build_input_data.py
RNAseq = sc.read_h5ad(os.path.join(pwd, "input", "Extended Data Figure 6", "SEAAD_MTG_RNAseq_final-nuclei_no_data.2024-02-13.h5ad"))
RNAseq = RNAseq[RNAseq.obs["method"].isin(["10Xv3.1", "10Xv3"]), :].copy()
RNAseq.obs["modality"] = "expression"
RNAseq.obs_names = [re.sub("([ATGC]+)-([^-]+)-[0-9]+", "\\1-\\2", i) + "_expression" for i in RNAseq.obs_names]

# MultiVI metrics from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
RNAseq_MVI_QC_metrics = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 6", "MultiVI_RNA Quality Control Score and Quality Control Clusters.csv"), index_col=0)

# MultiVI coordinates from from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
MultiVI_obs_names = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 6", "MultiVI_obs_names.csv"), index_col=0).index
MultiVI_umap = np.load(os.path.join(pwd, "input", "Extended Data Figure 6", "MultiVI_umap.npy"))


# MultiVI subclass coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
MultiVI_Sst_obs_names = pd.read_csv(os.path.join(pwd, "input", "Extended Data Figure 6", "MultiVI_Sst_obs_names.csv"), index_col=0).index
MultiVI_Sst_umap = np.load(os.path.join(pwd, "input", "Extended Data Figure 6", "MultiVI_Sst_umap.npy"))

### Extended Data Figure 6b

In [None]:
RNAseq_sub = RNAseq[np.intersect1d(RNAseq.obs_names, RNAseq_MVI_QC_metrics.index), :].copy()
RNAseq_sub.obs["RNA Quality Control Score"] = RNAseq_MVI_QC_metrics.loc[RNAseq_sub.obs_names, "RNA Quality Control Score"].copy()
RNAseq_sub.obs["Quality Control Clusters"] = RNAseq_MVI_QC_metrics.loc[RNAseq_sub.obs_names, "Quality Control Clusters"].copy()

adata = ad.concat([ATACseq, RNAseq_sub], axis=0)
to_keep = MultiVI_obs_names.isin(adata.obs_names)
MultiVI_obs_names = MultiVI_obs_names[to_keep]
MultiVI_umap = MultiVI_umap[to_keep, :]
adata = adata[MultiVI_obs_names, :].copy()
adata.obsm["X_umap"] = MultiVI_umap

In [None]:
sc.pp.subsample(adata, fraction=1)

plt.rcParams["figure.figsize"] = (5,5)
colors = {
    "expression": "red",
    "paired": "grey",
    "accessibility": "lightgrey"
}
sc.pl.umap(
    adata,
    color="modality",
    size=1,
    frameon=False,
    palette=colors,
    title="",
    save="_MultiVI_Modality.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_MultiVI_Modality.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6b_umap_MultiVI_Modality.pdf"))

### Extended Data Figure 6c

In [None]:
adata.obs["Quality Control Clusters"] = [str(i) for i in adata.obs["Quality Control Clusters"]]

In [None]:
plt.rcParams["figure.figsize"] = (5,5)
sc.pl.umap(
    adata,
    color="RNA Quality Control Score",
    size=1,
    frameon=False,
    cmap="YlGnBu",
    title="",
    sort_order=False,
    save="_MultiVI_RNA Quality Control Score.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_MultiVI_RNA Quality Control Score.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6c_umap_MultiVI_RNA Quality Control Score.pdf"))

plt.rcParams["figure.figsize"] = (2,4)
sc.pl.violin(
    adata,
    keys=["RNA Quality Control Score"],
    stripplot=False,
    save="_MultiVI_RNA Quality Control Score.pdf"
)
os.rename(os.path.join(pwd, "figures", "violin_MultiVI_RNA Quality Control Score.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6c_violin_MultiVI_RNA Quality Control Score.pdf"))

plt.rcParams["figure.figsize"] = (5,5)
sc.pl.umap(
    adata,
    color="Quality Control Clusters",
    size=1,
    frameon=False,
    title="",
    legend_loc="on data",
    legend_fontsize=10,
    sort_order=False,
    na_in_legend=False,
    groups=['33', '22', '21', '6', '29', '10', '25', '36', '39', '47', '32', '37', '42', '43', '45', '46'],
    save="_MultiVI_Quality Control Clusters.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_MultiVI_Quality Control Clusters.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6c_umap_MultiVI_Quality Control Clusters.pdf"))

plt.rcParams["figure.figsize"] = (10,2)
sc.pl.violin(
    adata,
    keys=["RNA Quality Control Score"],
    groupby="Quality Control Clusters",
    stripplot=False,
    rotation=90,
    save="_MultiVI_RNA Quality Control Score_by_Quality Control Clusters.pdf"
)
os.rename(os.path.join(pwd, "figures", "violin_MultiVI_RNA Quality Control Score_by_Quality Control Clusters.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6c_violin_MultiVI_RNA Quality Control Score_by_Quality Control Clusters.pdf"))

In [None]:
adata.obs["Good Cells"] = ~adata.obs["Quality Control Clusters"].isin(['33', '22', '21', '6', '29', '10', '25', '36', '39', '47', '32', '37', '42', '43', '45', '46'])
df = sc.get.obs_df(adata, ["Neurotypical reference", "modality", "library_prep", "Overall AD neuropathological Change", "Good Cells"])
df = df.loc[(df["modality"] == "accessibility") & (df["Neurotypical reference"] == "False"), :]
df = df.drop(["modality", "Neurotypical reference"], axis=1)
df["Overall AD neuropathological Change"] = df["Overall AD neuropathological Change"].cat.remove_unused_categories()
df = df.groupby(["library_prep", "Overall AD neuropathological Change"]).mean().dropna().reset_index()

plt.rcParams["figure.figsize"] = (3,4)
ax = sns.boxplot(data=df, x="Overall AD neuropathological Change", y="Good Cells", palette="tab20", showfliers=False);
ax = sns.swarmplot(data=df, x="Overall AD neuropathological Change", y="Good Cells", color="0.25", edgecolor="white", linewidth=0.5);

plt.xlabel("");
plt.ylabel('Fraction of cells that pass QC per library');
ax.set_xticklabels(ax.get_xticklabels(), rotation=90);

plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 6c_boxplot_Fraction of cells that pass QC by ADNC.pdf"), bbox_inches='tight');
plt.show()

### Extended Data Figure 6d

In [None]:
adata = adata[adata.obs["Used in analysis"] == "True", :].copy()
adata.obs["RNAseq Subclass"] = np.nan
adata.obs.loc[adata.obs["modality"] != "accessibility", "RNAseq Subclass"] = adata.obs.loc[adata.obs["modality"] != "accessibility", "Subclass"]

subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()


plt.rcParams["figure.figsize"] = (5,5)
sc.pl.umap(
    adata,
    color="RNAseq Subclass",
    size=1,
    frameon=False,
    title="",
    legend_loc="on data",
    legend_fontsize=10,
    sort_order=False,
    na_in_legend=False,
    palette=subclass_colors,
    save="_RNAseq Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_RNAseq Subclass.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6d_umap_RNAseq Subclass.pdf"))

sc.pl.umap(
    adata,
    color="Subclass",
    size=1,
    frameon=False,
    title="",
    legend_loc="on data",
    legend_fontsize=10,
    sort_order=False,
    na_in_legend=False,
    palette=subclass_colors,
    save="_Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Subclass.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6d_umap_Subclass.pdf"))

### Extended Data Figure 6e

In [None]:
RNAseq_sub = RNAseq[np.intersect1d(RNAseq.obs_names, MultiVI_Sst_obs_names), :].copy()

adata = ad.concat([ATACseq, RNAseq_sub], axis=0)
to_keep = MultiVI_Sst_obs_names.isin(adata.obs_names)
MultiVI_Sst_obs_names = MultiVI_Sst_obs_names[to_keep]
MultiVI_Sst_umap = MultiVI_Sst_umap[to_keep, :]
adata = adata[MultiVI_Sst_obs_names, :].copy()
adata.obsm["X_umap"] = MultiVI_Sst_umap

In [None]:
sc.pp.subsample(adata, fraction=1)

adata.obs["RNAseq Supertype"] = np.nan
adata.obs.loc[adata.obs["modality"] != "accessibility", "RNAseq Supertype"] = adata.obs.loc[adata.obs["modality"] != "accessibility", "Supertype"]

plt.rcParams["figure.figsize"] = (5,5)
colors = {
    "expression": "red",
    "paired": "grey",
    "accessibility": "lightgrey"
}

cluster_colors = color_order.loc[:, ["cluster_label", "cluster_color"]].drop_duplicates()
cluster_colors.index = cluster_colors["cluster_label"].copy()
cluster_colors = cluster_colors["cluster_color"].to_dict()

sc.pl.umap(
    adata,
    color="modality",
    size=2,
    frameon=False,
    palette=colors,
    title="",
    save="_MultiVI Sst Modality.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_MultiVI Sst Modality.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6e_umap_MultiVI Sst Modality.pdf"))

sc.pl.umap(
    adata,
    color="RNAseq Supertype",
    size=1,
    frameon=False,
    title="",
    legend_loc="on data",
    legend_fontsize=10,
    sort_order=False,
    na_in_legend=False,
    palette=cluster_colors,
    save="_RNAseq Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_RNAseq Supertype.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6e_umap_RNAseq Supertype.pdf"))

plt.rcParams["figure.figsize"] = (5,5)
sc.pl.umap(
    adata,
    color="Supertype",
    size=1,
    frameon=False,
    title="",
    legend_loc="on data",
    legend_fontsize=10,
    sort_order=False,
    na_in_legend=False,
    palette=cluster_colors,
    save="_Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Supertype.pdf"), os.path.join(pwd, "output", "Extended Data Figure 6e_umap_RNASupertype.pdf"))


### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))