### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import glob
import copy as cp
import seaborn as sns
from adjustText import adjust_text
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
import scipy.stats as sp_stats
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From 00_build_input_files.py
adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 7", "SEAAD_MTG_RNAseq_final-nuclei_limited.2024-02-13.h5ad"))

# Subclass UMAP coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
Microglia_PVM_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "Microglia-PVM", "obs_names.csv"), index_col=0).index
Microglia_PVM_umap = np.load(os.path.join(pwd, "input", "Figure 7", "Microglia-PVM", "X_umap.npy"))

Astrocyte_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "Astrocyte", "obs_names.csv"), index_col=0).index
Astrocyte_umap = np.load(os.path.join(pwd, "input", "Figure 7", "Astrocyte", "X_umap.npy"))

# Green et al (2023) labels from personal communication and Mathys et al (2023) labels from https://compbio.mit.edu/
# Green et al (2023) and Mathys et al (2023) mapping results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#PFC/RNAseq/public_datasets/
green_labels = pd.read_csv(os.path.join(pwd, "input", "Figure 7","Green_2023_author-annotation.csv"), index_col=0)
green_mapping = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "Green_2023-cell-annotation.2024-03-27.csv"), index_col=0)

mathys_label_files = glob.glob(os.path.join(pwd, "input", "Figure 7", "Mathys_2023", "*.csv"))
mathys_labels = False
for i in mathys_label_files:
    tmp = pd.read_csv(i, index_col=0)
    try:
        mathys_labels = pd.concat([mathys_labels, tmp], axis=0)
    except TypeError as i:
        mathys_labels = tmp.copy()
mathys_mapping = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "Mathys_2023-cell-annotation.2024-03-27.csv"), index_col=0)

# Mean expression table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
mean_expression = sc.read_h5ad(os.path.join(pwd, "input", "Figure 7", "mean_expression.h5ad"))

# Cell type specific marker genes from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_sizes_vs_all = sc.read_h5ad(os.path.join(pwd, "input", "Figure 7", "effect_sizes_vs_all.h5ad"))
      
# Effect size table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_size_table = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "effect_size_table.csv"), index_col=0)
level = "Subclass"
tmp = effect_size_table.loc[effect_size_table["Taxonomy Level"] == level, ["Gene", "Mean expression (natural log UMIs per 10k plus 1)"]]
tmp = tmp.groupby(["Gene"])["Mean expression (natural log UMIs per 10k plus 1)"].transform(lambda x : sp_stats.zscore(x,ddof=1)).fillna(0)
effect_size_table["Mean expression z-score"] = tmp

# GRN results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
eRegulons = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "eRegulon_metadata_filtered.csv"), index_col=0)
TFs = [i.replace("_extended", "") for i in eRegulons["Gene_signature_name"].unique()]
TFs = pd.DataFrame([[i] + i.split("_") for i in TFs], columns=["Gene_signature_name", "TF", "Direction", "Target Genes"])
TFs["Target Genes"] = [np.int32(i.replace("(", "").replace(")", "").replace("g", "")) for i in TFs["Target Genes"]]

### Figure 7a

In [None]:
sub = adata[Microglia_PVM_obs_names, :].copy()
sub.obsm["X_umap"] = Microglia_PVM_umap
sc.pp.subsample(sub, fraction=1)

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Supertype"],
    size=20,
    palette="tab10",
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Microglia-PVM_Supertype_tab10.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Microglia-PVM_Supertype_tab10.pdf"), os.path.join(pwd, "output", "Figure 7a_umap_Microglia-PVM_Supertype_tab10.pdf"))


### Figure 7b

In [None]:
effect_sizes_microglia = effect_sizes_vs_all[:, effect_sizes_vs_all.var["Subclass"] == "Microglia-PVM"].copy()
effect_sizes_microglia = pd.DataFrame(
    effect_sizes_microglia.X,
    index=effect_sizes_microglia.obs_names,
    columns=effect_sizes_microglia.var_names
)

mean_expression_microglia = mean_expression[:, mean_expression.var["Subclass"] == "Microglia-PVM"].copy()
mean_expression_microglia = pd.DataFrame(
    mean_expression_microglia.X,
    index=mean_expression_microglia.obs_names,
    columns=mean_expression_microglia.var_names
)

z_score_microglia = mean_expression_microglia.T
z_score_microglia = (z_score_microglia - z_score_microglia.mean(axis=0)) / z_score_microglia.std(axis=0)
z_score_microglia = z_score_microglia.T


genes = []
subclass = "Microglia-PVM"
for i in effect_sizes_microglia.columns:
    gene_n = 3
    if i == "Micro-PVM_3-SEAAD":
        gene_n = 30
    new_genes = effect_sizes_microglia.loc[(effect_sizes_microglia.max(axis=1) == effect_sizes_microglia.loc[:, i]) & (mean_expression_microglia.max(axis=1) == mean_expression_microglia.loc[:, i]), :].sort_values(by=i, ascending=False).iloc[:gene_n].index.to_list()
    genes.extend(z_score_microglia.loc[new_genes, i].sort_values(ascending=False).index.to_list())
    
plt.rcParams["figure.figsize"] = (15,3)
sns.heatmap(
    z_score_microglia.loc[genes, :].T,
    cmap="RdBu_r",
    center=0,
    xticklabels=True,
);
plt.savefig(os.path.join(pwd, "output", "Figure 7b_heatmap_Microglia marker z scores.pdf"), bbox_inches="tight")


### Figure 7c

In [None]:
# Green_2023
df = green_mapping.merge(green_labels.loc[:, ["cell", "state"]], how="inner", left_on="Original Cell ID", right_on="cell")
df = df.loc[:, ["Supertype", "state"]].groupby("state").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="state", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (5,3)
ax = sns.heatmap(
    df.loc[
        ["Micro-PVM_1", "Micro-PVM_2", "Micro-PVM_2_1-SEAAD", "Micro-PVM_2_3-SEAAD", "Micro-PVM_3-SEAAD", "Micro-PVM_4-SEAAD", "Monocyte"],
        ["Macrophages", "Mic.2", "Mic.3", "Mic.9", "Mic.5", "Mic.4", "Mic.14", "Mic.16", "Mic.11", "Mic.1", "Mic.10", "Mic.8", "Mic.7", "Mic.12", "Mic.13", "Mic.6", "Mic.15", "Monocytes"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Green et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 7c_heatmap_Green_Microglia_Comparison.pdf"), bbox_inches="tight")
plt.show()


# Mathys_2023
df = mathys_mapping.merge(mathys_labels.loc[:, ["barcode", "cell_type_high_resolution"]], how="inner", left_on="Original Cell ID Alternative", right_on="barcode")
df = df.loc[:, ["Supertype", "cell_type_high_resolution"]].groupby("cell_type_high_resolution").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="cell_type_high_resolution", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (3,3)
ax = sns.heatmap(
    df.loc[
        ["Micro-PVM_1", "Micro-PVM_2", "Micro-PVM_2_1-SEAAD", "Micro-PVM_2_3-SEAAD", "Micro-PVM_3-SEAAD", "Micro-PVM_4-SEAAD", "Monocyte"],
        ["CAMs", "Mic P2RY12", "Mic TPT1", "Mic MKI67"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Mathys et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 7c_heatmap_Mathys_Microglia_Comparison.pdf"), bbox_inches="tight")
plt.show()

### Figure 7d

In [None]:
# Microglia early
plt.rcParams["figure.figsize"] = (4,4)
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Microglia-PVM") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axvline(1, linestyle="--", color="lightgrey");
plt.axvline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = ["IRF1", "IRF7", "IFI16", "FCGR1A", "FCGR1B", "FCGR2A", "CD74", "HLA-DRB5", "IL1B", "JAK3", "NINJ1"]
already_colored.extend(["C1QC", "C1QB", "CTSC", "LY86", "CSF1R", "FCGR3A"])
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="red") for i in df.loc[df["Gene"].isin(["IRF1", "IRF7", "IFI16", "FCGR1A", "FCGR1B", "FCGR2A", "CD74", "HLA-DRB5", "IL1B", "JAK3", "NINJ1"]), :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["C1QC", "C1QB", "CTSC", "LY86", "CSF1R", "FCGR3A"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression", ascending=False).iloc[:10, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 7d_scatterplot_Microglia_Early.pdf"), bbox_inches="tight")
plt.show()

# Microglia late
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Microglia-PVM") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axhline(1, linestyle="--", color="lightgrey");
plt.axhline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = ["APOE", "CTSD", "CTSS", "LYZ", "CD9"]
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["APOE", "CTSD", "CTSS", "LYZ", "CD9"]), :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression", ascending=False).iloc[:10, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 7d_scatterplot_Microglia_Late.pdf"), bbox_inches="tight")
plt.show()

### Figure 7e

In [None]:
plt.rcParams["figure.figsize"] = (4,4)

# Scatterplot
df = effect_size_table.loc[(effect_size_table["Gene"].isin(TFs.loc[TFs["Direction"] == "+", "TF"])) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["Microglia-PVM"])), :]
sns.scatterplot(
    data=df,
    x="Mean expression z-score",
    y="Effect size across early pseudoprogression",
    alpha=0.5,
)
texts = [plt.text(df.loc[i, "Mean expression z-score"], df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Gene"], size=8) for i in df.loc[(df["Mean expression z-score"] > 2) & (df["Effect size across early pseudoprogression"] > 1.1), :].index]
adjust_text(texts, arrowprops=dict(arrowstyle="-", color="gray"));
plt.savefig(os.path.join(pwd, "output", "Figure 7e_scatterplot_Micro-PVM transcription factor regulators.pdf"), bbox_inches="tight")
MR = TFs.loc[TFs["TF"].isin(df.loc[(df["Mean expression z-score"] > 2) & (df["Effect size across early pseudoprogression"] > 1.1), "Gene"]) & (TFs["Direction"] == "+"), "Gene_signature_name"].to_list()


In [None]:
# KDEplot
Downstream_Genes = eRegulons.loc[(eRegulons["Gene_signature_name"].isin(MR)), ["Gene", "TF"]].drop_duplicates().loc[:, "Gene"].value_counts()
Downstream_Genes = Downstream_Genes[Downstream_Genes > Downstream_Genes.median() + 1].index

df = effect_size_table.loc[
    (effect_size_table["Gene"].isin(Downstream_Genes)) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["Microglia-PVM"])),
    :
].merge(
    eRegulons.loc[(eRegulons["Gene_signature_name"].isin(MR)), ["Gene", "TF2G_importance"]].drop_duplicates().groupby("Gene").mean(),
    left_on="Gene",
    right_on="Gene",
    how="left"
)
df_else = effect_size_table.loc[
    ~(effect_size_table["Gene"].isin(Downstream_Genes)) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["Microglia-PVM"])),
    :
]
if df.shape[0] < df_else.shape[0]:
    df_else = df_else.loc[np.random.choice(df_else.index, size=df.shape[0], replace=False), :].copy()
    
sns.kdeplot(
    data=df_else,
    x="Effect size across early pseudoprogression",
    color="orange"
);
sns.kdeplot(
    data=df,
    x="Effect size across early pseudoprogression"
);
print("p-value early: " + str(sp_stats.ttest_ind(df["Effect size across early pseudoprogression"], df_else["Effect size across early pseudoprogression"], equal_var=False)[1]))
plt.savefig(os.path.join(pwd, "output", "Figure 7e_kdeplot_Micro-PVM downstream targets.pdf"), bbox_inches="tight")

plt.show()

### Figure 7f

In [None]:
# Deltaplots
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()

sub = adata[adata.obs["Neurotypical reference"] == "False", :].copy()

for i in ["RUNX1", "CTSC", "JAK3"]:
    ax = delta_plot(
        adata=sub,
        genes=[i],
        groupby="Class",
        groupby_subset=["Non-neuronal and Non-neural"],
        plotby="Subclass",
        donor="Donor ID",
        across="Continuous Pseudo-progression Score",
        highlight=[],
        colormap=subclass_colors,
        title="Expression of " + i,
        legend=False,
        save=os.path.join(pwd, "output", "Figure 7f_deltaplot_{title}_by non-neuronal Subclass.pdf")
    )
    plt.show();

### Figure 7g

In [None]:
sub = adata[Astrocyte_obs_names, :].copy()
sub.obsm["X_umap"] = Astrocyte_umap
sc.pp.subsample(sub, fraction=1)

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Supertype"],
    size=20,
    palette="tab10",
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Astrocyte_Supertype_tab10.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Astrocyte_Supertype_tab10.pdf"), os.path.join(pwd, "output", "Figure 7g_umap_Astrocyte_Supertype_tab10.pdf"))


### Figure 7h

In [None]:
# Green_2023
df = green_mapping.merge(green_labels.loc[:, ["cell", "state"]], how="inner", left_on="Original Cell ID", right_on="cell")
df = df.loc[:, ["Supertype", "state"]].groupby("state").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="state", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (5,3)
ax = sns.heatmap(
    df.loc[
        ["Astro_2", "Astro_1", "Astro_3", "Astro_4", "Astro_5", "Astro_6-SEAAD"],
        ["Ast.1", "Ast.2", "Ast.3", "Ast.6", "Ast.8", "Ast.9", "Ast.10", "Ast.7", "Ast.4", "Ast.5"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Green et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 7h_heatmap_Green_Astrocyte_Comparison.pdf"), bbox_inches="tight")
plt.show()

# Mathys_2023
df = mathys_mapping.merge(mathys_labels.loc[:, ["barcode", "cell_type_high_resolution"]], how="inner", left_on="Original Cell ID Alternative", right_on="barcode")
df = df.loc[:, ["Supertype", "cell_type_high_resolution"]].groupby("cell_type_high_resolution").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="cell_type_high_resolution", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (2,3)
ax = sns.heatmap(
    df.loc[
        ["Astro_2", "Astro_1", "Astro_3", "Astro_4", "Astro_5", "Astro_6-SEAAD"],
        ["Ast GRM3", "Ast DPP10", "Ast CHI3L1"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Mathys et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 7c_heatmap_Mathys_Astrocyte_Comparison.pdf"), bbox_inches="tight")
plt.show()

### Figure 7i

In [None]:
effect_sizes_astrocyte = effect_sizes_vs_all[:, effect_sizes_vs_all.var["Subclass"] == "Astrocyte"].copy()
effect_sizes_astrocyte = pd.DataFrame(
    effect_sizes_astrocyte.X,
    index=effect_sizes_astrocyte.obs_names,
    columns=effect_sizes_astrocyte.var_names
)

mean_expression_astrocyte = mean_expression[:, mean_expression.var["Subclass"] == "Astrocyte"].copy()
mean_expression_astrocyte = pd.DataFrame(
    mean_expression_astrocyte.X,
    index=mean_expression_astrocyte.obs_names,
    columns=mean_expression_astrocyte.var_names
)

z_score_astrocyte = mean_expression_astrocyte.T
z_score_astrocyte = (z_score_astrocyte - z_score_astrocyte.mean(axis=0)) / z_score_astrocyte.std(axis=0)
z_score_astrocyte = z_score_astrocyte.T


genes = []
subclass = "Astrocyte"
for i in effect_sizes_astrocyte.columns:
    gene_n = 3
    if i == "Astro_2":
        gene_n = 30
    new_genes = effect_sizes_astrocyte.loc[(effect_sizes_astrocyte.max(axis=1) == effect_sizes_astrocyte.loc[:, i]) & (mean_expression_astrocyte.max(axis=1) == mean_expression_astrocyte.loc[:, i]), :].sort_values(by=i, ascending=False).iloc[:gene_n].index.to_list()
    genes.extend(z_score_astrocyte.loc[new_genes, i].sort_values(ascending=False).index.to_list())
    
plt.rcParams["figure.figsize"] = (15,3)
sns.heatmap(
    z_score_astrocyte.loc[genes, :].T,
    cmap="RdBu_r",
    center=0,
    xticklabels=True,
);
plt.savefig(os.path.join(pwd, "output", "Figure 7i_heatmap_Astrocyte marker z scores.pdf"), bbox_inches="tight")


### Figure 7j

In [None]:
genes = pd.Series(effect_size_table["Gene"].unique())
# Define gene lists
gene_lists = {}

# Cell adhesion, based on GO:0007155
gene_lists["Cell adhesion"] = []
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CNTN")) & ~(genes.str.contains("AP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NINJ"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NLGN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DLG")) & ~(genes.str.contains("AP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ADGR"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("PKP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("TENM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DSCAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NECTIN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CADM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("SIGLEC"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CTNN")) & ~(genes.str.contains("IP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ICAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CEACAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CLDN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CLSTN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CELSR"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DSG"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CDH"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("PCDH"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ITG")) & ~(genes.str.contains("BP"))])
gene_lists["Cell adhesion"].extend([
    "DSC1", "DSC2", "DSC3",
    "DCHS1", "DCHS2",
    "FAT1", "FAT2", "FAT3", "FAT4",
    "SELE", "SELL", "SELP",
    "VCAM1", "PECAM1", "MADCAM1"
    "L1CAM", "NRCAM", "CHL1", "NFASC",
    "MAG", "F11R", "JAM2", "JAM3", "JAML",
    "VSIG1", "VSIG2", "IGSF11", "VSIG4", "ESAM",
    "CXADR", "PVR", "CD2", "HEPACAM", "HEPACAM2",
    "BCAM", "EPCAM", "ALCAM", "CERCAM", "MCAM",
    "UNC5A", "UNC5B", "UNC5C", "UNC5D",
    "VCL", "JUP", "MSN", "CD209", 
])
gene_lists["Cell adhesion"] = np.intersect1d(gene_lists["Cell adhesion"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT")) & ~(genes.str.contains("-IT"))])

# Axonal guidance, based on GO:0097485
gene_lists["Axonal guidance"] = []
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("SEMA"))]) # Semaphorin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("UNC5")) & ~(genes.str.contains("50"))]) # Netrin receptor
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("EPH"))]) # Ephrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("EFN"))]) # Ephrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LGI"))]) # Leucine Repeat (Slit-like)
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NOTCH"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("DLL"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("JAG"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("SLIT"))]) # Slit
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LRTM"))]) # Slit-like
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("ROBO"))]) # Robo
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("PLXN"))]) # Plexin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FLRT"))]) # Fibronectin Leucine Rich Transmembrane
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("VEGF"))]) # Vascular endothelial growth factor
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NCAM"))]) # NCAM
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("CNTN")) & ~(genes.str.contains("AP"))]) # Contactin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("PTCH"))]) # Patch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FEZ"))]) # FEZ
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("ERBB"))]) # Erb-B2 Receptor Tyrosine Kinase
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FZD"))]) # Fzd
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NRP"))]) # Neuropilin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("BMP"))]) # BMP
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FGF")) & ~(genes.str.contains("P"))]) # FGF
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NTN"))]) # Netrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LGR"))]) # LGRs
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("WNT"))]) # Wnt
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NTRK"))]) # Ntrk
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NECTIN"))]) # Nectin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("DSCAM"))]) # DSCAM
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NRXN"))]) # Neurexin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("GFR"))]) # GDNF Receptor

In [None]:
# Astrocyte early
plt.rcParams["figure.figsize"] = (4,4)
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Astrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df2 = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Astrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    effect_size_table["Gene"].isin(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"])),
    :
]
df = pd.concat([df, df2], axis=0)
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axvline(1, linestyle="--", color="lightgrey");
plt.axvline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = cp.copy(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"]).tolist())
already_colored.extend(["GFAP"])
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="red") for i in df.loc[(df["Gene"].isin(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"]))) & (df["Effect size across early pseudoprogression"] > 1.5) , :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["GFAP"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression", ascending=False).iloc[:5, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 7j_scatterplot_Astrocyte_Early.pdf"), bbox_inches="tight")
plt.show()

# Astrocyte late
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Astrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df2 = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Astrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    effect_size_table["Gene"].isin(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"])),
    :
]
df = pd.concat([df, df2], axis=0)
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axhline(1, linestyle="--", color="lightgrey");
plt.axhline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = cp.copy(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"]).tolist())
already_colored.extend(["APOE"])
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="red") for i in df.loc[(df["Gene"].isin(np.union1d(gene_lists["Cell adhesion"], gene_lists["Axonal guidance"]))) & (df["Effect size across late pseudoprogression"].abs() > 1.5) , :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="green") for i in df.loc[df["Gene"].isin(["APOE"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression", ascending=False).iloc[:10, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 7j_scatterplot_Astrocyte_Late.pdf"), bbox_inches="tight")
plt.show()

### Figure 7k

In [None]:
# Deltaplots
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()

sub = adata[adata.obs["Neurotypical reference"] == "False", :].copy()

for i in ["APOE"]:
    ax = delta_plot(
        adata=sub,
        genes=[i],
        groupby="Class",
        groupby_subset=["Non-neuronal and Non-neural"],
        plotby="Subclass",
        donor="Donor ID",
        across="Continuous Pseudo-progression Score",
        highlight=[],
        colormap=subclass_colors,
        title="Expression of " + i,
        legend=False,
        save=os.path.join(pwd, "output", "Figure 7k_deltaplot_{title}_by non-neuronal Subclass.pdf")
    )
    plt.show();

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))