### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import glob
import copy as cp
import seaborn as sns
from adjustText import adjust_text
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
import scipy.stats as sp_stats
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From 00_build_input_files.py
adata = sc.read_h5ad(os.path.join(pwd, "input", "Figure 8", "SEAAD_MTG_RNAseq_final-nuclei_limited.2024-02-13.h5ad"))

# Subclass UMAP coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
OPC_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 8", "OPC", "obs_names.csv"), index_col=0).index
OPC_umap = np.load(os.path.join(pwd, "input", "Figure 8", "OPC", "X_umap.npy"))

Oligodendrocyte_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 8", "Oligodendrocyte", "obs_names.csv"), index_col=0).index
Oligodendrocyte_umap = np.load(os.path.join(pwd, "input", "Figure 8", "Oligodendrocyte", "X_umap.npy"))

# Green et al (2023) labels from personal communication and Mathys et al (2023) labels from https://compbio.mit.edu/
# Green et al (2023) and Mathys et al (2023) mapping results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#PFC/RNAseq/public_datasets/green_labels = pd.read_csv(os.path.join(pwd, "input", "Figure 8","Green_2023_author-annotation.csv"), index_col=0)
green_mapping = pd.read_csv(os.path.join(pwd, "input", "Figure 8", "Green_2023-cell-annotation.2024-03-27.csv"), index_col=0)

mathys_label_files = glob.glob(os.path.join(pwd, "input", "Figure 8", "Mathys_2023", "*.csv"))
mathys_labels = False
for i in mathys_label_files:
    tmp = pd.read_csv(i, index_col=0)
    try:
        mathys_labels = pd.concat([mathys_labels, tmp], axis=0)
    except TypeError as i:
        mathys_labels = tmp.copy()
mathys_mapping = pd.read_csv(os.path.join(pwd, "input", "Figure 8", "Mathys_2023-cell-annotation.2024-03-27.csv"), index_col=0)

# Mean expression table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
mean_expression = sc.read_h5ad(os.path.join(pwd, "input", "Figure 8", "mean_expression.h5ad"))

# Cell type specific marker genes from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_sizes_vs_all = sc.read_h5ad(os.path.join(pwd, "input", "Figure 8", "effect_sizes_vs_all.h5ad"))
      
# Effect size table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_size_table = pd.read_csv(os.path.join(pwd, "input", "Figure 8", "effect_size_table.csv"), index_col=0)
level = "Subclass"
tmp = effect_size_table.loc[effect_size_table["Taxonomy Level"] == level, ["Gene", "Mean expression (natural log UMIs per 10k plus 1)"]]
tmp = tmp.groupby(["Gene"])["Mean expression (natural log UMIs per 10k plus 1)"].transform(lambda x : sp_stats.zscore(x,ddof=1)).fillna(0)
effect_size_table["Mean expression z-score"] = tmp

# GRN results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
eRegulons = pd.read_csv(os.path.join(pwd, "input", "Figure 7", "eRegulon_metadata_filtered.csv"), index_col=0)
TFs = [i.replace("_extended", "") for i in eRegulons["Gene_signature_name"].unique()]
TFs = pd.DataFrame([[i] + i.split("_") for i in TFs], columns=["Gene_signature_name", "TF", "Direction", "Target Genes"])
TFs["Target Genes"] = [np.int32(i.replace("(", "").replace(")", "").replace("g", "")) for i in TFs["Target Genes"]]

### Figure 8a

In [None]:
sub = adata[OPC_obs_names, :].copy()
sub.obsm["X_umap"] = OPC_umap
sub = sub[sub.obs["Supertype"] != "OPC_2_1-SEAAD", :].copy()
sc.pp.subsample(sub, fraction=1)

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Supertype"],
    size=20,
    palette="tab10",
    frameon=False,
    legend_loc="on data",
    title="",
    save="_OPC_Supertype_tab10.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_OPC_Supertype_tab10.pdf"), os.path.join(pwd, "output", "Figure 8a_umap_OPC_Supertype_tab10.pdf"))


sub = adata[Oligodendrocyte_obs_names, :].copy()
sub.obsm["X_umap"] = Oligodendrocyte_umap
sub = sub[sub.obs["Supertype"] != "Oligo_2_1-SEAAD", :].copy()
sc.pp.subsample(sub, fraction=1)

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    sub,
    color=["Supertype"],
    size=20,
    palette="tab10",
    frameon=False,
    legend_loc="on data",
    title="",
    save="_Oligodendrocyte_Supertype_tab10.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Oligodendrocyte_Supertype_tab10.pdf"), os.path.join(pwd, "output", "Figure 8a_umap_Oligodendrocyte_Supertype_tab10.pdf"))


### Figure 8b

In [None]:
# OPC
effect_sizes_OPC = effect_sizes_vs_all[:, (effect_sizes_vs_all.var["Subclass"] == "OPC") & (effect_sizes_vs_all.var["Supertype"] != "OPC_2_1-SEAAD")].copy()
effect_sizes_OPC = pd.DataFrame(
    effect_sizes_OPC.X,
    index=effect_sizes_OPC.obs_names,
    columns=effect_sizes_OPC.var_names
)

mean_expression_OPC = mean_expression[:, (mean_expression.var["Subclass"] == "OPC") & (effect_sizes_vs_all.var["Supertype"] != "OPC_2_1-SEAAD")].copy()
mean_expression_OPC = pd.DataFrame(
    mean_expression_OPC.X,
    index=mean_expression_OPC.obs_names,
    columns=mean_expression_OPC.var_names
)

z_score_OPC = mean_expression_OPC.T
z_score_OPC = (z_score_OPC - z_score_OPC.mean(axis=0)) / z_score_OPC.std(axis=0)
z_score_OPC = z_score_OPC.T


genes = []
for i in effect_sizes_OPC.columns:
    gene_n = 3
    if i == "OPC_2":
        gene_n = 30
    new_genes = effect_sizes_OPC.loc[(effect_sizes_OPC.max(axis=1) == effect_sizes_OPC.loc[:, i]) & (mean_expression_OPC.max(axis=1) == mean_expression_OPC.loc[:, i]), :].sort_values(by=i, ascending=False).iloc[:gene_n].index.to_list()
    genes.extend(z_score_OPC.loc[new_genes, i].sort_values(ascending=False).index.to_list())
    
plt.rcParams["figure.figsize"] = (12,3)
sns.heatmap(
    z_score_OPC.loc[genes, :].T,
    cmap="RdBu_r",
    center=0,
    xticklabels=True,
);
plt.savefig(os.path.join(pwd, "output", "Figure 8b_heatmap_OPC marker z scores.pdf"), bbox_inches="tight")
plt.show()


# Oligodendrocyte
effect_sizes_Oligodendrocyte = effect_sizes_vs_all[:, (effect_sizes_vs_all.var["Subclass"] == "Oligodendrocyte") & (effect_sizes_vs_all.var["Supertype"] != "Oligo_2_1-SEAAD")].copy()
effect_sizes_Oligodendrocyte = pd.DataFrame(
    effect_sizes_Oligodendrocyte.X,
    index=effect_sizes_Oligodendrocyte.obs_names,
    columns=effect_sizes_Oligodendrocyte.var_names
)

mean_expression_Oligodendrocyte = mean_expression[:, (mean_expression.var["Subclass"] == "Oligodendrocyte") & (effect_sizes_vs_all.var["Supertype"] != "Oligo_2_1-SEAAD")].copy()
mean_expression_Oligodendrocyte = pd.DataFrame(
    mean_expression_Oligodendrocyte.X,
    index=mean_expression_Oligodendrocyte.obs_names,
    columns=mean_expression_Oligodendrocyte.var_names
)

z_score_Oligodendrocyte = mean_expression_Oligodendrocyte.T
z_score_Oligodendrocyte = (z_score_Oligodendrocyte - z_score_Oligodendrocyte.mean(axis=0)) / z_score_Oligodendrocyte.std(axis=0)
z_score_Oligodendrocyte = z_score_Oligodendrocyte.T


genes = []
for i in effect_sizes_Oligodendrocyte.columns:
    gene_n = 3
    if i == "Oligo_2" or i == "Oligo_4":
        gene_n = 30
    new_genes = effect_sizes_Oligodendrocyte.loc[(effect_sizes_Oligodendrocyte.max(axis=1) == effect_sizes_Oligodendrocyte.loc[:, i]) & (mean_expression_Oligodendrocyte.max(axis=1) == mean_expression_Oligodendrocyte.loc[:, i]), :].sort_values(by=i, ascending=False).iloc[:gene_n].index.to_list()
    genes.extend(z_score_Oligodendrocyte.loc[new_genes, i].sort_values(ascending=False).index.to_list())
    
plt.rcParams["figure.figsize"] = (20,3)
sns.heatmap(
    z_score_Oligodendrocyte.loc[genes, :].T,
    cmap="RdBu_r",
    center=0,
    xticklabels=True,
);
plt.savefig(os.path.join(pwd, "output", "Figure 8b_heatmap_Oligodendrocyte marker z scores.pdf"), bbox_inches="tight")

### Figure 8c

In [None]:
# Green et al_2023
df = green_mapping.merge(green_labels.loc[:, ["cell", "state"]], how="inner", left_on="Original Cell ID", right_on="cell")
df = df.loc[:, ["Supertype", "state"]].groupby("state").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="state", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (5,3)
ax = sns.heatmap(
    df.loc[
        ["Oligo_1", "Oligo_2", "Oligo_4", "Oligo_3", "OPC_1", "OPC_2", "OPC_2_2-SEAAD"],
        ["Oli.5", "Oli.11", "Oli.6", "Oli.9", "Oli.3", "Oli.8", "Oli.7", "Oli.4", "Oli.10", "Oli.12", "Oli.2", "MFOL", "Oli.1", "COP", "OPC.1", "OPC.2", "OPC.3"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Green et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 8c_heatmap_Green_OPC_Oligo_Comparison.pdf"), bbox_inches="tight")
plt.show()

# Mathys_2023
df = mathys_mapping.merge(mathys_labels.loc[:, ["barcode", "cell_type_high_resolution"]], how="inner", left_on="Original Cell ID Alternative", right_on="barcode")
df = df.loc[:, ["Supertype", "cell_type_high_resolution"]].groupby("cell_type_high_resolution").value_counts(normalize=True).reset_index().pivot(index="Supertype", columns="cell_type_high_resolution", values="proportion")
df = df.fillna(0)
plt.rcParams["figure.figsize"] = (1,3)
ax = sns.heatmap(
    df.loc[
        ["Oligo_1", "Oligo_2", "Oligo_4", "Oligo_3", "OPC_1", "OPC_2", "OPC_2_2-SEAAD"],
        ["Oli", "OPC"]
    ],
    cmap="YlGnBu",
    xticklabels=True,
    yticklabels=True,
);
ax.set(xlabel="Mathys et al 2023", ylabel="SEA-AD");
plt.savefig(os.path.join(pwd, "output", "Figure 8c_heatmap_Mathys_OPC_Oligo_Comparison.pdf"), bbox_inches="tight")

### Figure 8d

In [None]:
psen_genes = ["PSEN1", "PSEN2"]
beta_secretase_genes = ["BACE1", "BACE2"]
gamma_secretase_complex_genes = ["PSENEN", "NCSTN", "APH1A"]
final_genes = ["PSEN", "BACE", "PSENEN", "NCSTN", "APH1A", "APP"]
vars_to_get = ["APP"]
vars_to_get.extend(psen_genes)
vars_to_get.extend(beta_secretase_genes)
vars_to_get.extend(gamma_secretase_complex_genes)
vars_to_get.append("Subclass")
df = sc.get.obs_df(adata, vars_to_get)
df["PSEN"] = df.loc[:, psen_genes].sum(axis=1)
df["BACE"] = df.loc[:, beta_secretase_genes].sum(axis=1)
df["AB Score"] = df.loc[:, final_genes].mean(axis=1)

plt.rcParams["figure.figsize"] = (6,4)
ax = sns.boxplot(
    data=df,
    x="Subclass",
    y="AB Score",
    showfliers=False
);
plt.xticks(rotation=90);
ax.set(xlabel="", ylabel="Expression of beta- &\n gamma-secretase and APP");
plt.savefig(os.path.join(pwd, "output", "Figure 8d_boxplot_AB score_by_Subclass.pdf"), bbox_inches="tight")
plt.show()

### Figure 8e

In [None]:
genes = pd.Series(effect_size_table["Gene"].unique())
# Define gene lists
gene_lists = {}

# Cholesterol biosynthesis
# From https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-341
# https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-3
# and https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-4
gene_lists["Cholesterol biosynthesis"] = []
gene_lists["Cholesterol biosynthesis"].extend([
    "FDFT1",
    "SQLE",
    "LSS",
    "CYP51A1",
    "LBR",
    "TM7SF2",
    "MSMO1",
    "NSDHL",
    "HSD17B1",
    "EBP",
    "DHCR24",
    "SC5D",
    "DHCR7",
])

# Based on UniProt Keywords
gene_lists["Fatty Acid metabolism"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_0276_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Fatty Acid metabolism"].extend(np.intersect1d(tmp, genes))


In [None]:
# Oligodendrocyte early
plt.rcParams["figure.figsize"] = (4,4)
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Oligodendrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df2 = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Oligodendrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    effect_size_table["Gene"].isin(gene_lists["Fatty Acid metabolism"]),
    :
]
df = pd.concat([df, df2], axis=0)
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axvline(1, linestyle="--", color="lightgrey");
plt.axvline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = cp.copy(gene_lists["Fatty Acid metabolism"])
already_colored.extend(["NCSTN", "CR1", "MYRF", "PLLP"])
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="red") for i in df.loc[(df["Gene"].isin(gene_lists["Fatty Acid metabolism"])) & (effect_size_table["Effect size across early pseudoprogression"] > 1.5), :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["MYRF", "PLLP"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="green") for i in df.loc[df["Gene"].isin(["CR1", "NCSTN"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression", ascending=False).iloc[:10, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 8e_scatterplot_Oligodendrocyte_Early.pdf"), bbox_inches="tight")
plt.show()

# Oligodendrocyte late
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Oligodendrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df2 = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "Oligodendrocyte") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    effect_size_table["Gene"].isin(gene_lists["Cholesterol biosynthesis"]),
    :
]
df = pd.concat([df, df2], axis=0)
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axhline(1, linestyle="--", color="lightgrey");
plt.axhline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = cp.copy(gene_lists["Cholesterol biosynthesis"])
already_colored.extend(["MOG", "MYRF", "OPALIN", "OMG", "MOBP", "PLLP", "CR1"])
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="red") for i in df.loc[df["Gene"].isin(gene_lists["Cholesterol biosynthesis"]), :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["MOG", "MYRF", "OPALIN", "OMG", "MOBP", "PLLP"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="green") for i in df.loc[df["Gene"].isin(["CR1"]), :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression", ascending=False).iloc[:15, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across late pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across late pseudoprogression").iloc[:15, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 8e_scatterplot_Oligodendrocyte_Late.pdf"), bbox_inches="tight")
plt.show()

# OPC early
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "OPC") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    (effect_size_table["Mean expression z-score"] > 2),
    :
]
df2 = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Subclass") &
    (effect_size_table["Population"] == "OPC") &
    (effect_size_table["Mean expression (natural log UMIs per 10k plus 1)"] > 0.05) &
    effect_size_table["Gene"].isin(["DLL1", "DLL3", "CSPG4", "ASCL1", "OLIG1", "OLIG2"]),
    :
]
df = pd.concat([df, df2], axis=0)
df = df.loc[~df.duplicated()].copy()
ax = sns.scatterplot(
    data=df,
    x="Effect size across early pseudoprogression",
    y="Effect size across late pseudoprogression",
    alpha=0.1,
    size=0.5,
    color="grey",
)
plt.axvline(1, linestyle="--", color="lightgrey");
plt.axvline(-1, linestyle="--", color="lightgrey");
plt.legend('',frameon=False);

already_colored = ["DLL1", "DLL3", "CSPG4", "ASCL1", "OLIG1", "OLIG2"]
texts = [plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="blue") for i in df.loc[df["Gene"].isin(["DLL1", "DLL3", "CSPG4", "ASCL1", "OLIG1", "OLIG2"]), :].index]
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression", ascending=False).iloc[:10, :].index])
texts.extend([plt.text(df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Effect size across late pseudoprogression"], df.loc[i, "Gene"], size=8, color="grey") for i in df.loc[(df["Effect size across early pseudoprogression"].abs() > 1.5) & ~(df["Gene"].isin(already_colored)), :].sort_values(by="Effect size across early pseudoprogression").iloc[:10, :].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-"), color="grey");
plt.savefig(os.path.join(pwd, "output", "Figure 8e_scatterplot_OPC_Late.pdf"), bbox_inches="tight")
plt.show()

### Figure 8f

In [None]:
plt.rcParams["figure.figsize"] = (4,4)
# Scatterplot
df = effect_size_table.loc[(effect_size_table["Gene"].isin(TFs.loc[TFs["Direction"] == "+", "TF"])) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["OPC"])), :]
sns.scatterplot(
    data=df,
    x="Mean expression z-score",
    y="Effect size across early pseudoprogression",
    alpha=0.5
)
texts = [plt.text(df.loc[i, "Mean expression z-score"], df.loc[i, "Effect size across early pseudoprogression"], df.loc[i, "Gene"], size=8) for i in df.loc[(df["Mean expression z-score"] > 2.5) & (df["Effect size across early pseudoprogression"] > 1), :].index]
adjust_text(texts, arrowprops=dict(arrowstyle="-", color="gray"));
plt.savefig(os.path.join(pwd, "output", "Figure 8f_scatterplot_OPC transcription factor regulators.pdf"), bbox_inches="tight")
MR = TFs.loc[TFs["TF"].isin(df.loc[(df["Mean expression z-score"] > 2) & (df["Effect size across early pseudoprogression"] > 1.1), "Gene"]) & (TFs["Direction"] == "+"), "Gene_signature_name"].to_list()


In [None]:
# KDEplot
Downstream_Genes = eRegulons.loc[(eRegulons["Gene_signature_name"].isin(MR)), ["Gene", "TF"]].drop_duplicates().loc[:, "Gene"].value_counts()
Downstream_Genes = Downstream_Genes[Downstream_Genes > Downstream_Genes.median() + 1].index

df = effect_size_table.loc[
    (effect_size_table["Gene"].isin(Downstream_Genes)) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["OPC"])),
    :
].merge(
    eRegulons.loc[(eRegulons["Gene_signature_name"].isin(MR)), ["Gene", "TF2G_importance"]].drop_duplicates().groupby("Gene").mean(),
    left_on="Gene",
    right_on="Gene",
    how="left"
)
df_else = effect_size_table.loc[
    ~(effect_size_table["Gene"].isin(Downstream_Genes)) & (effect_size_table["Taxonomy Level"] == "Subclass") & (effect_size_table["Population"].isin(["OPC"])),
    :
]
if df.shape[0] < df_else.shape[0]:
    df_else = df_else.loc[np.random.choice(df_else.index, size=df.shape[0], replace=False), :].copy()


sns.kdeplot(
    data=df,
    x="Effect size across early pseudoprogression"
);
sns.kdeplot(
    data=df_else,
    x="Effect size across early pseudoprogression",
    color="orange"
);
print("p-value early: " + str(sp_stats.ttest_ind(df["Effect size across early pseudoprogression"], df_else["Effect size across early pseudoprogression"], equal_var=False)[1]))
plt.savefig(os.path.join(pwd, "output", "Figure 8f_kdeplot_OPC downstream targets.pdf"), bbox_inches="tight")

plt.show()

### Figure 8g

In [None]:
# Deltaplots
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()

sub = adata[adata.obs["Neurotypical reference"] == "False", :].copy()

for i in ["NCSTN", "OMG", "DHCR24", "OLIG2"]:
    ax = delta_plot(
        adata=sub,
        genes=[i],
        groupby="Class",
        groupby_subset=["Non-neuronal and Non-neural"],
        plotby="Subclass",
        donor="Donor ID",
        across="Continuous Pseudo-progression Score",
        highlight=[],
        colormap=subclass_colors,
        title="Expression of " + i,
        legend=False,
        save=os.path.join(pwd, "output", "Figure 8g_deltaplot_{title}_by non-neuronal Subclass.pdf")
    )
    plt.show();

### Figure 8h

In [None]:
sc.pl.dotplot(
    sub,
    groupby="Subclass",
    var_names=["IGF1", "IGF2", "PDGFA", "PDGFB", "PDGFC"],
    cmap="YlGnBu",
    mean_only_expressed=True,
    swap_axes=True,
    smallest_dot=0.1,
    figsize=(6,2),
    save="Myelination factors by Subclass.pdf"
)
os.rename(os.path.join(pwd, "figures", "dotplot_Myelination factors by Subclass.pdf"), os.path.join(pwd, "output", "Figure 8h_dotplot_Myelination factors by Subclass.pdf"))

### Figure 8i

In [None]:
# Deltaplots
subclass_colors = color_order.loc[:, ["subclass_label", "subclass_color"]].drop_duplicates()
subclass_colors.index = subclass_colors["subclass_label"].copy()
subclass_colors = subclass_colors["subclass_color"].to_dict()

sub = adata[adata.obs["Neurotypical reference"] == "False", :].copy()

for i in ["IGF1"]:
    ax = delta_plot(
        adata=sub,
        genes=[i],
        groupby="Class",
        groupby_subset=None,
        plotby="Subclass",
        donor="Donor ID",
        across="Continuous Pseudo-progression Score",
        highlight=[],
        colormap=subclass_colors,
        title="Expression of " + i,
        legend=False,
        save=os.path.join(pwd, "output", "Figure 8i_deltaplot_{title}_groupby Class_plotby Subclass.pdf")
    )
    plt.show();

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))