### Load needed libraries

In [None]:
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import re
import seaborn as sns
from adjustText import adjust_text
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from scipy import stats as sp_stats
from helper_functions import *

sc.settings.n_jobs = 32
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=500, frameon=False, vector_friendly=True, figsize=(10,10), format='png')
warnings.filterwarnings("ignore")

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams["axes.grid"] = False

pwd = os.getcwd()

### Load needed datasets/data files

In [None]:
# Cluster order and colors from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
color_order = pd.read_csv(os.path.join(pwd, "input", "cluster_order_and_colors.csv"))

# From 00_build_input_files.py
Sst_Pvalb = sc.read_h5ad(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "SEAAD_MTG_RNAseq_final-nuclei_limited.2024-02-13.h5ad"))

# Neighborhood UMAP coordinates from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
Sst_Pvalb_obs_names = pd.read_csv(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "Sst_Pvalb", "obs_names.csv"), index_col=0).index
Sst_Pvalb_umap = np.load(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "Sst_Pvalb", "X_umap.npy"))
Sst_Pvalb = Sst_Pvalb[Sst_Pvalb_obs_names, :].copy()
Sst_Pvalb.obsm["X_umap"] = Sst_Pvalb_umap

# scCODA results from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
results_table = pd.read_csv(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "MTG_RNAseq", "Continuous_Pseudo-progression_Score", "Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_results.csv"), index_col=0)
results_table = results_table.loc[results_table["Covariate"] == "Continuous_Pseudo-progression_Score", :].loc[:, ["Cell Type", "Final Parameter"]].groupby("Cell Type").mean()
Sst_Pvalb.obs = Sst_Pvalb.obs.merge(results_table, left_on="Supertype", right_index=True, how="left")

# Mean expression table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
mean_expression = sc.read_h5ad(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "mean_expression.h5ad"))

# MERFISH object from https://sea-ad-spatial-transcriptomics.s3.amazonaws.com/index.html#middle-temporal-gyrus/
MERFISH = sc.read_h5ad(os.path.join(pwd, "input", "SEAAD_MTG_MERFISH.2024-02-13.h5ad"))
MERFISH = MERFISH[MERFISH.obs["Used in analysis"] == True, :].copy()
MERFISH.obs = MERFISH.obs.merge(results_table, left_on="Supertype", right_index=True, how="left")

# Cell type specific marker genes from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_sizes_vs_all = sc.read_h5ad(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "effect_sizes_vs_all.h5ad"))
                    
# Effect size table from https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/Supplementary%20Information/
effect_size_table = pd.read_csv(os.path.join(pwd, "input", "Figure 6 and Extended Data Figure 11", "effect_size_table.csv"), index_col=0)
level = "Supertype"
tmp = effect_size_table.loc[effect_size_table["Taxonomy Level"] == level, ["Gene", "Mean expression (natural log UMIs per 10k plus 1)"]]
tmp = tmp.groupby(["Gene"])["Mean expression (natural log UMIs per 10k plus 1)"].transform(lambda x : sp_stats.zscore(x,ddof=1)).fillna(0)
effect_size_table["Mean expression z-score"] = tmp

### Figure 6a

In [None]:
sc.pp.subsample(Sst_Pvalb, fraction=1)

df = color_order.loc[color_order["subclass_label"].isin(["Sst", "Pvalb"]), ["cluster_label", "cluster_color"]]
df.index = df["cluster_label"]
df = df["cluster_color"].to_dict()

plt.rcParams["figure.figsize"] = (8,8)
sc.pl.umap(
    Sst_Pvalb,
    color="Supertype",
    legend_loc="on data",
    frameon=False,
    size=40,
    palette=df,
    save="_Sst_Pvalb_Supertype.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Sst_Pvalb_Supertype.pdf"), os.path.join(pwd, "output", "Figure 6a_umap_Sst_Pvalb_Supertype.pdf"))

sc.pl.umap(
    Sst_Pvalb,
    color="Final Parameter",
    color_map="YlGnBu_r",
    sort_order=False,
    frameon=False,
    size=40,
    save="_Sst_Pvalb_Effect_Size along CPS.pdf"
)
os.rename(os.path.join(pwd, "figures", "umap_Sst_Pvalb_Effect_Size along CPS.pdf"), os.path.join(pwd, "output", "Figure 6a_umap_Sst_Pvalb_Effect_Size along CPS.pdf"))


### Extended Data Figure 11a

In [None]:
Sst_Pvalb.obs["Supertype"] = Sst_Pvalb.obs["Supertype"].astype("category").cat.reorder_categories(
    ["Sst_1", "Sst_4", "Sst_5", "Sst_7", "Sst_10", "Sst_13", "Sst_12", "Sst_9",  "Sst_19", "Sst_3", "Sst_11", "Sst_20", "Sst_22", "Sst_23", "Sst_25", "Sst_2",
     "Pvalb_6", "Pvalb_5", "Pvalb_8", "Pvalb_14", "Pvalb_15", "Pvalb_2", "Pvalb_3", "Pvalb_1", "Pvalb_7", "Pvalb_12", "Pvalb_10", "Pvalb_13", "Pvalb_9"],
)

sns.heatmap(
    pd.DataFrame(mean_expression[:, Sst_Pvalb.obs["Supertype"].cat.categories].X).corr(),
    cmap="RdBu_r",
    xticklabels=Sst_Pvalb.obs["Supertype"].cat.categories,
    yticklabels=Sst_Pvalb.obs["Supertype"].cat.categories,
);
plt.title("Correlation of cluster means");
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 11a_Sst_Pvalb_supertype_mean_expression_correlation.pdf"), bbox_inches="tight")

### Extended Data Figure 11b

In [None]:
affected_supertypes = [
    "Sst_3",
    "Sst_19",
    "Sst_9",
    "Sst_11",
    "Sst_20",
    "Sst_23",
    "Sst_25",
    "Sst_2",
    "Pvalb_6",
    "Pvalb_5",
    "Pvalb_8",
    "Pvalb_3",
    "Pvalb_2",
    "Pvalb_15",
    "Pvalb_14",
]
effect_sizes_vs_all.var["Abundance Change"] = effect_sizes_vs_all.var["Supertype"].isin(affected_supertypes)

plt.rcParams["figure.figsize"] = (6,6)
Sst = effect_sizes_vs_all[:, (effect_sizes_vs_all.var["Subclass"] == "Sst") & (effect_sizes_vs_all.var["Abundance Change"] == True)].X.mean(axis=1).tolist()
Pvalb = effect_sizes_vs_all[:, (effect_sizes_vs_all.var["Subclass"] == "Pvalb") & (effect_sizes_vs_all.var["Abundance Change"] == True)].X.mean(axis=1).tolist()
df = pd.DataFrame([Sst, Pvalb], index=["Sst", "Pvalb"], columns=effect_sizes_vs_all.obs_names).T
df["multiplied"] = df["Sst"] * df["Pvalb"]
ax = sns.scatterplot(
    data=df,
    x="Sst",
    y="Pvalb",
    color="lightgrey",
    alpha=0.5,
    rasterized=True
);
ax.axhline(0, linestyle="--", color="grey");
ax.axvline(0, linestyle="--", color="grey");
ax.set(xlim=(-55,40), ylim=(-55,40));
plt.legend("", frameon=False);
plt.xlabel("Enrichment (Effect Size/SE) in vuln. Sst")
plt.ylabel("Enrichment (Effect Size/SE) in vuln. Pvalb")
plt.title("Correlation=" + str(np.round(sp_stats.pearsonr(Sst, Pvalb)[0],2)));
texts = [plt.text(df.loc[i, "Sst"], df.loc[i, "Pvalb"], i) for i in df.loc[(np.sign(df["Sst"]) == 1), :].sort_values(by=["multiplied"], ascending=False).iloc[:7].index]
texts.extend([plt.text(df.loc[i, "Sst"], df.loc[i, "Pvalb"], i) for i in df.loc[(np.sign(df["Sst"]) == -1), :].sort_values(by=["multiplied"], ascending=False).iloc[:7].index])
texts.extend([plt.text(df.loc[i, "Sst"], df.loc[i, "Pvalb"], i) for i in df.loc[(np.sign(df["Sst"]) == 1), :].sort_values(by=["multiplied"], ascending=True).iloc[:5].index])
texts.extend([plt.text(df.loc[i, "Sst"], df.loc[i, "Pvalb"], i) for i in df.loc[(np.sign(df["Sst"]) == -1), :].sort_values(by=["multiplied"], ascending=True).iloc[:5].index])
adjust_text(texts, arrowprops=dict(arrowstyle="-", color="gray"));
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 11b_scatterplot_vulnerable_Sst_and_Pvalb_markers.pdf"), bbox_inches="tight");

### Figure 6b

In [None]:
subclass_colors_by_supertype = color_order.loc[:, ["cluster_label", "subclass_color"]].drop_duplicates()
subclass_colors_by_supertype.index = subclass_colors_by_supertype["cluster_label"].copy()
subclass_colors_by_supertype = subclass_colors_by_supertype["subclass_color"].to_dict()

plt.rcParams["figure.figsize"] = (2,4)
df = MERFISH.obs.loc[
    ([i in ["L2/3 IT", "L4 IT", "L5 IT", "L6 IT", "L6 IT Car3", "L5 ET", "L5/6 NP", "L6 CT", "L6b"] for i in MERFISH.obs["Subclass"]]) &
    (MERFISH.obs["Supertype confidence"] > 0.4), 
     :
]
df["Supertype"] = df["Supertype"].astype("category").cat.remove_unused_categories()
ax = sns.lineplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    palette=subclass_colors_by_supertype,
    estimator=np.median,
    ci="sd",
);
df = df.loc[:, ["Supertype", "Final Parameter", "Depth from pia"]].groupby(["Supertype"]).median()
ax = sns.scatterplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    size=10,
    palette=subclass_colors_by_supertype,
);
ax.set_xlabel("Effect size across\npseudoprogression");
ax.set_ylabel("Cortical depth");
plt.title("Excitatory neurons");
plt.axvline(x=0, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=500, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1000, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1700, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2150, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2850, color="grey", linewidth=0.5, linestyle="dashed");
plt.xticks(rotation=90);
plt.ylim(4000, 0);
plt.xlim(0.5, -1.75);
plt.legend('',frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 6b_scatterplot_Excitatory_Effect_Size along CPS_versus_MERFISH_layer_depth.pdf"), bbox_inches="tight")
plt.show();

plt.rcParams["figure.figsize"] = (2,4)
df = MERFISH.obs.loc[
    ([i in ["Sst", "Pvalb"] for i in MERFISH.obs["Subclass"]]) &
    (MERFISH.obs["Supertype confidence"] > 0.4), 
     :
]
df["Supertype"] = df["Supertype"].astype("category").cat.remove_unused_categories()
ax = sns.lineplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    palette=subclass_colors_by_supertype,
    estimator=np.median,
    ci="sd",
);
df = df.loc[:, ["Supertype", "Final Parameter", "Depth from pia"]].groupby(["Supertype"]).median()
ax = sns.scatterplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    size=10,
    palette=subclass_colors_by_supertype,
);
ax.set_xlabel("Effect size across\npseudoprogression");
ax.set_ylabel("Cortical depth");
plt.title("MGE inhibitory neurons");
plt.axvline(x=0, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=500, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1000, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1700, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2150, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2850, color="grey", linewidth=0.5, linestyle="dashed");
plt.xticks(rotation=90);
plt.ylim(4000, 0);
plt.xlim(0.5, -1.75);
plt.legend('',frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 6b_scatterplot_MGE_Effect_Size along CPS_versus_MERFISH_layer_depth.pdf"), bbox_inches="tight")
plt.show();

plt.rcParams["figure.figsize"] = (2,4)
df = MERFISH.obs.loc[
    ([i in ["Lamp5", "Sncg", "Pax6", "Vip"] for i in MERFISH.obs["Subclass"]]) &
    (MERFISH.obs["Supertype confidence"] > 0.4), 
     :
]
df["Supertype"] = df["Supertype"].astype("category").cat.remove_unused_categories()
ax = sns.lineplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    palette=subclass_colors_by_supertype,
    estimator=np.median,
    ci="sd",
);
df = df.loc[:, ["Supertype", "Final Parameter", "Depth from pia"]].groupby(["Supertype"]).median()
ax = sns.scatterplot(
    data=df,
    x="Final Parameter",
    y="Depth from pia",
    hue="Supertype",
    size=10,
    palette=subclass_colors_by_supertype,
);
ax.set_xlabel("Effect size across\npseudoprogression");
ax.set_ylabel("Cortical depth");
plt.title("CGE inhibitory neurons");
plt.axvline(x=0, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=500, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1000, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=1700, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2150, color="grey", linewidth=0.5, linestyle="dashed");
plt.axhline(y=2850, color="grey", linewidth=0.5, linestyle="dashed");
plt.xticks(rotation=90);
plt.ylim(4000, 0);
plt.xlim(0.5, -1.75);
plt.legend('',frameon=False);
plt.savefig(os.path.join(pwd, "output", "Figure 6b_scatterplot_CGE_Effect_Size along CPS_versus_MERFISH_layer_depth.pdf"), bbox_inches="tight")
plt.show();

### Extended Data Figure 11d

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Figure 6e

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Figure 6f

In [None]:
# Code to generate Spatial transcriptomics figures are in the Spatial Transcriptomics folder

### Figure 6h,i

In [None]:
genes = pd.Series(effect_size_table["Gene"].unique())
# Define gene lists
gene_lists = {}
# Electron transport chain components, based on GO:0022900
gene_lists["Electron transport chain components"] = []
#complexes = ["NDUF", "SDH", "UQC", "COX", "ATP5"]
complexes = ["NDUF", "COX", "ATP5"]

for j,i in enumerate(complexes):
    gene_lists["Electron transport chain components"].extend(genes[(genes.str.startswith(i)) & ~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT"))])

# Glycolytic enzymes
# From https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-400&detail-level=2
gene_lists["Glycolytic enzymes"] = []
gene_lists["Glycolytic enzymes"].extend([
    "HK1", "HK2", "HK3", "GCK",
    "GPI",
    "PFKL", "PFKM", "PFKP",
    "ALDOA", "ALDOB", "ALDOC",
    "TPI1",
    "GAPDH", "GAPDHS",
    "PGK1", "PGK2",
    "PGAM1", "PGAM2", "PGAM4",
    "BPGM",
    "ENO1", "ENO2", "ENO3", "ENO4",
    "PKLR",
    "PKM",
])
# Cholesterol biosynthesis
# From https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-341
# https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-3
# and https://biocyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-4
gene_lists["Cholesterol biosynthesis"] = []
gene_lists["Cholesterol biosynthesis"].extend([
    "FDFT1",
    "SQLE",
    "LSS",
    "CYP51A1",
    "LBR",
    "TM7SF2",
    "MSMO1",
    "NSDHL",
    "HSD17B1",
    "EBP",
    "DHCR24",
    "SC5D",
    "DHCR7",
])

# Based on UniProt Keywords
gene_lists["Fatty Acid metabolism"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_0276_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Fatty Acid metabolism"].extend(np.intersect1d(tmp, genes))

gene_lists["Sphingolipid metabolism"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_0746_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Sphingolipid metabolism"].extend(np.intersect1d(tmp, genes))

gene_lists["Steroid metabolism"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_0753_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Steroid metabolism"].extend(np.intersect1d(tmp, genes))

gene_lists["Phospholipid metabolism"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_1208_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Phospholipid metabolism"].extend(np.intersect1d(tmp, genes))

gene_lists["Lipid metabolism"] = np.union1d(np.union1d(np.union1d(np.union1d(gene_lists["Cholesterol biosynthesis"], gene_lists["Fatty Acid metabolism"]), gene_lists["Sphingolipid metabolism"]), gene_lists["Steroid metabolism"]), gene_lists["Phospholipid metabolism"])

# Ribosomal proteins, based on GO:0006412
gene_lists["Ribosomal proteins"] = []
gene_lists["Ribosomal proteins"].extend([i for i in genes if re.match("RP[SL]([0-9]{1,2}[ABLXY]?[0-9]?|A|P[0-9]{1})$", i) != None])

# Eukaryotic initiation, elongation, and termination factors, based on GO:0006412
gene_lists["Translation factors"] = []
gene_lists["Translation factors"].extend([i for i in genes if re.match("^E[IET]F[0-9]{1,2}[A-Z]?[0-9]?[BL]?$", i) != None])

# DNA-templated transcription, based on GO:0006351
gene_lists["Transcription machinery"] = []
gene_lists["Transcription machinery"].extend(genes[(genes.str.startswith("POLR2"))])
gene_lists["Transcription machinery"].extend(genes[(genes.str.startswith("GTF2")) & ~(genes.str.contains("RD"))])
gene_lists["Transcription machinery"].extend(genes[(genes.str.startswith("MED")) & ~(genes.str.contains("OS"))])
gene_lists["Transcription machinery"].extend([
    "TFB", # General transcription factor
    "CCNC", # Mediator
])
gene_lists["Transcription machinery"] = np.intersect1d(gene_lists["Transcription machinery"], genes[~(genes.str.contains("-AS"))])

# Protein ubiquitination, based on UbiNet 2.0 (https://academic.oup.com/database/article/doi/10.1093/database/baab010/6162640)
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "Categorized_human_E3_ligases.csv"))
for i in tmp["Category"].unique():
    gene_lists["Ubiquitination-E3-" + i] = []
    gene_lists["Ubiquitination-E3-" + i].extend(np.intersect1d(tmp.loc[tmp["Category"] == i, "E3"], genes))

gene_lists["Ubiquitin"] = []
gene_lists["Ubiquitin"].extend(genes[(genes.str.startswith("UBA"))]) # Ubiquitin E1 ligases
gene_lists["Ubiquitin"].extend([
    "UBB", "UBC", "RPS27A", # Ubiquitin
])
gene_lists["Ubiquitination-E1E2"] = []
gene_lists["Ubiquitination-E1E2"].extend([
    "UBB", "UBC", "RPS27A", # Ubiquitin
    "NAE1", "SAE1", "ATG7", # E1 ligases
    "UFC1", "BIRC6", "ATG3", # E2 Ligases
])
                                            
gene_lists["Ubiquitination"] = []
for i in gene_lists.keys():
    if "Ubiquitin" in i:
        gene_lists["Ubiquitination"] = np.union1d(gene_lists["Ubiquitination"], gene_lists[i]).tolist()
                  
# Protein phosphorylation, based on KinHub (https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1433-7)
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "KinHub_database.csv"))
for i in tmp["Group"].unique():
    gene_lists["Phosphorylation-" + i] = []
    gene_lists["Phosphorylation-" + i].extend(np.intersect1d(tmp.loc[tmp["Group"] == i, "HGNC Name"].to_list(), genes))

gene_lists["Phosphorylation"] = []
for i in gene_lists.keys():
    if "Phosphorylation-" in i:
        gene_lists["Phosphorylation"] = np.union1d(gene_lists["Phosphorylation"], gene_lists[i]).tolist()
    
# Voltage gated ion channels, from Guide to Pharmacology
gene_lists["Voltage gated ion channels"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "GtP_receptors.tsv"), sep="\t")
gene_lists["Voltage gated ion channels"].extend(np.intersect1d(tmp.loc[tmp["Type"] == "vgic", "HGNC symbol"].dropna().unique(), genes))

# Ligand gated ion channels, from Guide to Pharmacology
gene_lists["Ligand gated ion chanels"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "GtP_receptors.tsv"), sep="\t")
gene_lists["Ligand gated ion chanels"].extend(np.intersect1d(tmp.loc[tmp["Type"] == "lgic", "HGNC symbol"].dropna().unique(), genes))

# Nuclear hormone receptors, from Guide to Pharmacology
gene_lists["Nuclear hormone receptors"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "GtP_receptors.tsv"), sep="\t")
gene_lists["Nuclear hormone receptors"].extend(np.intersect1d(tmp.loc[tmp["Type"] == "nhr", "HGNC symbol"].dropna().unique(), genes))

# Transcription factors, from Animal TFDB
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "AnimalTFDB.tsv"), sep="\t")
for i in tmp["Family"].unique():
    gene_lists["Transcription factors-" + i] = []
    gene_lists["Transcription factors-" + i].extend(np.intersect1d(tmp.loc[tmp["Family"] == i, "Symbol"].to_list(), genes))

gene_lists["Transcription factors"] = []
for i in gene_lists.keys():
    if "Transcription factors-" in i:
        gene_lists["Transcription factors"] = np.union1d(gene_lists["Transcription factors"], gene_lists[i]).tolist()
    
# GWAS genes, from Bellenguez et al (2022).
gene_lists["GWAS"] = []
tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "AD_GWAS.csv"))
gene_lists["GWAS"].extend(np.intersect1d(tmp["Gene"], genes))

# Cell adhesion, based on GO:0007155
gene_lists["Cell adhesion"] = []
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CNTN")) & ~(genes.str.contains("AP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NINJ"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NLGN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DLG")) & ~(genes.str.contains("AP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ADGR"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("PKP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("TENM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DSCAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("NECTIN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CADM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("SIGLEC"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CTNN")) & ~(genes.str.contains("IP"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ICAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CEACAM"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CLDN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CLSTN"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CELSR"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("DSG"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("CDH"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("PCDH"))])
gene_lists["Cell adhesion"].extend(genes[(genes.str.startswith("ITG")) & ~(genes.str.contains("BP"))])
gene_lists["Cell adhesion"].extend([
    "DSC1", "DSC2", "DSC3",
    "DCHS1", "DCHS2",
    "FAT1", "FAT2", "FAT3", "FAT4",
    "SELE", "SELL", "SELP",
    "VCAM1", "PECAM1", "MADCAM1"
    "L1CAM", "NRCAM", "CHL1", "NFASC",
    "MAG", "F11R", "JAM2", "JAM3", "JAML",
    "VSIG1", "VSIG2", "IGSF11", "VSIG4", "ESAM",
    "CXADR", "PVR", "CD2", "HEPACAM", "HEPACAM2",
    "BCAM", "EPCAM", "ALCAM", "CERCAM", "MCAM",
    "UNC5A", "UNC5B", "UNC5C", "UNC5D",
    "VCL", "JUP", "MSN", "CD209", 
])
gene_lists["Cell adhesion"] = np.intersect1d(gene_lists["Cell adhesion"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT")) & ~(genes.str.contains("-IT"))])


# Actin-Spectrin-Septin cytoskeletal components and modulators, based on GO:0005200, GO:0003779, GO:0030507, and GO:0031105
gene_lists["Actin cytoskeleton"] = []
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("ACT"))]) # Microfilament (Actin and Actinin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("POTE"))]) # Microfilament (POTEs)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("AFAP"))]) # Microfilament (Actin associated proteins)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("CORO"))]) # Microfilament (Coronin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("FLN"))]) # Microfilament (Filamin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("LMOD"))]) # Microfilament (Leiomodin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("TMOD"))]) # Microfilament (Tropomodulin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SNT"))]) # Microfilament (Syntrophin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("TNN")) & ~(genes.str.contains("K"))]) # Microfilament (Troponin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SSH"))]) # Microfilament (Slingshot)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("PARV"))]) # Microfilament (Parvin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("MICAL"))]) # Microfilament (F-Actin-Monooxygenase)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("TNS"))]) # Microfilament (Tensin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("PLEK"))]) # Microfilament (Pleckstrin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SYNE"))]) # Microfilament (Nesprin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SYNPO"))]) # Microfilament (Synaptopodin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("PFN"))]) # Microfilament (Profilin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("ARP")) & ~(genes.str.contains("PP"))]) # Microfilament (ARP2/3)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("FMN"))]) # Microfilament (Formin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("FSCN"))]) # Microfilament (Fascin)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SEPTIN"))]) # Septin
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SPTA"))]) # Spectrin
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SPTB"))]) # Spectrin
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SORBS"))]) # Sorbin
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("KLHL"))]) # Microfilament (Kelch like)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("PDLIM"))]) # Microfilament (PDZ and LIM containing)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("WAS")) & ~(genes.str.contains("IR"))]) # Microfilament (Actin binders/remodelers)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("PHACTR"))]) # Microfilament (Phosphatase and Actin Regulator 4)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SHANK"))]) # Microfilament (SH3 And Multiple Ankyrin Repeat Containing)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("EPB41"))]) # Microfilament (Erythrocyte Membrane Protein 4.1)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("BAIAP"))]) # Microfilament (Brain-specific angiogenesis Inhibitor)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("GMF"))]) # Microfilament (Glial maturation factors)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("SHROOM"))]) # Microfilament (SHROOM/Actin remodeler)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("GAS2"))]) # Microfilament (Growth arrest specific 2)
gene_lists["Actin cytoskeleton"].extend(genes[(genes.str.startswith("FGD"))]) # Microfilament (FYVE, RhoGEF And PH Domain Containing)
gene_lists["Actin cytoskeleton"].extend([
    "ABL1", "ABL2",
    "VASP", "INF2", "FHDC1", "DIAPH1", "DIAPH2", "DIAPH3", # Microfilament (Filament end tracking)
    "PLS1", "LCP1", "PLS3", "TMSB4X", "TMSB4Y", # Microfilament (Cross-linker)
    "CAP1", "CAP2", "CAPG", "CAPZA1", "CAPZA2", "CAPZA3", "CAPZB", "TWF1", "TWF2", # Microfilament (Capping proteins)
    "GSN", "SCIN",  "CFL1", "CFL2", "DTSN", # Microfilament (Filament severing, depolymerizing)
    "ANK1", "ANK2", "ANK3", # Microfilament (Ankyrin)
    "CNN1", "CNN2", "CNN3", # Microfilament (Calponin)
    "TTN", "MOYZ1", "MYOZ2", "MYOZ3", "MYORG", "MYOM1", "MYOM2", "MYOM3", # Titin complex
    "VIL", "VIL1", "VILL", "SVIL", "AVIL", "EVL", "ENAH", "DMTN", "ABLIM1", "ABLIM2", "ABLIM3", "DBN1", "DBNL", "ALKBH4", "UTRN", # Actin binders/remodelers
    "WDR1", "COTL1", "ANLN", "ADD1", "ADD2", "ADD3", "MYOT", "MYPN", "PALLD", "DAAM1", "DAAM2", "TLN1", "TLN2", # Actin binders/remodelers
    "WIPF1", "WIPF2", "WIPF3", "IPP", "FAM107A", "FAM107B", "DMD", "EMD", "CSRP1", "CSRP2", "CSRP3", "JMY", # Actin binders/remodelers
    "EPB41L1", "EZR", "CTTN", "DAG1", "EPS8", "EPS8L1", "EPS8L2", "EPS8L3", "ENC1", "RDX", "MSN", "WHAMM", "CGN", # Actin binders/remodelers
    "CCDC88A", "MICAL3", "MAEA", "SETD3", "NEB", "NRAP", "PPP1R18", "SPIRE1", "SPIRE2", "MACF1", "DST", "PLEC", "TAGLN", "FKBP15",  # Actin binders/remodelers
])
gene_lists["Actin cytoskeleton"] = np.intersect1d(gene_lists["Actin cytoskeleton"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT"))])

# Intermediate filament/Neurofilament cytoskeleton, based on GO:0005200, GO:0045111, and GO:0019215
gene_lists["Intermediate filament"] = []
gene_lists["Intermediate filament"].extend(genes[(genes.str.startswith("LMN"))]) # Lamins
gene_lists["Intermediate filament"].extend([
    "DES", "GFAP",  "VIM", "SYNC", "SYNM", # Desmin, GFAP, Peripherin, Vimentin, Syncoilin, Synemin
    "INA", "PRPH", "NEFL", "NEFM", "NEFH",  # Neurofilament
    "BFSP1", "BFSP2", "NES", # Beaded filaments
])
gene_lists["Intermediate filament"] = np.intersect1d(gene_lists["Intermediate filament"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT"))])

# Microtubule cytoskeleton, based on GO:0005200, GO:0015630, and GO:0008017
gene_lists["Microtubule cytoskeleton"] = []
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TUBA"))]) # Tubulin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TUBB"))]) # Tubulin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TUBD"))]) # Tubulin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TUBE"))]) # Tubulin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TUBG"))]) # Tubulin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("CROC"))]) # Ciliary Rootlet Coiled-Coil, Rootletin Famil
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("DCDC"))]) # Doublecortin domain containing
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("SPEF"))]) # Sperm flagellar protein
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TPG"))]) # Tubulin Polyglutamylase Complex
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("RMD"))]) # Regulator Of Microtubule Dynamics
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("CKAP"))]) # Cytoskeleton Associated Protein
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("EML"))]) # Echinoderm Microtubule Associated Protein Like
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("HAUS"))]) # HAUS Augmin Like Complex
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("MAST"))]) # Microtubule Associated Serine/Threonine Kinase
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("MAPRE"))]) # Microtubule Associated Protein RP/EB Family
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("DLGAP"))]) # Discs Large Homolog Associated Protein
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TBC")) & ~(genes.str.startswith("TBC1"))]) # Tubulin Folding Cofactor
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("CAMSAP"))]) # Calmodulin Regulated Spectrin Associated Protein Family
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("MAP1L"))]) # Microtubule associated protein
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("NDE"))]) # NudE Neurodevelopment Protein
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("NUDC"))]) # NudC
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("CETN"))]) # Centrin
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TTLL"))]) # Tubulin Polyglutamylase Complex
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("NEK"))]) # NIMA Related Kinase
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("TPPP"))]) # Tubulin Polymerization Promoting Protein Family
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("ODF"))]) # Outer Dense Fiber Of Sperm Tails
gene_lists["Microtubule cytoskeleton"].extend(genes[(genes.str.startswith("KATN"))]) # Katanin Regulatory Subunit
gene_lists["Microtubule cytoskeleton"].extend([
    "MAP1A", "MAP1B", "MAP1S", "MAP10", "MAP2", "MAPT", "MAP4", "MAP6", "MAP6D1", "MAP7", "MAP9"  # Microtubule associated protein
    "SPC24", "SPC25", "NDC80", "NUF2", # Component Of NDC80 Kinetochore Complex
    "TOGARAM1", "TOGARAM2" # TOG Array Regulator Of Axonemal Microtubules
    "MTUS1", "MTUS2", # Microtubule Associated Scaffold Protein
    "NUMA1", # Nuclear Mitotic Apparatus Protein
    "SKA1", "SKA2", "SKA3", # Spindle And Kinetochore Associated Complex
    "DCX", # Doublecortin
    "SPATA4", "SPAG6", "SPAG17", # Microtubule related sperm proteins
    "APC", "APC2", # Adenomatous polyposis coli
    "PRC1", # Protein Regulator Of Cytokinesis
    "SAXO1", "SAXO2", # Stabilizer Of Axonemal Microtubules
    "CCDC69", "CEP57", "CEP57L1", "CRIPT", "MACO1", "CCDC181", "MAST2", "ALMS1", "CCDC66", # Microtubule binding proteins
    "CCDC170", "ABRAXAS1", "ABRAXAS2", "CCSAP", "CFAP157", "CEP295NL", "CEP295", "CCSER2", "CCSER1" # Microtubule binding proteins
    "FSD2", "MID1", "MID2", "CDK5RAP2", "NUSAP1", "MTCL1", "SPAST", "RGS14", "FIGN", "FIGNL1", "FIGNL2", # Microtubule binding proteins
    "NAV1", "NAV2", "NAV3", "RACGAP1", "ZNF207", "TPGS1", "TPGS2", "HDAC6", "FKBP4", "RCC2", "PCNT", # Microtubule binding proteins
    "RP1", "RP1L1", "FGF13", "CCDC66", "NDRG1", "MDM1", "SIRT2", "FAM110C", "CEP170", "MATCAP1", "MATCAP2" # Microtubule binding proteins
    "CSTPP1", "LRRC49", "MID1IP1", "INO80", "FSD1", "LZTS2", "BEX4", "FEZ1", "FEZ2", "NICN1", "NIN", "NINL" # Microtubule binding proteins
    "ARL2", "CLASP1", "CLASP2"

])
gene_lists["Microtubule cytoskeleton"] = np.intersect1d(gene_lists["Microtubule cytoskeleton"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-IT"))])

# Vesicle trafficking, based on https://www.sciencedirect.com/science/article/pii/S0896627310007816
gene_lists["Vesicle trafficking"] = []
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("KIF"))]) # Kinesin
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("DYN"))]) # Dyenin
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("DNAH")) & ~(genes.str.contains("OS"))]) # Dyenin
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("DNAI"))]) # Dyenin
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("MYH"))]) # Kinesin
gene_lists["Vesicle trafficking"].extend(genes[(genes.str.startswith("MYL"))]) # Kinesin
gene_lists["Vesicle trafficking"].extend([
    "MYO10", "MYO15A", "MYO15B", "MYO16", "MYO18A", # Myosin
    "MYO18B", "MYO19", "MYO1A", "MYO1B", "MYO1C", "MYO1D", "MYO1E", # Myosin
    "MYO1F", "MYO1G", "MYO1H", "MYO3A", "MYO3B", "MYO5A", "MYO5B", # Myosin
    "MYO5C", "MYO6", "MYO7A", "MYO7B", "MYO9A", "MYO9B", # Myosin
    "STARD9" # Others
])
gene_lists["Vesicle trafficking"] = np.intersect1d(gene_lists["Vesicle trafficking"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-IT"))])

# Adapator proteins/trafficking proteins, based on https://www.sciencedirect.com/science/article/pii/S0896627310007816, GO:0030705, and GO:0016192
gene_lists["Trafficking adaptors"] = []
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("TRAK"))]) # Milton
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("RHOT"))]) # Miro
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("GRIP"))]) # AMPA receptor
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SYT"))]) # Synaptogamin, for TrkB+ vesicles
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("RAB"))]) # RABs
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("STX"))]) # Syntaxin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SNAP")) & ~(genes.str.startswith("SNAPC"))]) # SNARE
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("VAMP"))]) # Synaptobrevin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("VAP"))]) # Synaptobrevin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("CLIP"))]) # CAP-Gly Domain Containing Linker
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("GIPC"))]) # GIPC PDZ Domain Containing Family
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("AP1"))]) # Adapter Protein Complex 1
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("AP2"))]) # Adapter Protein Complex 2
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("AP3"))]) # Adapter Protein Complex 3
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("AP4"))]) # Adapter Protein Complex 4
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("DTN"))]) # Dystobrevin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("BLOC"))]) # Biogenesis Of Lysosomal Organelles Complex
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("BORC"))]) # Biogenesis Of Lysosomal Organelles Complex
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SNX"))]) # Adapter Protein Complex 4
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("ARF"))]) # ADP Ribosylation Factor
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("RIMS"))]) # Regulating Synaptic Membrane Exocytosis
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("COP")) & ~(genes.str.startswith("COPS"))]) # COPI
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SEC13"))]) # COPII
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SEC16"))]) # COPII
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SEC2"))]) # COPII
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SEC3"))]) # COPII
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("PACSIN"))]) # Protein Kinase C And Casein Kinase Substrate In Neurons
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SYNGR"))]) # Synaptogyrin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SNC"))]) # Synuclein
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("COG"))]) # COG
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("VTI"))]) # VTI
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("HOOK"))]) # HOOK
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("CYTH"))]) # Cytohesin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("TRAPP"))]) # TRAPP Complex
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("GOLG")) & ~(genes.str.startswith("GOLGA6")) & ~(genes.str.startswith("GOLGA7")) & ~(genes.str.startswith("GOLGA8"))]) # Golgin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("LIN7"))]) # NMDA receptor cargo adaptor
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("DCTN"))]) # Dynactin
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("TMED"))]) # TMED
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("UNC13"))]) # UND13
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("GOSR"))]) # Golgi SNAP
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("WASHC"))]) # WASH complex
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("SCAMP"))]) # SCAMP
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("EXOC"))]) # Exocyst
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("TBC1D"))]) # TBC1D Family
gene_lists["Trafficking adaptors"].extend(genes[(genes.str.startswith("GOLPH"))]) # Golgi Phosphoproteins


gene_lists["Trafficking adaptors"].extend([
    "MADD", "PPFIA1", "PPFIA2", "SYP", # Pre-synaptic vesicle adaptor
    "KIFBP", "SYBU", "MGARP", # Mitochondria
    "MAPK8IP1", "MAPK8IP2", "MAPK8IP3", "APP", # Vesicles
    "HAP1", # GABA receptor vesicles
    "HTT", # BDNF vesicles
    "HSPA8", # Slow cargo
    "SPEN", # NR2B vesicles,
    "GPHN", # Glycine vesicles
    "RILP", # NGF-TrkA Endosomes
    "BSN", "PCLO", "CPLX1", "CPLX2", "CPLX3", "CPLX4",  # Active-zone-protein veiscles
    "DBNDD1", "DBNDD2", # Dystobrevin
    "PEX5", "PEX14", # Perioxisomes
    "F8A1", "TMEM108", "TANC1", "TANC2", "STARD3", "STARD3NL",
    "FYCOV1", "RUFY1", "RUFY2", "RUFY3", "RUFY4",
    "CLN3", "CLN5", "BECN1", "BECN2", "ARL1", "SYCN", "TMEM87A",
    "VPS26A", "VPS26B", "VPS29", "VPS35", "VPS35L", # Retromer
    "VPS11", "VPS16", "VPS18", "VPS33A", "VPS8", "VPS39", "VPS41", #  CORVET And HOPS Complexes
    "VPS33B", "VPS50", "VPS51", "VPS52", "VPS53", "VPS54",
    "CCDC93", "CCDC91", "EHBP1", "EHBP1L1", "RBSN", "EXPH5", "MLPH", "MYRIP", "OPTN",
    "BLTP1", "DNM1", "DNM2", "DNM3", "TMEM115", "APBA1", "APBA2", "APLP1"
    "ANKFY1", "RGP1", "SAR1A", "SAR1B", "SCRN1", "SCRN2", "SCRN3", "BAIAP3",
    "PHETA1", "PHETA2", "RHOBTB1", "RHOBTB2", "RHOBTB3", "RIN1", "RIN2", "RIN3", 
    "ENTR1", "ITSN1", "ITSN2", "SCFD1", "SCFD2", "SORT1", "SORL1", "RIC1", 
    "LAMP1", "GCC1", "GCC2", "AHI1", "LYST", "NBEA", "TMEM108", "LLGL1", "LLGL2",
    "STON1", "STON2", "ZFYVE16", "ZFYVE9", "ZFYVE27", "TFG", "RUBCN", "RUBCNL", "BICD1", "BICD2", "EEA1", 
    "OTOF", "GBF1", "FAM91A1", "WDR11", "TMCC1", "TMCC2", "FCHSD1", "FCHSD2",
    "USP6NL", "NBAS", "PREB", "GOPC", "BRSK1", "BRSK2", "EIPR1", "TRIP11", "TRIP10",
    "DOC2A", "DOC2B", "C17orf75", "GGA1", "GGA2", "GGA3", "C9orf72", "PICK1",
    "RILP", "RILPL1", "RILPL2", "SYNRG", "DAB1", "DAB2", "GAPVD1", "LAMTOR1", "SURF4",
    "TOM1", "TOM1L2", "USO1", "SNPH", "NSF", "VTA1", "HIP1", "HIP1R"
    
    
]) 
gene_lists["Trafficking adaptors"] = np.intersect1d(gene_lists["Trafficking adaptors"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT")) & ~(genes.str.contains("-IT"))])

# Axonal guidance, based on GO:0097485
gene_lists["Axonal guidance"] = []
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("SEMA"))]) # Semaphorin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("UNC5")) & ~(genes.str.contains("50"))]) # Netrin receptor
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("EPH"))]) # Ephrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("EFN"))]) # Ephrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LGI"))]) # Leucine Repeat (Slit-like)
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NOTCH"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("DLL"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("JAG"))]) # Notch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("SLIT"))]) # Slit
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LRTM"))]) # Slit-like
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("ROBO"))]) # Robo
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("PLXN"))]) # Plexin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FLRT"))]) # Fibronectin Leucine Rich Transmembrane
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("VEGF"))]) # Vascular endothelial growth factor
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NCAM"))]) # NCAM
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("CNTN")) & ~(genes.str.contains("AP"))]) # Contactin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("PTCH"))]) # Patch
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FEZ"))]) # FEZ
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("ERBB"))]) # Erb-B2 Receptor Tyrosine Kinase
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FZD"))]) # Fzd
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NRP"))]) # Neuropilin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("BMP"))]) # BMP
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("FGF")) & ~(genes.str.contains("P"))]) # FGF
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NTN"))]) # Netrin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("LGR"))]) # LGRs
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("WNT"))]) # Wnt
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NTRK"))]) # Ntrk
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NECTIN"))]) # Nectin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("DSCAM"))]) # DSCAM
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("NRXN"))]) # Neurexin
gene_lists["Axonal guidance"].extend(genes[(genes.str.startswith("GFR"))]) # GDNF Receptor

# Based on https://elifesciences.org/articles/47889
gene_lists["Neuropeptides"] = []
gene_lists["Neuropeptides"].extend([
    "SST", "CORT", "NPY", "GRP", "CCK", "NTS", "NMB", "RLN1", "TRH", "ADCYAP1",
    "VIP", "CRH", "PNOC", "TAC1", "TAC3", "PDYN", "PTHLH", "PENK"
])

gene_lists["Neuropeptide receptors"] = []
gene_lists["Neuropeptide receptors"].extend([
    "NPY1R", "NPY2R", "NPY5R", "SSTR1", "SSTR2", "SSTR3", "SSTR4", "SSTR5",
    "VIPR1", "VIPR2", "TACR1", "TACR2", "TACR3", "OPRD1", "OPRM1", "OPRK1",
    "CRHR1", "CRHR2", "PTH1R", "OPRL1", "TRHR", "GRPR", "ADCYAP1R1",
    "NTSR1", "NTSR2", "NMBR"
])

# Myelin components and factors, from https://www.ncbi.nlm.nih.gov/books/NBK28221/#:~:text=The%20protein%20composition%20of%20CNS,are%20present%20to%20lesser%20extents.
gene_lists["Myelin components"] = []
gene_lists["Myelin components"].extend([
    "MOG", "MAG", "MBP", "MOBP", "MPZ", "OMG", "CNP", "PLP1", "PLLP", "OPALIN"
])

# Re-myelination program, based on https://www.mdpi.com/2218-273X/11/2/283, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3892435/, and https://www.sciencedirect.com/science/article/pii/S0896627300805152, and https://linkinghub.elsevier.com/retrieve/pii/S0962892415001646
gene_lists["Remyelination"] = []
gene_lists["Remyelination"].extend([
    "OLIG1", "OLIG2", "SOX10", "SOX8", "CSPG4", "DLL1", "DLL2", "DLL3", "ASCL1", "HES5", "ID2", "ID4", "HDAC1", "HDAC2", "MYRF", "TCF7L2", "GPR17", "NKX2-2", "ZFP488", "ZFP191"
])

# Fc Receptors & MHCII
gene_lists["Fc receptors and MHCII"] = []
gene_lists["Fc receptors and MHCII"].extend(genes[(genes.str.startswith("FCGR"))])
gene_lists["Fc receptors and MHCII"].extend(genes[(genes.str.startswith("HLA-D"))])
gene_lists["Fc receptors and MHCII"].extend(["CD74"])
gene_lists["Fc receptors and MHCII"] = np.intersect1d(gene_lists["Fc receptors and MHCII"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT")) & ~(genes.str.contains("-IT"))])

# Human plaque induced genes, from https://www.sciencedirect.com/science/article/pii/S0092867420308151?via%3Dihub
gene_lists["Microglia plaque induced genes"] = []
gene_lists["Microglia plaque induced genes"].extend([
    "AXL", "C1QA", "C1QB", "C1QC", "FCGR2A", "FCGR2B", "FCGR2C", "FCGR3A",
    "CSF1R", "CTSH", "CTSS", "CTSD", "CYBA", "GRN", "LY86", "LYZ", "HEXB",
    "LAPTM5", "OLFML3", "TYROBP"
])

# Interferon stimulated genes, from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4313732/
gene_lists["Interferon stimulated genes"] = []
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("ISG")) & ~(genes.str.contains("L"))])
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("IFI")) & ~(genes.str.contains("L"))])
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("IRF")) & ~(genes.str.contains("BP"))])
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("JAK")) & ~(genes.str.contains("IP"))])
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("STAT")) & ~(genes.str.contains("H"))])
gene_lists["Interferon stimulated genes"].extend(genes[(genes.str.startswith("OAS"))])
gene_lists["Interferon stimulated genes"].extend(["MX1", "MX2", "NINJ1", "IL1B", "IL6", "RNASEL", "EIF2AK2", "RSAD2", "BST2", "CH25H"])
gene_lists["Interferon stimulated genes"] = np.intersect1d(gene_lists["Interferon stimulated genes"], genes[~(genes.str.contains("-AS")) & ~(genes.str.contains("-DT")) & ~(genes.str.contains("-IT"))])

tmp = pd.read_csv(os.path.join(pwd, "input", "dbs", "uniprotkb_keyword_KW_0276_AND_reviewed_2024_02_06.tsv"), sep="\t")
tmp = [i.split(" ")[0] for i in tmp["Gene Names"]]
gene_lists["Fatty Acid metabolism"].extend(np.intersect1d(tmp, genes))

affected_supertypes = [
    "Sst_3",
    "Sst_19",
    "Sst_9",
    "Sst_11",
    "Sst_20",
    "Sst_23",
    "Sst_25",
    "Sst_2",
    "Pvalb_6",
    "Pvalb_5",
    "Pvalb_8",
    "Pvalb_3",
    "Pvalb_2",
    "Pvalb_15",
    "Pvalb_14",
]
effect_size_table["Abundance Change"] = effect_size_table["Population"].isin(affected_supertypes)

Sst_Pvalb.obs["Abundance Change"] = Sst_Pvalb.obs["Supertype"].isin(affected_supertypes).astype("str")
Sst_Pvalb.obs["Subclass"] = Sst_Pvalb.obs["Subclass"].astype("str")

Sst_Pvalb.obs["Subclass_Abundance Change"] = Sst_Pvalb.obs["Subclass"] + "_" + Sst_Pvalb.obs["Abundance Change"]
Sst_Pvalb = Sst_Pvalb[Sst_Pvalb.obs["Neurotypical reference"] == "False", :].copy()

Sst_Pvalb.obs["Subclass"] = Sst_Pvalb.obs["Subclass"].astype("category")
Sst_Pvalb.obs["Subclass_Abundance Change"] = Sst_Pvalb.obs["Subclass_Abundance Change"].astype("category")


In [None]:
# Scatterplot
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Supertype")
    & (effect_size_table["Population"].str.contains("Sst_"))
].copy()
mean_table = df.drop(["Population", "Taxonomy Level"], axis=1).groupby(["Gene", "Abundance Change"]).mean()
mean_table = mean_table.reset_index()
mean_table.index = mean_table["Gene"].copy()
mean_table.index.name = "index"
diff_table = mean_table.groupby(["Gene"]).diff().dropna().drop("Abundance Change", axis=1)
diff_table.columns = ["Diff_" + i for i in diff_table.columns]

mean_table = mean_table.drop("Gene", axis=1).pivot(columns=["Abundance Change"])
mean_table.columns = [i[0] + "_" + str(i[1]) for i in mean_table.columns]
mean_table = mean_table.merge(diff_table, left_index=True, right_index=True, how="left")

df = mean_table.copy()

plt.rcParams["figure.figsize"] = (4,4)
tmp = df.loc[
    ~(
        (df["Mean expression (natural log UMIs per 10k plus 1)_True"] < 0.05) &
        (df["Mean expression (natural log UMIs per 10k plus 1)_False"] < 0.05)
    ),
    :
].copy()

indep = "Effect size across early pseudoprogression_True"
dep = "Effect size across early pseudoprogression_False"
hue = "Diff_Effect size across early pseudoprogression"

ax = sns.scatterplot(
    data=tmp,
    x=indep,
    y=dep,
    alpha=0.05,
    size=1,
    color="lightgrey",
);

colors = {
    "Electron transport chain components": "red",
    "Ribosomal proteins": "orange",
    "Ubiquitination": "blue",
    "Phosphorylation": "green",
}

try:
    del texts
except:
    pass

for m,n in gene_lists.items():
    if m not in colors.keys():
        continue
    ax = sns.scatterplot(
        data=tmp.loc[(tmp[hue].abs() > 1) & (tmp.index.isin(n)), :],
        x=indep,
        y=dep,
        alpha=0.3,
        size=1,
        color=colors[m],
    );
    ax = sns.scatterplot(
        data=tmp.loc[(tmp[hue].abs() <= 1) & (tmp.index.isin(n)), :],
        x=indep,
        y=dep,
        alpha=0.05,
        size=1,
        color=colors[m],
    );
    if m in ["Electron transport chain components", "Ribosomal proteins"]:
        try:
            texts.extend([plt.text(tmp.loc[i, indep], tmp.loc[i, dep], i, size=8, color="grey") for i in tmp[(tmp[dep] < -1) & (tmp[hue] > 1) & (tmp.index.isin(n))].sort_values(by=dep).index[:10]])
        except:
            texts = [plt.text(tmp.loc[i, indep], tmp.loc[i, dep], i, size=8, color="grey") for i in tmp[(tmp[dep] < -1) & (tmp[hue] > 1) & (tmp.index.isin(n))].sort_values(by=dep).index[:10]]
    if m in ["Ubiquitination", "Phosphorylation", "Lipid metabolism"]:
        try:
            texts.extend([plt.text(tmp.loc[i, indep], tmp.loc[i, dep], i, size=8, color="grey", horizontalalignment="right") for i in tmp[(tmp[indep] < -1) & (tmp[hue] < -1) & (tmp.index.isin(n))].sort_values(by=dep).index[:10]])
        except:
            texts = [plt.text(tmp.loc[i, indep], tmp.loc[i, dep], i, size=8, color="grey", horizontalalignment="right") for i in tmp[(tmp[indep] < -1) & (tmp[hue] < -1) & (tmp.index.isin(n))].sort_values(by=dep).index[:10]]

x = np.linspace(-4,4,100)
y = x - 1
plt.plot(x, y, linestyle="--", color="lightgrey");
y = x + 1
plt.plot(x, y, linestyle="--", color="lightgrey");
plt.legend('',frameon=False)
ax.set(xlim=(-4,4), ylim=(-4,4), xlabel="Mean early effect size in\nvulnerable Sst supertypes", ylabel="Mean early effect size in\n unaffected Sst supertypes");
plt.savefig(os.path.join(pwd, "output", "Figure 6i_scatterplot_vulnerable versus unaffected Sst supertypes early effect sizes.pdf"), bbox_inches="tight")
plt.show()

In [None]:
# Deltaplots
for i in ["MAPK8", "NGF", "MME", "LNX2", "ATP5MPL", "RPL4"]:
    ax = delta_plot(
        adata=Sst_Pvalb,
        genes=[i],
        groupby="Subclass",
        groupby_subset=["Sst", "Pvalb"],
        plotby="Subclass_Abundance Change",
        donor="Donor ID",
        across="Continuous Pseudo-progression Score",
        highlight=["Sst_True", "Pvalb_True"],
        colormap=None,
        title="Expression of " + i,
        legend=False,
        save=os.path.join(pwd, "output", "Figure 6hi_deltaplot_{title}_in_Sst_Pvalb_splitby_subclass_groupby_Subclass_Abundance Change.pdf")
    )
    plt.show();

### Extended Data Figure 11h

In [None]:
df = effect_size_table.loc[
    (effect_size_table["Taxonomy Level"] == "Supertype")
    & (effect_size_table["Population"].str.contains("Pvalb_"))
].copy()
mean_table = df.drop(["Population", "Taxonomy Level"], axis=1).groupby(["Gene", "Abundance Change"]).mean()
mean_table = mean_table.reset_index()
mean_table.index = mean_table["Gene"].copy()
mean_table.index.name = "index"
diff_table = mean_table.groupby(["Gene"]).diff().dropna().drop("Abundance Change", axis=1)
diff_table.columns = ["Diff_" + i for i in diff_table.columns]

mean_table = mean_table.drop("Gene", axis=1).pivot(columns=["Abundance Change"])
mean_table.columns = [i[0] + "_" + str(i[1]) for i in mean_table.columns]
mean_table = mean_table.merge(diff_table, left_index=True, right_index=True, how="left")

df = mean_table.copy()

plt.rcParams["figure.figsize"] = (4,4)
# Unsupervised
tmp = df.loc[
    ~(
        (df["Mean expression (natural log UMIs per 10k plus 1)_True"] < 0.05) &
        (df["Mean expression (natural log UMIs per 10k plus 1)_False"] < 0.05)
    ),
    :
].copy()
ax = sns.scatterplot(
    data=tmp,
    x=indep,
    y=dep,
    alpha=0.05,
    size=1,
    color="lightgrey",
);

colors = {
    "Electron transport chain components": "red",
    "Ribosomal proteins": "orange",
    "Ubiquitination": "blue",
    "Phosphorylation": "green",
}

try:
    del texts
except:
    pass

for m,n in gene_lists.items():
    if m not in colors.keys():
        continue

    indep = "Effect size across early pseudoprogression_True"
    dep = "Effect size across early pseudoprogression_False"
    hue = "Diff_Effect size across early pseudoprogression"
    ax = sns.scatterplot(
        data=tmp.loc[(tmp[hue].abs() > 1) & (tmp.index.isin(n)), :],
        x=indep,
        y=dep,
        alpha=0.3,
        size=1,
        color=colors[m],
    );
    ax = sns.scatterplot(
        data=tmp.loc[(tmp[hue].abs() <= 1) & (tmp.index.isin(n)), :],
        x=indep,
        y=dep,
        alpha=0.05,
        size=1,
        color=colors[m],
    );

x = np.linspace(-4,4,100)
y = x - 1
plt.plot(x, y, linestyle="--", color="lightgrey");
y = x + 1
plt.plot(x, y, linestyle="--", color="lightgrey");
plt.legend('',frameon=False)
ax.set(xlim=(-4,4), ylim=(-4,4), xlabel="Mean early effect size in\nvulnerable Pvalb supertypes", ylabel="Mean early effect size in\n unaffected Pvalb supertypes");
plt.savefig(os.path.join(pwd, "output", "Extended Data Figure 11h_scatterplot_vulnerable versus unaffected Pvalb supertypes early effect sizes.pdf"), bbox_inches="tight")
plt.show()

### Clean up

In [None]:
shutil.rmtree(os.path.join(pwd, "figures"))