### Load required libraries and data

In [None]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from helper_functions import *

pwd = os.getcwd()
plt.rcParams["pdf.fonttype"] = 42

### Read in the MTG snRNAseq and snMultiome dataset

In [None]:
region = "MTG"
dataset = "RNAseq"
date = "2024-02-13"
RNAseq = pd.read_csv(os.path.join(pwd, "input", "SEAAD_" + region + "_" + dataset + "_final-nuclei_metadata." + date +".csv"), index_col=0)

# Format metadata for the compositional test
RNAseq["Cognitive_Status"] = RNAseq["Cognitive Status"].astype("category")
RNAseq["Cognitive_Status"] = RNAseq["Cognitive_Status"].cat.reorder_categories(["No dementia", "Dementia"])

RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall AD neuropathological Change"].astype("category")
RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall_AD_neuropathological_Change"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change"].cat.codes
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change_codes"] /  RNAseq["Overall_AD_neuropathological_Change_codes"].max()

RNAseq["Continuous_Pseudo-progression_Score"] = RNAseq["Continuous Pseudo-progression Score"].copy()

RNAseq["APOE4_Status"] = RNAseq.obs["APOE Genotype"].str.contains("4")
RNAseq["APOE4_Status"] = RNAseq["APOE4 Status"].astype("category")
RNAseq["APOE4_Status"] = RNAseq["APOE4_Status"].cat.reorder_categories([False, True])
RNAseq["APOE4_Status"] = RNAseq["APOE4_Status"].cat.rename_categories(
    {
        False: "N",
        True: "Y",
    }
)

RNAseq["Race_choice_White"] = RNAseq["Race (choice=White)"].astype("category")
RNAseq["Race_choice_White"] = RNAseq["Race_choice_White"].cat.reorder_categories(["Unchecked", "Checked"])

RNAseq["Age_at_Death_binned"] = pd.cut(RNAseq["Age at Death"], bins=5)
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned"].cat.codes
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned_codes"] /  RNAseq["Age_at_Death_binned_codes"].max()

RNAseq["PMI"] = (RNAseq["PMI"] - RNAseq["PMI"].min()) /  (RNAseq["PMI"].max() - RNAseq["PMI"].min())

### Run scCODA on the MTG snRNAseq and snMultiome datasets

In [None]:
# Primary model
region = "MTG_RNAseq"
formula = "Sex + Age_at_Death_binned_codes + Race_choice_White + APOE4_Status + method + "
covariates = ["library_prep", "method", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Race_choice_White", "Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Donor_Pseudo-progression"]
tests = ["Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Continuous Pseudo-progression Score"]
labels_keys = ["Supertype"]

run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)

# Secondary models requested by reviewers
region = "MTG_RNAseq_Donor"
covariates = ["Donor ID", "method", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Race_choice_White", "Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Donor_Pseudo-progression"]
tests = ["Continuous Pseudo-progression Score"]

run_scCODA(
    cell_count=RNAseq,
    random_effect="Donor ID",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="Donor ID",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)

region = "MTG_RNAseq_PMI"
formula = "Sex + Age_at_Death_binned_codes + Race_choice_White + APOE4_Status + method + PMI + "
covariates = ["library_prep", "method", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Race_choice_White", "PMI", "Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Donor_Pseudo-progression"]
tests = ["Continuous Pseudo-progression Score"]

run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)

region = "MTG_RNAseq_No_SA_Donors"
formula = "Sex + Age_at_Death_binned_codes + Race_choice_White + APOE4_Status + method + "
covariates = ["library_prep", "method", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Race_choice_White", "Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Donor_Pseudo-progression"]
tests = ["Continuous Pseudo-progression Score"]
RNAseq = RNAseq.loc[RNAseq["Severely Affected Donor"] == "N", :].copy()

run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)

### Read in the snATACseq dataset

In [None]:
region = "MTG"
dataset = "ATACseq"
date = "2024-02-13"
ATACseq = pd.read_csv(os.path.join(pwd, "input", "SEAAD_" + region + "_" + dataset + "_final-nuclei_metadata." + date +".csv"), index_col=0)
ATACseq = ATACseq.loc[ATACseq["method"] == "10xATAC_v1.1", :].copy()

# Format metadata for the compositional test
ATACseq["Continuous_Pseudo-progression_Score"] = ATACseq["Continuous Pseudo-progression Score"].copy()

ATACseq["Race_choice_White"] = ATACseq["Race (choice=White)"].astype("category")
ATACseq["Race_choice_White"] = ATACseq["Race_choice_White"].cat.reorder_categories(["Unchecked", "Checked"])

ATACseq["Age_at_Death_binned"] = pd.cut(ATACseq["Age at Death"], bins=5)
ATACseq["Age_at_Death_binned_codes"] = ATACseq["Age_at_Death_binned"].cat.codes
ATACseq["Age_at_Death_binned_codes"] = ATACseq["Age_at_Death_binned_codes"] /  ATACseq["Age_at_Death_binned_codes"].max()

### Run scCODA on the MTG snATACseq dataset

In [None]:
# Primary model
region = "MTG_ATACseq"
formula = "Sex + Age_at_Death_binned_codes + Race_choice_White + "
covariates = ["library_prep", "method", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Race_choice_White", "Cognitive_Status", "Overall_AD_neuropathological_Change_codes", "Donor_Pseudo-progression"]
tests = ["Continuous Pseudo-progression Score"]
labels_keys = ["Supertype"]

run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)