### Load required libraries and data

In [None]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from helper_functions import *

pwd = os.getcwd()
plt.rcParams["pdf.fonttype"] = 42

### Read in the A9 snRNAseq and snMultiome dataset

In [None]:
# Pull the CPS values from the MTG dataset
region = "MTG"
dataset = "RNAseq"
date = "2024-02-13"
MTG = pd.read_csv(os.path.join(pwd, "input", "SEAAD_" + region + "_" + dataset + "_final-nuclei_metadata." + date +".csv"), index_col=0)
MTG = MTG.loc[:, ["Donor ID", "Continuous Pseudo-progression Score"]].copy()

region = "A9"
dataset = "RNAseq"
date = "2024-02-13"
RNAseq = pd.read_csv(os.path.join(pwd, "input", "SEAAD_" + region + "_" + dataset + "_final-nuclei_metadata." + date +".csv"), index_col=0)
RNAseq = RNAseq.merge(MTG, how="left", left_on="Donor ID", right_on="Donor ID")

# Format metadata for the compositional test
RNAseq["Continuous_Pseudo-progression_Score"] = RNAseq["Continuous Pseudo-progression Score"].copy()

RNAseq["Race_choice_White"] = RNAseq["Race (choice=White)"].astype("category")
RNAseq["Race_choice_White"] = RNAseq["Race_choice_White"].cat.reorder_categories(["Unchecked", "Checked"])

RNAseq["Age_at_Death_binned"] = pd.cut(RNAseq["Age at Death"], bins=5)
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned"].cat.codes
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned_codes"] /  RNAseq["Age_at_Death_binned_codes"].max()

### Run scCODA on the A9 snRNAseq and snMultiome datasets

In [None]:
# Primary model
region = "A9_RNAseq"
formula = "Sex + Age_at_Death_binned_codes + Race_choice_White + "
covariates = ["library_prep", "Sex", "Age_at_Death_binned_codes", "Race_choice_White", "Donor_Pseudo-progression"]
tests = ["Continuous Pseudo-progression Score"]
labels_keys = ["Supertype"]

run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Non-neuronal and non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)
run_scCODA(
    cell_count=RNAseq,
    random_effect="library_prep",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
)