### Load required libraries

In [None]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from datetime import datetime
from helper_functions import *

pwd = os.getcwd()
plt.rcParams["pdf.fonttype"] = 42

### Read in the non-neuronal Mathys et al (2023) dataset

In [None]:
region = "PFC_External"
dataset = "Mathys_2023_external"
date = "2023-12-11"
RNAseq = pd.read_csv(os.path.join(pwd, "input", region, region.replace("_External", "") + "_" + dataset + "_metadata_keepers_non-neurons." + date +".csv"), index_col=0)

# Format metadata for the compositional test
RNAseq["Cognitive_Status"] = RNAseq["Cognitive Status"].astype("category")
RNAseq["Cognitive_Status"].cat.reorder_categories(["No dementia", "Dementia"], inplace=True)

RNAseq["APOE4_Status"] = RNAseq["APOE4 Status"].astype("category")
RNAseq["APOE4_Status"].cat.reorder_categories(["N", "Y"], inplace=True)

RNAseq["Age_at_Death_binned"] = pd.cut(RNAseq["Age at Death"], bins=5)
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned"].cat.codes
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned_codes"] /  RNAseq["Age_at_Death_binned_codes"].max()

RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall AD neuropathological Change"].astype("category")
RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall_AD_neuropathological_Change"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change"].cat.codes
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change_codes"] /  RNAseq["Overall_AD_neuropathological_Change_codes"].max()

### Run scCODA on the non-neuronal Mathys et al (2023) dataset

In [None]:
region = "PFC_Mathys_2023"
formula = "Sex + Age_at_Death_binned_codes + APOE4_Status + "
covariates = ["Donor ID", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Overall_AD_neuropathological_Change_codes"]
tests = ["Overall_AD_neuropathological_Change_codes"]
labels_keys=["Supertype"]
run_scCODA(
    cell_count=RNAseq,
    random_effect="Donor ID",
    split_key="Class",
    split_value=["Non-neuronal and Non-neural"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
    figsize=(20,6)
)

### Read in the neuronal Mathys et al (2023) dataset

In [None]:
region = "PFC_External"
dataset = "Mathys_2023_external"
date = "2023-12-11"
RNAseq = pd.read_csv(os.path.join(pwd, "input", region, region.replace("_External", "") + "_" + dataset + "_metadata_keepers_neurons." + date +".csv"), index_col=0)

# Format metadata for the compositional test
RNAseq["Cognitive_Status"] = RNAseq["Cognitive Status"].astype("category")
RNAseq["Cognitive_Status"].cat.reorder_categories(["No dementia", "Dementia"], inplace=True)

RNAseq["APOE4_Status"] = RNAseq["APOE4 Status"].astype("category")
RNAseq["APOE4_Status"].cat.reorder_categories(["N", "Y"], inplace=True)

RNAseq["Age_at_Death_binned"] = pd.cut(RNAseq["Age at Death"], bins=5)
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned"].cat.codes
RNAseq["Age_at_Death_binned_codes"] = RNAseq["Age_at_Death_binned_codes"] /  RNAseq["Age_at_Death_binned_codes"].max()

RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall AD neuropathological Change"].astype("category")
RNAseq["Overall_AD_neuropathological_Change"] = RNAseq["Overall_AD_neuropathological_Change"].cat.reorder_categories(["Not AD", "Low", "Intermediate", "High"])
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change"].cat.codes
RNAseq["Overall_AD_neuropathological_Change_codes"] = RNAseq["Overall_AD_neuropathological_Change_codes"] /  RNAseq["Overall_AD_neuropathological_Change_codes"].max()

### Run scCODA on the neuronal Mathys et al (2023) dataset

In [None]:
region = "PFC_Mathys_2023"
formula = "Sex + Age_at_Death_binned_codes + APOE4_Status + "
covariates = ["Donor ID", "Sex", "Age_at_Death_binned_codes", "APOE4_Status", "Overall_AD_neuropathological_Change_codes"]
tests = ["Overall_AD_neuropathological_Change_codes"]
labels_keys=["Supertype"]
run_scCODA(
    cell_count=cell_count,
    random_effect="Donor ID",
    split_key="Class",
    split_value=["Neuronal: Glutamatergic", "Neuronal: GABAergic"],
    labels_keys=labels_keys,
    region=region,
    covariates=covariates,
    tests=tests,
    formula=formula,
    figsize=(30,6)
)