In [3]:
import os
import glob
import pandas as pd
import numpy as np
import scanpy as sc

In [6]:
def quickfix(result_dir):
    covariates_to_fix = {
        "age_at_death_binned_codes": "Age_at_Death_binned_codes",
        "sex": "Sex",
        "[T.M]": "[T.Male]",
        "ch_race___1": "Race_choice_White",
        "apoe4_status": "APOE4_Status",
        "ch_cognitivestatus_binary": "Cognitive_Status",
        "adneurochange_codes": "Overall_AD_neuropathological_Change_codes",
        "donor_pseudotime": "Continuous_Pseudo-progression_Score",
        "pmi_scaled": "PMI",        
    }
    celltypes_to_fix = {
        "Astro_Unknown_25": "Astro_6-SEAAD",
        "Oligo_Unknown_15": "Oligo_2_1-SEAAD",
        "OPC_Unknown_25": "OPC_2_2-SEAAD",
        "OPC_Unknown_18": "OPC_2_1-SEAAD",
        "VLMC_Unknown_24": "SMC-SEAAD",
        "VLMC_Unknown_32": "Pericyte_2-SEAAD",
        "Micro-PVM_Unknown_0": "Micro-PVM_3-SEAAD",
        "Micro-PVM_Unknown_9": "Micro-PVM_2_3-SEAAD",
        "Micro-PVM_Unknown_10": "Micro-PVM_4-SEAAD",
        "Micro-PVM_Unknown_116": "Micro-PVM_2_1-SEAAD",
        "Micro-PVM_Unknown_135": "Monocyte",
        "Micro-PVM_Unknown_200": "Lymphocyte",
        "VLMC_2": "Pericyte_1",
        "VLMC_2_1-SEAAD": "SMC-SEAAD",
        "VLMC_2_2-SEAAD": "Pericyte_2-SEAAD",
        "Micro-PVM_2_2-SEAAD": "Lymphocyte",
        "Micro-PVM_1_1-SEAAD": "Monocyte"
    }
    
    CSVs = glob.glob(os.path.join(result_dir, "*/*.csv"))
    print(CSVs)
    h5ads = glob.glob(os.path.join(result_dir, "*/*.h5ad"))
    print(h5ads)

    for i in CSVs:
        CSV = pd.read_csv(i, index_col=0)
        CSV = CSV.loc[:, ["Covariate", "Reference Cell Type", "Cell Type", "Final Parameter", "SD", "Inclusion probability"]].copy()
        
        for j,k in covariates_to_fix.items():
            CSV["Covariate"] = CSV["Covariate"].str.replace(j,k)
        
        for j,k in celltypes_to_fix.items():
            CSV["Cell Type"] = CSV["Cell Type"].str.replace(j,k)
            CSV["Reference Cell Type"] = CSV["Reference Cell Type"].str.replace(j,k)
        
        outfile = i.replace("exc inh", "Neuronal: Glutamatergic Neuronal: GABAergic").replace("glia", "Non-neuronal and Non-neural").replace("supertype_scANVI_leiden", "Supertype")
        CSV.to_csv(outfile)
        
        if "exc inh" in i:
            os.remove(i)
        if "glia" in i:
            os.remove(i)
        if "leiden" in i:
            os.remove(i)

    for i in h5ads:
        h5ad = sc.read_h5ad(i)

        for j,k in covariates_to_fix.items():
            h5ad.obs.columns = h5ad.obs.columns.str.replace(j,k)

        h5ad.obs["Sex"].str.replace("M", "Male")
        h5ad.obs["Sex"].str.replace("F", "Female")

        columns_to_keep = np.intersect1d(h5ad.obs.columns, list(covariates_to_fix.values()))
        h5ad.obs = h5ad.obs.loc[:, columns_to_keep].copy()
        
        for j,k in celltypes_to_fix.items():
           h5ad.var.index = h5ad.var.index.str.replace(j,k)

        outfile = i.replace("exc inh", "Neuronal: Glutamatergic Neuronal: GABAergic").replace("glia", "Non-neuronal and Non-neural").replace("supertype_scANVI_leiden", "Supertype")
        h5ad.write(outfile)
        
        if "exc inh" in i:
            os.remove(i)
        if "glia" in i:
            os.remove(i)
        if "leiden" in i:
            os.remove(i)

In [11]:
quickfix("/allen/programs/celltypes/workgroups/hct/SEA-AD/MTG Manuscript/Single nucleus omics/03_Differential abundance analysis/output/MTG_RNAseq_PMI")

['/allen/programs/celltypes/workgroups/hct/SEA-AD/MTG Manuscript/Single nucleus omics/03_Differential abundance analysis/output/MTG_RNAseq_PMI/Continuous_Pseudo-progression_Score/Neuronal: Glutamatergic Neuronal: GABAergic_supertype_scANVI_leiden_results.csv', '/allen/programs/celltypes/workgroups/hct/SEA-AD/MTG Manuscript/Single nucleus omics/03_Differential abundance analysis/output/MTG_RNAseq_PMI/Continuous_Pseudo-progression_Score/Non-neuronal and Non-neural_supertype_scANVI_leiden_results.csv']
['/allen/programs/celltypes/workgroups/hct/SEA-AD/MTG Manuscript/Single nucleus omics/03_Differential abundance analysis/output/MTG_RNAseq_PMI/objects/Neuronal: Glutamatergic Neuronal: GABAergic_Supertype_abundances.h5ad', '/allen/programs/celltypes/workgroups/hct/SEA-AD/MTG Manuscript/Single nucleus omics/03_Differential abundance analysis/output/MTG_RNAseq_PMI/objects/Non-neuronal and Non-neural_Supertype_abundances.h5ad']
