## LUAD cell type fractions

In [None]:
import pertpy as pt

In [None]:
import scanpy as sc

In [None]:
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import mudata as mu

In [None]:
import sccoda.util.cell_composition_data as scc_dat
import sccoda.util.comp_ana as scc_ana
import sccoda.util.data_visualization as scc_viz

In [None]:
import altair as alt

In [None]:
import pandas as pd

In [None]:
path = "/data/projects/2023/LCBiome/nsclc_gender_atlas_tmp/out/011_analysis_paired_remove_xy/pseudobulk/"
resDir = "/data/projects/2023/LCBiome/nsclc_gender_atlas_tmp/out/011_analysis_paired_remove_xy/figures/"
input_path = f"{path}/paired_adata_clean.h5ad"

In [None]:
adata = sc.read_h5ad(input_path)

In [None]:
adata.obs["cell_type_major"].replace("Macrophage alveolar", "Macrophage", inplace=True)

In [None]:
adata = adata[
    ~adata.obs["cell_type_major"].isin(["transitional club/AT2", "Ciliated", "other"])
]

In [None]:
adata.obs.cell_type_major.value_counts()

In [None]:
adata.obs.disease.value_counts()

In [None]:
adata

In [None]:
adata = adata[adata.obs["disease"]=="lung adenocarcinoma"]

In [None]:
adata

In [None]:
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata,
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="cell_type_major",
    sample_identifier="donor_id",
    covariate_obs=["disease","dataset","sex"],
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
# Select control and salmonella data
sccoda_data.mod["coda_sex"] = sccoda_data["coda"][
    sccoda_data["coda"].obs["sex"].isin(["male", "female"])
].copy()
print(sccoda_data["coda_sex"])

In [None]:
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda_sex",
    formula="sex",
    reference_cell_type="automatic",
)
sccoda_data["coda_sex"]

In [None]:
sccoda_data["coda_sex"]

In [None]:
sccoda_model.plot_boxplots(sccoda_data, modality_key="coda_sex", feature_name="sex", add_dots=False)
plt.show()

In [None]:
# Run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda_sex")
sccoda_data["coda_sex"]

In [None]:
sccoda_model.summary(sccoda_data, modality_key="coda_sex")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda_sex")

In [None]:
#sccoda_model.plot_effects_barplot(sccoda_data, modality_key="coda_sex", parameter="Final Parameter")

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda_sex", est_fdr=0.4)
sccoda_model.summary(sccoda_data, modality_key="coda_sex")

In [None]:
sccoda_data

In [None]:
#path = "./sccoda_model/sccoda_model"
#sccoda_data.write_h5mu(path)

#sccoda_data_2 = mu.read_h5mu(path)

#sccoda_model.summary(sccoda_data_2, modality_key="coda_sex")

In [None]:
test_model = pt.tl.Sccoda()

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda_sex")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda_sex")

In [None]:
sccoda_data["coda_sex"].varm["intercept_df"]

In [None]:
import warnings

warnings.filterwarnings("ignore")

import arviz as az
import matplotlib.pyplot as plt
import pandas as pd
import pertpy as pt

In [None]:
sccoda_model.plot_boxplots(sccoda_data, modality_key="coda_sex", feature_name="sex", add_dots=False)
plt.show()

In [None]:
sccoda_model.plot_stacked_barplot(sccoda_data, modality_key="coda_sex", feature_name="sex")

In [None]:
def run_sccoda(subset, reference):
    sccoda_data = sccoda_model.load(
        adata,
        type="cell_level",
        generate_sample_level=True,
        cell_type_identifier="cell_type_major",
        sample_identifier="donor_id",
        covariate_obs=["disease","dataset","sex"],
    )

    sccoda_data = sccoda_model.prepare(
        sccoda_data,
        modality_key="coda_sex",
        formula="sex",
        reference_cell_type="automatic",
        automatic_reference_absence_threshold=0.1,
    )

    sccoda_model.run_nuts(sccoda_data, modality_key="coda_sex")
    sccoda_model.set_fdr(sccoda_data, modality_key="coda_sex", est_fdr=0.1)

    comparison_groups = [g for g in subset.obs["sex"].unique() if g != reference]
    effect_df = pd.DataFrame(
        {"log2-fold change": [], "Cell Type": [], "Reference": [], "Comp. Group": [], "Final Parameter": []})
    for comp_group in comparison_groups:
        group_effects = sccoda_data["coda"].varm[f"effect_df_C(Group, Treatment('{reference}'))[T.{comp_group}]"][
            ["log2-fold change", "Final Parameter"]]
        group_effects = group_effects[group_effects["Final Parameter"] != 0]
        group_effects["Cell Type"] = group_effects.index
        group_effects["Reference"] = reference
        group_effects["Comp. Group"] = comp_group
        effect_df = pd.concat([effect_df, group_effects])

    if not effect_df.empty:
        fig = sccoda_model.plot_effects_barplot(sccoda_data, return_fig=True, show=False)
        fig.set_size_inches(12, 4)
        fig.show()
    else:
        print(f"No significant effects for reference {reference}")

    return effect_df

In [None]:
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata,
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="cell_type_major",
    sample_identifier="donor_id",
    covariate_obs=["dataset","sex"],
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
# Select control and salmonella data
sccoda_data.mod["coda_sex"] = sccoda_data["coda"][
    sccoda_data["coda"].obs["sex"].isin(["male", "female"])
].copy()
print(sccoda_data["coda_sex"])

In [None]:
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda_sex",
    formula="sex",
    reference_cell_type="automatic",
)
sccoda_data["coda_sex"]

In [None]:
# Run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda_sex")
sccoda_data["coda_sex"]

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda_sex", est_fdr=0.1)

In [None]:
comparison_groups = [g for g in adata.obs["sex"].unique() if g != "male"]
effect_df = pd.DataFrame(
    {"log2-fold change": [], "Cell Type": [], "Reference": [], "Comp. Group": [], "Final Parameter": []})

In [None]:
comparison_groups

In [None]:
effect_df

In [None]:
sccoda_data["coda_sex"].varm

In [None]:
sccoda_data["coda_sex"].varm["intercept_df"]

In [None]:
sccoda_data["coda_sex"].varm["effect_df_sex[T.male]"]

In [None]:
group_effects = sccoda_data["coda_sex"].varm["effect_df_sex[T.male]"][["log2-fold change", "Final Parameter"]]
group_effects = group_effects[group_effects["Final Parameter"] != 0]
group_effects["Cell Type"] = group_effects.index
group_effects["Reference"] = "male"
group_effects["Comp. Group"] = "comparison_groups"
effect_df = pd.concat([effect_df, group_effects])
    

In [None]:
if not effect_df.empty:
    fig = sccoda_model.plot_effects_barplot(sccoda_data, return_fig=True, show=False)
    fig.set_size_inches(12, 4)
    fig.show()
else:
    print(f"No significant effects for reference male")
    