In [None]:
import scanpy as sc

In [None]:
import sccoda.util.cell_composition_data as scc_dat
import sccoda.util.data_visualization as scc_viz

In [None]:
adata = sc.read_h5ad("/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/results_by_sample/merge_and_solo_samples/adata_nodoublet.h5ad")

In [None]:
adata

In [None]:
adata.obs

In [None]:
adata.var

In [None]:
samples = {
    "10mix-ICI1": {"group": "10mix", "condition":"ICI","origin":"tumor"},
    "10mix-ICI2": {"group": "10mix","condition":"ICI","origin":"tumor"},
    "11mix-ICI1": {"group": "11mix","condition":"ICI","origin":"tumor"},
    "11mix-ICI2": {"group": "11mix","condition":"ICI","origin":"tumor"},
    "GF-ICI1": {"group": "GF","condition":"No ICI","origin":"tumor"},
    "GF-ICI2": {"group": "GF","condition":"No ICI","origin":"tumor"},
    "GF-ICI1-plus": {"group": "GF-plus","condition":"ICI","origin":"tumor"},
    "GF-ICI2-plus": {"group": "GF-plus","condition":"ICI","origin":"tumor"},
    "10mix1": {"group": "10mix", "condition":"naive","origin":"colon"},
    "10mix2": {"group": "10mix", "condition":"naive","origin":"colon"},
    "11mix1": {"group": "11mix", "condition":"naive","origin":"colon"},
    "11mix2": {"group": "11mix", "condition":"naive","origin":"colon"},
    "GF1": {"group": "GF", "condition":"naive","origin":"colon"},
    "GF2": {"group": "GF", "condition":"naive","origin":"colon"},
}    

In [None]:
# Set global met'adata on `mdata.obs`

adata.obs["group"] = adata.obs["sample_id"].map(lambda x: samples[x]["group"])
adata.obs["condition"] = adata.obs["sample_id"].map(lambda x: samples[x]["condition"])

In [None]:
adata.obs["condition"] = adata.obs["sample_id"].map(lambda x: samples[x]["condition"])

In [None]:
adata.obs["origin"] = adata.obs["sample_id"].map(lambda x: samples[x]["origin"])

In [None]:
adata.var_names_make_unique()

In [None]:
set(adata.obs.condition)

In [None]:
adata.var

In [None]:
adata

In [None]:
import mygene

In [None]:
mg = mygene.MyGeneInfo()
list_var = adata.var.index.unique()
ginfo = mg.querymany(list_var, scopes='ensembl.gene')
df = pd.DataFrame(ginfo)
df.drop_duplicates(subset='query', keep='last', inplace=True)
adata.var["query"] = adata.var.index
adata.var = df.merge(adata.var, on='query', how='left')
adata.var['symbol'] = adata.var.apply(lambda row: row['query'] if pd.isna(row['symbol']) else row['symbol'], axis=1)


In [None]:
cl_annotation = {
    "0": "IFN T-cells",
    "1": "Naive T-cells",
    "2": "Exhausted T-cells",
    "3": "Naive T-cells",
    "4":"Senescent-like T-cells",
    "5":"Exhausted T-cells",
    "6":"Naive T-cells",
    "7":"Exhausted T-cells",
    "8":"IFN T-cells",
    "9":"Senescent-like T-cells",
    "10":"Exhausted T-cells",
    "11":"Undefined",
    "12":"Naive T-cells",
    "13":"Stress response T-cells",
}

In [None]:
sc.tl.leiden(adata, resolution=0.7, key_added="leiden")

In [None]:
sc.pl.umap(adata, color=["leiden"])


In [None]:
sc.pl.umap(adata, color=["Ifng","Gzmb"], gene_symbols="symbol")

In [None]:
adata.obs["cell_type_anno"] = adata.obs.leiden.map(cl_annotation)

In [None]:
sc.pl.umap(adata, color=["cell_type_anno"])


In [None]:
set(adata.obs["cell_type_anno"])

In [None]:
adata.obs

In [None]:
sc.pl.umap(adata, color=["origin"])


In [None]:
adata

In [None]:
adata_qc = sc.read_h5ad("mdata_qc.h5ad")

In [None]:
adata_qc

### Compositional analysis

In [None]:
adata.obs.rename(columns={"condition":"treatment"}, inplace = True)

In [None]:
set(adata.obs.sample_id)

In [None]:
frac_by_condition = (
    adata.obs.groupby(["group","sample_id","treatment"])
    .apply(lambda x: x.value_counts("cell_type_anno", normalize=False))
    .reset_index(name="n_cells")
    .assign(condition=lambda x: x["group"].astype(str))
)


In [None]:
frac_by_condition = frac_by_condition[frac_by_condition['cell_type_anno'] != 'Undefined']

In [None]:
frac_by_condition = frac_by_condition[frac_by_condition['cell_type_anno'] != 'Stress response T-cells']

In [None]:
frac_by_condition = frac_by_condition[frac_by_condition['condition'] != 'GF-plus']

In [None]:
frac_by_condition = frac_by_condition[frac_by_condition['treatment'] != 'No ICI']

In [None]:
frac_by_condition = frac_by_condition[frac_by_condition['treatment'] != 'ICI']

In [None]:
frac_by_condition.group.replace("GF-plus","GF",inplace=True)

In [None]:
frac_by_condition.group.replace("No ICI	","naive",inplace=True)

In [None]:

for index, row in frac_by_condition.iterrows():
    if row["treatment"] == "naive":
        frac_by_condition.loc[index, "experiment"] = "2019"
    else:
        frac_by_condition.loc[index, "experiment"] = "2021"

In [None]:
set(frac_by_condition.condition)

In [None]:
frac_by_condition

In [None]:
frac_pivot = (
    frac_by_condition.pivot(
        index=["group", "treatment","sample_id","experiment"],
        columns="cell_type_anno",
        values="n_cells",
    )
    .reset_index()
    #.drop(columns="other")
)


In [None]:
data_all = scc_dat.from_pandas(
    frac_pivot, covariate_columns=["group", "treatment","sample_id","experiment"]
)

In [None]:
import pandas as pd

In [None]:
data_all.obs["group"] = pd.Categorical(
    data_all.obs["group"], categories=['10mix', '11mix','GF']
)

In [None]:
data_all.obs["treatment"] = pd.Categorical(
    data_all.obs["treatment"], categories=['ICI', 'naive']
)

In [None]:
data_all._sanitize()

In [None]:
scc_viz.boxplots(data_all, feature_name="group", figsize=(12, 5), cmap = "inferno")

#plt.savefig( "figures/cell_proportion_tumor_and_normal_sex.png")


In [None]:
scc_viz.boxplots(data_all, feature_name="treatment", figsize=(12, 5), cmap = "inferno")

#plt.savefig( "figures/cell_proportion_tumor_and_normal_sex.png")


In [None]:
data_all_2019.obs

In [None]:
scc_viz.boxplots(data_all_2019, feature_name="sample_id", figsize=(12, 5), cmap = "inferno")

#plt.savefig( "figures/cell_proportion_tumor_and_normal_sex.png")


In [None]:
Tnaive - Ltb","Sell","Lef1","Nosip
Tex - Cxcr6","Gzmb","Havcr2","Ctla4"
Tsen - Plcg2","Igkc","Klrk1","Slamf7"
Tifn "Isg15","Ifit3","Ifit1","Stat1" 


In [None]:
data_all_2019 = data_all[data_all.obs["experiment"]=="2019"]

In [None]:
data_all_2021 = data_all[data_all.obs["experiment"]=="2021"]

In [None]:
scc_viz.stacked_barplot(data_all_2019, feature_name="group")


In [None]:
scc_viz.stacked_barplot(data_all_2021, feature_name="group")


In [None]:
scc_viz.stacked_barplot(data_all_2021, feature_name="sample_id")


In [None]:
scc_viz.stacked_barplot(data_all_2019, feature_name="sample_id")


In [None]:
scc_viz.stacked_barplot(data_all, feature_name="treatment")


In [None]:
scc_viz.stacked_barplot(data_all, feature_name="sample_id")