# 2. Celltype deconvolution by cell2location

In [None]:
import sys

import cell2location
import matplotlib.pyplot as plt
import pandas as pd
import scanpy as sc
import seaborn as sns
from cell2location.utils import select_slide
from helper_functions import select_slide
from matplotlib import rcParams

warnings.filterwarnings("ignore")
sys.path.insert(1, "../../helper_functions")


rcParams["pdf.fonttype"] = 42

### Preparing reference signature

We have two options for a reference: Gouin paper and Chen paper. We don't want to merge them because they have a different reference and a different annotation.

To infer signatures, run: 

1. `../../scripts/submit_cell2location_reference_Gouin.sh`
2. `../../scripts/submit_cell2location_reference_Chen.sh`

## QC of the Gouin reference model

In [None]:
samples = ["B22", "B24", "B60", "B154", "B156", "B175", "B178", "B4", "B42", "B123"]

In [None]:
mod_ref = cell2location.models.RegressionModel.load(
    "../Gouin_muscle_reference_model_subtype/"
)

In [None]:
adata_ref = mod_ref.export_posterior(
    mod_ref.adata, sample_kwargs={"num_samples": 1000, "batch_size": 2500}
)

In [None]:
adata_ref.obs["subtype"].value_counts()

In [None]:
mod_ref.plot_QC()

In [None]:
del mod_ref
del adata_ref

### Mapping signatures to spots

To get an estimation of number of cells per spot, Visium samples were manually inspected in Loupe browser. In the end, we decided for an estimation of 20 cells per spot.

However, we wanted to test multiple alpha values (regularization). Thus we ran the mapping for alphas 2, 20 or 200.

Mapping was performed with:
1. `../../scripts/submit_cell2location_visium_celltypes_N20_alpha20_chen.sh`
2. `../../scripts/ubmit_cell2location_visium_celltypes_N20_alpha20_gouin.sh`

For alpha values 2, 20 and 200

### Plot cell type abundances

In [None]:
column_order = [
    "CDH12_Epithelial",
    "Cycling_Epithelial",
    "KRT6A_Epithelial",
    "KRT13_Epithelial",
    "UPK_Epithelial",
    "ACTA2_Fibroblast",
    "FAP_Fibroblast",
    "PDGFRB_Fibroblast",
    "PDPN_Fibroblast",
    "Endothelial",
    "Inflam_Macrophage",
    "MHCII_Macrophage",
    "Dendritic_cell",
    "CD20_Bcell",
    "Plasma_Bcell",
    "CD8T",
    "Naive_Tcell",
    "Treg",
    "Normal_Smooth_muscle",
]

In [None]:
cellt_newnames = {
    "CDH12_Epithelial": "Immune-inflitrated epithelial",
    "Cycling_Epithelial": "Cycling epithelial",
    "KRT6A_Epithelial": "Basal epithelial",
    "KRT13_Epithelial": "Luminal epithelial (KRT13+)",
    "UPK_Epithelial": "Luminal epithelial (UPK)",
    "ACTA2_Fibroblast": "mCAF",
    "FAP_Fibroblast": "iCAF (FAP+)",
    "PDGFRB_Fibroblast": "General CAF",
    "PDPN_Fibroblast": "iCAF (PDPN+)",
    "Endothelial": "Endothelial",
    "Inflam_Macrophage": "Inflammatory macrophage",
    "MHCII_Macrophage": "Antigen-presenting macrophage",
    "Dendritic_cell": "Dendritic cell",
    "CD20_Bcell": "B cell",
    "Plasma_Bcell": "Plasma cell",
    "CD8T": "CD8+ T cell",
    "Naive_Tcell": "Naive T cell",
    "Treg": "Regulatory T cell",
    "Normal_Smooth_muscle": "Normal smooth muscle",
}

In [None]:
color_order = [
    "#a03704",
    "#d55607",
    "#f4811d",
    "#feb23f",
    "#feda7e",
    "#9f0245",
    "#d51965",
    "#e4429a",
    "#d280bd",
    "#11859a",
    "#95d6bb",
    "#c2e7c0",
    "#fedbcc",
    "#539ecd",
    "#dbe9f6",
    "#828282",
    "#adadad",
    "#d1d1d1",
    "#05712f",
]

### First for Gouin III reference

In [None]:
sns.reset_defaults()
for alpha in [20]:
    print(f"Working on alpha {alpha}")
    adata_celltypes = sc.read_h5ad(
        f"../visium_model_alpha{alpha}_N20_Gouin_muscle_merged/posteriors_adata.h5ad"
    )
    for sample in [
        "B22",
        "B24",
        "B60",
        "B154",
        "B156",
        "B175",
        "B178",
        "B4",
        "B42",
        "B123",
    ]:
        slide = select_slide(adata_celltypes, sample)
        sc.pl.spatial(
            slide,
            cmap="magma",
            color=slide.uns["mod"]["factor_names"],
            ncols=3,
            size=1.5,
            vmin=0,
            wspace=0.3,
            img_key=None,
            show=False,
        )
        plt.savefig(
            f"../figures/{sample}_alpha{alpha}_N20_Gouin_muscle_merged_celltypes.svg",
            dpi=300,
        )
        plt.close()

    master = pd.DataFrame()
    for sample in samples:
        adata = select_slide(adata_celltypes, s=sample)

        df = adata.obs.loc[:, adata.uns["mod"]["factor_names"]]
        df = pd.DataFrame(df.sum() / df.values.sum()).T
        df.index = [sample]

        master = pd.concat([master, df])

    master = master[column_order]
    master.rename(columns=cellt_newnames, inplace=True)

    ax = master.plot.bar(stacked=True, width=0.9, color=color_order, figsize=(8, 6))
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    plt.legend(
        title="Cell type",
        bbox_to_anchor=(1.05, 1),
        loc="upper left",
        title_fontsize=19,
        fontsize=12,
    )
    plt.xlabel("Sample", size=19)
    plt.ylabel("Relative cell type abundance", size=19)
    plt.xticks(size=12, rotation=45)
    plt.yticks(size=12)
    plt.tight_layout()
    plt.savefig(
        f"suppfig_6E_celltypes_barplot_alpha{alpha}_N20_Gouin_muscle_merged.svg",
        dpi=300,
        bbox_inches="tight",
    )
    plt.show()
    plt.close()

    del adata_celltypes

### Chen reference

In [None]:
for alpha in [20]:
    print(f"Working on alpha {alpha}")
    adata_celltypes = sc.read_h5ad(
        f"../visium_model_alpha{alpha}_N20_Chen_merged/posteriors_adata.h5ad"
    )
    for sample in [
        "B22",
        "B24",
        "B60",
        "B154",
        "B156",
        "B175",
        "B178",
        "B4",
        "B42",
        "B123",
    ]:
        slide = select_slide(adata_celltypes, sample)
        sc.pl.spatial(
            slide,
            cmap="magma",
            color=slide.uns["mod"]["factor_names"],
            ncols=3,
            size=1.5,
            vmin=0,
            wspace=0.3,
            img_key=None,
            show=False,
        )
        plt.savefig(
            f"../figures/{sample}_alpha{alpha}_N20_Chen_merged_celltypes.svg", dpi=300
        )
        plt.close()

    master = pd.DataFrame()
    for sample in [
        "B22",
        "B24",
        "B60",
        "B154",
        "B156",
        "B175",
        "B178",
        "B4",
        "B42",
        "B123",
    ]:
        adata = select_slide(adata_celltypes, s=sample)

        df = adata.obs.loc[
            :,
            [
                "Epithelial cells",
                "iCAF",
                "mCAF",
                "Endothelial cells",
                "Myeloid cells",
                "Mast cells",
                "B cells",
                "T cells",
            ],
        ]
        df = pd.DataFrame(df.sum() / df.values.sum()).T
        df.index = [sample]

        master = pd.concat([master, df])

    ax = master.plot.bar(
        stacked=True,
        width=0.9,
        color=[
            "#d55607",
            "#d51965",
            "#9f0245",
            "#11859a",
            "#95d6bb",
            "#c2e7c0",
            "#539ecd",
            "#adadad",
        ],
        figsize=(8, 6),
    )
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    plt.legend(
        title="Cell type",
        bbox_to_anchor=(1.05, 1),
        loc="upper left",
        title_fontsize=19,
        fontsize=12,
    )
    plt.xlabel("Sample", size=19)
    plt.ylabel("Relative cell type abundance", size=19)
    plt.xticks(size=12, rotation=45)
    plt.yticks(size=12)
    plt.tight_layout()
    plt.savefig(
        f"suppfig_6F_celltypes_barplot_alpha{alpha}_N20_Chen_merged.svg", dpi=300
    )
    plt.show()
    plt.close()