In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import os
import warnings

# Suppress all future warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

sc.logging.print_header()

In [None]:
print("Previous working directory: " + os.getcwd())
if 'notebooks' in os.getcwd():
    os.chdir("..")
print("Current working directory: " + os.getcwd())

In [None]:
adata = sc.read_h5ad("SCC_KCreanalysis_RNA_PP_sep2024.h5ad") 
import numpy as np

# Assuming `adata.obsm['UMAP']` is a DataFrame with 'umap_1' and 'umap_2' columns
umap_df = adata.obsm['UMAP']

# Convert DataFrame to NumPy array
umap_array = umap_df[['umap_1', 'umap_2']].to_numpy()

# Assign the array to adata.obsm['X_umap']
adata.obsm['X_umap'] = umap_array

# Check the result
adata.obsm['X_umap']
adata

In [None]:
adata.layers['counts'] = adata.X.copy()


In [None]:
adata.obsm["X_umap"] = adata.obsm["UMAP"].values

In [None]:
sc.pl.umap(adata, color=["ident","Level1_unnamed"], legend_loc='on data')

Expected output:<br>
![sample outtput](./images/umap_1.png)

In [None]:
adata.obs = adata.obs.join(pd.concat([
        pd.read_csv('data\Endothelial_annotations.txt', sep='\t', index_col=0),
        pd.read_csv('data\Fibroblast_annotations.txt', sep='\t', index_col=0),
        pd.read_csv('data\Immune_annotations.txt', sep='\t', index_col=0),
        pd.read_csv('data\KC_annotations.txt', sep='\t', index_col=0),
        pd.read_csv('data\Melanocyte_annotations.txt', sep='\t', index_col=0),
    ]).drop(['integrated_snn_res.0.1','integrated_snn_res.0.4'], axis=1)
)

In [None]:
# Subset Immune Cells
idata = adata[adata.obs.Level1_unnamed == 'immune']

In [None]:
idata.var["mt"] = idata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    idata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
)
sc.pp.normalize_total(idata, target_sum=1e4)
sc.pp.log1p(idata)
sc.pp.highly_variable_genes(idata, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
import re

vdj_pattern = "^TR[AB][VDJ]|^IG[HKL][VDJC]"
for i in idata.var.index:
    if vdj_pattern is not None:
        if re.search(vdj_pattern, i):
            idata.var.at[i, "highly_variable"] = False

In [None]:
idata.raw = idata
idata = idata[:, idata.var["highly_variable"]].copy()

In [None]:
sc.pp.scale(idata, max_value=10)
sc.tl.pca(idata, svd_solver="arpack")
sc.pp.neighbors(idata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(idata)
sc.tl.umap(idata, min_dist=0.3)

In [None]:
idata

In [None]:
sc.pl.umap(idata, color=['leiden','JCHAIN', 'CD19','CD3E', 'CD8A', 'CD8B','CD4', 'CD1C', 'PCLAF','CD14', 'S100A9', "NCAM1",
                         "KRT14", "MS4A1", "FCN1", "KIT",], legend_loc='on data')

Expected output:<br>
![sample outtput](./images/umap_2.png)

In [None]:
sc.tl.rank_genes_groups(idata, groupby="leiden", method="wilcoxon")


In [None]:
sc.pl.rank_genes_groups_dotplot(
    idata, groupby="leiden", standard_scale="var", n_genes=5
)

Expected output:<br>
![sample outtput](./images/heatmap_1.png)

In [None]:
# 5, 14 are doublets with keratinocytes?? Express KRT14

new_cell_dict = {
    "0": "Imm_T cell",
    "1": "Imm_T cell",
    "2": "Imm_T cell",#
    "3": "Imm_T cell",#
    "4": "Imm_T cell",#
    "5": "Imm_Myeloid",#
    "6": "Ambiguous",#
    "7": "Imm_T cell",#
    "8": "Imm_T cell",#
    "9": "Imm_T cell", #
    "10": "Imm_Myeloid",#
    "11": "Imm_B cell",#
    "12": "Imm_T cell",#
    "13": "Imm_Myeloid",#
    "14": "Ambiguous",#
    "15": "Imm_Plasma",#
    "16": "Imm_Myeloid",#
    "17": "Imm_Myeloid",#
    "18": "Imm_T cell",#
    "19": "Ambiguous" #
}
idata.obs["cell_type_PP"] = [new_cell_dict[x] for x in idata.obs["leiden"]]

mdata = idata[idata.obs["cell_type_PP"].isin(["Imm_Myeloid"])].copy()
tdata = idata[idata.obs["cell_type_PP"].isin(["Imm_T cell"])].copy()

In [None]:
idata.obs.to_csv("Imm_annot_PP_adata_obs.txt", sep='\t', index=True)

Compute subclusters for the T cell and Myeloid cell groups

In [None]:
mdata = mdata.raw.to_adata()
vdj_pattern = "^TR[AB][VDJ]|^IG[HKL][VDJC]"
sc.pp.highly_variable_genes(mdata, min_mean=0.0125, max_mean=3, min_disp=0.5)
for i in mdata.var.index:
    if vdj_pattern is not None:
        if re.search(vdj_pattern, i):
            mdata.var.at[i, "highly_variable"] = False
mdata.raw = mdata
mdata = mdata[:, mdata.var["highly_variable"]].copy()
sc.pp.scale(mdata, max_value=10)
sc.tl.pca(mdata, svd_solver="arpack")
sc.pp.neighbors(mdata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(mdata)
sc.tl.umap(mdata, min_dist=0.3)

In [None]:
tdata = tdata.raw.to_adata()
vdj_pattern = "^TR[AB][VDJ]|^IG[HKL][VDJC]"
sc.pp.highly_variable_genes(tdata, min_mean=0.0125, max_mean=3, min_disp=0.5)
for i in tdata.var.index:
    if vdj_pattern is not None:
        if re.search(vdj_pattern, i):
            tdata.var.at[i, "highly_variable"] = False
tdata.raw = tdata
tdata = tdata[:, tdata.var["highly_variable"]].copy()
sc.pp.scale(tdata, max_value=10)
sc.tl.pca(tdata, svd_solver="arpack")
sc.pp.neighbors(tdata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(tdata)
sc.tl.umap(tdata, min_dist=0.3)

In [None]:
mdata.obs_keys()

Analysis of myeloid cell data

In [None]:
sc.pl.umap(mdata, color=['leiden'], legend_loc='on data')

Expected output:<br>
![sample outtput](./images/umap_3.png)

In [None]:
sc.pl.umap(mdata, color=['leiden', 'cancer_status_corrected',
                         'CD1C', 'S100A9', 'CD14', 'FCGR3A',
                         'IL15RA',
                         'TREM2', 'CD68', 'CD8A', 'JCHAIN', "CALML5",
                         ], legend_loc='on data')

Expected output:<br>
![sample outtput](./images/umap_4.png)

In [None]:
sc.pl.umap(mdata, color=['leiden', 'cancer_status_corrected'])

Expected output:<br>
![sample outtput](./images/umap_5.png)

In [None]:
sc.tl.leiden(mdata, resolution=0.2, restrict_to=("leiden", ["3"]))
sc.tl.leiden(mdata, resolution=0.2, restrict_to=("leiden_R", ["8"]))
sc.pl.umap(mdata, color="leiden_R", legend_loc="on data", legend_fontoutline=2)

Expected output:<br>
![sample outtput](./images/umap_6.png)

In [None]:
sc.tl.rank_genes_groups(mdata, groupby="leiden", method="wilcoxon")

In [None]:
sc.tl.dendrogram(mdata, groupby='leiden')

sc.pl.rank_genes_groups_dotplot(
    mdata, groupby="leiden", standard_scale="var", n_genes=6
)

Expected output:<br>
![sample outtput](./images/heatmap_2.png)

In [None]:
sc.pl.dotplot(
    mdata,
    {
        "General": ["ITGAM", "HLA-DRA"],
        "LC": ["CD207", "EPCAM", "ITGAX"],
        "Monocyte": ["CCR2", "CD14", "FCGR3A", "S100A8", "S100A9", "FCN1", "CX3CR1"],
        "DC1": ["XCR1", "CLEC9A"],
        "DC2": ["CLEC10A", "THBD"],
        "DC3": ["CSF3R", "CSF2RA"],
        "mRegDC": ["LAMP3", "CCR7"],
        "pDC": ["IL3RA", "KIT", "IRF8"],
        "Mac": [
            "C1QA",
            "FCGR1A",
            "MERTK",
            "TREM2",
            "SIGLEC1",
            "CD68",
            "LYVE1",
            "ARG1",
            "CD163",
        ],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "contamination": ["CD3D", "NCAM1"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="leiden",
)

Expected output:<br>
![sample outtput](./images/heatmap_3.png)

In [None]:
# 2  Unknown
# 11 Contamination
# 12 Potentially granular keratinocyte

new_dict = {
    "0": "Imm_LC",
    "1": "Imm_CX3CR1+ Mono/Mac",
    "2": "Ambiguous",
    "3": "Imm_LC",
    "4": "Imm_CD169+ Mac",
    "5": "Imm_CD169+ Mac", #
    "6": "Imm_CD14+ Mono",
    "7": "Imm_CD14+ Mono",#
    "8": "Ambiguous",
    "9": "Imm_mRegDC",
    "10": "Imm_LC KI67+",
    "11": "Ambiguous",
    "12": "Ambiguous"
}
mdata.obs["cell_type_js"] = [new_dict[x] for x in mdata.obs.leiden]

In [None]:
sc.pl.dotplot(
    mdata,
    {
        "General": ["ITGAM", "HLA-DRA"],
        "LC": ["CD207", "EPCAM", "ITGAX"],
        "Monocyte": ["CCR2", "CD14", "FCGR3A", "S100A8", "S100A9", "FCN1", "CX3CR1"],
        "DC1": ["XCR1", "CLEC9A"],
        "DC2": ["CLEC10A", "THBD"],
        "DC3": ["CSF3R", "CSF2RA"],
        "mRegDC": ["LAMP3", "CCR7"],
        "pDC": ["IL3RA", "KIT", "IRF8"],
        "Mac": [
            "C1QA",
            "FCGR1A",
            "MERTK",
            "TREM2",
            "SIGLEC1",
            "CD68",
            "LYVE1",
            "ARG1",
            "CD163"
        ],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "contamination": ["CD3D", "NCAM1"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="leiden_R",
)

Expected output:<br>
![sample outtput](./images/heatmap_3.png)

In [None]:
# 2  Unknown
# 11 Contamination
# 12 Potentially granular keratinocyte

new_dict = {
    "0": "Imm_LC",
    "1": "Imm_CX3CR1+ Mono/Mac",
    "2": "Ambiguous",
    "3,0": "Imm_LC",
    "4": "Imm_CD169+ Mac",
    "5": "Imm_TREM2+ Mac", #
    "6": "Imm_CD14+ Mono",
    "7": "Imm_DC1/DC2",#
    "8,0":"Imm_CX3CR1+ Mono/Mac",
    "8,1": "Ambiguous",
    "9": "Imm_mRegDC",
    "10": "Imm_LC KI67+",
    "11": "Ambiguous",
    "12": "Ambiguous"
}
mdata.obs["cell_type_PP"] = [new_dict[x] for x in mdata.obs.leiden_R]

sc.pl.umap(mdata, color=["cell_type_PP"])

Expected output:<br>
![sample outtput](./images/umap_7.png)

In [None]:
mdata.obs.to_csv("Imm_myeloid_annot_PP_adata_obs.txt", sep='\t', index=True)

Verify the new cell labels by checking the gene markers

In [None]:
sc.pl.dotplot(
    mdata,
    {
        "General": ["ITGAM", "HLA-DRA"],
        "LC": ["CD207", "EPCAM", "ITGAX"],
        "Monocyte": ["CCR2", "CD14", "FCGR3A", "S100A8", "S100A9", "FCN1", "CX3CR1"],
        "DC1": ["XCR1", "CLEC9A"],
        "DC2": ["CLEC10A", "THBD"],
        "DC3": ["CSF3R", "CSF2RA"],
        "mRegDC": ["LAMP3", "CCR7"],
        "pDC": ["IL3RA", "KIT", "IRF8"],
        "Mac": [
            "C1QA",
            "FCGR1A",
            "MERTK",
            "TREM2",
            "SIGLEC1",
            "CD68",
            "LYVE1",
            "ARG1",
            "CD163",
        ],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "contamination": ["CD3D", "NCAM1"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="cell_type_PP",
)

Expected output:<br>
![sample outtput](./images/heatmap_5.png)

Analysis of T cell data

In [None]:
sc.pl.umap(
    tdata,
    color=["leiden"],
    legend_fontoutline=2,
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_8.png)

Split clusters 9 and 3 due to apparent subdivisions

In [None]:
#sc.tl.leiden(tdata, key_added="leiden_R", restrict_to=("leiden", ["3","9","2"]), resolution=0.5)

sc.tl.leiden(tdata, resolution=0.15, restrict_to=("leiden", ["3"]))
sc.tl.leiden(tdata, resolution=0.3, restrict_to=("leiden_R", ["9"]))
sc.tl.leiden(tdata, resolution=0.3, restrict_to=("leiden_R", ["2"]))
sc.tl.leiden(tdata, resolution=0.2, restrict_to=("leiden_R", ["12"]))
sc.pl.umap(tdata, color="leiden_R", legend_loc="on data", legend_fontoutline=2)

Expected output:<br>
![sample outtput](./images/umap_9.png)

In [None]:
sc.pl.umap(tdata, 
    color=[
        "leiden_R", "CD3E", "CD3D",
        "CD4","CD8A", "CD8B",
        "CCR7", "SELL", "CD44",
        "CD27", "NKG7"

    ],
    legend_fontoutline=2,
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_10.png)

In [None]:
sc.tl.rank_genes_groups(tdata, groupby="leiden_R")
sc.pl.rank_genes_groups_dotplot(
    tdata,
    standard_scale="var",
    color_map="viridis",
    n_genes=5,
    min_logfoldchange=1,
    dendrogram=False,
)

Expected output:<br>
![sample outtput](./images/heatmap_6.png)

In [None]:
sc.pl.dotplot(
    tdata,
    {
        "T cell": [
            "CD3D",
            "CD3E",
            "CD4",
            "CD8B",
            "CD8A",
            "CCR7",
            "SELL",
            "CD44",
            "CD27",
        ],
        "Trm": ["CD69", "ITGAE", "CRTAM", "S1PR1"],
        "Treg": ["FOXP3", "PDCD1", "CTLA4", "IL2RA"],
        "NK": ["NCAM1", "NCR1", "IFNG", "GZMB","GNLY", "XCL2"],
        "ILC": ["IL7R", "XCL1", "RORC"],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "Contamination": ["ITGAM", "S100A8"],
        "Endothelial": ["CLDN5", "PLVAP", "SPARCL1"],
        "Fibroblast": ["PDGFRA", "PDGFRB", "COL1A1", "COL1A2", "DCN"]
    },
    standard_scale="var",
    color_map="Blues",
    groupby="leiden_R",
)

Expected output:<br>
![sample outtput](./images/heatmap_7.png)

In [None]:
sc.pl.umap(tdata, color="leiden_R", legend_loc="on data", legend_fontoutline=2)

Expected output:<br>
![sample outtput](./images/umap_11.png)

In [None]:
# 6,7 Keratinocyte?
# 11 Fibroblast

new_dict = {
    "0": "Imm_CD4Tcm",
    "1": "Imm_CD8Tem",
    "2,0": "Imm_CD4Tcm",
    "2,1":"Ambiguous",
    "3,0": "Imm_Treg",
    "3,1": "Imm_PD-1+CTLA4+ CD4Tcm",
    "4":"Imm_CD8Tem",
    "7":"Imm_CD4Tcm",
    "8": "Ambiguous",
    "9,0":"Imm_PD-1+ CD8Tem",
    "9,1":"Imm_CD8Trm",
    "12,0": "Imm_CD8Trm",
    "12,1": "Imm_NK",
    "18": "Ambiguous",
}
tdata.obs["cell_type_PP"] = [new_dict[x] for x in tdata.obs.leiden_R]

sc.pl.umap(
    tdata,
    color=["cell_type_PP"],
)

Expected output:<br>
![sample outtput](./images/umap_12.png)

In [None]:
tdata.obs.to_csv("Imm_Tcell_annot_PP_adata_obs.txt", sep='\t', index=True)

Verify labels with gene markers

In [None]:
sc.pl.dotplot(
    tdata,
    {
        "T cell": [
            "CD3D",
            "CD3E",
            "CD4",
            "CD8B",
            "CD8A",
            "CCR7",
            "SELL",
            "CD44",
            "CD27",
        ],
        "Trm": ["CD69", "ITGAE", "CRTAM", "S1PR1"],
        "Treg": ["FOXP3", "PDCD1", "CTLA4", "IL2RA"],
        "NK": ["NCAM1", "NCR1", "IFNG", "GZMB"],
        "ILC": ["IL7R", "XCL1", "RORC"],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "Contamination": ["ITGAM", "S100A8"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="cell_type_PP",
)

Expected output:<br>
![sample outtput](./images/heatmap_8.png)

Remove ambiguous and recluster

In [None]:
mdata2 = mdata.copy()
mdata = mdata[mdata.obs["cell_type_PP"] != "Ambiguous"]
mdata = mdata.raw.to_adata()
vdj_pattern = "^TR[AB][VDJ]|^IG[HKL][VDJC]"
sc.pp.highly_variable_genes(mdata, min_mean=0.0125, max_mean=3, min_disp=0.5)
for i in mdata.var.index:
    if vdj_pattern is not None:
        if re.search(vdj_pattern, i):
            mdata.var.at[i, "highly_variable"] = False
mdata.raw = mdata
mdata = mdata[:, mdata.var["highly_variable"]].copy()
sc.pp.scale(mdata, max_value=10)
sc.tl.pca(mdata, svd_solver="arpack")
sc.pp.neighbors(mdata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(mdata)
sc.tl.umap(mdata, min_dist=0.3)

In [None]:
tdata2 = tdata.copy()
tdata = tdata[tdata.obs["cell_type_PP"] != "Ambiguous"]
tdata = tdata.raw.to_adata()
vdj_pattern = "^TR[AB][VDJ]|^IG[HKL][VDJC]"
sc.pp.highly_variable_genes(tdata, min_mean=0.0125, max_mean=3, min_disp=0.5)
for i in tdata.var.index:
    if vdj_pattern is not None:
        if re.search(vdj_pattern, i):
            tdata.var.at[i, "highly_variable"] = False
tdata.raw = tdata
tdata = tdata[:, tdata.var["highly_variable"]].copy()
sc.pp.scale(tdata, max_value=10)
sc.tl.pca(tdata, svd_solver="arpack")
sc.pp.neighbors(tdata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(tdata)
sc.tl.umap(tdata, min_dist=0.3)

In [None]:
sc.pl.umap(
    mdata,
    color=["cell_type_PP", "leiden"],
    legend_fontoutline=2,
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_13.png)

In [None]:
sc.tl.rank_genes_groups(mdata, groupby="leiden", method="wilcoxon")

In [None]:
sc.pl.rank_genes_groups_dotplot(
    mdata,
    standard_scale="var",
    color_map="viridis",
    n_genes=10,
    min_logfoldchange=1,
    dendrogram=False,
)

Expected output:<br>
![sample outtput](./images/heatmap_9.png)

In [None]:
sc.pl.violin(mdata, ["n_genes_by_counts", "total_counts"], groupby="leiden")

Expected output:<br>
![sample outtput](./images/violin_1.png)

In [None]:
sc.pl.dotplot(
    mdata,
    {
        "General": ["ITGAM", "HLA-DRA"],
        "LC": ["CD207", "EPCAM", "ITGAX"],
        "Monocyte": ["CCR2", "CD14", "FCGR3A", "S100A8", "S100A9", "FCN1", "CX3CR1"],
        "DC1": ["XCR1", "CLEC9A"],
        "DC2": ["CLEC10A", "THBD"],
        "DC3": ["CSF3R", "CSF2RA"],
        "mRegDC": ["LAMP3", "CCR7"],
        "pDC": ["IL3RA", "KIT", "IRF8"],
        "Mac": [
            "C1QA",
            "FCGR1A",
            "MERTK",
            "TREM2",
            "SIGLEC1",
            "CD68",
            "LYVE1",
            "ARG1",
            "CD163",
        ],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "contamination": ["CD3D", "NCAM1"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="leiden",
)

Expected output:<br>
![sample outtput](./images/heatmap_10.png)

In [None]:
new_dict = {
    "0": "Imm_LC",
    "1": "Imm_CX3CR1+ Mono/Mac",
    "2": "Imm_LC",
    "3": "Imm_CX3CR1+ Mono/Mac",
    "4": "Imm_CD169+ Mac",
    "5": "Imm_mRegDC",
    "6": "Imm_TREM2+ Mac",
    "7": "Imm_CD14+ Mono",
    "9": "Imm_DC1/DC2",
    "10": "Imm_IFN+ Mac",
    "8": "Imm_LC KI67+",
}
mdata.obs["cell_type_PP2"] = [new_dict[x] for x in mdata.obs.leiden]

In [None]:
sc.pl.umap(
    mdata,
    color=["cell_type_PP", "cell_type_PP2"],
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_14.png)

In [None]:
sc.pl.umap(
    tdata,
    color=["cell_type_PP", "leiden"],
    legend_fontoutline=2,
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_15.png)

In [None]:
sc.tl.leiden(tdata, key_added="leiden_R", restrict_to=("leiden", ["5"]), resolution=0.2)
sc.pl.umap(
    tdata,
    color="leiden_R",
    legend_fontoutline=2,
    legend_loc="on data",
)

Expected output:<br>
![sample outtput](./images/umap_16.png)

In [None]:
sc.tl.rank_genes_groups(tdata, groupby="leiden_R")
sc.pl.rank_genes_groups_dotplot(
    tdata,
    standard_scale="var",
    color_map="viridis",
    n_genes=5,
    min_logfoldchange=1,
    dendrogram=False,
)

Expected output:<br>
![sample outtput](./images/heatmap_11.png)

In [None]:
sc.pl.violin(tdata, ["n_genes_by_counts", "total_counts"], groupby="leiden_R")

Expected output:<br>
![sample outtput](./images/violin_2.png)

In [None]:
sc.pl.dotplot(
    tdata,
    {
        "T cell": [
            "CD3D",
            "CD3E",
            "CD4",
            "CD8B",
            "CD8A",
            "CCR7",
            "SELL",
            "CD44",
            "CD27",
        ],
        "Trm": ["CD69", "ITGAE", "CRTAM", "S1PR1"],
        "Treg": ["FOXP3", "PDCD1", "CTLA4", "IL2RA"],
        "NK": ["NCAM1", "NCR1", "IFNG", "GZMB","GNLY", "XCL2", "FCGR3A", "NKG7", "CD160"],
        "ILC": ["IL7R", "XCL1", "RORC"],
        "prolif": ["MKI67", "TOP2A"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "Contamination": ["ITGAM", "S100A8"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="leiden_R",
)

Expected output:<br>
![sample outtput](./images/heatmap_12.png)

In [None]:
new_dict = {
    "0": "Imm_CD4Tcm",
    "1": "Imm_CD4Tcm",
    "2": "Imm_CD8Tem",
    "3": "Imm_CD8Tem",
    "4": "Imm_Treg",
    "5,0": "Imm_CD16- NK",
    "5,1": "Imm_NKT",
    "5,2": "Imm_CD16+ NK",
    "6": "Imm_PD-1+ CD8Tem",
    "8": "Imm_PD-1+CTLA4+ CD8Tcm",
    "7": "Imm_CD8Tem"
}
tdata.obs["cell_type_PP2"] = [new_dict[x] for x in tdata.obs.leiden_R]

sc.pl.umap(
    tdata,
    color=["cell_type_PP", "cell_type_PP2"],
)

Expected output:<br>
![sample outtput](./images/umap_17.png)

In [None]:
sc.pl.umap(
    tdata,
    color=["FCGR3A","PDCD1","CTLA4"],color_map="Reds") #CD16, PD-1,

Expected output:<br>
![sample outtput](./images/umap_18.png)

In [None]:
# Possible that a subset of PD-1+ Tem cells are Trm cells

sc.pl.dotplot(
    tdata,
    {
        "T cell": [
            "CD3D",
            "CD3E",
            "CD4",
            "CD8B",
            "CD8A",
            "CCR7",
            "SELL",
            "CD44",
            "CD27",
        ],
        "Trm": ["CD69", "ITGAE", "CRTAM", "S1PR1"],
        "Treg": ["FOXP3", "PDCD1", "CTLA4", "IL2RA"],
        "NK": ["NCAM1", "NCR1", "IFNG", "GZMB"],
        "ILC": ["IL7R", "XCL1", "RORC"],
        "prolif": ["MKI67"],
        "IFN": ["ISG15", "IFI27", "STAT1"],
        "Contamination": ["ITGAM", "S100A8"],
    },
    standard_scale="var",
    color_map="Blues",
    groupby="cell_type_PP2",
)

Apply the labels to the original adata

In [None]:
adata.obs["cell_type_PP"] = adata.obs["Level1_unnamed"]
adata.obs["cell_type_PP"] = adata.obs["cell_type_PP"].astype(str)
adata.obs["cell_type_PP"].update(idata.obs["cell_type_PP"])
adata.obs["cell_type_PP"].update(mdata2.obs["cell_type_PP"])
adata.obs["cell_type_PP"].update(tdata2.obs["cell_type_PP"])
adata.obs["cell_type_PP"].update(mdata.obs["cell_type_PP2"])
adata.obs["cell_type_PP"].update(tdata.obs["cell_type_PP2"])

In [None]:
sc.pl.umap(adata, color=["Level1_unnamed", "cell_type_PP"], ncols=1)

Expected output:<br>
![sample outtput](./images/umap_19.png)

In [None]:
adata.write_h5ad("allintegratedClustered_Seurat_PP.h5ad")

In [None]:
df = pd.DataFrame(adata.obs["cell_type_js"])
df

In [None]:
df.to_csv("data/allintegratedClustered_w_JS_imm_labels.csv")

In [None]:
idata.write_h5ad("idata_Immune_PP.h5ad")
tdata.write_h5ad("tdata_Tcells_PP.h5ad")
mdata.write_h5ad("mdata_myeloid_PP.h5ad")