# SERPENTINE -- Combined Myeloid Cells Subclustering

## Environment Setup

In [None]:
# load packages
import sys
import scanpy as sc
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scvi
import seaborn as sns
import scipy
import decoupler as dc

In [None]:
import anndata2ri
import logging

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
# import helper functions
from helper_functions import save_markers, compute_signature_score

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (6.0, 4.0)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=120,
    facecolor="white",
    frameon=False,
    figsize=(4, 4),
    format="png",
    dpi_save=300
)

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "myeloid", "preprocessing/")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "myeloid", "preprocessing/")

In [None]:
# read anndata object
adata_full = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

## Data Preparation

### Full Object Visualization

In [None]:
adata_full

In [None]:
adata_full.obs

In [None]:
sc.pl.umap(
    adata_full,
    color=["Annotation_1.0", "Prior_Annotation_2.0", "timepoint", "project", "subproject", "sample"],
    legend_loc="right margin",
    legend_fontsize=7,
    ncols=2
)

### Clean Anndata Object

In [None]:
adata_full.uns_keys

In [None]:
# remove full non-informative metadata
adata_full.obs.drop(columns=['leiden_res0_25', 'leiden_res0_5', 'leiden_res0_75', 'leiden_res1', 'leiden_res0_5_1', 'scVI_Annotation_1.0', 'harmony_Annotation_1.0', 'scGen_Annotation_1.0'], inplace=True)

In [None]:
adata_full.var.drop(columns=['HVG_cell_ranger', 'HVG_seurat', 'HVG_seurat_v3', 'HVG_model', 'highly_variable', 'excl_hv', 'orig_highly_variable'], inplace=True)

In [None]:
obsm_rem = ['harmony_umap', 'scGen_corrected_latent', 'scGen_umap', 'scVI_umap', 'X_pca_harmony', 'X_scVI']
for obsm in obsm_rem:
    del adata_full.obsm[obsm]

In [None]:
del adata_full.varm['PCs']

In [None]:
obsp_rem = ['harmony_neighbors_connectivities', 'harmony_neighbors_distances', 'scGen_neighbors_connectivities', 'scGen_neighbors_distances', 'scVI_neighbors_connectivities', 'scVI_neighbors_distances']
for obsp in obsp_rem:
    del adata_full.obsp[obsp]

### Subset Myeloid populations

In [None]:
adata = adata_full[adata_full.obs['Annotation_1.0'].isin(['Myeloid', 'pDC'])]

In [None]:
# check number of cells
len(adata.obs_names)

In [None]:
# non Myeloid integrated visualization (whole obj integration)
sc.pl.umap(
    adata_full,
    color=["Annotation_1.0", "Prior_Annotation_2.0", "timepoint", "project", "subproject", "sample"],
    legend_loc="right margin",
    legend_fontsize=7,
    ncols=2,
    wspace=.5
)

## TNK-specific Preprocessing

### Normalization

In [None]:
# observe count dist
plt.figure(figsize=(6.0, 4.0))
sns.histplot(adata.obs["total_counts"], bins=100, kde=False)

In [None]:
# normalization
sc.pp.normalize_total(adata, target_sum=1e4)

In [None]:
# log-transform the data
sc.pp.log1p(adata)

In [None]:
# visualize shifted logarithm distribution 
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
p1 = sns.histplot(adata.obs["total_counts"], bins=100, kde=False, ax=axes[0])
axes[0].set_title("Total counts")
p2 = sns.histplot(adata.layers["logcounts"].sum(1), bins=100, kde=False, ax=axes[1])
axes[1].set_title("Shifted logarithm")
plt.show()

### Compute Highly Variable Genes

In [None]:
# find variable genes
sc.pp.highly_variable_genes(adata, batch_key="sample", flavor="seurat", n_top_genes=2000, subset = False, inplace = True)

In [None]:
print(adata.var.highly_variable.value_counts())

In [None]:
# select MT genes to don't take them into account for PCA

# select mitochondrial genes
MT_genes = [gene_name for gene_name in adata.var_names if gene_name.startswith("MT-")]
print(MT_genes[:10])

# merge unwanted genes
out_genes = MT_genes 

# check there are matches
len(set(adata.var_names) & set(out_genes)) > 0

In [None]:
# create boolean list indicating whether the genes are excluded as HVG
in_out_genes = [False] * len(adata.var_names)
for i in range(0, len(adata.var_names)-1):
    gene = adata.var_names[i]
    if gene in out_genes:
        in_out_genes[i] = True

adata.var["excl_hv"] = in_out_genes

len(out_genes)

In [None]:
# remove undesired genes as highly variable
for i in range(0, len(adata.var_names)-1):
    gene = adata.var_names[i]
    if ( (adata.var.loc[gene, "highly_variable"] == True) & (adata.var.loc[gene, "excl_hv"] == True) ):
        print(gene + " found in HVG -- removed!")
        adata.var.loc[gene, "highly_variable"] = False

In [None]:
(adata.var.highly_variable.value_counts())

In [None]:
# check highly variable genes
highly_variable_genes = adata.var.index[adata.var['highly_variable']]
highly_variable_genes

### Run PCA

In [None]:
sc.tl.pca(adata, use_highly_variable = True)

In [None]:
# visualize loadings
sc.pl.pca_loadings(adata, include_lowest=True, components=[1, 2, 3, 4, 5])

In [None]:
# select optimal number of PCs
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

In [None]:
# calculate the percent of variation associated with each PC
pct = adata.obsm['X_pca'].std(axis=0) / np.sum(adata.obsm['X_pca'].std(axis=0)) * 100

# calculate cumulative percents for each PC
cumu = np.cumsum(pct)

# determine which PC exhibits cumulative percent greater than 90% and % variation associated with the PC is less than 5
co1 = np.where((cumu > 90) & (pct < 5))[0]
co1_index = co1[0] if len(co1) > 0 else None
print(co1_index)

# determine the difference between the variation of PC and subsequent PC
co2 = np.sort(np.where((pct[:-1] - pct[1:]) > 0.05)[0])[::-1] # before: 0.1
co2_index = co2[0] + 1 if len(co2) > 0 else None
print(co2_index)

# usually, we would choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.
pcs = min(co1_index, co2_index) if co1_index is not None and co2_index is not None else None

print("PCs covering the majority of the variation:", pcs)

In [None]:
# Kaiser rule --> Keep PC with an eigenvalue of >=1

# extract eigenvalues from PCA results
eigenvalues = adata.uns['pca']['variance']

# filter eigenvalues greater than or equal to 1
eigenvalues_gt_1 = [val for val in eigenvalues if val >= 1]

# get the corresponding principal component numbers
pcs_gt_1 = [i+1 for i, val in enumerate(eigenvalues) if val >= 1]

print(eigenvalues)
print(max(pcs_gt_1))

In [None]:
# 2/3 Variance Explanation
print(cumu)
np.where((cumu > 100*2/3))[0][0]

In [None]:
# define number of PCs
n_pcs=35

### Compute Neighbors & Non Linear Dim. Reduction

In [None]:
sc.pp.neighbors(adata, n_pcs=n_pcs)
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color=["sample", "Annotation_1.0"], wspace=1.5)

In [None]:
# check technical sources of variation
sc.pl.umap(
    adata,
    color=["pct_counts_mt", "pct_counts_ribo", "n_genes_by_counts", "total_counts", "doublet_score", "sample"],
    vmax="p99",
    #legend_loc="on data",
    frameon=False,
    cmap="PuOr",
    use_raw=False,
    ncols=2
)

In [None]:
sc.pl.umap(
    adata,
    color=["patient", "timepoint"],
    vmax="p99",
    #legend_loc="on data",
    frameon=False,
    use_raw=False,
    ncols=2
)

### Integration (scVI)

In [None]:
# define integration vars
batch_key = "subproject"

In [None]:
# create object specific to scVI with just HVG
adata_scvi = adata[:, adata.var["highly_variable"]].copy()

In [None]:
print(adata.shape)
print(adata_scvi.shape)

In [None]:
# prepare object
scvi.model.SCVI.setup_anndata(adata_scvi, 
                              layer="rawcounts", 
                              batch_key=batch_key,
                              #continuous_covariate_keys=["pct_counts_mt", "pct_counts_ribo", "n_genes_by_counts", "total_counts"],
                              #categorical_covariate_keys=["patient", "timepoint"]
                             )
adata_scvi

In [None]:
# create the model
model_scvi = scvi.model.SCVI(adata_scvi)
model_scvi

In [None]:
# visualize model
model_scvi.view_anndata_setup()

In [None]:
# find optimal number of epochs
max_epochs_scvi = np.min([round((20000 / adata_scvi.n_obs) * 400), 400])
max_epochs_scvi

In [None]:
# train the model
model_scvi.train()

In [None]:
# extract the embedding
adata_scvi.obsm["X_scVI"] = model_scvi.get_latent_representation() #just embedding used in further steps
adata_scvi.layers["scvi_normalized"] = model_scvi.get_normalized_expression(library_size=10e4) # would allow us to perforem DE

In [None]:
# transfer scVI latent space to the full anndata object
adata.obsm["X_scVI"] = model_scvi.get_latent_representation()

In [None]:
# save the model
model_scvi.save(os.path.join(work_dir, "data", "models", "Combined_SCR_C02_Myeloid_scVI_integration_model_11-04-24"), overwrite=True)

In [None]:
# batch-corrected visualization (full)
sc.pp.neighbors(adata, use_rep="X_scVI")
sc.tl.umap(adata)
adata

In [None]:
# visualize integration
sc.pl.umap(adata, 
           color=["Annotation_1.0", 
                  "sample",  
                  "subproject", 
                  "project", 
                  "patient", 
                  "timepoint",
                  "Prior_Annotation_2.0"
                 ], 
           wspace=1, 
           ncols=2) 

In [None]:
# save integrated myeloid adata object
adata.write(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_scVI-integrated_11-04-24.h5ad"))

In [None]:
# read integrated myeloid adata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_scVI-integrated_11-04-24.h5ad"))

In [None]:
# identfy technical sources of variation
sc.pl.umap(
    adata,
    color=["pct_counts_mt", "pct_counts_ribo", "n_genes_by_counts", "total_counts", "doublet_score", "sample"],
    vmax="p99",
    frameon=False,
    cmap="PuOr",
    use_raw=False,
    ncols=2,
    save="Techincal_Sources_of_Variation.png"
)

In [None]:
# visualize batches
sc.pl.umap(
    adata,
    color=["patient", "project", "timepoint", "subproject", "response", "sample"],
    vmax="p99",
    frameon=False,
    cmap="PuOr",
    use_raw=False,
    ncols=2,
    save="Batches.png"
)

In [None]:
# check specific Myeloid markers to evaluate integration
sc.pl.umap(
    adata,
    color=[
            "FCN1",       # Mono
            "CD14",       # CD14 Mono
            "FCGR3A",     # CD16 Mono
            "CD68",       # Macro
            "CD5L",       # TRM Kupffer-like TAM
            "MKI67",      # Prolif. TAM
            "CX3CR1",     # Reg-TAM
            "APOE",       # LA-TAM
            "IL1B",       # Inflam. TAM
            "ISG15",      # IFN-TAM
            "VCAN",       # Angio. TAM
            "SPP1",       # Infla, LA, Angio TAMs
            "C1QC",       # LA, Inflam, RTM TAMs
            "TREM2",      # cancer prognosis --> bad
            "S100A9",     # Monos (CD14), Inflam, angio, TAMs, cDC3
            "CLEC9A",     # cDC1
            "CD1C",       # cDC2
            "LILRA4",     # pDCs
            "TPSAB1",     # Mast
            
            
            
            #"NR4A1"       # tissue resident monos
    ],
    vmax="p99",
    legend_loc="on data",
    frameon=False,
    #cmap="Viridis",
    use_raw=False
)


### Clustering

In [None]:
# perform clustering
sc.tl.leiden(adata, key_added="leiden_res0_25", resolution=0.25)
sc.tl.leiden(adata, key_added="leiden_res0_5", resolution=0.5)
sc.tl.leiden(adata, key_added="leiden_res0_75", resolution=0.75)
sc.tl.leiden(adata, key_added="leiden_res1", resolution=1.0)
sc.tl.leiden(adata, key_added="leiden_res1_2", resolution=1.2)
sc.tl.leiden(adata, key_added="leiden_res1_4", resolution=1.4)
sc.tl.leiden(adata, key_added="leiden_res1_6", resolution=1.6)
sc.tl.leiden(adata, key_added="leiden_res1_8", resolution=1.8)
sc.tl.leiden(adata, key_added="leiden_res2", resolution=2)

In [None]:
# visulize clustering
sc.pl.umap(
    adata,
    color=["leiden_res0_25", "leiden_res0_5", "leiden_res0_75", "leiden_res1",
          "leiden_res1_2", "leiden_res1_4", "leiden_res1_6", "leiden_res1_8", "leiden_res2"],
    legend_loc="on data"
)

In [None]:
# subcluster cluster 17 and 8(mix of kupffer)
sc.tl.leiden(adata, key_added="leiden_res1_1", resolution=0.2, restrict_to = ("leiden_res1", ["17"]))
sc.tl.leiden(adata, key_added="leiden_res1_1", resolution=0.2, restrict_to = ("leiden_res1_1", ["8"]))

In [None]:
# visulize clustering
sc.set_figure_params(figsize=(6, 6))
sc.pl.umap(
    adata,
    color=["leiden_res1", "leiden_res1_1"],
    legend_loc="on data"
)

In [None]:
# save clustered myeloid adata object
adata.write(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_scVI-integrated_clustered_18-04-24.h5ad"))

In [None]:
# read clustered myeloid adata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_scVI-integrated_clustered_18-04-24.h5ad"))

## Level 2 Annotation

### Map Known Marker Genes

In [None]:
# define marker genes
marker_genes = [

            "FCN1",       # Mono
            "LYZ",
            "CD14",       # CD14 Mono
            "S100A9",     # Monos (CD14), Inflam, angio, TAMs, cDC3
            "FCGR3A",     # CD16 Mono
            "HES4",
            "CD68",       # Macro
            "SPP1",       # Infla, LA, Angio TAMs
            "C1QC",       # LA, Inflam, RTM TAMs
            "TREM2",      # cancer prognosis --> bad
            "CD5L",       # TRM Kupffer-like TAM
            "MARCO",
            "MKI67",      # Prolif. TAM
            "TOP2A",
            "CX3CR1",     # Reg-TAM
            "CD274",
            "APOE",       # LA-TAM
            "APOC1",
            "IL1B",       # Inflam. TAM
            "IL6",
            "ISG15",      # IFN-TAM
            "IFIT1",
            "VCAN",       # Angio. TAM
            "VEGFA",
            "CLEC9A",     # cDC1
            "BATF3",
            "CD1C",       # cDC2
            "CLEC10A",
            "CCR7",       # cDC3
            "LAMP3",  
            "LILRA4",     # pDCs
            "TPSAB1",     # Mast
            "FCGR3B",     # neutrophil
            "CSF3R",      
            "G0S2"
    
]


In [None]:
# marker genes dictionary
marker_genes_dict = {

            "Mono":               ["FCN1", "LYZ"],       # Mono
            "CD14 Mono":          ["CD14", "S100A9"],      # CD14 Mono
            "CD16 Mono":          ["FCGR3A", "HES4"],     # CD16 Mono
            "TAM":                ["CD68", "SPP1", "C1QC", "TREM2"],       # Macro
            "TRM-Kuppfer TAM":    ["CD5L", "MARCO"],       # TRM Kupffer-like TAM
            "Prolif TAM":         ["MKI67", "TOP2A"],      # Prolif. TAM
            "Reg TAM":            ["CX3CR1", "CD274"],     # Reg-TAM
            "LA TAM":             ["APOE", "APOC1"],       # LA-TAM
            "Inflam TAM":         ["IL1B", "IL6"],       # Inflam. TAM
            "IFN-TAM":            ["ISG15", "IFIT1"],      # IFN-TAM
            "Angio TAM":          ["VCAN", "VEGFA"],       # Angio. TAM
            "cDC1":               ["CLEC9A", "BATF3"],     # cDC1
            "cDC2":               ["CD1C", "CLEC10A"],       # cDC2
            "cDC3":               ["CCR7", "LAMP3"],
            "pDC":                ["LILRA4"],     # pDCs
            "Mast":               ["TPSAB1"],     # Mast
            "Neutrophil":         ["FCGR3B", "CSF3R", "G0S2"] # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9767679/
    
}

In [None]:
# plot the expression of marker genes
sc.pl.umap(
        adata,
        color=marker_genes,
        vmin=0,
        vmax="p99",  # set vmax to the 99th percentile of the gene count instead of the maximum, to prevent outliers from making expression in other cells invisible. Note that this can cause problems for extremely lowly expressed genes.
        sort_order=True,  # do not plot highest expression on top, to not get a biased view of the mean expression among cells
        frameon=True,
        use_raw=False,
        cmap="viridis",  # https://matplotlib.org/stable/tutorials/colors/colormaps.html
        save="Markers_UMAPs.png"
    )

In [None]:
# dotplot of marker genes
sc.pl.dotplot(
    adata,
    groupby="leiden_res1_1",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Clusters_Markers_Dotplot.png"
)

In [None]:
sc.pl.matrixplot(
    adata,
    groupby="leiden_res1_1",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Clusters_Markers_Matrixplot.png"
)

### Compute Marker Genes

In [None]:
# compute marker genes
sc.tl.rank_genes_groups(
    adata, groupby="leiden_res1_1", method="wilcoxon", key_added="dea_leiden_res1_1", use_raw=False#, layer="rawcounts" # do not use raw!
)

In [None]:
# save marker genes
save_markers(adata, "leiden_res1_1", os.path.join(work_dir, "data", "markers", "Combined", "Combined_Myeloid_res1_markers_18-04-24.xlsx"))

In [None]:
sc.tl.dendrogram(adata, groupby='leiden_res1_1')

In [None]:
# plot top 5 marker genes per cluster
sc.pl.rank_genes_groups_dotplot(
    adata,
    groupby="leiden_res1_1",
    standard_scale="var",
    n_genes=5,
    key="dea_leiden_res1_1",
    use_raw=False,
    cmap="Blues",
    save="Cluster_Marker_Genes_Dotplot.png"
)

### Carry Out Annotation

In [None]:
#sc.set_figure_params(figsize=(4, 4))
sc.pl.umap(
    adata,
    color=["leiden_res1_1"],
    legend_loc="on data",
    save="Clustering_res1_1_UMAP.png"
)

In [None]:
annotation = {
    
    "0":     "CD14 Mono",
    "1":     "Angio TAM",
    "2":     "cDC2",
    "3":     "CD14 CD16 Mono",
    "4":     "CD16 Mono",
    "5":     "LA TAM",
    "6":     "pDC",
    "7":     "CD14 Mono",
    "8,0":   "TRM Kupffer TAM",
    "8,1":   "Anti-Inflam TAM",
    "9":     "Angio TAM-like",
    "10":    "CD14 CD16 Mono",
    "11":    "LA TAM",
    "12":    "LA TAM",
    "13":    "Neutrophil",
    "14":    "cDC1",
    "15":    "Mast",
    "16":    "NOISE",
    "17,0":  "pDC-like",
    "17,1":  "pDC-like",
    "18":    "cDC3"
    
        
}

adata.obs["Annotation_2.0"] = adata.obs.leiden_res1_1.map(annotation)

In [None]:
# remove NOISE cluster
adata = adata[~adata.obs['Annotation_2.0'].isin(["NOISE"])].copy()

### Visualize Annotation 2.0

In [None]:
# visualize annotation
sc.set_figure_params(figsize=(4, 4))
sc.pl.umap(
    adata,
    color=["Annotation_2.0"],
    title=["Annotation 2.0"],
    frameon=True,
    cmap="tab20",
    save="Annotation_2.0_UMAP.png"
)

In [None]:
# dotplot of marker genes
sc.pl.dotplot(
    adata,
    groupby="Annotation_2.0",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Annotation_2.0_Dotplot.png"
)

In [None]:
sc.pl.matrixplot(
    adata,
    groupby="Annotation_2.0",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Annotation_2.0_Matrixplot.png"
)
#plt.savefig(os.path.join(fig_dir,"Annotation_2.0_Matrixplot.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
sc.pl.stacked_violin(
    adata,
    groupby="Annotation_2.0",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Annotation_2.0_Stacked_Violin.png"
)

In [None]:
# correlation matrix
sc.pl.correlation_matrix(adata, "Annotation_2.0", save="Annotation_2.0_Correlation_Matrix.png")

### Save Annotated Anndata Object

In [None]:
# save it
adata.write(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_annotated_18-04-24.h5ad"))

In [None]:
# read annotated object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_annotated_18-04-24.h5ad"))

## Gene Signatures Scores

In [None]:
# change saving figures dir
fig_dir = os.path.join(work_dir, "figures", "combined", "Myeloid", "signatures/")

In [None]:
# define signatures

signatures = {  

    "Pro-Inflam": ["C1QA","C1QC","CCL2","IL1B","CCL4","CCL7","CCL8","NFKB1","CD40", "CXCL2", "CXCL3", "CXCL9", "CXCL10","CXCL11","IDO1","NFKBIA", "TNF","CXCL8","G0S2","IL6","INHBA","S100A8","S100A9"],
    "Anti-Inflam": ["SELENOP", "MRC1", "CCL18","CD163", "CD209", "ARG1", "IL10", "CD274","CHIT1",  "RNASE1", "TREM2", "IL10", "ITGA4", "LGALS9", "MARCO", "TGFB2", "TGFB1", "CSF1R", "CSF1", "SPP1","TREM2"],
    "IFN-Response": ["CASP1", "CASP4", "CCL2", "CCL3", "CCL4", "CCL7", "CCL8",
         "CD274", "CD40", "CXCL2", "CXCL3", "CXCL9", "CXCL10", "CXCL11", "IDO1", "IFI6", "IFIT1", "IFIT2", "IFIT3", "IFITM1", "IFITM3", "IRF1", "IRF7", "ISG15", "LAMP3", "PDCD1LG2", "TNFSF10",
         "C1QA", "C1QC", "CD38", "IL4I1", "IFI44L", "STAT1", "IRF1", "IRF7"],
    "Lipid-Associated": ["ACP5", "APOE", "APOC1", "ATF1", "C1QA", "C1QB", "C1QC", "CCL18", "CD163", "CD36", 
         "CD63", "CHI3L1", "CTSB", "CTSD", "CTSL", "F13A1", "FABP5", "FOLR2", "GPNMB", "IRF3", 
         "LGALS3", "LIPA", "LPL", "MARCO", "MERTK", "MMP7", "MMP9", "MMP12", "MRC1", "NR1H3", 
         "NRF1", "NUPR1", "PLA2G7", "RNASE1", "SPARC", "SPP1", "TFDP2", "TREM2", "ZEB1", "FOS", "JUN", "HIF1A", "MAF", "MAFB", "NR1H3", "TCF4", "TFEC"],
    "Angiogenesis": ["ADAM8", "AREG", "BNIP3", "CCL2", "CCL4", "CCL20", "CD163", "CD300E", "CD44", "CD55", "CEBPB", "CLEC5A", "CTSB", "EREG", "FCN1", "FLT1", "FN1", "HES1", "IL1B", "IL1RN", "CXCL8", "MAF", "MIF", "NR1H3", "OLR1", "PPARG", "S100A8", "S100A9", "S100A12", "SERPINB2", "SLC2A1", "SPIC", "SPP1", "THBS1", "TIMP1", "VCAN", "VEGFA", "BACH1", "CEBPB", "FOSL2", "HIF1A", "KLF5", "MAF", "NFKB1", "NR1H3", "RUNX1", "SPIC", "TEAD1", "ZEB2"],
    "Regulatory": ["CCL2", "CD274", "CD40", "CD80", "CD86", "CHIT1", "CX3CR1", "HLA-A", "HLA-C", "HLA-DQA1", "HLA-DQB1", "HLA-DRA", "HLA-DRB1", "HLA-DRB5", "ICOSLG", "IL10", "ITGA4", "LGALS9", "MARCO", "MRC1", "TGFB2"],
    "Proliferation": ["CCNA2", "CDC45", "CDK1", "HIST1H4C", "HMGB1", "HMGN2", "MKI67", "RRM2", "STMN1", "TOP2A", "TUBA1B", "TUBB", "TYMS"],
    "TRM-Kupffer": ["C1QA", "C1QB", "C1QC", "CCL7", "CD163", "CD5L", "CD74", "CETP", "FOLR2", "HLA-DPA1", "HLA-DPB1", "HLA-DRB1", "MARCO", "MAF", "MS4A7", "SLC40A1", "VCAM1", "VSIG4"],
    "APM": ["HLA-A", "HLA-B", "HLA-C", "HLA-E", "HLA-G", "HLA-F", "HLA-DRA", "HLA-DRB1", "HLA-DQA1", "HLA-DQB1", "HLA-DQA2", "HLA-DQB2", "HLA-DPA1", "HLA-DPB1",
         "NLRC5", "CIITA", "PSME1", "PSME2", "PSMB8", "PSMB9", "PSMB10", "B2M", "HLA-DRB5",
         "HLA-DMA", "HLA-DMB", "HLA-DOA", "HLA-DOB", "ERAP1", "TAPBP", "TAP1", "TAP2"] # https://www.nature.com/articles/s41598-023-28167-1

}
# # Macrophage diversity in cancer revisited in the era of single-cell omics


In [None]:
# compute and plot signatures

for signature in signatures:
    print(signature + " Done!")
    compute_signature_score(adata, gene_set=signatures[signature], score_name=signature+"_Score", palette="viridis", plot=False)


In [None]:
sc.pl.heatmap(adata, var_names=signatures, groupby="Annotation_2.0", use_raw=False)

In [None]:
# create pseudobulks
pdata= dc.get_pseudobulk(
    adata,
    sample_col='sample',
    groups_col='Annotation_2.0',
    layer='rawcounts',
    mode='sum',
    min_cells=10,
    min_counts=1000,
)
pdata

In [None]:
pdata.obs

In [None]:
pdata[pdata.obs.Condition == "T0/-ICI"]

In [None]:
# compute and plot signatures

for signature in signatures:
    print(signature + " Score Done!")
    compute_signature_score(pdata, gene_set=signatures[signature], score_name=signature+"_Score", palette="viridis", plot=False)
    compute_signature_score(adata, gene_set=signatures[signature], score_name=signature+"_Score", palette="viridis", plot=False)


In [None]:
df_p = pdata.obs
df_p

In [None]:
df = adata.obs
df

In [None]:
# non-responder df
df_pd = df[df['response'] == 'PD']
df_p_pd = df_p[df_p['response'] == 'PD']

In [None]:
df_p.loc[df_p['Condition'] == 'T1/+ICI']

In [None]:
df_p

In [None]:
scores = list(signatures.keys())
print(scores)

for score in scores:

    from scipy.stats import mannwhitneyu, normaltest, ttest_ind, wilcoxon
    from statsmodels.stats.multitest import multipletests

    df_p = pdata.obs[pdata.obs['patient'].isin(['01', '02', '03', '08', '10'])]
    df = adata.obs[adata.obs['patient'].isin(['01', '02', '03', '08', '10'])]
    '''
    if score in ["Pro-Inflam", "Anti-Inflam", "Angiogenesis", "CD16 Mono", "CD14 Mono", "CD14 CD16 Mono", "Lipid-Associated" "TRM-Kupffer"]:
        df = df[df["Annotation_2.0"].isin(["Anti-Inflam TAM", "LA TAM", "TRM Kupffer TAM", "Angio TAM", "Angio TAM-like", "CD16 Mono", "CD14 Mono", "CD14 CD16 Mono"])]
        df_p = df_p[df_p["Annotation_2.0"].isin(["Anti-Inflam TAM", "LA TAM", "TRM Kupffer TAM", "Angio TAM", "Angio TAM-like", "CD16 Mono", "CD14 Mono", "CD14 CD16 Mono"])]
        df_pd = df_pd[df_pd["Annotation_2.0"].isin(["Anti-Inflam TAM", "LA TAM", "TRM Kupffer TAM", "Angio TAM", "Angio TAM-like", "CD16 Mono", "CD14 Mono", "CD14 CD16 Mono"])]
        df_p_pd = df_p_pd[df_p_pd["Annotation_2.0"].isin(["Anti-Inflam TAM", "LA TAM", "TRM Kupffer TAM", "Angio TAM", "Angio TAM-like", "CD16 Mono", "CD14 Mono", "CD14 CD16 Mono"])]

    elif score == "APM":
        df = df[df["Annotation_2.0"].isin(["cDC1", "cDC2", "cDC3"])]
        df_p = df_p[df_p["Annotation_2.0"].isin(["cDC1", "cDC2", "cDC3"])]
        df_pd = df_pd[df_pd["Annotation_2.0"].isin(["cDC1", "cDC2", "cDC3"])]
        df_p_pd = df_p_pd[df_p_pd["Annotation_2.0"].isin(["cDC1", "cDC2", "cDC3"])]
    '''
    '''
    subprojects = df_p['subproject'].unique()
    cell_types_per_subproject = {subproject: set(df_p[df_p['subproject'] == subproject]['Annotation_2.0']) for subproject in subprojects}
    common_cell_types = set.intersection(*cell_types_per_subproject.values())
    df_p = df_p[df_p['Annotation_2.0'].isin(common_cell_types)]
    print(common_cell_types)
    
    subprojects = df_p['subproject'].unique()
    common_cell_types = None
    for subproject in subprojects:
        subproject_data = df_p[df_p['subproject'] == subproject]
        cell_types_in_subproject = set(subproject_data['Annotation_2.0'])
        if common_cell_types is None:
            common_cell_types = cell_types_in_subproject
        else:
            common_cell_types = common_cell_types.intersection(cell_types_in_subproject)
    print(common_cell_types)
    df_p = df_p[df_p['Annotation_2.0'].isin(common_cell_types)]
    

    df_pd = df[df['response'] == 'PD']
    df_p_pd = df_p[df_p['response'] == 'PD']

    
    cell_types_to_remove = []
    for cell_type in df_p['Annotation_2.0'].unique(): 
        if cell_type not in df_p.loc[df_p['Condition'] == 'T0/-ICI', 'Annotation_2.0'].values:
            cell_types_to_remove.append(cell_type)
            print(cell_type, "not found!")
        if cell_type not in df_p.loc[df_p['Condition'] == 'T1/+ICI','Annotation_2.0'].values:
            cell_types_to_remove.append(cell_type)
            print(cell_type, "not found!")

    df_p = df_p[~df_p['Annotation_2.0'].isin(cell_types_to_remove)]
    '''
    
    ############################
    # T0 vs. T1 - All Patients #
    ############################
    
    # statistical test (T0 bvs T1)
    print("T0 vs. T1 - All Patients")
    # create arrays of each condition
    t0 = df_p.loc[(df_p.Condition == "T0/-ICI"), score+"_Score"].values
    t1 = df_p.loc[(df_p.Condition == "T1/+ICI"), score+"_Score"].values
    
    
    log_t0 = np.log(t0)
    log_t1 = np.log(t1)
    
    # pvalues with scipy:
    stat_results = [
      mannwhitneyu(t0, t1, alternative="two-sided")
    ]
    
    pvalues = [result.pvalue for result in stat_results]

    # Perform FDR correction using Benjamini-Hochberg procedure
    reject_null, corrected_p_values1, _, _ = multipletests(pvalues, method='fdr_bh')

    print(score + " Score:")
    print("T0/-ICI vs. T1/+ICI: \n", stat_results[0], "\n")
    print("Corrected p-value:", corrected_p_values1[0])

    ##################
    # T0 vs. T1 - PD #
    ##################

    # statistical test (T0 bvs T1)
    print("T0 vs. T1 - PD")
    # create arrays of each condition
    t0 = df_p_pd.loc[(df_p_pd.Condition == "T0/-ICI"), score+"_Score"].values
    t1 = df_p_pd.loc[(df_p_pd.Condition == "T1/+ICI"), score+"_Score"].values
    
    
    log_t0 = np.log(t0)
    log_t1 = np.log(t1)
    
    # pvalues with scipy:
    stat_results = [
      mannwhitneyu(t0, t1, alternative="two-sided")
    ]
    
    pvalues = [result.pvalue for result in stat_results]

    # Perform FDR correction using Benjamini-Hochberg procedure
    reject_null, corrected_p_values2, _, _ = multipletests(pvalues, method='fdr_bh')

    print(score + " Score:")
    print("T0/-ICI vs. T1/+ICI: \n", stat_results[0], "\n")
    print("Corrected p-value:", corrected_p_values2[0])
    
    #############
    # PD vs. SB #
    #############

    df_p = pdata.obs
    df = adata.obs
    
    # statistical test (PD bvs SD)
    print("PD vs. SD")
    # create arrays of each condition
    t0 = df_p.loc[(df_p.response == "PD"), score+"_Score"].values
    t1 = df_p.loc[(df_p.response == "SD"), score+"_Score"].values
    
    
    log_t0 = np.log(t0)
    log_t1 = np.log(t1)
    
    # pvalues with scipy:
    stat_results = [
      mannwhitneyu(t0, t1, alternative="two-sided")
    ]
    
    pvalues = [result.pvalue for result in stat_results]

    # Perform FDR correction using Benjamini-Hochberg procedure
    reject_null, corrected_p_values3, _, _ = multipletests(pvalues, method='fdr_bh')

    print(score + " Score:")
    print("PD vs. SD: \n", stat_results[0], "\n")
    print("Corrected p-value:", corrected_p_values3[0])
    
    ##############################
    # -ICI vs. +ICI/PD & +ICI/SD #
    ##############################
   
    # create arrays of each condition
    t0 = df_p.loc[(df_p.ICI_status == "-ICI"), score+"_Score"].values
    pd = df_p.loc[(df_p.ICI_status == "+ICI/PD"), score+"_Score"].values
    sd = df_p.loc[(df_p.ICI_status == "+ICI/SD"), score+"_Score"].values
    
    log_t0 = np.log(t0)
    log_pd = np.log(pd)
    log_sd = np.log(sd)
    
    # pvalues with scipy:
    stat_results = [
      mannwhitneyu(t0, pd, alternative="two-sided"),
      mannwhitneyu(pd, sd, alternative="two-sided"),
      mannwhitneyu(t0, sd, alternative="two-sided")
    ]
    
    pvalues = [result.pvalue for result in stat_results]

    # Perform FDR correction using Benjamini-Hochberg procedure
    reject_null, corrected_p_values4, _, _ = multipletests(pvalues, method='fdr_bh')

    print("-ICI vs. +ICI/PD: \n", stat_results[0], "\n")
    print("Corrected p-value:", corrected_p_values4[0])
    print("+ICI/PD vs. +ICI/SD: \n", stat_results[1], "\n")
    print("Corrected p-value:", corrected_p_values4[1])
    print("-ICI vs. +ICI/SD: \n", stat_results[2], "\n")
    print("Corrected p-value:", corrected_p_values4[2])


    # logarize scores for better visualization
    constant_df = abs(df[score+'_Score'].min()) + 1
    constant_df_pd = abs(df_pd[score+'_Score'].min()) + 1
    df[score+'_Score_positive'] = df[score+'_Score'] + constant_df
    df_pd[score+'_Score_positive'] = df_pd[score+'_Score'] + constant_df_pd
    # Compute log2 after adding the constant
    df['log2_'+score+'_Score'] = np.log2(df[score+'_Score_positive'])
    df_pd['log2_'+score+'_Score'] = np.log2(df_pd[score+'_Score_positive'])

    #########
    # PLOTS #
    #########
    
    # Create subplots
    fig, axes = plt.subplots(1, 5, figsize=(18, 3)) 

    def p_value_to_symbol(p_value):
        if p_value <= 1.00e-4:
            return "****"
        elif p_value <= 1.00e-3:
            return "***"
        elif p_value <= 1.00e-2:
            return "**"
        elif p_value <= 5.00e-2:
            return "*"
        else:
            return "ns"
    


    # second subplot
        
    pairs = [("T0/-ICI", "T1/+ICI")]
    #axes[1]=get_log_ax()
    hue_plot_params = {
            "data":df, 
            "x":"Condition", 
            "y":'log2_'+score+'_Score',            
            "inner":"box", 
            "hue":"Condition", 
            "palette":["coral","darkviolet"], 
            "edgecolor":"black", 
            "alpha":0.7
    }
    sns.violinplot(ax=axes[0], **hue_plot_params)
    axes[0].set_title("Timepoint (PD+SD)")
    axes[0].set_ylabel('log2('+score+" Score)")
    axes[0].set_xlabel("")
    ymax = df['log2_'+score+'_Score'].max()
    axes[0].plot([0, 0, 1, 1], [ymax+.4, ymax+.5, ymax+.5, ymax+.4], lw=1, color='black')
    axes[0].text(.5, ymax+.5, f" {p_value_to_symbol(corrected_p_values1[0])}", ha='center', va='bottom', size=9)

    
    # third subplot
        
    pairs = [("T0/-ICI", "T1/+ICI")]
    #axes[1]=get_log_ax()
    hue_plot_params = {
            "data":df_pd, 
            "x":"Condition", 
            "y":'log2_'+score+'_Score',            
            "inner":"box", 
            "hue":"Condition", 
            "palette":["coral","darkviolet"], 
            "edgecolor":"black", 
            "alpha":0.7
    }
    sns.violinplot(ax=axes[1], **hue_plot_params)
    axes[1].set_title("Timepoint (PD)")
    axes[1].set_ylabel("")
    axes[1].set_xlabel("")
    ymax = df_pd['log2_'+score+'_Score'].max()
    axes[1].plot([0, 0, 1, 1], [ymax+.4, ymax+.5, ymax+.5, ymax+.4], lw=1, color='black')
    axes[1].text(.5, ymax+.5, f" {p_value_to_symbol(corrected_p_values2[0])}", ha='center', va='bottom', size=9)

    
    # forth subplot
    
    pairs = [("PD", "SD")]
    #axes[1]=get_log_ax()
    hue_plot_params = {
            "data":df, 
            "x":"response", 
            "y":'log2_'+score+'_Score',            
            "inner":"box", 
            "hue":"response", 
            "palette":["mistyrose", "lavender"], 
            "edgecolor":"black", 
            "alpha":0.7
    }
    sns.violinplot(ax=axes[2], **hue_plot_params)
    axes[2].set_title("Response")
    axes[2].set_ylabel("")
    axes[2].set_xlabel("")
    ymax = df['log2_'+score+'_Score'].max()
    axes[2].plot([0, 0, 1, 1], [ymax+.4, ymax+.5, ymax+.5, ymax+.4], lw=1, color='black')
    axes[2].text(.5, ymax+.5, f" {p_value_to_symbol(corrected_p_values3[0])}", ha='center', va='bottom', size=9)

    
    # fifth subplot
        
    # prepare significance annotation
    pairs = [("-ICI", "+ICI/PD"), ("+ICI/PD", "+ICI/SD"), ("-ICI", "+ICI/SD")]
    #axes[0]=get_log_ax()
    hue_plot_parameters = {
            "data":df, 
            "x":"ICI_status", 
            "y":'log2_'+score+'_Score', 
            "inner":"box", 
            "hue":"ICI_status", 
            "palette":["red", "blue", "snow"], 
            "edgecolor":"black",
            "alpha":0.7, 
    }
    sns.violinplot(ax=axes[3], **hue_plot_parameters)
    axes[3].set_title("ICI Status")
    axes[3].set_ylabel("")
    axes[3].set_xlabel("")
    ymax = df['log2_'+score+'_Score'].max()
    axes[3].plot([0, 0, 2, 2], [ymax+1, ymax+1.1, ymax+1.1, ymax+1], lw=1, color='black')
    axes[3].text(1, ymax+1.1, f" {p_value_to_symbol(corrected_p_values4[0])}", ha='center', va='bottom', size=9)
    axes[3].plot([0, 0, 1, 1], [ymax+.7, ymax+.8, ymax+.8, ymax+.7], lw=1, color='black')
    axes[3].text(0.5, ymax+.8, f" {p_value_to_symbol(corrected_p_values4[1])}", ha='center', va='bottom', size=9)
    axes[3].plot([1, 1, 2, 2], [ymax+0.4, ymax+.5, ymax+.5, ymax+0.4], lw=1, color='black')
    axes[3].text(1.5, ymax+.5, f" {p_value_to_symbol(corrected_p_values4[2])}", ha='center', va='bottom', size=9)

    # first plot
    sc.pl.umap(
            adata,
            color=score+"_Score",
            vmin=0,
            vmax="p99",  
            sort_order=True,  
            frameon=True,
            use_raw=False,
            cmap="viridis",
            ax=axes[4],
            title=score+" Score"
        )
    
    # Adjust layout
    plt.tight_layout()

    plt.show()
    
    # Show plot
    fig.savefig(os.path.join(fig_dir,score+"_Score_ICI_status_Condition_Violin_UMAP.pdf"), dpi=600, format="pdf", bbox_inches="tight")

In [None]:
print(df_p_pd.loc[(df_p_pd.Condition == "T0/-ICI"), "Annotation_2.0"].value_counts())
print(df_p_pd.loc[(df_p_pd.Condition != "T0/-ICI"), "Annotation_2.0"].value_counts())

In [None]:
subprojects = df_p['subproject'].unique()
subprojects
cell_types_per_subproject = {subproject: set(df_p[df_p['subproject'] == subproject]['Annotation_2.0']) for subproject in subprojects}
cell_types_per_subproject

In [None]:
subprojects = df_p['subproject'].unique()
cell_types_per_subproject = {subproject: set(df_p[df_p['subproject'] == subproject]['Annotation_2.0']) for subproject in subprojects}
common_cell_types = set.intersection(*cell_types_per_subproject.values())
df_p = df_p[df_p['Annotation_2.0'].isin(common_cell_types)]

In [None]:
df_p.loc[df_p['Condition'] == 'T0/-ICI']['Annotation_2.0']

## Plot Proliferation

In [None]:
sc.pl.dotplot(
    adata,
    groupby="patient",
    var_names="Proliferation_Score",
    standard_scale="var", 
    use_raw=False,
    cmap="RdYlBu"
)

In [None]:
sc.pl.dotplot(
    adata,
    groupby="Condition",
    var_names="Proliferation_Score",
    standard_scale="var", 
    use_raw=False,
    cmap="RdYlBu"
)

## Pro-Infl. vs. Anti-Infl. Scatter Plot

In [None]:
# read annotated object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_annotated_18-04-24.h5ad"))

In [None]:
# change saving figures dir
fig_dir = os.path.join(work_dir, "figures", "TFM", "Fig5/")

In [None]:
adata.obs["Annotation_2.0"].unique()

In [None]:
# create pseudobulks
pdata= dc.get_pseudobulk(
    adata,#adata[adata.obs["Annotation_2.0"].isin(["LA TAM", "Angio-TAM", "Angio TAM-like", "Anti-Inflam TAM", "TRM Kupffer TAM"])],
    sample_col='sample', #timepoint + patient
    groups_col='subproject',
    layer='rawcounts',
    mode='sum',
    min_cells=10,
    min_counts=1000,
)
pdata

In [None]:
# define signatures

signatures = {  

    "Pro-Inflam": ["C1QA","C1QC","CCL2","IL1B","CCL4","CCL7","CCL8","NFKB1","CD40", "CXCL2", "CXCL3", "CXCL9", "CXCL10","CXCL11","IDO1","NFKBIA", "TNF","CXCL8","G0S2","IL6","INHBA","S100A8","S100A9"],
    "Anti-Inflam": ["SELENOP", "MRC1", "CCL18","CD163", "CD209", "ARG1", "IL10", "CD274","CHIT1",  "RNASE1", "TREM2", "IL10", "ITGA4", "LGALS9", "MARCO", "TGFB2", "TGFB1", "CSF1R", "CSF1", "SPP1","TREM2"],
    
}


In [None]:
for signature in signatures:
    print(signature + " Done!")
    compute_signature_score(pdata, gene_set=signatures[signature], score_name=signature+"_Score", palette="viridis", plot=False)

In [None]:
pdata.obs

In [None]:
fig_dir

In [None]:
import seaborn as sns
sns.set_theme(style="white")

fig=sns.relplot(x="Pro-Inflam_Score", y="Anti-Inflam_Score", style="timepoint", 
            size="psbulk_n_cells", hue="patient",           
            sizes=(40, 400), alpha=.5, palette="muted",
            height=5, data=pdata.obs)
fig.savefig(os.path.join(fig_dir,"Anti_vs_Pro_Inflam_ScatterPlot_TAM.pdf"), dpi=600, format="pdf", bbox_inches="tight")

In [None]:
def rgb_to_hex(rgb):
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255))

colors = [
    rgb_to_hex((0.9139561707035756, 0.36239907727797, 0.27935409457900806)),  # #e95c47
    rgb_to_hex((0.9934640522875817, 0.7477124183006535, 0.4352941176470587)),  # #fce067
    rgb_to_hex((0.998077662437524, 0.9992310649750096, 0.7460207612456747)),  # #feffbd
    rgb_to_hex((0.7477124183006538, 0.8980392156862746, 0.6274509803921569)),  # #c0e4a1
    rgb_to_hex((0.3280276816608997, 0.6805074971164936, 0.6802768166089965))   # #539d99
]
import seaborn as sns
sns.set_theme(style="white")

# Define your color palette
palette = {
    '01': colors[0],
    '02': colors[1],
    '03': colors[2],
    '08': colors[3],
    '10': colors[4]
}

# Plot
fig = sns.relplot(
    x="Pro-Inflam_Score",
    y="Anti-Inflam_Score",
    style="timepoint",
    size="psbulk_n_cells",
    hue="patient",
    sizes=(40, 400),
    alpha=.5,
    palette=palette,
    height=6,
    data=pdata.obs, 
)
#fig.set_size_inches(3, 3)
fig.savefig(os.path.join(fig_dir, "Anti_vs_Pro_Inflam_ScatterPlot_TAM.pdf"), dpi=600, format="pdf", bbox_inches="tight")


## Thesis / Poster Plots

In [None]:
# set figure params
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "TFM", "Fig5")
sc.settings.figdir = os.path.join(work_dir, "figures", "TFM", "Fig5")
sc.set_figure_params(dpi=120, dpi_save=600, format='pdf', frameon=False, figsize=(3,3))

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_Myeloid_annotated_18-04-24.h5ad"))

In [None]:
# count number of cells
adata

In [None]:
# Basic Annotation UMAP
sc.pl.umap(
        adata,
        color="Annotation_2.0",
        vmin=0,
        vmax="p99",  
        sort_order=False,  
        frameon=False,
        use_raw=False,
        palette="tab20",
        title="Myeloid Cells (31013)",
        legend_fontsize=8,
        legend_loc="right margin",
        save = "_Myeloid_Annotation_2.0.pdf"
)

In [None]:
# marker genes dictionary
marker_genes_dict = {

            "Mono":               ["FCN1", "LYZ"],       # Mono
            "CD14 Mono":          ["CD14", "S100A9"],      # CD14 Mono
            "CD16 Mono":          ["FCGR3A", "CX3CR1"],     # CD16 Mono
            "TAM":                ["CD68", "SPP1", "C1QC", "TREM2"],       # Macro
            "Pro-Inflam":         ["IL1B", "TNF", "NOS2", "CXCL10"],       # Inflam. TAM
            "Anti-Inflam":        ["IL10", "ARG1", "CD163", "MRC1"],
            "Lipid Met":          ["APOE", "APOC1"],       # LA-TAM
            "Angiogenesis":       ["VCAN", "VEGFA"],       # Angio. TAM
            "TRM-Kuppfer":        ["CD5L", "MARCO"],       # TRM Kupffer-like TAM
            "Regulatory":         ["CX3CR1", "CD274"],     # Reg-TAM
            "IFN Response":       ["ISG15", "IFIT1"],      # IFN-TAM
            "Proliferation":      ["MKI67", "TOP2A"],      # Prolif. TAM
            "cDC1":               ["CLEC9A", "BATF3"],     # cDC1
            "cDC2":               ["CD1C", "CLEC10A"],       # cDC2
            "cDC3":               ["CCR7", "LAMP3"],
            "pDC":                ["LILRA4", "IL3RA"],     # pDCs
            "Mast":               ["TPSAB1"],     # Mast
            "Neutrophil":         ["FCGR3B", "CSF3R", "G0S2"] # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9767679/
    
}

In [None]:
# order cat values
order = ['CD14 Mono', 'CD16 Mono', 'CD14 CD16 Mono', 'Anti-Inflam TAM', 'LA TAM', 'TRM Kupffer TAM', 'Angio TAM', 'Angio TAM-like', 'cDC1', 'cDC2', 'cDC3', 'pDC', 'pDC-like', 'Mas', 'Neutrophil']
adata.obs['Annotation_2.0'] = pd.Categorical(adata.obs['Annotation_2.0'], categories=order)

In [None]:
# dotplot of marker genes
sc.pl.dotplot(
    adata,
    groupby="Annotation_2.0",
    var_names=marker_genes_dict,
    standard_scale="var", 
    use_raw=False,
    cmap="Blues",
    save="Clusters_Markers_Dotplot.pdf"
)

In [None]:
markers = ["FCN1", "CD14", "TPSAB1", "CD1C", "LILRA4", "FCGR3B"]
sc.pl.umap(
        adata,
        color=markers,
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=True,
        use_raw=False,
        #title=["CD80 (B7.1)", "CD86 (B7.2)", "CD28", "CTLA4", "CD274 (PDL1)", "PDCD1 (PD1)"],
        #legend_loc="on data",
        save="Markers.pdf",
        legend_loc=None,
        colorbar_loc=None,
        cmap = "Blues",
        add_outline=False,
        ncols=3, 
)