## 05_2. Manual Major Cell Type Annotation

<div style="text-align: left;">
    <p style="text-align: left;">Updated Time: 2024-02-12</p>
</div>

##### Load libraries

In [None]:
import os
import sys
import numpy as np
import anndata
import scanpy as sc
import pandas as pd
from matplotlib.pyplot import rc_context
import matplotlib.pyplot as plt
import seaborn as sns

import omicverse as ov
ov.plot_set()

import warnings
warnings.simplefilter("ignore")

##### Set working directory  for analysis

In [None]:
working_dir = '/media/bio/Disk/Research Data/EBV/omicverse'
os.chdir(working_dir)
updated_dir = os.getcwd()
print("Updated working directory: ", updated_dir)

from pathlib import Path
saving_dir = Path('Results/05.celltype_annotation')
saving_dir.mkdir(parents=True, exist_ok=True)

##### Reading in clustered AnnData object

In [None]:
adata = sc.read("Processed Data/scRNA_Reclustering_AutoAnnotation.h5ad")
adata

In [None]:
from scipy.sparse import csr_matrix

adata.X = csr_matrix(adata.X)


In [None]:
print(np.min(adata.X), np.max(adata.X))

##### Cell type annotation from marker genes

Based on the literature and existing knowledge, a feature dictionary was constructed by integrating the marker genes of each subpopulation obtained from the previous section, defining potential cell subtypes and their corresponding marker genes.

In [None]:
for i in adata.obs['leiden_1_0'].cat.categories:
  number = len(adata.obs[adata.obs['leiden_1_0']==i])
  print('the number of category {} is {}'.format(i,number))

The marker genes list can be a list or a dictionary. If marker genes List is a dictionary, then plot shows the marker genes grouped and labelled

In [None]:
marker_genes_dict = {
        'Epithelial': ['EPCAM', 'KRT18', 'KRT19'],    
        'Fibroblasts': ['FN1', 'COL1A1', 'CALD1'],  
        'T': ['CD3D','CD4','CD8A'],
        'NK': ['NKG7','GNLY','KLRD1'],   
        'B': ['CD79A','CD79B','MS4A1'], 
        'Myeloid':  ['TYROBP','C1QA','LYZ'],
        'Plasma': ['CD38','MZB1','JCHAIN'], 
        'Mast': ['TPSAB1','CPA3','KIT'], 
        'pDC': ['IRF7','IL3RA','LILRA4'],
        'Neutrophils': ['CSF3R','S100A8','S100A9'],
}

##### Dot plots

The dotplot visualization provides a compact way of showing per group, the fraction of cells expressing a gene (dot size) and the mean expression of the gene in those cell (color scale).

In [None]:
sc.pl.dotplot(adata, marker_genes_dict, 'leiden_1_0', dendrogram=True, use_raw=False, standard_scale='var', show=False)

plt.savefig("Results/05.celltype_annotation/05. Dotplot_Cluster_Curated_Markers.pdf", format="pdf", bbox_inches="tight")
plt.show()

Create a dictionary to map cluster to annotation label

In [None]:
cluster2annotation = {
    '0': 'T',
    '1': 'B',
    '2': 'B',
    '3': 'T',
    '4': 'T',
    '5': 'T',
    '6': 'T',
    '7': 'T',
    '8': 'Myeloid',
    '9': 'NK',
    '10': 'Epithelial',
    '11': 'Plasma',
    '12': 'T',
    '13': 'B',
    '14': 'pDC',
    '15': 'B',
    '16': 'B',
    '17': 'Fibroblasts',
    '18': 'Mast',
    '19': 'Neutrophils',
    '20': 'Epithelial',
    '21': 'T',
    '22': 'B',
    '23': 'T',
}
adata.obs['Cell_type'] = adata.obs['leiden_1_0'].map(cluster2annotation).astype('category')
adata.obs['Cell_type'] = adata.obs['Cell_type'].cat.reorder_categories(['Epithelial', 'Fibroblasts', 'T','NK','B','Myeloid','Plasma','Mast','pDC','Neutrophils'])

In [None]:
fig,ax=plt.subplots(figsize = (5,5))
colors = sns.color_palette("Paired", n_colors=len(adata.obs['Cell_type'].unique()))
ov.pl.embedding(adata,
                basis='X_umap',
                color='Cell_type',
                frameon='small',
                palette=colors,
                show=False,
                ax=ax,)
plt.title('',fontsize=10)

plt.savefig("Results/05.celltype_annotation/05. UMAP_Major_Cell_Type.pdf", format="pdf", bbox_inches="tight")
plt.show()

#### Visualizing marker genes

Visualize marker genes using Heatmap

In [None]:
ov.pl.marker_heatmap(
    adata,
    marker_genes_dict,
    groupby='Cell_type',
    color_map="RdBu_r",
    use_raw=False,
    standard_scale="var",
    expression_cutoff=0.0,
    fontsize=12,
    bbox_to_anchor=(7, -2),
    figsize=(8.5,3),
    spines=False,
    show_rownames=False,
    show_colnames=True,
)

plt.savefig("Results/05.celltype_annotation/05. Heatmap_Cell_type_Marker_Genes.pdf", format="pdf", bbox_inches="tight")

#### Visualize marker genes using stacked violing plots

In [None]:
adata

In [None]:
print(np.min(adata.X), np.max(adata.X))

#### Save AnnData object with celltype annotation

In [None]:
adata.write_h5ad("Processed Data/scRNA_Annotation.h5ad")


**<span style="font-size:16px;">Session information：</span>**

In [None]:
import sys
import platform
import pkg_resources

# Get Python version information
python_version = sys.version
# Get operating system information
os_info = platform.platform()
# Get system architecture information
architecture = platform.architecture()[0]
# Get CPU information
cpu_info = platform.processor()
# Print Session information
print("Python version:", python_version)
print("Operating system:", os_info)
print("System architecture:", architecture)
print("CPU info:", cpu_info)

# Print imported packages and their versions
print("\nImported packages and their versions:")
for package in pkg_resources.working_set:
    print(package.key, package.version)