**<span style="color:darkred; font-size:22px;">06_1. Epithelial Cells  -- Cell Annotation</span>**

<div style="text-align: left;">
    <p style="text-align: left;">Updated Time: 2025-01-19</p>
</div>

##### Load libraries

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import infercnvpy as cnv
import matplotlib.pyplot as plt
import seaborn as sns
import omicverse as ov
ov.plot_set()

import warnings
warnings.simplefilter("ignore")

##### Set working directory for analysis

In [None]:
cwd = '/media/bio/Disk/Research Data/EBV/omicverse'
os.chdir(cwd)
updated_dir = os.getcwd()
print("Updated working directory: ", updated_dir)

from pathlib import Path
saving_dir = Path('Results/06.Epithelial')
saving_dir.mkdir(parents=True, exist_ok=True)

#### Reading in annotated AnnData object

In [None]:
adata = sc.read_h5ad("Processed Data/scRNA_Annotation.h5ad")
adata

##### Select Epithelial cells for downstream analysis

In [None]:
adata_epi = adata[adata.obs['Cell_type'].isin(['Epithelial'])].copy()
print(np.min(adata_epi.X), np.max(adata_epi.X))

In [None]:
adata_epi.obs['EBV_status'] = adata_epi.obs['EBV_status'].cat.reorder_categories(['Normal','Negative','Positive'])
for i in adata_epi.obs['EBV_status'].cat.categories:
  number = len(adata_epi.obs[adata_epi.obs['EBV_status']==i])
  print('the number of category {} is {}'.format(i,number))

### Infer CNV on all epithelial cells

##### Download the gencode.v44.annotation.gtf

In [None]:
# https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz
gtf_file='/media/bio/Disk/Research Data/EBV/Data/gencode.v44.annotation.gtf.gz' 

In [None]:
# The GTF file needs to match the genome annotation used for your single cell dataset.
cnv.io.genomic_position_from_gtf(gtf_file, adata=adata_epi, gtf_gene_id='gene_name', inplace=True)

In [None]:
adata_epi.var.head()

##### Running infercnv

In [None]:
# We provide all epithelial cells from health control sample as "normal cells".
cnv.tl.infercnv(
    adata_epi,
    reference_key='EBV_status',
    reference_cat=[
       'Normal',
        ],
    window_size=500, 
)

Now, we can plot smoothed gene expression by tissuetype and chromosome.

In [None]:
cnv.pl.chromosome_heatmap(adata_epi, groupby="EBV_status")
# adata_epi.obsm["X_cnv"].data = np.clip(adata_epi.obsm["X_cnv"].data, -0.2, 0.2)

##### Calculate CNV score for each cell and ithcna/ithgex score for each sample

In [None]:
barcode_keys = adata_epi.obs.index
adata_epi.obs['CellID'] = barcode_keys
cnv.tl.cnv_score(adata_epi, groupby='CellID', key_added='cnv_score')
# cnv.tl.ithcna(adata_epi, groupby='orig.ident', use_rep='X_cnv', key_added='ithcna', inplace=True)
# cnv.tl.ithgex(adata_epi, groupby='orig.ident', use_raw=None, layer=None, inplace=True, key_added='ithgex')

adata_epi.obs.head()

##### Compare CNV level of different tissue types

In [None]:
plt.rcParams['figure.figsize'] = [5, 5]
sc.pl.violin(adata_epi, 'cnv_score', groupby='EBV_status', jitter=0, multi_panel=True, show=False)

df = adata_epi.obs[['EBV_status', 'cnv_score']]
result = df.groupby('EBV_status')['cnv_score'].agg(['mean', 'std'])
print(result)

##### Clustering by CNV profiles and identifying tumor cells

In [None]:
cnv.tl.pca(adata_epi)
cnv.pp.neighbors(adata_epi)
cnv.tl.leiden(adata_epi)

##### Calculate CNV score for each cnv_leiden

In [None]:
cnv.tl.cnv_score(adata_epi, groupby='cnv_leiden', key_added='cnv_cluster')

In [None]:
cnv.pl.chromosome_heatmap(adata_epi, groupby="cnv_leiden", figsize=(10, 6),  dendrogram=True, show=False)

plt.savefig('Results/06.Epithelial/06.Epithelial.cnv.pl.chromosome_heatmap.pdf', format='pdf')
plt.show()

##### UMAP plot of CNV profiles

In [None]:
cnv.tl.umap(adata_epi)

In [None]:
sc.pl.umap

In [None]:
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(7, 3))

cnv.pl.umap(adata_epi, color="cnv_leiden", legend_loc="on data", legend_fontoutline=2,  ax=ax1, show=False)
cnv.pl.umap(adata_epi, color="cnv_score", ax=ax2, show=False)

plt.savefig('Results/06.Epithelial/06.Epithelial_cnv_leiden_score.pdf', format='pdf', bbox_inches='tight')
plt.show()

##### Compare CNV level of different tissue types

In [None]:
sc.pl.violin(adata_epi, 'cnv_score', groupby='cnv_leiden', xlabel=None, 
                  jitter=0, multi_panel=True, show=True, rotation=90, size=0.8)


In [None]:
df = adata_epi.obs[['cnv_leiden', 'cnv_score']]
result = df.groupby('cnv_leiden')['cnv_score'].agg(['mean', 'std'])
print(result)

Stacked Bar Plot of Percentage of Cells Passing QC by Dataset

In [None]:
grouped = adata_epi.obs.groupby(['cnv_leiden','EBV_status']).size().unstack(fill_value=0)
cross_tab = grouped.div(grouped.sum(axis=1), axis=0) * 100

ax = cross_tab.plot(kind='bar', stacked=True, figsize=(12, 4))
ax.legend(loc='best', bbox_to_anchor=(1.0, 0.5))
ax.grid(False)
plt.show()

In [None]:
adata_epi.obs["cnv_status"] = "Tumor"
adata_epi.obs.loc[adata_epi.obs["cnv_leiden"].isin(["3", "6", "9", "13", "14", "19","24"]), "cnv_status"] = (
    "Normal"
)

##### Compare CNV level of different tissue types

In [None]:
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(10, 5))
sc.pl.violin(adata_epi, 'cnv_score', groupby='cnv_status', jitter=0, multi_panel=True, ax=ax1, show=False)
cnv.pl.umap(adata_epi, color="cnv_status", ax=ax2, show=False)

df = adata_epi.obs[['cnv_status', 'cnv_score']]
result = df.groupby('cnv_status')['cnv_score'].agg(['mean', 'std'])
print(result)

##### Stacked Bar Plot of Percentage of Cells Passing QC by cnv_status

In [None]:

grouped = adata_epi.obs.groupby(['orig.ident','cnv_status']).size().unstack(fill_value=0)
cross_tab = grouped.div(grouped.sum(axis=1), axis=0) * 100

ax = cross_tab.plot(kind='bar', stacked=True, figsize=(10, 6))
ax.legend(loc='best', bbox_to_anchor=(1.0, 0.5))
ax.grid(False)
plt.show()

##### Classification of epithelial cells

In [None]:
adata_epi

In [None]:
def define_new_variable(row):
    if row['EBV_status'] == 'Normal':
        return 'Normal ECs'
    elif row['cnv_status'] == 'Normal' and row['EBV_status'] == 'Negative':
        return 'EBV- ECs'
    elif row['cnv_status'] == 'Normal' and row['EBV_status'] == 'Positive':
        return 'EBV+ ECs'
    elif row['cnv_status'] == 'Tumor' and row['EBV_status'] == 'Negative':
        return 'EBV- CCs'
    else:
        return 'EBV+ CCs'

adata_epi.obs['Epi_celltype'] = adata_epi.obs.apply(define_new_variable, axis=1)

categories = ['Normal ECs', 'EBV- ECs', 'EBV+ ECs', 'EBV- CCs', 'EBV+ CCs']
adata_epi.obs['Epi_celltype'] = pd.Categorical(adata_epi.obs['Epi_celltype'], categories=categories, ordered=True )

In [None]:
for i in adata_epi.obs['Epi_celltype'].cat.categories:
  number = len(adata_epi.obs[adata_epi.obs['Epi_celltype']==i])
  print('the number of category {} is {}'.format(i,number))    

In [None]:
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(9, 3.5), gridspec_kw={'width_ratios': [1.25, 1]})

sc.pl.violin(adata_epi, 'cnv_score', groupby='cnv_leiden', xlabel=None, ax=ax1, 
             jitter=0, multi_panel=True, show=False, rotation=90, size=0.5)

cnv.pl.umap(adata_epi, color="Epi_celltype", ax=ax2, show=False)

ax2.set_title('') 
ax2.legend(loc='center', bbox_to_anchor=(0.25, 0.82), fontsize=8)


plt.savefig('Results/06.Epithelial/06.Epithelial_cnv_leiden_violin_umap.pdf', format='pdf', bbox_inches='tight')
plt.show()

#### Save Epi AnnData object with CNV score

In [None]:
adata_epi.write_h5ad("Processed Data/scRNA_Epi_CNV.h5ad")


**<span style="font-size:16px;">Session information：</span>**

In [None]:
import sys
import platform
import pkg_resources

# Get Python version information
python_version = sys.version
# Get operating system information
os_info = platform.platform()
# Get system architecture information
architecture = platform.architecture()[0]
# Get CPU information
cpu_info = platform.processor()
# Print Session information
print("Python version:", python_version)
print("Operating system:", os_info)
print("System architecture:", architecture)
print("CPU info:", cpu_info)

# Print imported packages and their versions
print("\nImported packages and their versions:")
for package in pkg_resources.working_set:
    print(package.key, package.version)