# Cell Type Identification and Annotation

## Import required libraries

In [1]:
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

## Configure Environment


In [2]:
# Configure Scanpy settings
sc.settings.verbosity = 3  # Show more output by default
sc.settings.set_figure_params(dpi=100, figsize=(8, 8))
np.random.seed(42)

# Project Configuration and paths
PROJ_NAME = ""
PROJ_DESCRIPTION = ""
FULL_PROJ_NAME = f"{PROJ_NAME}_{PROJ_DESCRIPTION}"

PROJECT_DIR = Path("/path/to/project")
OUTPUT_DIR = PROJECT_DIR / "output"

## Data Loading

In [None]:
# Load normalized data
print("Loading normalized data...")
adata = sc.read_h5ad(OUTPUT_DIR / f"{FULL_PROJ_NAME}_normalized.h5ad")
print(f"Data shape: {adata.shape[0]} cells and {adata.shape[1]} genes")

## Remove Contamination Cells

In [None]:
# Check for glial contamination
print("\nChecking for glial contamination...")
glial_markers = ['Atp1b2', 'Fabp7', 'Sostdc1', 'Timp3']

# Plot glial markers
sc.pl.dotplot(
    adata,
    var_names=glial_markers,
    groupby='leiden_res1',
    standard_scale='var'
)
plt.show()

In [None]:
# Remove glial clusters (based on marker expression)
print("\nRemoving glial cells...")
glial_clusters = ['10']  # Update based on your marker analysis
if glial_clusters:
    # Convert cluster IDs to string type for comparison
    adata.obs['leiden_res1'] = adata.obs['leiden_res1'].astype(str)
    adata = adata[~adata.obs['leiden_res1'].isin(glial_clusters)]
    print(f"Remaining cells after glial removal: {adata.n_obs}")

    # Rerun dimensional reduction on neurons-only data
    print("\nRerunning dimensional reduction on neurons...")
    sc.tl.pca(adata)
    sc.pp.neighbors(adata, use_rep='X_pca_harmony')
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=1, key_added='leiden_res1')

## Cell type annotation

In [None]:
# Analyze neuronal subtypes, these are example markers for DRG neurons
print("\nAnalyzing neuronal subtypes...")
neuronal_subtypes = {
    'mNP': ['P2rx3', 'Mrgprd', 'Gfra2'],
    'mNFa': ['Necab2', 'Nefh'],
    'mNFb': ['Fam19a1', 'Nefh'],
    'mPEPa': ['Smr2', 'Calca'],
    'mPEPb': ['Trpa1', 'Calca'],
    'pNF': ['Spp1', 'Nefh'],
    'pPEP': ['Th', 'Calca']
}

# Plot neuronal markers
all_markers = list(set([gene for genes in neuronal_subtypes.values() for gene in genes]))
sc.pl.dotplot(
    adata,
    var_names=all_markers,
    groupby='leiden_res1',
    standard_scale='var'
)
plt.show()

In [None]:
# Find cluster markers
print("\nFinding cluster markers...")
sc.tl.rank_genes_groups(
    adata,
    groupby='leiden_res1',
    method='wilcoxon',
    pts=True         # Calculate percentage of cells expressing genes
)


# Plot top markers
sc.pl.rank_genes_groups(
    adata,
    n_genes=10,
    sharey=False
)
plt.show()

In [None]:
# Get marker results as dataframe
marker_results = sc.get.rank_genes_groups_df(
    adata,
    group=None,
    key='rank_genes_wilcoxon',
    pval_cutoff=0.05,
    log2fc_min=0.25
)

print("\nTop markers per cluster:")
print(marker_results.groupby('group').head(5))

In [None]:
# Assign cell types
print("\nAssigning cell types...")
neuron_assignments = {
    'mNP': ['0', '3'],    # Clusters showing mNP markers
    'mNFa': ['7'],        # Clusters showing mNFa markers
    'mNFb': ['2,8'],        # Clusters showing mNFb markers
    'mPEPa': ['5'],       # Clusters showing mPEPa markers
    'mPEPb': ['6'],       # Clusters showing mPEPb markers
    'pNF': ['4,9'],         # Clusters showing pNF markers
    'pPEP': ['1']         # Clusters showing pPEP markers
}

# Create cell type assignments
adata.obs['celltype'] = 'Unknown'
for celltype, clusters in neuron_assignments.items():
    mask = adata.obs['leiden_res1'].isin(clusters)
    adata.obs.loc[mask, 'celltype'] = celltype

In [None]:
# Final visualization
print("\nGenerating final visualizations...")
# UMAP by cell type
sc.pl.umap(adata, color='celltype', legend_loc='on data')
plt.show()

## Other Visulization 

In [None]:
fig, axes = plt.subplots(1, len(adata.obs['treatment'].unique()), figsize=(12, 4))

for ax, treatment in zip(axes, adata.obs['treatment'].unique()):
    sc.pl.umap(
        adata[adata.obs['treatment'] == treatment],
        color='celltype',
        title=f'Treatment: {treatment}',
        show=False,
        ax=ax
    )

plt.tight_layout()
plt.show()

# Feature plots of key markers
sc.pl.umap(
    adata,
    color=list(set(list(neuronal_subtypes.values())[0]))[:4],
    ncols=2
)
plt.show()

## Data Saving

In [None]:

# Save annotated object
output_file = OUTPUT_DIR / f"{FULL_PROJ_NAME}_annotated.h5ad"
print(f"\nSaving annotated data to: {output_file}")
adata.write(output_file)

print("Cell annotation complete!")