### Notebook for the manual annotation of cell states and excluding stromal

- **Developed by:** Carlos Talavera-López Ph.D
- **Würzburg Institute for Systems Immunology - Faculty of Medicine - Julius-Maximilian-Universität Würzburg**
- v231127

### Import required modules

In [1]:
import anndata
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt

from SCCAF import SCCAF_assessment, plot_roc

### Set up working environment

In [2]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'svg')

### Read in Healthy-CTRL dataset

In [None]:
adata = sc.read_h5ad('../../../data/heart_mm_nuclei-23-0092_scANVI-Myeloid_ctl240502.raw.h5ad') 
adata

### Visualise manifold with `scANVI` annotation

In [None]:
sc.pl.umap(adata, frameon = False, color = ['sample', 'genotype', 'cell_type'], size = 1, legend_fontsize = 5, ncols = 3)

### Cluster manifold using `leiden` with resolution of `1.0`

In [None]:
sc.pp.neighbors(adata, use_rep = "X_scANVI", n_neighbors = 150, metric = 'minkowski')
sc.tl.leiden(adata, resolution = 0.5, random_state = 1712)
sc.pl.umap(adata, frameon = False, color = ['genotype', 'cell_type', 'leiden'], size = 0.8, legend_fontsize = 5, ncols = 3)

### Evaluate clustering accuracy with `SCCAF`

In [None]:
sc.pl.umap(adata, frameon = False, color = ['cell_type', 'leiden'], size = 0.8, legend_fontsize = 5, ncols = 3, legend_loc = 'on data')

In [None]:
y_prob, y_pred, y_test, clf, cvsm, acc = SCCAF_assessment(adata.X, adata.obs['leiden'], n = 100)

In [None]:
plot_roc(y_prob, y_test, clf, cvsm = cvsm, acc = acc)
plt.rcParams["figure.figsize"] = (16,6)
plt.show()

In [None]:
adata

### Rename cluster based on the cell type annotation

In [None]:
sc.pl.umap(adata, frameon = False, color = ['leiden', 'cell_type'], size = 1, legend_fontsize = 5, ncols = 3, legend_loc = 'on data')

In [None]:
adata.obs['leiden'].cat.categories

In [None]:
adata.obs['cell_states'] = adata.obs['leiden']
adata.obs['cell_states'] = adata.obs['cell_type'].cat.rename_categories(['FB', 'EC1', 'Mono', 'CM1', 'EC4', 'PC', 'CM2', 'mesCM', 'EC8', 'Myeloid1', 'EC10', 'MØ', 'lnEC',
       'mesFB', 'CD8+T', 'B', 'Myeloid2', 'CM3'])
sc.pl.umap(adata, frameon = False, color = ['leiden', 'cell_type', 'C_scANVI'], size = 1, legend_fontsize = 5, ncols = 3, legend_loc = 'on data')

### Calculate cluster-specific marker genes using _Wilcoxon's Rank Sum Test_

In [None]:
adata_log = adata.copy()
adata_log

In [None]:
sc.pp.normalize_total(adata_log, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_log)

In [None]:
sc.tl.rank_genes_groups(adata_log, 'cell_type', method = 'wilcoxon', n_genes = 100, use_raw = False)
result = adata_log.uns['rank_genes_groups']
groups = result['names'].dtype.names
wilcox_markers = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names','logfoldchanges', 'pvals_adj']})
wilcox_markers.head(10)

In [None]:
wilcox_markers.to_csv('../../../data/heart_mm_nuclei-23-0092_Myeloid-CellTypes_ctl240502_WilcoxRST_markers.csv', sep = ',', index = False)

### Export annotated object

In [None]:
adata