### Notebook for the training of the _Yoshida et al 2021_ and _Nathan et al 2021_ PBMCs cell populations with `scNym`.

- **Developed by**: Carlos Talavera-López
- **Institute of Computational Biology - Computational Health Centre - Hemlholtz Munich**
- v220713

### Load required modules

In [None]:
import time
import scnym
import anndata
import scipy as sp
import numpy as np
import pandas as pd
import scanpy as sc

### Set up working environment

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 160, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

In [None]:
config_name = "new_identity_discovery"
config = scnym.api.CONFIGS[config_name]
config["domain_groupby"] = "domain_label"

### Read in reference object

In [None]:
combined_object = sc.read_h5ad('/home/cartalop/data/single_cell/lung/tb/caiy2020/CaiY2020_PBMCplus_TB_pre-scnym_ctl220713.h5ad')
combined_object

In [None]:
combined_object.obs['cell_states'] = combined_object.obs['cell_states'].astype(str)

In [None]:
combined_object.X[:8,:8].todense()

In [None]:
sc.pp.normalize_total(combined_object, target_sum = 1e6)
sc.pp.log1p(combined_object)

### Filter low quality cells

In [None]:
sc.pp.filter_genes(combined_object, min_counts  = 3)
combined_object

### Train reference with `scNym`

- Record start time for `scNym` training

In [None]:
start_time = time.time()

- Train model

In [None]:
scnym.api.scnym_api(
    adata = combined_object,
    task = 'train',
    groupby = 'cell_states',
    domain_groupby='domain_label',
    out_path = '/home/cartalop/data/lung/tb/Cai2020/scnym_model/',
    config = 'new_identity_discovery',
)

- Record end time for scNym label transfer

In [None]:
end_time = time.time()

- Compute the elapsed time

In [None]:
total_time = end_time - start_time
print(f"Total time: {total_time}")

### Predict cell labels

In [None]:
from scnym.api import scnym_api

scnym_api(
    adata = combined_object,
    task = 'predict',
    key_added = 'scNym',
    trained_model = '/home/cartalop/data/lung/tb/Cai2020/scnym_model/',
    out_path = '/home/cartalop/data/lung/tb/Cai2020/scnym_model/',
    config = 'new_identity_discovery',
)


### Visualise label transfer and cofindence using `X_scNym`

In [None]:
sc.pp.neighbors(combined_object, use_rep = 'X_scnym', n_neighbors = 50)
sc.tl.umap(combined_object, min_dist = 0.3, spread = 5, random_state = 1712)
sc.pl.umap(combined_object, color = ['scNym', 'scNym_confidence', 'cell_states'], size = 0.2, frameon = False, legend_loc = 'on data', legend_fontsize = 4)

In [None]:
sc.pl.umap(combined_object, color = ['scNym', 'scNym_confidence', 'study'], size = 0.1, frameon = False, legend_fontsize = 5)

In [None]:
combined_object

### Save object

In [None]:
adata_export = anndata.AnnData(X = combined_object.X, obs = combined_object.obs, var = combined_object.var, uns = combined_object.uns, obsm = combined_object.obsm, obsp = combined_object.obsp)
adata_export

In [None]:
adata_export.write('/home/cartalop/He111/single_cell/lung/tb/caiy2020/CaiY2020_PBMCplus_TB_post-scnym_ctl220713.h5ad')