# Post-process after running Scenic plus

In [None]:
import os
import sys
import pickle
import tempfile
import logging as log
import warnings
from pathlib import Path

import scanpy as sc
from pycisTopic.cistopic_class import *
from pycisTopic.lda_models import *
from pycisTopic.clust_vis import *
from pycisTopic.topic_binarization import *
from pycisTopic.diff_features import *

from utils import load_cistopic_obj, save_cistopic_obj

In [None]:
log.basicConfig(level=log.INFO)
warnings.simplefilter(action = 'ignore', category = FutureWarning)

In [None]:
from IPython.display import display, Markdown

In [None]:
sc.settings.set_figure_params(dpi=150, frameon=False, figsize=(10, 10), facecolor='white')

## Parameters

In [None]:
# input infile_models: <scATAC/models/models_LDA.pkl>
# output cistopic_done: <done/cistopic.done>
# params cell_type_col: <cell_type>
# params work_dir: </lustre/scratch117/cellgen/team205/jp30/scenic_plus_test/fetal_lung_multiome>
# params tmp_dir: <~/mylustre/tmp_ray_spill>
# threads n_cores: <4>
# params topic_modeling_num_topic: <32>

In [None]:
infile_models = "/path/to/scATAC/models/models_LDA.pkl"

cell_type_col = "cell_type_obs_column"
work_dir = Path("/path/to/work_dir")
n_cores = 4
topic_modeling_num_topic = 32

tmp_dir = tempfile.mkdtemp()

In [None]:
atac_path = work_dir / "scATAC"
qc_path = atac_path / 'quality_control'
candidate_enhancer_path = atac_path / "candidate_enhancers"

file_cistopic_obj = str(atac_path / "cistopic_obj_filt.pkl")
infile_rna_h5ad = work_dir / "scRNA" / "anndata_metacells.h5ad"

In [None]:
if not os.path.exists(os.path.join(work_dir, 'scATAC')):
    os.makedirs(os.path.join(work_dir, 'scATAC'))

## 1) Load

In [None]:
log.info("load cistopic object")

cistopic_obj = load_cistopic_obj(atac_path / "cistopic_obj_filt.pkl")

In [None]:
log.info(cistopic_obj)

In [None]:
if cell_type_col:
    cistopic_obj.cell_data["celltype"] = cistopic_obj.cell_data[cell_type_col].astype(str)

log.info(cistopic_obj.cell_data.columns)

In [None]:
log.info("load models...")

with open(infile_models, "rb") as f:
    models = pickle.load(f)

## 2) Evaluate model

In [None]:
log.info("evaluate and select model")

model = evaluate_models(
    models,
    select_model = topic_modeling_num_topic, 
    return_model = True, 
    metrics = ['Arun_2010','Cao_Juan_2009', 'Minmo_2011', 'loglikelihood'],
    plot_metrics = False,
    save = str(atac_path / "evaluate_topic_models.png"),
)

In [None]:
log.info("add model and save cistopic object")

cistopic_obj.add_LDA_model(model)

In [None]:
save_cistopic_obj(cistopic_obj, atac_path / "cistopic_obj_filt.pkl")

In [None]:
log.info(cistopic_obj)

## 3) Plot UMAP

In [None]:
log.info("plot UMAP")

try:
    run_umap(
        cistopic_obj, 
        #target = 'cell', 
        scale = True
    )
except Exception as e:
    log.error(e)

In [None]:
try:
    plot_metadata(
        cistopic_obj, 
        reduction_name = 'UMAP', 
        variables = ['celltype'],
        save = str(atac_path / "metadata.png"),
    )
except Exception as e:
    log.error(e)

In [None]:
# We can also plot the cell-topic probabilities on the UMAP, to visualize their cell type specifiticy.

try:
    plot_topic(
        cistopic_obj, 
        reduction_name = 'UMAP',
        save = str(atac_path / "topic_umap.png"),
    )
except Exception as e:
    log.error(e)

## 4) Binarise topics

In [None]:
log.info("binarise topics")

In [None]:
region_bin_topics_otsu = binarize_topics(
    cistopic_obj, 
    method='otsu'
)

In [None]:
region_bin_topics_top3k = binarize_topics(
    cistopic_obj, 
    method='ntop', 
    ntop = 3000
)

## 5) Calculate DARs

In [None]:
log.info("calculate DARs")

In [None]:
imputed_acc_obj = impute_accessibility(
    cistopic_obj, 
    selected_cells = None, 
    selected_regions = None, 
    scale_factor = 10**6
)

In [None]:
normalized_imputed_acc_obj = normalize_scores(
    imputed_acc_obj, 
    scale_factor = 10**4
)

In [None]:
variable_regions = find_highly_variable_features(
    normalized_imputed_acc_obj, 
    plot = False
)

In [None]:
try:
    markers_dict = find_diff_features(
        cistopic_obj, 
        imputed_acc_obj, 
        variable = 'celltype', 
        var_features = variable_regions, 
        split_pattern = '-'
    )
except:
    markers_dict = {}
    log.error("could not compute marker peaks... cell type info provided?")

## 6) Save object

In [None]:
log.info("save results")

In [None]:
if not os.path.exists(os.path.join(work_dir, 'scATAC/candidate_enhancers')):
    os.makedirs(os.path.join(work_dir, 'scATAC/candidate_enhancers'))

In [None]:
with open(candidate_enhancer_path / "region_bin_topics_otsu.pkl", "wb") as f:
    pickle.dump(region_bin_topics_otsu, f)

In [None]:
with open(candidate_enhancer_path / "region_bin_topics_top3k.pkl", "wb") as f:
    pickle.dump(region_bin_topics_top3k, f)

In [None]:
with open(candidate_enhancer_path / "markers_dict.pkl", "wb") as f:
    pickle.dump(markers_dict, f)

In [None]:
log.info("all done.")