# Plot pycistopic object using muon

In [20]:
from pathlib import Path

import scanpy as sc
from muon import atac as ac

import matplotlib.pyplot as plt

from utils import load_cistopic_obj

## Params

In [2]:
# obj_path = "/lustre/scratch117/cellgen/team205/jp30/tmp/tmp_cistopic_obj_01.pkl"
# obj_path = "/lustre/scratch126/cellgen/team205/jp30/fetal_heart/mono_macro_multiome/working_dir_all/scATAC/cistopic_obj.pkl"
work_dir = Path(str(snakemake.params.work_dir))

cell_type = "celltype"

## 1) Load

In [None]:
obj_path = work_dir / "scATAC" / "cistopic_obj_filt.pkl"

In [15]:
cistopic_obj = load_cistopic_obj(obj_path)

load object without matrices from: /lustre/scratch126/cellgen/team205/jp30/fetal_heart/mono_macro_multiome/working_dir_all/scATAC/cistopic_obj.pkl
load fragment matrix from: /lustre/scratch126/cellgen/team205/jp30/fetal_heart/mono_macro_multiome/working_dir_all/scATAC/cistopic_obj.npz
restore binary matrix...


In [16]:
print(cistopic_obj)

CistopicObject from project cisTopic with n_cells × n_regions = 1899 × 14685


In [17]:
ad_atac = sc.AnnData(
    cistopic_obj.fragment_matrix.T, 
    obs = cistopic_obj.cell_data,
    var = cistopic_obj.region_data,
)

In [19]:
ad_atac

AnnData object with n_obs × n_vars = 1899 × 14685
    obs: 'cisTopic_nr_acc', 'cisTopic_log_nr_acc', 'cisTopic_log_nr_frag', 'cisTopic_nr_frag', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'n_genes', 'n_counts', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'HB_score', 'donor_by_library-prep', 'multiplexed', 'SOC | status', 'SOC | log_prob_singleton', 'SOC | log_prob_doublet', 'batch_key', '_scvi_batch', '_scvi_labels', '2Jan_coarse_grain', '2Jan_mid_grain', '2Jan_fine_grain', 'leiden_scVI', 'FACSgate', 'sex', 'week', 'trimester', 'cell_type2', 'ne

## 2) Run

In [7]:
# sc.pp.scale(ad_regs)
ac.pp.tfidf(ad_atac, scale_factor=1e4)

sc.pp.highly_variable_genes(ad_atac, min_mean=0.05, max_mean=1.5, min_disp=.5)

# sc.pp.pca(ad_regs)
ac.tl.lsi(ad_atac)

In [8]:
# remove first component, which is typically associated with number of counts
ad_atac.obsm['X_lsi'] = ad_atac.obsm['X_lsi'][:,1:]
ad_atac.varm["LSI"] = ad_atac.varm["LSI"][:,1:]
ad_atac.uns["lsi"]["stdev"] = ad_atac.uns["lsi"]["stdev"][1:]

In [9]:
sc.pp.neighbors(ad_atac, use_rep="X_lsi")

sc.tl.leiden(ad_atac)

sc.tl.umap(ad_atac)

In [None]:
try:
    with plt.rc_context({"figure.figsize": (10,10)}):
        sc.pl.umap(ad_atac, color = cell_type, size=100, alpha=0.6)
except Exception:
    pass

In [None]:
try:
    with plt.rc_context({"figure.figsize": (10,10)}):
        sc.pl.umap(ad_atac, color = "leiden", size=100, alpha=0.6, legend_loc = "on data")
except Exception:
    pass