# Map barcodes of cistopic object and add metadata

In [None]:
import os
import sys
import pickle
import re
import logging as log
from pathlib import Path

import pandas as pd
import scanpy as sc
from pycisTopic.cistopic_class import *

import seaborn as sns
from IPython.display import display, Markdown

from utils import match_barcodes_to, load_cistopic_obj, save_cistopic_obj

In [None]:
log.basicConfig(level=log.INFO)

## Parameters

In [None]:
infile_rna_h5ad = "/path/to/rna.h5ad"

infile_rna_sample_id_col = "sample_id_obs_column"

infile_rna_celltype_col = "cell_type_obs_column"

work_dir = Path("/path/to/work_dir")

In [None]:
display(Markdown(f"""
**parameters:**
- **input file RNA:** *{infile_rna_h5ad}*
- **anndata sample ID column:** *{infile_rna_sample_id_col}*
- **anndata cell type column:** *{infile_rna_celltype_col}*
- **working directory for output files:** *{work_dir}*
"""))

paths

In [None]:
atac_path = work_dir / "scATAC"

## Load anndata with annotation

In [None]:
# mapping to remove particular symbols from cell names
id_map = str.maketrans({
    "+": "pos",
    "/": "or",
    " ": "_",
    ".": "_",
})

In [None]:
adata = sc.read_h5ad(infile_rna_h5ad)

cell_data = adata.obs

if infile_rna_sample_id_col:
    cell_data['sample_id'] = cell_data[infile_rna_sample_id_col]

if infile_rna_celltype_col:
    cell_data['celltype'] = cell_data[infile_rna_celltype_col].astype(str)
    cell_data['celltype'] = cell_data['celltype'].str.translate(id_map)

del(adata)

## Load cistopic object

In [None]:
cistopic_obj = load_cistopic_obj(atac_path / "cistopic_obj.pkl")

In [None]:
print(cistopic_obj)

match barcodes

In [None]:
from_bc = cistopic_obj.cell_names
to_bc = cell_data.index.tolist()

target_barcodes = match_barcodes_to(from_bc, to_bc)

In [None]:
print(f"first 5 new barcodes: {', '.join(target_barcodes[:5])}")

In [None]:
cistopic_obj.cell_names = target_barcodes
cistopic_obj.cell_data.index = target_barcodes

## Add annotation to cistopic object

In [None]:
cistopic_obj.add_cell_data(cell_data)

In [None]:
cistopic_obj.cell_data

## Save cistopic object

In [None]:
print(cistopic_obj)

In [None]:
save_cistopic_obj(cistopic_obj, atac_path / "cistopic_obj.pkl")