In [3]:
import numpy as np
import pandas as pd
import scanpy as sc
import scvelo as scv
import torch
import regvelo
from regvelo import REGVELOVI,sanity_check,prior_GRN,abundance_test,TFscreening
from velovi import preprocess_data # install this
import cellrank as cr

import mplscience
import anndata
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.spatial.distance import cdist

In [4]:
adata = sc.read_h5ad('../../output/annotated_subset_with_latent.h5ad')
adata

AnnData object with n_obs × n_vars = 51879 × 36579
    obs: 'sample_id', 'tissue', 'patient_id', 'author', 'barcode', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_20_genes', 'pct_counts_mt', 'pct_counts_ribo', 'pct_counts_hb', 'scDblFinder_score', 'scDblFinder_class', 'doublet_score', 'predicted_doublet', 'leiden', 'developing_human_brain_anno', 'developing_human_brain_conf_score', 'human_adultaged_hippocampus_anno', 'human_adultaged_hippocampus_conf_score', 'human_longitudinal_hippocampus_anno', 'human_longitudinal_hippocampus_conf_score', 'gbmap_ref_model_anno', 'gbmap_ref_model_conf_score', 'gbmap_ref_hvg_anno', 'gbmap_ref_hvg_conf_score', 'cell_type', 'cas_cell_type_score_1', 'cas_cell_type_name_1', 'cas_cell_type_label_1', 'cas_cell_type_score_2', 'cas_cell_type_name_2', 'cas_cell_type_label_2', 'cas_cell_type_score_3', 'cas_cell_type_name_3', 'cas_cell_type_label_3', 'cnv_leiden', 'cnv_score', 'each_cell_type'

In [8]:
prior_net = pd.read_csv('../../output/grn_co_tumor.csv')
prior_net

Unnamed: 0,source,target,coef_mean,coef_abs,p,-logp
0,PRRX1,CHI3L1,1.248090,1.248090,2.586625e-13,12.587267
1,BACH2,EGFR,1.212983,1.212983,4.766684e-18,17.321784
2,ZSCAN31,GRM5,1.207882,1.207882,2.315785e-17,16.635302
3,KLF12,NRXN1,1.165955,1.165955,7.527354e-13,12.123358
4,SMAD1,SNTG1,1.035499,1.035499,2.419078e-08,7.616350
...,...,...,...,...,...,...
6185,VDR,HTR1E,-0.000121,0.000121,2.881267e-04,3.540417
6186,VDR,GASAL1,-0.000110,0.000110,7.627598e-10,9.117612
6187,ZBTB7C,CCKBR,0.000101,0.000101,3.766331e-04,3.424082
6188,MAFF,CCKBR,-0.000097,0.000097,1.814739e-04,3.741186


In [12]:
with open('/Users/jiehoonk/DevHub/mnt/annotations/_TF.txt', 'r') as f:
    TF_list = f.read().splitlines()
print('Known TFs: ', len(TF_list))
TF_list = adata.var_names[adata.var_names.isin(TF_list)]
print('TFs in adata: ', len(TF_list))

Known TFs:  1892
TFs in adata:  1839


In [9]:
all_genes = adata.var.index.tolist()
n_genes = len(all_genes)
W = np.zeros((n_genes, n_genes), dtype=int)

gene_to_index = {gene: idx for idx, gene in enumerate(all_genes)}

for _, row in prior_net.iterrows():
    source, target = row['source'], row['target']
    if source in gene_to_index and target in gene_to_index:
        W[gene_to_index[source], gene_to_index[target]] = 1

binary_matrix = pd.DataFrame(W, index=all_genes, columns=all_genes)
binary_matrix

Unnamed: 0,A1BG,A1BG-AS1,A1CF,A2M,A2M-AS1,A2ML1,A2ML1-AS1,A2ML1-AS2,A2MP1,A3GALT2,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
A1BG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1BG-AS1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1CF,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A2M,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A2M-AS1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYG11A,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZYG11B,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZYX,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZZEF1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
