# Graph construction

In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, module="ignite.handlers.checkpoint")

In [2]:
from itertools import chain

import anndata as ad
import itertools
import networkx as nx
import pandas as pd
import scanpy as sc
import scglue
import seaborn as sns
from matplotlib import rcParams

  from torch.distributed.optim import ZeroRedundancyOptimizer


In [3]:
# set up presentation interface
scglue.plot.set_publication_params()
rcParams["figure.figsize"] = (4, 4)

In [4]:
# IMPORTANT if Bedtools cannot be found automatically
scglue.config.BEDTOOLS_PATH = '/usr/local/bin'

In [5]:
rna = ad.read_h5ad("../data/GSE126074/GSE126074-RNA_pp.h5ad")
atac = ad.read_h5ad("../data/GSE126074/GSE126074-ATAC_pp.h5ad")

In [6]:
rna

AnnData object with n_obs × n_vars = 9190 × 28930
    obs: 'domain', 'protocol', 'dataset', 'cell_type'
    var: 'chrom', 'chromStart', 'chromEnd', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts', 'gene_id', 'gene_type', 'mgi_id', 'havana_gene', 'tag', 'genome', 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'cell_type_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [7]:
atac

AnnData object with n_obs × n_vars = 9190 × 241757
    obs: 'domain', 'protocol', 'dataset', 'cell_type'
    var: 'chrom', 'chromStart', 'chromEnd', 'genome', 'n_counts'
    uns: 'cell_type_colors', 'neighbors', 'umap'
    obsm: 'X_lsi', 'X_umap'
    obsp: 'connectivities', 'distances'

## Construct prior regulatory graph

In [8]:
# Graph construction
guidance = scglue.genomics.rna_anchored_guidance_graph(rna, atac)
guidance

window_graph:   0%|          | 0/28930 [00:00<?, ?it/s]

<networkx.classes.multidigraph.MultiDiGraph at 0x7fde187a2470>

In [9]:
scglue.graph.check_graph(guidance, [rna, atac])

[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] check_graph: All checks passed!


In [10]:
atac.var.head()

Unnamed: 0_level_0,chrom,chromStart,chromEnd,genome,n_counts,highly_variable
peaks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
chr1:3005833-3005982,chr1,3005833,3005982,mm10,2,False
chr1:3094772-3095489,chr1,3094772,3095489,mm10,123,False
chr1:3119556-3120739,chr1,3119556,3120739,mm10,424,False
chr1:3121334-3121696,chr1,3121334,3121696,mm10,107,False
chr1:3134637-3135032,chr1,3134637,3135032,mm10,7,False


In [11]:
nx.write_graphml(guidance, "../result/GSE126074/GLUE_model/guidance.graphml.gz")

In [None]:
rna.write("../data/GSE126074/GSE126074-RNA_prior.h5ad", compression="gzip")
atac.write("../data/GSE126074/GSE126074-ATAC_prior.h5ad", compression="gzip")