In [None]:
import random
import numpy as np
import torch
import anndata as ad
import networkx as nx
import scanpy as sc
import scglue

In [None]:
def set_seed(seed):
    random.seed(seed)  
    np.random.seed(seed)  
    torch.manual_seed(seed)  
    torch.cuda.manual_seed(seed) 
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False  

set_seed(42)

In [None]:
rna = sc.read("./data/neurips-multiome/rna_hvg.h5ad")
atac = sc.read("./data/neurips-multiome/atac_hvf.h5ad")

rna.X = rna.layers['counts'].copy()
atac.X = atac.layers['counts'].copy()

sc.pp.highly_variable_genes(rna, n_top_genes=2000, flavor="seurat_v3")
sc.pp.normalize_total(rna)
sc.pp.log1p(rna)
sc.pp.scale(rna)
sc.tl.pca(rna, n_comps=100, svd_solver="auto")
rna.X = rna.layers['counts'].copy()
atac.X = atac.layers['counts'].copy()

scglue.data.lsi(atac, n_components=100, n_iter=15)

scglue.data.get_gene_annotation(
    rna,
    gtf="./data/genes.gtf",
    gtf_by="gene_name"
)
rna.var.loc[:, ["chrom", "chromStart", "chromEnd"]].head()

split = atac.var_names.str.split(r"[:-]")
atac.var["chrom"] = split.map(lambda x: x[0])
atac.var["chromStart"] = split.map(lambda x: x[1]).astype(int)
atac.var["chromEnd"] = split.map(lambda x: x[2]).astype(int)

rna.var = rna.var.loc[:, ~rna.var.columns.duplicated()]
atac.var = atac.var.loc[:, ~atac.var.columns.duplicated()]

for col in rna.var.columns:
    if rna.var[col].dtype.name in ["object", "category"] and col != "highly_variable":
        rna.var[col] = rna.var[col].astype(str)

for col in atac.var.columns:
    if atac.var[col].dtype.name in ["object", "category"]:
        atac.var[col] = atac.var[col].astype(str)

guidance = scglue.genomics.rna_anchored_guidance_graph(rna, atac)

scglue.graph.check_graph(guidance, [rna, atac])

scglue.models.configure_dataset(rna, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_pca", use_batch='batch')

scglue.models.configure_dataset(atac, "NB", use_highly_variable=True,use_rep="X_lsi", use_batch='batch')

glue = scglue.models.fit_SCGLUE({"rna": rna, "atac": atac}, guidance, model=scglue.models.PairedSCGLUEModel, fit_kws={"directory": "glue"})

rna.obsm["latent"] = glue.encode_data("rna", rna)
rna.write_h5ad("./results/neurips-multiome-gluer.h5ad")

atac.obsm["latent"] = glue.encode_data("atac", atac)
atac.write_h5ad("./results/neurips-multiome-gluea.h5ad")