In [None]:
import scanpy as sc
import muon as mo
import pandas as pd
import numpy as np
import torch
import random
import sys
sys.path.append("/home/zeng/zjy/software/MIMA") 
from src.model import Modality, MIMA  
import mudata as mu
from pathlib import Path

In [None]:
def set_seed(seed):
    """设置所有相关的随机种子，以确保实验可复现"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
rna = sc.read("./data/neurips-multiome/rna_hvg.h5ad")
atac = sc.read("./data/neurips-multiome/atac_hvf.h5ad")

rna.var['modality'] = 'Gene Expression'
atac.var['modality'] = 'Peaks'

adata = rna.T.concatenate(atac.T).T

adata.obs['batch'] = adata.obs['batch'].astype('category')
adata.obs['batch_id'] = pd.Categorical(adata.obs['batch'].cat.codes)
rna.obs['batch_id'] = adata.obs['batch_id'].copy()
atac.obs['batch_id'] = adata.obs['batch_id'].copy()


mdata = mu.MuData({"rna": rna, "atac": atac})

RNA_mod = Modality(adata=mdata.mod["rna"], mod_name="rna", n_layers=2, n_hidden=300, z_dim=50, beta=0.01)
ATAC_mod = Modality(adata=mdata.mod["atac"], mod_name="atac", n_layers=2, n_hidden=300, z_dim=50, beta=0.01)
model = MIMA([RNA_mod, ATAC_mod], beta=0.001)

input_dict = {
    "paired": mu.MuData({
        "rna": mdata.mod["rna"].copy(),
        "atac": mdata.mod["atac"].copy()
    })
}

model.train_mima(
    data_dict=input_dict,
    n_epochs=50,
    train_size=0.9999,
    dataset_name='multiome',
    output_path="/home/zeng/zjy/software/MIMA"
)

latent_dict = model.to_latent(input_dict, use_gpu=True)
adata.obsm['latent'] = latent_dict["paired"]["z_poe"]

out_path = Path('./results/neurips_multiome_mima.h5ad')
out_path.parent.mkdir(parents=True, exist_ok=True)
adata.write_h5ad(out_path)
print(f"保存完成：{out_path}")

In [None]:
rna = sc.read("./data/neurips-cite/rna_hvg.h5ad")
protein = sc.read('./data/neurips-cite/protein.h5ad')

adata = rna.T.concatenate(protein.T).T

adata.obs['batch'] = adata.obs['batch'].astype('category')
adata.obs['batch_id'] = pd.Categorical(adata.obs['batch'].cat.codes)
rna.obs['batch_id'] = adata.obs['batch_id'].copy()
protein.obs['batch_id'] = adata.obs['batch_id'].copy()


mdata = mu.MuData({"rna": rna, "protein": protein})

RNA_mod = Modality(adata=mdata.mod["rna"], mod_name="rna", n_layers=2, n_hidden=300, z_dim=50, beta=0.01)
PROTEIN_mod = Modality(adata=mdata.mod["protein"], mod_name="protein", n_layers=2, n_hidden=300, z_dim=50, beta=0.01)
model = MIMA([RNA_mod, PROTEIN_mod], beta=0.001)

input_dict = {
    "paired": mu.MuData({
        "rna": mdata.mod["rna"].copy(),
        "protein": mdata.mod["protein"].copy()
    })
}

model.train_mima(
    data_dict=input_dict,
    n_epochs=50,
    train_size=0.9999,
    dataset_name='cite',
    output_path="/home/zeng/zjy/software/MIMA"
)

latent_dict = model.to_latent(input_dict, use_gpu=True)
adata.obsm['latent'] = latent_dict["paired"]["z_poe"]

out_path = Path('./results/neurips-cite-mima.h5ad')
out_path.parent.mkdir(parents=True, exist_ok=True)
adata.write_h5ad(out_path)
print(f"保存完成：{out_path}")