## multiDGD - gene2peak perturbation example

In [6]:
import multiDGD
import numpy as np
import anndata as ad

## Load a pre-trained model

See [example_adata_bonemarrow](https://github.com/Center-for-Health-Data-Science/multiDGD/blob/main/tutorials/example_adata_bonemarrow.ipynb) tutorial.

In [19]:
import requests

# Download model and hyperparameters
figshare_url = 'https://api.figshare.com/v2/articles/23796198/files'
files = {
    'dgd_human_bonemarrow.pt':'41735907',
    'dgd_human_bonemarrow_hyperparameters.json':'41735904'
}

for file_name, file_id in files.items():
    file_url = f'{figshare_url}/{file_id}'
    file_response = requests.get(file_url).json()
    file_download_url = file_response['download_url']
    response = requests.get(file_download_url, stream=True)
    with open(file_name, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

data = ad.read_h5ad('./human_bonemarrow.h5ad')
# Patch for broken data loader
data.obs['train_val_test'].to_csv('./_obs.csv')

In [3]:
# Load data
data = ad.read_h5ad('./example_data.h5ad')

# load model from the saved checkpoint
# (has to be trained on test data)
model = multiDGD.DGD.load(data=data, save_dir='./models/', model_name='dgd_bonemarrow_default')

Covariate model initialized as:

        Gaussian_mix_compture:
            Dimensionality: 2
            Number of components: 4
        
#######################
Training status
#######################
True


### Gene2Peak

This feature performs in silico perturbations on the specified gene and predicts the changes in prediction on all output features.

Currently, we only support this being performed on the test data. See the tutorial on training and testing an anndata object for details on the model and test data.

Right now the perturbations only consist of silencing of the given gene. Thus, a negative predicted change suggests a positive correlation.

In [4]:
# specify the gene we want to look at
gene_name = "ID2"

# and the samples we want to look at
test_set = data[data.obs["train_val_test"] == "test",:].copy()

In [5]:
predicted_changes, samples_of_interest = model.gene2peak(gene_name=gene_name, testset=test_set)

using 193 samples


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [6]:
delta_gex = predicted_changes[0]
delta_atac = predicted_changes[1]