## This notebook runs inference on a GEARS model trained on Norman

- Download trained GEARS model and Dataloader from Dataverse
- Model is trained on Norman et al. 2019 (Science) dataset
- Example below showing how to make perturbation outcome prediction and GI prediction

In [None]:
import sys
sys.path.append('../')

from gears import PertData, GEARS
from gears.utils import dataverse_download
from zipfile import ZipFile 

### Download saved model and dataloader

In [4]:
## Download dataloader from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979957', 'norman_umi_go.tar.gz')

## Extract and set up dataloader directory
import tarfile
with tarfile.open('norman_umi_go.tar.gz', 'r:gz') as tar:
    tar.extractall()

Found local copy...


In [5]:
## Download model from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/10457098', 'model.zip')

## Extract and set up model directory
with ZipFile(('model.zip'), 'r') as zip:
    zip.extractall(path = './')

Found local copy...


### Load model and dataloader

In [6]:
data_path = './'
data_name = 'norman_umi_go'
model_name = 'gears_misc_umi_no_test'

pert_data = PertData(data_path)
pert_data.load(data_path = data_path + data_name)
pert_data.prepare_split(split = 'no_test', seed = 1)
pert_data.get_dataloader(batch_size = 32, test_batch_size = 128)

gears_model = GEARS(pert_data, device = 'cpu', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

Found local copy...
Found local copy...
These perturbations are not in the GO graph and their perturbation can thus not be predicted
['RHOXF2BB+ctrl' 'LYL1+IER5L' 'ctrl+IER5L' 'KIAA1804+ctrl' 'IER5L+ctrl'
 'RHOXF2BB+ZBTB25' 'RHOXF2BB+SET']
Local copy of pyg dataset is detected. Loading...
Done!
Local copy of split is detected. Loading...
Done!
Creating dataloaders....
Done!


here1


In [8]:
gears_model = GEARS(pert_data, device = 'cpu', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

### Make transcriptional outcome predictions

In [9]:
gears_model.predict([['CNN1', 'CBL']])

{'CNN1_CBL': array([3.9031501e-03, 4.3718898e-01, 8.4673628e-02, ..., 3.5398278e+00,
        1.6265446e-08, 3.8483053e-01], dtype=float32)}

### Make GI outcome prediction

In [10]:
## If reproducing results from paper, you can use the same gene set, 
## although the function works even if GI_genes_file is set to None

dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979958', 
                   'genes_with_hi_mean.npy')

gears_model.GI_predict(['CNN1', 'CBL'], GI_genes_file='./genes_with_hi_mean.npy')

Downloading...
100%|██████████| 45.0k/45.0k [00:00<00:00, 245kiB/s] 


{'ts': TheilSenRegressor(fit_intercept=False, max_iter=1000,
                   max_subpopulation=100000.0, random_state=1000),
 'c1': 0.7979913890704244,
 'c2': 0.7735386501901355,
 'mag': 1.1113740596075303,
 'dcor': 0.8387027964205771,
 'dcor_singles': 0.7127935191939647,
 'dcor_first': 0.7813197735662497,
 'dcor_second': 0.7862153401509131,
 'corr_fit': 0.9263004394700668,
 'dominance': 0.013516186723244426,
 'eq_contr': 0.9937732497260564}