In [1]:
import sys
sys.path.append('../')

from gears import PertData, GEARS



In [None]:
import pickle
import torch
import networkx as nx

Load data. We use norman as an example.

In [3]:
import os
os.makedirs('./data/norman/splits/data', exist_ok=True)

In [4]:
pert_data = PertData('./data')
pert_data.load(data_name = 'norman')
pert_data.prepare_split(split = 'simulation', seed = 1)
pert_data.get_dataloader(batch_size = 32, test_batch_size = 128)

Found local copy...
Found local copy...
Found local copy...
These perturbations are not in the GO graph and their perturbation can thus not be predicted
['RHOXF2BB+ctrl' 'LYL1+IER5L' 'ctrl+IER5L' 'KIAA1804+ctrl' 'IER5L+ctrl'
 'RHOXF2BB+ZBTB25' 'RHOXF2BB+SET']
Local copy of pyg dataset is detected. Loading...
Done!
Local copy of split is detected. Loading...
Simulation split test composition:
combo_seen0:9
combo_seen1:43
combo_seen2:19
unseen_single:36
Done!


here1


Creating dataloaders....
Done!


In [None]:
# Load and convert GO-based to custom perturbation graph
with open("BioGRID_graph.pkl", "rb") as f:
    G_nx = pickle.load(f)

node_map = pert_data.node_map_pert

def nx_to_gears_graph(G_nx, node_map):
    valid_edges = [(u, v, d.get('weight', 1.0))
                   for u, v, d in G_nx.edges(data=True)
                   if u in node_map and v in node_map]
    # shape = (2, num_edges) each column is an edge
    edge_index = torch.LongTensor([
        [node_map[u] for u, v, _ in valid_edges],
        [node_map[v] for u, v, _ in valid_edges]
    ])
    edge_weight = torch.FloatTensor([w for _, _, w in valid_edges])
    return edge_index, edge_weight

G_go, G_go_weight = nx_to_gears_graph(G_nx, node_map)

Create a model object; if you use [wandb](https://wandb.ai), you can easily track model training and evaluation by setting `weight_bias_track` to true, and specify the `proj_name` and `exp_name` that you like.

In [8]:
import os
os.makedirs('data/data/norman', exist_ok=True)

In [9]:
gears_model = GEARS(pert_data, device = 'cpu', # cuda:7 not available
                        weight_bias_track = False, 
                        proj_name = 'pertnet_1_0', 
                        exp_name = 'pertnet_1_0')
gears_model.model_initialize(
    hidden_size = 64,
    G_go = G_go,
    G_go_weight = G_go_weight
)

You can find available tunable parameters in model_initialize via

In [10]:
gears_model.tunable_parameters()

{'hidden_size': 'hidden dimension, default 64',
 'num_go_gnn_layers': 'number of GNN layers for GO graph, default 1',
 'num_gene_gnn_layers': 'number of GNN layers for co-expression gene graph, default 1',
 'decoder_hidden_size': 'hidden dimension for gene-specific decoder, default 16',
 'num_similar_genes_go_graph': 'number of maximum similar K genes in the GO graph, default 20',
 'num_similar_genes_co_express_graph': 'number of maximum similar K genes in the co expression graph, default 20',
 'coexpress_threshold': 'pearson correlation threshold when constructing coexpression graph, default 0.4',
 'uncertainty': 'whether or not to turn on uncertainty mode, default False',
 'uncertainty_reg': 'regularization term to balance uncertainty loss and prediction loss, default 1',
 'direction_lambda': 'regularization term to balance direction loss and prediction loss, default 1'}

Train your model:

Note: For the sake of demo, we set epoch size to 1. To get full model, set `epochs = 20`.

In [11]:
gears_model.train(epochs = 1, lr = 1e-3)

Start Training...
Epoch 1 Step 1 Train Loss: 0.5168
Epoch 1 Step 51 Train Loss: 0.4161
Epoch 1 Step 101 Train Loss: 0.3633
Epoch 1 Step 151 Train Loss: 0.5028
Epoch 1 Step 201 Train Loss: 0.4465
Epoch 1 Step 251 Train Loss: 0.5855
Epoch 1 Step 301 Train Loss: 0.4471
Epoch 1 Step 351 Train Loss: 0.4266
Epoch 1 Step 401 Train Loss: 0.4478
Epoch 1 Step 451 Train Loss: 0.4614
Epoch 1 Step 501 Train Loss: 0.3947
Epoch 1 Step 551 Train Loss: 0.4357
Epoch 1 Step 601 Train Loss: 0.5128
Epoch 1 Step 651 Train Loss: 0.4784
Epoch 1 Step 701 Train Loss: 0.4481
Epoch 1 Step 751 Train Loss: 0.5780
Epoch 1 Step 801 Train Loss: 0.4769
Epoch 1 Step 851 Train Loss: 0.4084
Epoch 1 Step 901 Train Loss: 0.4370
Epoch 1 Step 951 Train Loss: 0.4889
Epoch 1 Step 1001 Train Loss: 0.4384
Epoch 1 Step 1051 Train Loss: 0.4466
Epoch 1 Step 1101 Train Loss: 0.6302
Epoch 1 Step 1151 Train Loss: 0.4552
Epoch 1 Step 1201 Train Loss: 0.4885
Epoch 1 Step 1251 Train Loss: 0.4568
Epoch 1 Step 1301 Train Loss: 0.4950
Epoch 

Save and load pretrained models:

In [None]:
gears_model.save_model('test_model_BioGRID_1_0')
gears_model.load_pretrained('test_model_BioGRID_1_0')

Make prediction for new perturbation:

In [None]:
gears_model.predict([['FEV'], ['FEV', 'AHR']])

{'FEV': array([-5.7163749e-11,  5.9469450e-02,  4.8302099e-02, ...,
         3.4583302e+00,  1.1393473e-03, -7.8322862e-18], dtype=float32),
 'FEV_AHR': array([-8.48831075e-11,  9.95547995e-02,  7.28517845e-02, ...,
         3.35909700e+00,  1.02890411e-03, -1.29426535e-17], dtype=float32)}

Gene list can be found here:

In [None]:
gears_model.gene_list[:5]

['RP11-34P13.8', 'RP11-54O7.3', 'SAMD11', 'PERM1', 'HES4']