In [1]:
import warnings
warnings.filterwarnings("ignore")

## Link Prediction and weights

In [24]:
import torch
import json
import numpy as np

from gdata import GData
from trainer import Trainer
from evaluator import Evaluator
from gat import LPGAT
from soft_yen import soft_yen_refactored

In [3]:
emb_d = 6
gd = GData('new_nodes_full.csv',
           'new_edges_full.csv',
           ['gene', 'disease', 'drug', 'cell', 'phenotype', 'pathway'],
           emb_d, dummy_emb=True)

ds = gd.get_tg(0.1)

In [4]:
num_ep = 5000
lr = 0.001
input_dim = ds.x.size()[1]
hidden_dim = 64
out_dim = 64
neg2pos = 1.0
num_neg_eval = 1000000

In [7]:
device = torch.device('cuda:0')
model = LPGAT(input_dim, hidden_dim, emb_d, out_dim).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion_cls = torch.nn.BCEWithLogitsLoss()
criterion_emb = torch.nn.MSELoss()

In [8]:
tr = Trainer(model, optimizer, criterion_emb, criterion_cls, neg2pos, ds, device)

In [9]:
tr.train(5000, num_neg_eval)

epoch: 0 || losses: 5.496936321258545 0.6873712539672852 || prec: 0.788113534450531 || spec: 0.5060629844665527
epoch: 10 || losses: 4.946714401245117 0.5234881043434143 || prec: 0.9605477452278137 || spec: 0.6155419945716858
epoch: 20 || losses: 4.564230442047119 0.41073647141456604 || prec: 0.9317918419837952 || spec: 0.8059830069541931
epoch: 30 || losses: 4.339101314544678 0.2930484414100647 || prec: 0.9395274519920349 || spec: 0.9008449912071228
epoch: 40 || losses: 4.155532360076904 0.2508847117424011 || prec: 0.8987804651260376 || spec: 0.9427729845046997
epoch: 50 || losses: 3.963477611541748 0.22649098932743073 || prec: 0.9258397817611694 || spec: 0.9379519820213318
epoch: 60 || losses: 3.793639898300171 0.21247290074825287 || prec: 0.9276103973388672 || spec: 0.9386190176010132
epoch: 70 || losses: 3.667573928833008 0.20321522653102875 || prec: 0.9216566681861877 || spec: 0.9470779895782471
epoch: 80 || losses: 3.580294132232666 0.20510248839855194 || prec: 0.9454975724220276

In [10]:
scores = tr.predict_scores()

precision: 0.9340959191322327


In [14]:
# weighted edges saving

w0 = [e.item() for e in scores]

edges_weighted = []

for i in range(len(gd.edges_list)):
    e_w = gd.edges_list[i][0], gd.edges_list[i][1], w0[i]
    edges_weighted.append(e_w)

with open('edges_weighted.json', 'w') as fl:
    json.dump(edges_weighted, fl, indent=6)

## Path generation and Evaluation

In [17]:
from igraph import Graph

In [18]:
with open('edges_weighted.json') as f:
    edges_weighted = json.load(f)

In [26]:
w0 = np.array(edges_weighted)[:,-1]

G = Graph(n=gd.k)
G.add_edges(gd.edges_list)
G.es['weight'] = w0.copy()

In [27]:
ev = Evaluator('Hypothesis_diseases.csv')

In [28]:
r1, r2 = ev.evaluate(G, gd.node_id_dict, soft_yen_refactored, penalty_coef=0.95)

In [29]:
sum(r1) / len(r1)

2.671875