In [1]:
import argparse
import dgl
import ast
from model import MCDHGN
import numpy as np
import pickle
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv
import logging 
import datetime
from model import MySampler
from utils import EarlyStopping, setup_seed, generate_traning_batch,HeteroDotProductPredictor
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, auc, precision_recall_curve,roc_curve

In [2]:
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
np.random.seed(42)
random.seed(42)
dgl.random.seed(42)
torch.backends.cudnn.deterministic = True

In [3]:
meta_paths = [['gg'],['ga','ag'],['gb','bg'],['gc','cg'],['gd','dg'],['ge','eg'],['gf','fg'],['gh','hg'],['gi','ig']]
graph_path = './data/network/hetero/new_9nodes_graph.bin'
graphs,_ = dgl.data.utils.load_graphs(graph_path)
g = graphs[0]
features = g.ndata['feature']['Gene']
#In order to ensure reproducible operation, in addition to fixing the random number seed, I also fixed the intermediate random walk sampling results and the division results of the training set, verification set, and test set.
# you also can get the random sampler result by the codes below.
# gene_ids = g.nodes('Gene')
# my_sampler = MySampler(g, meta_paths, 128)
# _,gs = my_sampler.sample_blocks(gene_ids)
# print(gs)
gs = []
for i in range(9):
    loadpath = './Intermediate/blocks/{}gs.bin'.format(i)
    with open(loadpath,'rb')as f:
        tg = pickle.load(f)
        gs.append(tg)
# you can generate another train \ val \ test \ set by the codes below
# postivefile = './data/label/now_pos427.pkl'
# negtivefile = './data/label/now_neg427.pkl'
# train_batch,test_mask,test_label = generate_traning_batch(postivefile,negtivefile)
test_mask = torch.load('./Intermediate/label_set/mydatatest_mask.pt')
test_label = torch.load('./Intermediate/label_set/mydatatest_label.pt')
with open('./Intermediate/label_set/train_batch.pkl','rb')as f:
    train_batch = pickle.load(f)

In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(device))
else:
    print('No GPU available, using CPU instead.')
gs1 = [block.to(device) for block in gs]
features = features.to(device)

def evaluate(g,model, mask,label):
    model.eval() 
    with torch.no_grad():
        _,pred,alpha,beta = model(g,features)
        output = pred[mask]
        output = F.log_softmax(output,dim=1)
        val_loss = F.nll_loss(output, label)
        output = output.cpu().numpy()
        label = label.cpu().numpy()
        acc = accuracy_score(label, np.argmax(output, axis=1))
        output = torch.sigmoid(pred[mask]).cpu().detach().numpy()
        auc = roc_auc_score(label, output[:, 1])
        aupr = average_precision_score(label, output[:, 1])
    return val_loss,acc,auc,aupr,pred.cpu(),alpha,beta
def generate_curve(fpred,test_mask,test_label):
    label = test_label.cpu()
    mask = test_mask.cpu()
    probas = torch.nn.Sigmoid()(fpred.cpu())
    probas = probas.detach().numpy()
    fpr,tpr,_ = roc_curve(label, probas[mask][:,1])
    p,r,_ = precision_recall_curve(label, probas[mask][:,1])
    myauc = auc(fpr,tpr)
    myaupr = auc(r,p)
    return myauc,myaupr,fpr,tpr,p,r

There are 2 GPU(s) available.
Device name: Tesla V100S-PCIE-32GB


In [5]:
test_label = test_label.to(device)
loss_weight = torch.tensor([1.0, 1.904]).to(device)
model = MCDHGN(
    num_meta_paths=len(gs1),
    in_size=features.shape[1],
    hidden_size=256,
    out_size=2,
    num_heads=[4],
    dropout=0.4,
).to(device)

optimizer = torch.optim.Adam(
    model.parameters(), lr=0.01, weight_decay=0.0001)
train_mask,val_mask,train_label,val_label = train_batch[0]
best_auc = 0
best_acc =0
best_aupr = 0
x_train = torch.cat((train_mask,val_mask),dim=0)
y_train = torch.cat((train_label,val_label),dim =0)
y_train = y_train.to(device)
for epoch in range(1000):
    model.train()
    _,pred,_,_ = model(gs1,features)
    output = pred[x_train]
    output = F.log_softmax(output,dim=1)
    loss = F.nll_loss(output, y_train,weight=loss_weight)
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()
    output = output.cpu().detach().numpy()
    if epoch%10==0:
        test_loss,test_acc,test_auc,test_aupr,output,alpha,beta= evaluate(gs1,model,test_mask,test_label)
        if test_aupr>best_aupr:
            best_auc = test_auc
            best_aupr = test_aupr
            best_acc=test_acc
            fpred = output
            fbeta = beta
        print(epoch,test_auc,test_aupr,test_acc)
myauc,myaupr,fpr,tpr,p,r = generate_curve(fpred,test_mask,test_label)
print(myauc,myaupr)

0 0.6249895885390638 0.43792229766222146 0.44891640866873067
10 0.5071630851241047 0.34413896278679545 0.6408668730650154
20 0.7194111277694487 0.5730146149634641 0.695046439628483
30 0.7681575878727303 0.6307111542419118 0.653250773993808
40 0.7616296018657338 0.6129125170906872 0.6996904024767802
50 0.8012972680326503 0.6847626639768907 0.7414860681114551
60 0.7926661669165418 0.6757923788086279 0.6873065015479877
70 0.8141970681326003 0.7214071303595105 0.7678018575851393
80 0.7954356155255705 0.6872938753723831 0.6888544891640866
90 0.8160919540229885 0.7318726557630653 0.760061919504644
100 0.8162064800932868 0.7304386064015758 0.7275541795665634
110 0.8235986173579878 0.7358267426219179 0.7445820433436533
120 0.8262639513576545 0.7317914252258212 0.7337461300309598
130 0.8177681992337165 0.7233530763873729 0.7708978328173375
140 0.8266804097951023 0.7361211919800152 0.7492260061919505
150 0.8354780942861902 0.7491559600637833 0.7476780185758514
160 0.8386744127936032 0.7534633071