In [1]:
# -*- coding: utf-8 -*-
import torch
import numpy as np
import sys, copy, math, time, pdb, warnings, traceback
import pickle
import scipy.io as sio
import scipy.sparse as ssp
import os.path
import random
import argparse
from shutil import copy, rmtree, copytree
from torch.optim.lr_scheduler import ReduceLROnPlateau
from util_functions import *
from preprocessing_Dataset1 import *
from train_eval import *
from models import *
from torch_geometric.data import Data, Dataset
import traceback
import warnings
import sys
import xlwt
from torchsummary import summary
import gc
from sklearn.metrics import precision_recall_curve,roc_curve,roc_auc_score,f1_score,precision_score,recall_score,auc



if __name__ == '__main__':

    # Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--no-train', action='store_true', default=False)
    parser.add_argument('--dataset', help='dataset name')

    parser.add_argument('--use-features', action='store_true', default=False)

    args = parser.parse_args()

    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(2341)
    seed=2341
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.backends.cudnn.benchmark=False
    torch.backends.cudnn.deterministic=True
    hop = 1

    if not args.no_train:
        #Construct model
        print('training.....')
        data_combo = (args.dataset, '', '')
        u_features, v_features, net, labels, u_indices, v_indices, num_list = load_data(args.dataset)
        print('preprocessing end.')
        adj=torch.tensor(net)
        if args.use_features:
            n_features = u_features.shape[1] + v_features.shape[1]
        else:
            u_features, v_features = None, None
            n_features = 0
        all_indices = (u_indices, v_indices)
        print('begin constructing all_graphs')
        all_graphs = extracting_subgraphs(net, all_indices, labels,hop, u_features,v_features,hop*2+1)
        mydataset = MyDataset(all_graphs, root='data/{}{}/{}/train'.format(*data_combo))
        print('constructing all_graphs end.')

        sum=0
        all_results=[]
        max_f1=0

        for count in range(1):
            model = gGATLDA(515, side_features=args.use_features, n_side_features=515)
            #model=model.cuda()
            print('########',count,' training.'+'#########')

	        #K-fold cross-validation
            K=5
            all_f1_mean,all_f1_std=0,0
            all_accuracy_mean,all_accuracy_std=0,0
            all_recall_mean,all_recall_std=0,0
            all_precision_mean,all_precision_std=0,0
            all_auc_mean,all_auc_std=0,0
            all_aupr_mean,all_aupr_std=0,0
            truth=[]
            predict=[]
            f1_s=[]
            accuracy_s=[]
            recall_s=[]
            precision_s=[]
            auc_s=[]
            aupr_s=[]
            max=0
            for i in range(K):
                print('*'*25,i+1,'*'*25)

                train_graphs,test_graphs=get_k_fold_data(K,i,mydataset)
                test_auc,f1,accuracy,recall,precision,auc,aupr,one_truth,one_predict=train_multiple_epochs(train_graphs,test_graphs, model, adj)
                truth.extend(one_truth)
                predict.extend(one_predict)
                f1_s.append(f1)
                accuracy_s.append(accuracy)
                recall_s.append(recall)
                precision_s.append(precision)
                auc_s.append(auc)
                aupr_s.append(aupr)


            print('#'*10,'Final k-fold cross validation results','#'*10)
            print('The %d-fold CV auc: %f +/- %f' %(i,np.mean(auc_s),np.std(auc_s)))
            print('The %d-fold CV aupr: %f +/- %f' %(i,np.mean(aupr_s),np.std(aupr_s)))
            print('The %d-fold CV f1-score: %f +/- %f' %(i,np.mean(f1_s),np.std(f1_s)))
            print('The %d-fold CV recall: %f +/- %f' %(i,np.mean(recall_s),np.std(recall_s)))
            print('The %d-fold CV accuracy: %f +/- %f' %(i,np.mean(accuracy_s),np.std(accuracy_s)))
            print('The %d-fold CV precision: %f +/- %f' %(i,np.mean(precision_s),np.std(precision_s)))
            all_f1_mean=all_f1_mean+np.mean(f1_s)
            all_f1_std=all_f1_std+np.std(f1_s)

            all_recall_mean=all_recall_mean+np.mean(recall_s)
            all_recall_std=all_recall_std+np.std(recall_s)

            all_accuracy_mean=all_accuracy_mean+np.mean(accuracy_s)
            all_accuracy_std=all_accuracy_std+np.std(accuracy_s)

            all_precision_mean=all_precision_mean+np.mean(precision_s)
            all_precision_std=all_precision_std+np.std(precision_s)

            all_auc_mean=all_auc_mean+np.mean(auc_s)
            all_auc_std=all_auc_std+np.std(auc_s)

            all_aupr_mean=all_aupr_mean+np.mean(aupr_s)
            all_aupr_std=all_aupr_std+np.std(aupr_s)

            truth_predict=[truth,predict]
            all_results.append(truth_predict)


        np.save('results/log_truth_Dataset1_CV1.npy',np.array(truth))
        np.save('results/log_predict_Dataset1_CV1.npy',np.array(predict))
        torch.save(model,'model.pth')



    print("All end...")





ModuleNotFoundError: No module named 'util_functions'

In [None]:
import torch
import math
import torch.nn as nn
from torch_geometric.nn import MessagePassing,GATConv
import torch.nn.functional as F
from torch.nn import Linear, Conv1d,AdaptiveMaxPool2d
from torch_geometric.nn import GCNConv, RGCNConv, global_sort_pool, global_add_pool, global_max_pool,global_mean_pool
from torch_geometric.utils import dropout_adj
from util_functions import *
import pdb
import time
from torch.autograd import Variable
from ranger import Ranger
from ranger import RangerVA
from ranger import RangerQH



class gGATLDA(torch.nn.Module):
    # The gGATLDA model use GCN layer + GAT layer
    def __init__(self, in_features, gconv=GATConv, latent_dim=[16, 16, 16, 16], side_features=False, n_side_features=0):
        super(gGATLDA, self).__init__()
        self.convs = torch.nn.ModuleList()
        self.conv1=GCNConv((in_features), 16)
        self.convs.append(gconv(16, latent_dim[0],heads=8,dropout=0.2))
        self.convs.append(gconv(16*8, 16,heads=8,dropout=0.2))
        self.convs.append(gconv(16*8, 16,heads=8,dropout=0.2))
        self.convs.append(gconv(16*8, 2,heads=1,dropout=0.2))
        self.conv2=gconv(in_channels=2*8,out_channels=2,dropout=0.2,heads=1,concat=True)
        self.lin1 = Linear(3*sum(latent_dim), 8)
        self.lin2 = Linear(2*4*16, 8)

    def forward(self, data):
        start = time.time()
        x, edge_index, batch = data.x, data.edge_index, data.batch
        concat_states=[]
        x = F.elu(self.conv1(x, edge_index))
        for conv in self.convs:
            x = F.elu(conv(x, edge_index))
            concat_states.append(x)
        concat_states = torch.cat(concat_states, 1)
        users = data.x[:, 0] == 1
        items = data.x[:, 1] == 1
        x = torch.cat([x[users], x[items]], 1)
        return F.log_softmax(x, dim=1)

    def predict(self,data):
        out=self.forward(data)
        return out









In [None]:
from __future__ import division
from __future__ import print_function

import numpy as np
import scipy.sparse as sp
import scipy.io as scio
import pickle as pkl
import os
import h5py
import pandas as pd
import random
import pdb
import math
from random import randint,sample
from sklearn.model_selection import KFold

def load_data(dataset):
    print("Loading lncRNAdisease dataset")
    path_dataset = 'raw_data/' + dataset + '/training_test_dataset.mat'
    data=scio.loadmat(path_dataset)
    net=data['interMatrix']

    #lncRNA features and disease features
    u_features=data['lncSim']
    disSim_path='raw_data/' + dataset + '/disSim.xlsx'
    disSim_data=pd.read_excel(disSim_path,header=0)
    v_features=np.array(disSim_data)

    num_list=[len(u_features)]
    num_list.append(len(v_features))
    temp=np.zeros((net.shape[0],net.shape[1]),int)
    u_features=np.hstack((u_features,net))
    v_features=np.hstack((net.T,v_features))

    a=np.zeros((1,u_features.shape[0]+v_features.shape[0]),int)
    b=np.zeros((1,v_features.shape[0]+u_features.shape[0]),int)
    u_features=np.vstack((a,u_features))
    v_features=np.vstack((b,v_features))

    num_lncRNAs=net.shape[0]
    num_diseases=net.shape[1]

    row,col,_=sp.find(net)
    perm=random.sample(range(len(row)),len(row))
    row,col=row[perm],col[perm]
    sample_pos=(row,col)
    print("the number of all positive sample:",len(sample_pos[0]))

    print("sampling negative links for train and test")
    sample_neg=([],[])
    net_flag=np.zeros((net.shape[0],net.shape[1]))
    X=np.ones((num_lncRNAs,num_diseases))
    net_neg=X-net
    row_neg,col_neg,_=sp.find(net_neg)
    perm_neg=random.sample(range(len(row_neg)),len(row))
    row_neg,col_neg=row_neg[perm_neg],col_neg[perm_neg]
    sample_neg=(row_neg,col_neg)
    sample_neg=list(sample_neg)
    print("the number of all negative sample:", len(sample_neg[0]))

    u_idx = np.hstack([sample_pos[0], sample_neg[0]])
    v_idx = np.hstack([sample_pos[1], sample_neg[1]])
    labels= np.hstack([[1]*len(sample_pos[0]), [0]*len(sample_neg[0])])

    l1=np.zeros((1,net.shape[1]),int)
    print(l1.shape)
    net=np.vstack([l1,net])
    print("old net:",net.shape)
    l2=np.zeros((net.shape[0],1),int)
    net=np.hstack([l2,net])
    print("new net:",net.shape)

    u_idx=u_idx+1
    v_idx=v_idx+1

    return u_features, v_features, net, labels, u_idx, v_idx,num_list

def load_predict_data(dataset):
    print("Loading lncRNAdisease dataset")
    path_dataset = 'raw_data/' + dataset + '/training_test_dataset.mat'
    data=scio.loadmat(path_dataset)
    net=data['interMatrix']
    num_lncRNAs=net.shape[0]
    num_diseases=net.shape[1]

    net_new=np.zeros((num_lncRNAs+1,num_diseases+1),dtype=np.int32)
    for i in range(1,num_lncRNAs+1):
        for j in range(1,num_diseases+1):
            net_new[i,j]=net[i-1,j-1]
    u_features=data['lncSim']
    disSim_path='raw_data/' + dataset + '/disSim.xlsx'
    disSim_data=pd.read_excel(disSim_path,header=0)
    v_features=np.array(disSim_data)

    num_list=[len(u_features)]
    num_list.append(len(v_features))
    temp=np.zeros((net.shape[0],net.shape[1]),int)
    u_features=np.hstack((u_features,net))
    v_features=np.hstack((net.T,v_features))
    a=np.zeros((1,u_features.shape[0]+v_features.shape[0]),int)
    b=np.zeros((1,v_features.shape[0]+u_features.shape[0]),int)
    u_features=np.vstack((a,u_features))
    v_features=np.vstack((b,v_features))

    #loading miRNA_name and disease_name
    lncRNA_name=[]
    disease_name=[]
    disease_name.append([])
    lncRNA_name.append([])
    f=open('raw_data/' + dataset+'/lncRNA_Name.txt','r')
    while True:
        line=f.readline()
        if not line:
            break
        lncRNA_name.append(line)
    f.close()
    f=open('raw_data/' + dataset+'/disease_Name.txt','r')
    while True:
        line=f.readline()
        if not line:
            break
        disease_name.append(line)
    f.close()
    print("lncRNA_name:",len(lncRNA_name))
    case_disease='renal carcinoma\n'
    if case_disease in disease_name:
        idx=disease_name.index(case_disease)

    u_idx,v_idx, labels=[],[],[]
    list=[]
    for i in range(1,net_new.shape[0]):
        if net_new[i][idx]==0:
            list.append([i,idx,net_new[i][idx]])

    for i in range(len(list)):
        u_idx.append(list[i][0])
        v_idx.append(list[i][1])
        labels.append(list[i][2])
    class_values=np.array([0,1],dtype=float)

    return u_features, v_features, net_new, labels, u_idx, v_idx, class_values,lncRNA_name,disease_name




In [None]:
import time
import os
import math
import multiprocessing as mp
import numpy as np
import networkx as nx
import torch
import torch.nn.functional as F
from torch import tensor
from torch.optim import Adam
from sklearn.model_selection import StratifiedKFold
from torch_geometric.data import DataLoader, DenseDataLoader as DenseLoader
from tqdm import tqdm
import pdb
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from util_functions import PyGGraph_to_nx
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn import metrics
import random
from ranger import Ranger
from ranger import RangerVA
from ranger import RangerQH
from torch.optim import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_k_fold_data(k,i,data):
    assert k>1
    data_pos=data[0:621]
    data_neg=data[621:1242]

    start=int(i*621//k)
    end=int((i+1)*621//k)

    data_train, data_valid=None, None
    data_valid_pos, data_valid_neg=None, None
    data_train_pos, data_train_neg=None, None

    data_valid_pos=data_pos[start:end]
    data_train_pos=data_pos[0:start]+data_pos[end:621]
    data_valid_neg=data_neg[start:end]
    data_train_neg=data_neg[0:start]+data_neg[end:621]
    data_train=data_train_pos+data_train_neg
    data_valid=data_valid_pos+data_valid_neg
    return data_train,data_valid



def train_multiple_epochs(train_graphs, test_graphs, model,  adj):
    train_loss,test_loss=[],[]

    print("starting train...")
    LR=0.01
    batch_size=64
    epochs=50
    train_loader = DataLoader(train_graphs, batch_size, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_graphs, batch_size, shuffle=True, num_workers=0)
    optimizer = Ranger(model.parameters(), lr=0.001, weight_decay=0)
    start_epoch = 1
    pbar = tqdm(range(start_epoch, epochs + start_epoch))
    count=0

    for epoch in pbar:
        total_loss=0


        model.train()
        for data in train_loader:
            optimizer.zero_grad()
            #data=data.cuda()
            out = model(data)
            loss=F.cross_entropy(out, data.y.view(-1).long())
            loss.backward()
            total_loss+=loss.item()*num_graphs(data)
            optimizer.step()
        train_loss=total_loss/len(train_loader.dataset)
        train_auc=evaluate(model,train_loader,1)
        print('\n Epoch: {:03d}, Loss: {:.5f}, Train Auc: {:.5f}'.format(epoch, train_loss, train_auc))


    test_auc,one_pred_result=evaluate(model,test_loader,2)
    truth=one_pred_result[1]
    predict=one_pred_result[0]
    vmax=max(predict)
    vmin=min(predict)

    alpha=0.8
    predict_f1=[0 for x in range(len(predict))]
    for p in range(len(predict)):
        predict_f1[p]=(predict[p]-vmin)/(vmax-vmin)
    predict_f1=[int(item>alpha) for item in predict_f1]

    f1=metrics.f1_score(truth,predict_f1)
    accuracy=metrics.accuracy_score(truth,predict_f1)
    recall=metrics.recall_score(truth,predict_f1)
    precision=metrics.precision_score(truth,predict_f1)
    fpr,tpr, thresholds1=metrics.roc_curve(truth,predict,pos_label=1)
    auc_score=metrics.auc(fpr,tpr)
    p,r,thresholds2=metrics.precision_recall_curve(truth,predict,pos_label=1)
    aupr_score=metrics.auc(r,p)
    print('f1:',f1)
    print('accuracy:',accuracy)
    print('recall:',recall)
    print('precision:',precision)
    print('auc:',auc_score)
    print('aupr:',aupr_score)
    print('test_auc:',test_auc)
    return test_auc, f1,accuracy,recall,precision,auc_score,aupr_score,truth,predict

def evaluate(model,loader,flag):
    one_pred_result=[]
    model.eval()
    predictions=torch.Tensor()
    labels=torch.Tensor()
    with torch.no_grad():
        for data in loader:
            data=data.to(device)
            pred=model(data)
            #predictions.append(pred[:,1].cpu().detach())

            predictions=torch.cat((predictions,pred[:,1].detach()),0)
            labels=torch.cat((labels,data.y),0)

    #labels=labels.cuda().data.cpu().numpy()
    #predictions=predictions.cuda().data.cpu().numpy()
    if flag==2:
        one_pred_result=np.vstack((predictions,labels))

    fpr,tpr,_=metrics.roc_curve(labels,predictions,pos_label=1)
    auc=metrics.auc(fpr,tpr)
    if flag==1:
        return auc
    else:
        return auc,one_pred_result;


def num_graphs(data):
    if data.batch is not None:
        return data.num_graphs
    else:
        return data.x.size(0)




