In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

### load adjacent matrix and weight matrix of gene and disease

In [None]:
gene_adj = torch.load('.\heterogeneous network\lncRNA-disease\lnc_adj.pth')

gene_wei = torch.load('.\heterogeneous network\lncRNA-disease\lnc_wei.pth')

dis_adj = torch.load('.\heterogeneous network\lncRNA-disease\dis_adj.pth')

dis_wei = torch.load('.\heterogeneous network\lncRNA-disease\dis_wei.pth')

### load train sample, validation sample and test sample

In [None]:
train_sample,valid_sample,test_sample = [],[],[]

file_lncdis_train = open('.\heterogeneous network\lncRNA-disease\train_lncdis.txt')

for i in file_lncdis_train:
    
    train_sample.append((int(i.split()[0]),int(i.split()[1].split('\n')[0])))
    
file_lncdis_train.close()

file_lncdis_valid = open('.\heterogeneous network\lncRNA-disease\valid_lncdis.txt')

for i in file_lncdis_valid:
    
    valid_sample.append((int(i.split()[0]),int(i.split()[1].split('\n')[0])))
    
file_lncdis_valid.close()

file_lncdis_test = open('.\heterogeneous network\lncRNA-disease\test_lncdis.txt')

for i in file_lncdis_test:
    
    test_sample.append((int(i.split()[0]),int(i.split()[1].split('\n')[0])))
    
file_lncdis_test.close()

print(len(train_sample),len(valid_sample),len(test_sample))

### load conduit type group for train sample, validation sample and test sample
0 denote scarce conduit, 1 denote unilateral_conduit and 2 denote bilateral_conduit

In [None]:
train_group, valid_group, test_group = [], [], []

file_train_group = open('.\heterogeneous network\lncRNA-disease\ld_train group.txt')

for line in file_train_group:

    train_group.append(int(line))

file_train_group.close()

file_valid_group = open('.\heterogeneous network\lncRNA-disease\ld_valid group.txt')

for line in file_valid_group:

    valid_group.append(int(line))

file_valid_group.close()

file_test_group = open('.\heterogeneous network\lncRNA-disease\ld_test group.txt')

for line in file_test_group:

    test_group.append(int(line))

file_test_group.close()

print(len(train_group), len(valid_group), len(test_group))

### Store all data into the package structure to make it easier for CGNN to call the data

In [None]:
validation_package = [[gene_adj,gene_wei],[dis_adj,dis_wei],valid_sample,valid_group]

test_package = [[gene_adj,gene_wei],[dis_adj,dis_wei],test_sample,test_group]

train_package = [[gene_adj.cuda(),gene_wei.cuda()],[dis_adj.cuda(),dis_wei.cuda()],train_sample,train_group]

In [None]:
def get_node_degree(adj_matrix,node):
    
    return torch.sum(adj_matrix[node,:])

### Stage1: node learning
in_size is the dimension of the embedding of the previous layer ($l-1$).

out_size is the dimension of the embedding of the current layer ($l$).

pos is the index of network ($0$ denotes gene-gene network, $1$ denotes disease-disease network).

old_node_embedding is the embedding of the previous layer ($l-1$).

use_divce denotes whether CUDA is used.

In [None]:
class node_learning(nn.Module):
    
    def __init__(self,
                 in_size,
                 out_size):
        
        super(node_learning,self).__init__()
        
        self.nei_update1 = nn.Linear(in_size,out_size,bias=False)
        
        self.self_node_update1 = nn.Linear(in_size,out_size,bias=False)
        
        self.nei_update2 = nn.Linear(in_size,out_size,bias=False)
        
        self.self_node_update2 = nn.Linear(in_size,out_size,bias=False)        
        
    def forward(self,
                pos,
                out_size,
                package,
                old_node_embedding,
                use_divce):
        
        total_node = old_node_embedding[pos].shape[0]
        
        if use_divce == 1:
            
            new_node_embedding = torch.zeros((total_node,out_size)).cuda()
            
        else:
            
            new_node_embedding = torch.zeros((total_node,out_size)).cpu()
            
        for node in range(total_node):
            
            nei_information,nei_gather = 0,0
            
            self_information = 0
            
            if pos == 0:
                
                self_information += self.self_node_update1(old_node_embedding[pos][node,:].reshape(-1))
                    
                if get_node_degree(package[pos][0],node) > 0:
                    
                    nei_gather = torch.mm(package[pos][1][node,:].reshape(1,-1),old_node_embedding[pos])
                    
                    nei_information += self.nei_update1(nei_gather.reshape(-1))

                total_information = F.relu(nei_information + self_information)
            else:
                
                self_information += self.self_node_update2(old_node_embedding[pos][node,:].reshape(-1))
                    
                if get_node_degree(package[pos][0],node) > 0:
                    
                    nei_gather = torch.mm(package[pos][1][node,:].reshape(1,-1),old_node_embedding[pos])
                    
                    nei_information += self.nei_update2(nei_gather.reshape(-1))

                total_information = F.relu(nei_information + self_information)
                
            new_node_embedding[node,:] = torch.add(new_node_embedding[node,:],total_information)
            
        return new_node_embedding

### Stage2: conduit node learning
pre_size is the dimension of the embedding of the previous layer ($l-1$).

next_size is the dimension of the embedding of the current layer ($l$).

node_embedding is a list containing the embedding of gene node and disease node.

list[0] stores embeddings of gene nodes and list[1] stores embeddings of disease nodes.

use_divce denotes whether CUDA is used.

In [None]:
class conduit_node_learning(nn.Module):
    
    def __init__(self,
                 pre_size,
                 next_size):
        
        super(conduit_node_learning,self).__init__()
        
        self.left_gate_update1 = nn.Linear(pre_size,next_size,bias=False)
        
        self.right_gate_update1 = nn.Linear(pre_size,next_size,bias=False)
        
        self.left_gate_update2 = nn.Linear(pre_size,next_size,bias=False)
        
        self.right_gate_update2 = nn.Linear(pre_size,next_size,bias=False)
        
        self.left_gate_update3 = nn.Linear(pre_size,next_size,bias=False)
        
        self.right_gate_update3 = nn.Linear(pre_size,next_size,bias=False)
        
        self.conduit_update1 = nn.Linear(pre_size,next_size,bias=True)
        
        self.conduit_update2 = nn.Linear(pre_size,next_size,bias=True)
        
        self.conduit_update3 = nn.Linear(pre_size,next_size,bias=True)
        
    def forward(self,
                package,
                next_size,
                node_embedding,
                use_divce):
        
        conduit_sample = package[2]
        
        index = list(range(len(conduit_sample)))
        
        if use_divce == 1:
            
            new_conduit_embedding = torch.zeros((len(conduit_sample),next_size)).cuda()
            
        else:
            
            new_conduit_embedding = torch.zeros((len(conduit_sample),next_size)).cpu()
            
        for j in index:
            
            i = conduit_sample[j]
            
            left_gate,right_gate,conduit_embedding = 0,0,0
            
            combined_feature,gather_information = 0,0
            
            gather_information = torch.add(node_embedding[0][i[0],:],node_embedding[1][i[1],:])
            
            if package[3][j] == 0:
                
                left_gate += torch.sigmoid(self.left_gate_update1(node_embedding[0][i[0],:]))
                
                right_gate += torch.sigmoid(self.right_gate_update1(node_embedding[1][i[1],:]))
                
                combined_feature += torch.tanh(self.conduit_update1(gather_information))
                
            if package[3][j] == 1:
                
                left_gate += torch.sigmoid(self.left_gate_update2(node_embedding[0][i[0],:]))
                
                right_gate += torch.sigmoid(self.right_gate_update2(node_embedding[1][i[1],:]))
                
                combined_feature += torch.tanh(self.conduit_update2(gather_information))
                
            if package[3][j] == 2:
                
                left_gate += torch.sigmoid(self.left_gate_update3(node_embedding[0][i[0],:]))
                
                right_gate += torch.sigmoid(self.right_gate_update3(node_embedding[1][i[1],:]))
                
                combined_feature += torch.tanh(self.conduit_update3(gather_information))
    
            conduit_embedding += left_gate*combined_feature + right_gate*combined_feature
        
            new_conduit_embedding[j,:] += conduit_embedding.reshape(-1)
            
        return new_conduit_embedding

### The framework of CGNN
The dimensions of each layer are the same as those in paper.

gene_node_embedding is the initial embedding of gene node.

dis_node_embedding is the initial embedding of disease node.

use_divce denotes whether CUDA is used.

output_thred denotes whether outputing conduit node embedding.

In [None]:
class conduitGNN(nn.Module):
    
    def __init__(self):
        super(conduitGNN,self).__init__()
        
        self.node_update_layer1 = node_learning(256,128)
        self.conduit_layer1 = conduit_node_learning(128,64)
        self.node_update_layer2 = node_learning(128,64)
        self.conduit_layer2 = conduit_node_learning(64,32)
        self.node_update_layer3 = node_learning(64,32)
        self.conduit_layer3 = conduit_node_learning(32,1)
        self.fuse_1 = nn.Linear(64,32,bias=True)
        self.fuse_2 = nn.Linear(32,1,bias=True)
        
    def forward(self,
                gene_node_embedding,
                dis_node_embedding,
                package,
                use_divce,
                output_thred):#
        
        use_divce = use_divce
        
        if use_divce == 1:
            
            output = torch.zeros((len(package[2]),1)).cuda() 
            
            gene_node_embedding = gene_node_embedding.cuda()
            
            dis_node_embedding = dis_node_embedding.cuda()
            
        else:
            
            output = torch.zeros((len(package[2]),1)).cpu()
            
            gene_node_embedding = gene_node_embedding.cpu()
            
            dis_node_embedding = dis_node_embedding.cpu()
            
        output_dict = {}
            
        init_node_embedding[0] = gene_node_embedding
        
        init_node_embedding[1] = dis_node_embedding
        
        ##########
        node_embedding_1_left = self.node_update_layer1(0,
                                                        128,
                                                        package,
                                                        init_node_embedding,
                                                        use_divce)
        
        node_embedding_1_right = self.node_update_layer1(1,
                                                         128,
                                                         package,
                                                         init_node_embedding,
                                                         use_divce)

        node_embedding_1 = [node_embedding_1_left,node_embedding_1_right]
        
        conduit_embedding_1 = self.conduit_layer1(package,
                                                  64,
                                                  node_embedding_1,
                                                  use_divce)
        
        ##########
        node_embedding_2_left = self.node_update_layer2(0,
                                                        64,
                                                        package,
                                                        node_embedding_1,
                                                        use_divce)
        
        node_embedding_2_right = self.node_update_layer2(1,
                                                         64,
                                                         package,
                                                         node_embedding_1,
                                                         use_divce)
        
        node_embedding_2 = [node_embedding_2_left,node_embedding_2_right]
        
        conduit_embedding_2 = self.conduit_layer2(package,
                                                  32,
                                                  node_embedding_2,
                                                  use_divce)

        fused_embedding1 = torch.sigmoid(self.fuse_1(conduit_embedding_1)+conduit_embedding_2)

        #########
        node_embedding_3_left = self.node_update_layer3(0,
                                                        32,
                                                        package,
                                                        node_embedding_2,
                                                        use_divce)
        
        node_embedding_3_right = self.node_update_layer3(1,
                                                         32,
                                                         package,
                                                         node_embedding_2,
                                                         use_divce)
        
        node_embedding_3 = [node_embedding_3_left,node_embedding_3_right]
        
        conduit_embedding_3 = self.conduit_layer3(package,
                                                  1,
                                                  node_embedding_3,
                                                  use_divce)
        
        fused_embedding2 = torch.sigmoid(self.fuse_2(fused_embedding1)+conduit_embedding_3)
        
        #########
        output += fused_embedding2
        
        if output_thred == 1:
            output_dict['conduit_embedding_1'] = conduit_embedding_1
            output_dict['conduit_embedding_2'] = conduit_embedding_2
            output_dict['fused_embedding1'] = fused_embedding1
            return output_dict, output
        else:
            return output

In [None]:
def Com_acc(output,lab):

    result = output.ge(0.5).float() == lab.reshape(-1,1)
    
    acc = result.float().mean()
    
    return acc

In [None]:
def Com_recall(output,lab):

    pred = output.ge(0.5).float()
    
    pred = pred.reshape(-1)
    
    posi_index = np.where(np.array(lab)==1)[0]
    
    posi_pred,posi_label = np.array(pred)[posi_index],np.array(lab)[posi_index]
    
    recall = np.sum(posi_pred == posi_label,dtype = np.float64)/(posi_index).shape[0]
    
    return recall

In [None]:
def regu():
    
    reg_loss = 0
    
    for name,param in conduit_GNN.named_parameters():
        
        if 'fuse' in name and 'weight' in name:
            
            l2_reg = torch.norm(param,p=2)
            
            reg_loss += 0.02*l2_reg
            
        if 'conduit_update' in name and 'weight' in name:
            
            l2_reg = torch.norm(param,p=2)
            
            reg_loss += 0.005*l2_reg
            
    return reg_loss

### load initial node embedding for gene and disease

In [None]:
gene_node_embedding = torch.load('.\heterogeneous network\lncRNA-disease\lnc_node_embedding.pth')

dis_node_embedding = torch.load('.\heterogeneous network\lncRNA-disease\disForlnc_node_embedding.pth')

### load train label, validation label and test label

In [None]:
train_label,valid_label,test_label = [],[],[]

file_lncdis_train = open('.\heterogeneous network\lncRNA-disease\ld_train_label.txt')

for i in file_lncdis_train:
    
    train_label.append(int(i))
    
file_lncdis_train.close()

file_lncdis_valid = open('.\heterogeneous network\lncRNA-disease\ld_valid_label.txt')

for i in file_lncdis_valid:
    
    valid_label.append(int(i))
    
file_lncdis_valid.close()

file_lncdis_test = open('.\heterogeneous network\lncRNA-disease\ld_test_label.txt')

for i in file_lncdis_test:
    
    test_label.append(int(i))
    
file_lncdis_test.close()

tensor_train_label = torch.tensor(train_label).float().cuda()

tensor_vali_label = torch.tensor(valid_label).float()

tensor_test_label = torch.tensor(test_label).float()

print(len(train_label),len(valid_label),len(test_label))

### initializing CGNN model
init_node_embedding is a list to store initial embedding of gene and disease in conduitGNN.

In [None]:
conduit_GNN = conduitGNN().cuda()

init_node_embedding = 2*['']

optm = torch.optim.Adam(conduit_GNN.parameters(), lr=0.01)

BCE = nn.BCELoss()

for name,param in conduit_GNN.named_parameters():
    
    print(name)

### Run the following code to get the train acc, validation acc and test acc, recall.

In [None]:
valid_acc_list,test_acc_list = [],[]
test_recall_list = []
# exper_num = ''
total_start_time = time.time()
max_valid_acc = 0

for epoch in range(50):
    
    loss,train_acc = 0,0

    conduit_GNN.train()
    conduit_GNN.cuda()
    
    start_time = time.time()
    
    train_output_matrix = conduit_GNN(gene_node_embedding,
                                      dis_node_embedding,
                                      train_package,
                                      use_divce = 1,
                                      output_thred = 0)#
    
    loss = BCE(train_output_matrix.reshape(-1),tensor_train_label.cuda()) + regu().cuda()
    
    optm.zero_grad()
    
    loss.backward()
    
    optm.step()
    
    train_acc = Com_acc(train_output_matrix,tensor_train_label.cuda())
    
    end_time = time.time()

    print('Epoch : %d, time : %d, loss : %.4f, train acc : %.4f'%\
          (epoch, int(end_time-start_time), loss.item(), train_acc.item()))
    
    if epoch > 10:#
        
        conduit_GNN.cpu()
        
        conduit_GNN.eval()
        
        vali_output_matrix = conduit_GNN(gene_node_embedding,
                                         dis_node_embedding,
                                         validation_package,
                                         use_divce = 0,
                                         output_thred = 0)#
        
        validation_acc = Com_acc(vali_output_matrix,tensor_vali_label)
        
        valid_acc_list.append(validation_acc)
        
        print('Epoch : %d, validation acc : %.4f'%\
              (epoch, validation_acc.item()))
        
        if validation_acc > max_valid_acc:
            
            max_valid_acc = validation_acc
            
#             state = {'CGNN':conduit_GNN.state_dict(),'optimizer':optm.state_dict(),'epoch':epoch}
            
#             torch.save(state, exper_num+' '+str(epoch)+' model parameter.pth')
        
            test_output_matrix = conduit_GNN(gene_node_embedding,
                                             dis_node_embedding,
                                             test_package,
                                             use_divce = 0,
                                             output_thred = 0)#

            test_acc = Com_acc(test_output_matrix,tensor_test_label)#

            test_recall = Com_recall(test_output_matrix,tensor_test_label)

            test_acc_list.append(test_acc)

            test_recall_list.append(test_recall)

            print('Epoch : %d, test acc : %.4f, test recall : %.4f'\
                  %(epoch, test_acc.item(), test_recall))
#             torch.save(test_output_matrix, exper_num+' '+str(epoch)+' test probability output.pth')
       
total_end_time = time.time()
print('total time : %d'%int(total_end_time-total_start_time))