In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

### Selet subgraph of PPI

In [None]:
graph_num = 'graph1'

### load the graph node of subgraph
The index of graph node is from original graph

In [None]:
graph_node = []

file_graph_node = open('.\\homogeneous network\\PPI\\'+graph_num+' graph node.txt')

for line in file_graph_node:
    
    graph_node.append(int(line))
    
file_graph_node.close()

node_num = len(graph_node)

### load adjecant matrix, norm adjecant matrix and degree matrix

In [None]:
ppi_matrix = torch.load('.\\homogeneous network\\PPI\\'+graph_num+'ppi adj matrix.pth')#ppi_matrix

norm_ppi_matrix = torch.load('.\\homogeneous network\\PPI\\'+graph_num+'ppi norm matrix.pth')#norm_ppi_matrix

degree_matrix = torch.load('.\\homogeneous network\\PPI\\'+graph_num+'degree matrix.pth')#degree_matrix

### load train sample and test sample

In [None]:
train_sample,test_sample = [],[]

file_train_sample = open('.\\homogeneous network\\PPI\\'+graph_num+' train_sample.txt')

for line in file_train_sample:
    
    train_sample.append((int(line.split(' ')[0]),int(line.split(' ')[1].split('\n')[0])))
    
file_train_sample.close()

file_test_sample = open('.\\homogeneous network\\PPI\\'+graph_num+' test_sample.txt')

for line in file_test_sample:
    
    test_sample.append((int(line.split(' ')[0]),int(line.split(' ')[1].split('\n')[0])))
    
file_test_sample.close()

print(len(train_sample),len(test_sample))

### load conduit type group for train sample and test sample
0 denotes 'LeftLeaning'(LL) conduit and 1 denotes 'RightLeaning'(RL) conduit

In [None]:
train_group,test_group = [],[]

file_train_group = open('.\\homogeneous network\\PPI\\'+graph_num+' train_group.txt')

for line in file_train_group:
    
    train_group.append(int(line.split('\n')[0]))
    
file_train_group.close()

file_test_group = open('.\\homogeneous network\\PPI\\'+graph_num+' test_group.txt')

for line in file_test_group:
    
    test_group.append(int(line.split('\n')[0]))
    
file_test_group.close()

print(len(train_group),len(test_group))

In [None]:
degree_list = []

for i in range(node_num):
    
    degree_list.append(degree_matrix[i,i])
    
print(len(degree_list))

### Store all data into the package structure to make it easier for CGNN to call the data

In [None]:
train_packge = [ppi_matrix.cuda(),norm_ppi_matrix.cuda(),train_sample,train_group,degree_list]

test_package = [ppi_matrix.cpu(),norm_ppi_matrix.cpu(),test_sample,test_group,degree_list]

In [None]:
def get_node_degree(adj_matrix,node):
    
    return torch.sum(adj_matrix[node,:])

### Stage1: node learning
in_size is the dimension of the embedding of the previous layer ($l-1$).

out_size is the dimension of the embedding of the current layer ($l$).

old_node_embedding is the node embedding of the previous layer ($l-1$).

use_divce denotes whether CUDA is used.

In [None]:
class node_learning(nn.Module):
    def __init__(self, 
                 in_size, 
                 out_size):
        super(node_learning, self).__init__()

        self.nei_update1 = nn.Linear(in_size, out_size, bias=False)
        
        self.self_node_update1 = nn.Linear(in_size,
                                           out_size,
                                           bias=False)

        self.nei_update2 = nn.Linear(in_size, out_size, bias=False)
        
        self.self_node_update2 = nn.Linear(in_size,
                                           out_size,
                                           bias=False)

    def forward(self,
                out_size,
                package,
                old_node_embedding,
                use_divce):
        
        total_node = old_node_embedding.shape[0]
        
        if use_divce == 1:
            new_node_embedding = torch.zeros((total_node, out_size)).cuda()
            
        else:
            
            new_node_embedding = torch.zeros((total_node, out_size)).cpu()

        for node in range(total_node):
            
            nei_information, nei_gather = 0, 0
            
            self_information, total_information = 0, 0

            if package[4][node] == 1:
                
                if get_node_degree(ppi_matrix, node) == 0:
                    
                    self_information += self.self_node_update1(
                        old_node_embedding[node, :])
                        
                    total_information += F.relu(nei_information +
                                                       self_information)
                    
                else:
                    self_information += self.self_node_update1(
                        old_node_embedding[node, :])
                    
                    nei_gather += torch.mm(package[1][node, :].reshape(1, -1),
                                           old_node_embedding)
                    
                    nei_information += self.nei_update1(nei_gather)
                    
                    total_information += F.relu(nei_information +
                                                       self_information)

            else:
                self_information += self.self_node_update2(
                    old_node_embedding[node, :])
                
                nei_gather += torch.mm(package[1][node, :].reshape(1, -1),
                                       old_node_embedding)
                
                nei_information += self.nei_update2(nei_gather)
                
                total_information += F.relu(nei_information +
                                                   self_information)
                
            new_node_embedding[node, :] = torch.add(
                new_node_embedding[node, :], total_information)
            
        return new_node_embedding

### Stage2: conduit node learning
pre_size is the dimension of the embedding of the previous layer ($l-1$).

next_size is the dimension of the embedding of the current layer ($l$).

node_embedding is the node embedding of $l$-th layer.

use_divce denotes whether CUDA is used.

In [None]:
class conduit_node_learning(nn.Module):
    
    def __init__(self,
                 pre_size,
                 next_size):
        
        super(conduit_node_learning,self).__init__()
        
        self.left_gate1 = nn.Linear(pre_size,next_size,bias=False)
        
        self.right_gate1 = nn.Linear(pre_size,next_size,bias=False)
        
        self.left_gate2 = nn.Linear(pre_size,next_size,bias=False)
        
        self.right_gate2 = nn.Linear(pre_size,next_size,bias=False)

        self.LL_conduit_update = nn.Linear(pre_size,next_size,bias=True)
        
        self.RL_conduit_update = nn.Linear(pre_size,next_size,bias=True)
        
    def forward(self,
                package,
                next_size,
                node_embedding,
                use_divce):
        
        conduit_sample = package[2]
        
        index = list(range(len(conduit_sample)))
        
        if use_divce == 1:
            
            new_conduit_embedding = torch.zeros((len(conduit_sample),next_size)).cuda()
            
        else:
            
            new_conduit_embedding = torch.zeros((len(conduit_sample),next_size)).cpu()
            
        for j in index:
            
            i = conduit_sample[j]
            
            left_gate,right_gate,conduit_embedding = 0,0,0
            
            combined_feature,gather_information = 0,0
            
            gather_information += torch.add(node_embedding[i[0],:],node_embedding[i[1],:])
            
            if package[3][j] == 1:
                
                left_gate += torch.sigmoid(self.left_gate1(node_embedding[i[0],:]))
                
                right_gate += torch.sigmoid(self.right_gate1(node_embedding[i[1],:]))
                
                combined_feature += torch.tanh(self.LL_conduit_update(gather_information))
                
            if package[3][j] == 2:
                
                left_gate += torch.sigmoid(self.left_gate2(node_embedding[i[0],:]))
                
                right_gate += torch.sigmoid(self.right_gate2(node_embedding[i[1],:]))
                
                combined_feature += torch.tanh(self.RL_conduit_update(gather_information))

            conduit_embedding += left_gate*combined_feature + right_gate*combined_feature
            
            new_conduit_embedding[j,:] += conduit_embedding.reshape(-1)
            
        return new_conduit_embedding

### The framework of CGNN
The dimensions of each layer are the same as those in paper.

pre_node_embedding is the initial embedding of node.

use_divce denotes whether CUDA is used.

output_thred denotes whether outputing the conduit node embedding

In [None]:
class conduitGNN(nn.Module):
    
    def __init__(self):
        super(conduitGNN,self).__init__()
        
        self.node_update_layer1 = node_learning(256,128)        
        self.conduit_layer1 = conduit_node_learning(128,64)        
        self.node_update_layer2 = node_learning(128,64)        
        self.conduit_layer2 = conduit_node_learning(64,32)        
        self.node_update_layer3 = node_learning(64,32)        
        self.conduit_layer3 = conduit_node_learning(32,1)        
        self.fuse_1 = nn.Linear(64,32,bias=True)        
        self.fuse_2 = nn.Linear(32,1,bias=True)
        
    def forward(self,
                pre_node_embedding,
                package,
                use_divce,
                output_thred):
        
        output_dict = {}
        
        use_divce = use_divce
        
        if use_divce == 1:
            
            output = torch.zeros((len(package[2]),1)).cuda()
            
            pre_node_embedding = pre_node_embedding.cuda()
            
        else:
            
            output = torch.zeros((len(package[2]),1)).cpu()
            
            pre_node_embedding = pre_node_embedding.cpu()
            
        ##########
        
        node_embedding_1 = self.node_update_layer1(128,
                                                   package,
                                                   pre_node_embedding,
                                                   use_divce)
        
        conduit_embedding_1 = self.conduit_layer1(package,
                                                  64,
                                                  node_embedding_1,
                                                  use_divce)
        
        ##########
        
        node_embedding_2 = self.node_update_layer2(64,
                                                   package,
                                                   node_embedding_1,
                                                   use_divce)
        
        conduit_embedding_2 = self.conduit_layer2(package,
                                                  32,
                                                  node_embedding_2,
                                                  use_divce)
        
        #########
        
        fused_embedding1 = torch.sigmoid(self.fuse_1(conduit_embedding_1)+conduit_embedding_2)
        
        #########
        
        node_embedding_3 = self.node_update_layer3(32,
                                                   package,
                                                   node_embedding_2,
                                                   use_divce)
        
        conduit_embedding_3 = self.conduit_layer3(package,
                                                  1,
                                                  node_embedding_3,
                                                  use_divce)
        
        #########
        
        fused_embedding2 = torch.sigmoid(self.fuse_2(fused_embedding1)+conduit_embedding_3)
        
        #########
        
        output += fused_embedding2
        
        if output_thred == 1:
            output_dict['conduit_embedding_1'] = conduit_embedding_1
            output_dict['conduit_embedding_2'] = conduit_embedding_2
            output_dict['fused_embedding1'] = fused_embedding1
            return output_dict, output
        else:
            return output

In [None]:
def Com_acc(output,lab):

    result = output.ge(0.5).float() == lab.reshape(-1,1)
    
    acc = result.float().mean()
    
    return acc

In [None]:
def Com_recall(output,lab):
    
    pred = output.ge(0.5).float()
    
    pred = pred.reshape(-1)
    
    posi_index = np.where(np.array(lab)==1)[0]
    
    posi_pred,posi_label = np.array(pred)[posi_index],np.array(lab)[posi_index]
    
    recall = np.sum(posi_pred == posi_label,dtype = np.float64)/(posi_index).shape[0]
    
    return recall

In [None]:
def regu():
    
    reg_loss = 0
    
    for name,param in conduit_GNN.named_parameters():
        
        if 'conduit_update' in name and 'weight' in name:
            
            l2_reg = torch.norm(param,p=2)
            
            reg_loss += 0.005*l2_reg
            
        if 'node_update' in name and 'weight' in name:
            
            l2_reg = torch.norm(param,p=2)
            
            reg_loss += 0.005*l2_reg
            
    return reg_loss

### initializing CGNN model

In [None]:
conduit_GNN = conduitGNN().cuda()

optm = torch.optim.Adam(conduit_GNN.parameters(), lr=0.01)

BCE = nn.BCELoss()

for name,param in conduit_GNN.named_parameters():
    
    print(name)

### load initial node embedding for protein
select initial embedding matrix of subgraph using graph_node

In [None]:
all_node_embedding = torch.load('.\\homogeneous network\\PPI\\all_node_embedding.pth')

graph_node_tensor = torch.tensor(graph_node)

graph_node_embedding = all_node_embedding[graph_node_tensor,:]

print(graph_node_embedding.shape)

### load train label and test label

In [None]:
train_label,test_label = [],[]

file_train_label = open('.\\homogeneous network\\PPI\\'+graph_num+' train_label.txt')

for line in file_train_label:
    
    train_label.append(int(line))
    
file_train_label.close()

file_test_label = open('.\\homogeneous network\\PPI\\'+graph_num+' test_label.txt')

for line in file_test_label:
    
    test_label.append(int(line))
    
file_test_label.close()


tensor_all_train_label = torch.tensor(train_label).float()

tensor_all_test_label = torch.tensor(test_label).float()

print(len(train_label),len(test_label))#

### Run the following code to get the train acc and test acc, recall.

In [None]:
train_acc_list,test_acc_list = [],[]
test_recall_list = []
test_epoch = []
# exper_num = ''
total_start_time = time.time()

for epoch in range(30):
    
    loss,train_acc = 0,0
    
    conduit_GNN.train()
    
    conduit_GNN.cuda()
    
    train_start_time = time.time()
    
    train_output_matrix = conduit_GNN(graph_node_embedding,
                                      train_packge,
                                      1,
                                      0)
    
    train_acc = Com_acc(train_output_matrix,tensor_all_train_label.cuda())
    
    if epoch == 0:
        
        max_train_acc = train_acc.item()
        
    else:
        
        max_train_acc = max(train_acc_list)
    
    train_acc_list.append(train_acc.item())
    
    train_end_time = time.time()
    
    print('Epoch : %d, train time : %d, train acc : %.4f'%(epoch, int(train_end_time-train_start_time), train_acc.item()))
    
    if epoch > 4 and train_acc.item() > max_train_acc:
        
        conduit_GNN.cpu()
        
        conduit_GNN.eval()
        
        test_start_time = time.time()
        
        test_output_matrix = conduit_GNN(graph_node_embedding,
                                         test_package,
                                         0,
                                         0)
        
        test_acc = Com_acc(test_output_matrix,tensor_all_test_label)
        test_recall = Com_recall(test_output_matrix,tensor_all_test_label)
        
        test_acc_list.append(test_acc.item())
        test_recall_list.append(test_recall)
        test_epoch.append(epoch)
        
        test_end_time = time.time()
        print('Epoch :',epoch,'test time',int(test_end_time-test_start_time),'test acc',test_acc,'test recall',test_recall)

#         torch.save(train_output_matrix, '.\\'+graph_num+'\\'+exper_num+' '+str(epoch)+'train output.pth')
        
#         state = {'CGNN':conduit_GNN.state_dict(),'optimizer':optm.state_dict(),'epoch':epoch}
#         torch.save(state, '.\\'+graph_num+'\\'+exper_num+' '+str(epoch)+'model parameter.pth')

#         torch.save(test_output_matrix,'.\\'+graph_num+'\\'+exper_num+' '+str(epoch)+'test output.pth')
        del test_output_matrix
    
    loss_start_time = time.time()
    
    conduit_GNN.train()
    conduit_GNN.cuda()
    
    loss = BCE(train_output_matrix.reshape(-1),tensor_all_train_label.cuda()) + regu().cuda()
    optm.zero_grad()
    loss.backward()
    optm.step()
    
    loss_end_time = time.time()
    
    print('Epoch :',epoch,'loss: ',loss,'loss time: ',int(loss_end_time - loss_start_time))
    del train_output_matrix

    torch.cuda.empty_cache()
    
total_end_time = time.time()
print('total time ',int(total_end_time - total_start_time))