In [1]:
import sys, os, argparse, warnings, time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from data import *
from util import *
from module import DeepMOI

In [4]:
def batch_idx(train_idx, minibatch):
    """To obtain batch index.
    train_idx (list): index of training dataset.
    minibatch (int): sample's number in each batch.

    Return:
        batch_idx (list): the element is list, i.e., index for each batch.
    """
    batch_idx, m = [], 0
    while True:
        if (m+1)*minibatch < len(train_idx):
            batch_idx.append(train_idx[m*minibatch:(m+1)*minibatch])
        else:
            batch_idx.append(train_idx[m*minibatch:])
            break
        m += 1
    return batch_idx


def data_split(labels, test_size):
    """To split dataset into training dataset and testing dataset.
    Args:
        labels (numpy): The labels of samples.
        test_size (float, 0-1): The proportion of test data.
    
    Return:
        (list) index of training data, index of testing data
    """
    test_number = int(len(labels) * test_size)
    idx = list(range(len(labels)))
    np.random.shuffle(idx)
    return idx[test_number:], idx[:test_number]



In [2]:
omics_files = ['./data/LGG/rna.csv.gz', './data/LGG/met.csv.gz']
label_file = './data/LGG/label.csv'
add_file = './data/LGG/mirna.csv.gz'
network_file = 'default'

print('[INFO] Reading dataset. There are {} omics data in total.\n'.format(len(omics_files)))
omics = read_omics(omics_files=omics_files, label_file=label_file, add_file=add_file)
graph, labels, add_features, id_mapping = build_graph(omics=omics, label_file=label_file, add_file=add_file, network_file=network_file)
omic_features = graph.ndata['h']

[INFO] Reading dataset. There are 2 omics data in total.

[INFO] The overlaping genes number between omics and ppi dataset is: 14803



In [3]:
pathways = read_pathways(id_mapping=id_mapping, file='./Pathway/pathway_genes.gmt')

# 模型参数

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.nn.pytorch as dglnn
from dgl.nn import Set2Set

class DeepMOI(nn.Module):
    def __init__(self, in_dim, pathway, add_features=None):
        """
        in_dim: == omics' number
        hidden_dim: == 
        """
        super(DeepMOI, self).__init__()
        
        hidden_dim1 = 256
        hidden_dim2 = 64
        
        # GNN-1
        self.gin_lin1 = torch.nn.Linear(in_dim, in_dim*3)
        self.conv1 = dglnn.GINConv(self.gin_lin1, 'sum')
        self.sns1 = Set2Set(in_dim*3, 2, 1)
        
        # GNN-2
        self.gin_lin2 = torch.nn.Linear(in_dim*3, in_dim*8)
        self.conv2 = dglnn.GINConv(self.gin_lin2)
        self.sns2 = Set2Set(in_dim*8, 2, 1)
        
        # GNN-3
        self.gin_lin3 = torch.nn.Linear(in_dim*8, in_dim*16)
        self.conv3 = dglnn.GINConv(self.gin_lin3)
        self.sns3 = Set2Set(in_dim*16, 2, 1)
        
        # GNN-4
        self.gin_lin4 = torch.nn.Linear(in_dim*16, in_dim*32)
        self.conv4 = dglnn.GINConv(self.gin_lin4)
        self.sns4 = Set2Set(in_dim*32, 2, 1)
 
        self.ln_gpool = nn.Linear(in_dim*3*2 + in_dim*8*2 + in_dim*16*2 + in_dim*32*2, 1)
        
        # MLP
        if add_features == None:
            self.lin1 = nn.Linear(len(pathway), hidden_dim1)  # not including clinical features 
        else:
            add_num = add_features.shape[1]
            self.lin1 = nn.Linear(len(pathway) + add_num, hidden_dim1)  # including clinical features    
        self.lin2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.lin3 = nn.Linear(hidden_dim2, 1)
        
        # other args
        self.pathway = pathway


    def forward(self, g, h, c=None):
        
        # subnetwork1: GRL layers
        h1 = F.relu(self.conv1(g, h))
        h2 = F.relu(self.conv2(g, h1))
        h3 = F.relu(self.conv3(g, h2))
        h4 = F.relu(self.conv4(g, h3))
        # subnetwork2: patyway layers
        with g.local_scope():
            g.ndata['h1'] = h1
            g.ndata['h2'] = h2
            g.ndata['h3'] = h3
            g.ndata['h4'] = h4
            # global pooling with Set2Set: output dim = 2*node_dim
            subgraphs = [dgl.node_subgraph(g, n) for n in self.pathway.values()]
            graphs_ = dgl.batch(subgraphs)
            readout1 =  self.sns1(graphs_, graphs_.ndata['h1'])
            readout2 =  self.sns2(graphs_, graphs_.ndata['h2'])
            readout3 =  self.sns3(graphs_, graphs_.ndata['h3'])
            readout4 =  self.sns4(graphs_, graphs_.ndata['h4'])
            readout = torch.stack([readout1, readout2, readout3, readout4], dim=1)
            readout = readout.reshape(readout.shape[0], -1)
            print(readout.shape)
            # compute pathway score
            x = self.ln_gpool(readout)
            
            # additional features
            if c != None:
                x = torch.cat([x, c], dim=0)
            
            # reshape
            x = x.reshape(1,-1)  # add features
            
            # linear-1
            x = nn.Tanh()(self.lin1(x))
            
            x = nn.Dropout(p=0.2)(x)
            
            # linear-2
            x = nn.ReLU()(self.lin2(x))

            # linear-3
            logit = nn.Sigmoid()(self.lin3(x))

            return logit

In [6]:
model = DeepMOI(in_dim=len(omics_files), pathway=pathways, add_features=add_features)

i = 0
model(g=graph, h=omic_features[:, i, :], c=add_features[i])

RuntimeError: stack expects each tensor to be equal size, but got [857, 12] at entry 0 and [857, 32] at entry 1

In [78]:
print('# Model parameters:', sum(param.numel() for param in model.parameters()))

# Model parameters: 590820


In [83]:
# to device
device = 'cuda'
graph = graph.to(device)
omic_features = omic_features.to(device)
if add_features != None:
    add_features = add_features.to(device)

In [107]:
model = model.to(device)

In [108]:
i = 0
model(g=graph, h=omic_features[:, i, :], c=add_features[i])

torch.Size([857, 384])


tensor([[0.5165]], device='cuda:0', grad_fn=<SigmoidBackward>)

In [102]:
logit = model(g=graph, h=omic_features[:, i, :], c=add_features[i])

torch.Size([857, 384])


In [104]:
logit.to(device='cpu')

tensor([[0.4264]], grad_fn=<CopyBackwards>)

# 固定部分参数

In [75]:
# 固定部分参数

for k,v in model.named_parameters():
    if k.startswith('lin'):
        v.requires_grad = False
        
n = 0
for param in model.parameters():
    if param.requires_grad == True:
        n += param.numel()
print(n)

152163


In [40]:
# 固定部分参数

for k,v in model.named_parameters():
    if k.startswith('lin'):
        v.requires_grad = False
        
n = 0
for param in model.parameters():
    if param.requires_grad == True:
        n += param.numel()
print(n)

227


In [42]:
for k,v in model.named_parameters():
    if  k.startswith('lin'):
        v.requires_grad = True
    else:
        v.requires_grad = False

n = 0
for param in model.parameters():
    if param.requires_grad == True:
        n += param.numel()
print(n)

438657


# Sequential