In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader   #包装可迭代的数据
import torch.utils.data as Data 

from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc  ###计算roc和auc

In [2]:
#用pytorch构建神经网络
#为了在PyTorch中定义一个神经网络，我们创建一个继承自nn.Module的类。我们在init函数中定义网络
#的层，并在forward函数中指定数据将如何通过网络。为了加速神经网络的操作，如果有GPU的话，我
#们把它移到GPU上

device="cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

class CnncNet(nn.Module):
    def __init__(self):
        super(CnncNet,self).__init__()
        #self.flatten=nn.Flatten()
        self.conv=nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,padding=1),    # 输入(1,1,32,32)     输出 (1,32,32,32)  第一个1代表批量大小
            nn.ReLU(),   
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=(3,3)),     #输入(1,32,32,32)  输出(1,32,30,30)
            nn.ReLU(),
            nn.MaxPool2d(2,2),         #输出 (1,32,15,15)
            nn.Dropout2d(0.25),        ## 表示每个神经元有0.25的可能性不被激活  Dropout只能用在训练部分而不能用在测试部分
    
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=(3,3),padding=(1,1)),     #输入(1,32,15,15)  #输出(1,64,15,15)
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(3,3)),      #输入(1,64,15,15)   输出(1,64,13,13)
            nn.ReLU(),
            nn.MaxPool2d(2,2),     #输入(1,64,13,13)   输出(1,64,6,6)
            nn.Dropout2d(0.25),
    
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=(3,3),padding=(1,1)),   #输入(1,64,6,6)   输出(1,128,6,6)
            nn.ReLU(),
            nn.Conv2d(in_channels=128,out_channels=128,kernel_size=(3,3)),   #输入(1,128,6,6)   输出(1,128,4,4)
            nn.ReLU(),
            nn.MaxPool2d(2,2),        #输入(1,128,4,4)   输出(1,128,2,2)
            nn.Dropout2d(0.25),
            nn.Flatten(),      #结果为 (1,512)
            #后续添加的层
            nn.Linear(in_features=512,out_features=512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=512,out_features=1),    #out_feaures=1是将二分类输出为一个神经元，若是必须得2个，后续在修改，标签也要修改
            nn.Sigmoid()   
            
        )
        #self.FC1=nn.Linear(512,512)
        #self.FC2=nn.Linear(512,1)
        #self.Sg=nn.Sigmoid()
        #self.Rl=nn.ReLU()
        #self.dr=nn.Dropout(0.5)
    def forward(self,x):
        logits=self.conv(x)
        #x=logits.reshape(logits.shape[0],-1)
        #x=self.FC1(x)
        #x=self.Rl(x)
        #x=self.dr(x)
        #x=self.FC2(x)
        #logits=self.Sg(x)
        
        return logits

Using cuda device


In [3]:
#Interaction_df=pd.read_csv("../data/yellowModule_interactions_short 2.tsv",sep='\t')  #1
#Interaction_df.columns=["node1","node2","label"]
#Interaction_df.shape[0]

In [13]:
key_genes=["UBE2C", "TPX2", "FOXM1", "CEP55", "NEK2", "CDCA8", "CDK1", "CCNB1","CKS2"]
key_gene_df=pd.DataFrame(columns=["node1","node2"])
print(key_genes)

['UBE2C', 'TPX2', 'FOXM1', 'CEP55', 'NEK2', 'CDCA8', 'CDK1', 'CCNB1', 'CKS2']


In [14]:
key_gene_df

Unnamed: 0,node1,node2


In [15]:
for i in range(8) :
    k=i+1
    for  a in range(k,9):
         key_gene_df=key_gene_df._append({"node1":key_genes[i],"node2":key_genes[a]},ignore_index=True)

In [16]:
 key_gene_df

Unnamed: 0,node1,node2
0,UBE2C,TPX2
1,UBE2C,FOXM1
2,UBE2C,CEP55
3,UBE2C,NEK2
4,UBE2C,CDCA8
5,UBE2C,CDK1
6,UBE2C,CCNB1
7,UBE2C,CKS2
8,TPX2,FOXM1
9,TPX2,CEP55


In [17]:
#读取表达矩阵
expression_df=pd.read_csv("../data/brown_moduel_final_expression_matrix_protein_coding_genes.csv")  #这里是获取表达矩阵  222是简化版的表达矩阵，去掉了未包含基因的表达  #1
expression_df=expression_df.iloc[:,1:]   #处理后的表达矩阵  #1
gene_index=expression_df.columns   #这里将表达矩阵的基因索引单独存放在一个array中   #1
gene_index=np.array(gene_index)     #1
expression_matrix=expression_df.values  #这里将df形式的表达矩阵转为array，方便用numpy进行运算   #1
expression_matrix.shape

(508, 777)

In [18]:
#将转变图像的代码封装成函数形式   #1
def transform_image(gene1_name,gene2_name,gene_list,expression_matrix):   #这里传入的gene_list是表达矩阵中基因的顺序(不能修改)，传入的expression_matrix是array类型的
    index_1=np.where(gene_list==gene1_name)#这里是为了得到具有相互作用的两个基因在基因表达矩阵中对应的索引，有了索引，才能得到该基因在所有样本中的表达情况
    index_2=np.where(gene_list==gene2_name)
    if(index_1[0].size>0 and index_2[0].size>0):
        gene_1_expression=expression_matrix[:,index_1].reshape(508)   #这里是获取某一个相互关系的一个端点基因的表达情况   这里的1166是样本数，reshape是为了降低维度
       
        gene_2_expression=expression_matrix[:,index_2].reshape(508)   #这里是获取某一个相互关系的第二个端点基因的表达情况
        #这里通过numpy.histogram2d方法进行转换，将两个基因的表达情况转变为图像
        H,x_edges,y_edges=np.histogram2d(gene_1_expression,gene_2_expression,bins=32)
        H=H.T
        print(H.sum())
        #H=(np.log10(H/1166+10**-4)+4)/4      #这里是对每一个基因对图像的值做类似降维的处理，防止值过大
        print(H.sum())
        #print(H)
        return H
    else :
        return 0 

In [19]:
#创建模型
model=CnncNet().to(device)
#加载参数
model.load_state_dict(torch.load("../brownModule_WGCNA_CNN_accuracy_77.9_roc_0.97.pth"))

<All keys matched successfully>

In [20]:
key_gene_df["score"]=0
key_gene_df

Unnamed: 0,node1,node2,score
0,UBE2C,TPX2,0
1,UBE2C,FOXM1,0
2,UBE2C,CEP55,0
3,UBE2C,NEK2,0
4,UBE2C,CDCA8,0
5,UBE2C,CDK1,0
6,UBE2C,CCNB1,0
7,UBE2C,CKS2,0
8,TPX2,FOXM1,0
9,TPX2,CEP55,0


In [22]:
for i in range(36):
    a=key_gene_df.iloc[i][0]
    b=key_gene_df.iloc[i][1]
    image=transform_image(a,b,gene_index,expression_matrix)
    image=image.reshape(1,1,32,32)
    image=torch.from_numpy(image)
    image=image.to(torch.float32)
    image=image.to(device)
    score=model(image)
    score=score.to("cpu")
    score=score.detach().numpy() 
    print(type(score))
    key_gene_df.iat[i,2]=score

508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.ndarray'>
508.0
508.0
<class 'numpy.nd

In [23]:
key_gene_df

Unnamed: 0,node1,node2,score
0,UBE2C,TPX2,0.999552
1,UBE2C,FOXM1,0.991565
2,UBE2C,CEP55,0.998596
3,UBE2C,NEK2,0.02794
4,UBE2C,CDCA8,0.610739
5,UBE2C,CDK1,0.737134
6,UBE2C,CCNB1,0.996337
7,UBE2C,CKS2,0.98461
8,TPX2,FOXM1,0.999982
9,TPX2,CEP55,0.740432


In [24]:
#将结果写入文件
key_gene_df.to_csv('20250428_KeyGene_regulatory_Network.csv',sep=',')