In [1]:
# result_w2v ['BiLSTM', 'XGBoost', 'CatBoost', 'LightGBM', 'AdaBoost', 'ExtraTree']
# result_sec ['LSTM_Attention_CNN', 'BiLSTM_Attention_CNN', 'XGBoost', 'CatBoost', 'LightGBM', 'ExtraTree']
# result_loc ['CNN', 'LSTM', 'BiLSTM', 'LSTM_Attention_CNN', 'BiLSTM_Attention_CNN', 'SVM']
# result_com ['CNN', 'LSTM', 'BiLSTM', 'LSTM_Attention_CNN', 'BiLSTM_Attention_CNN', 'SVM']

In [2]:
from features import ensembleFeature
import pandas as pd
import numpy as np
from utils import metricsCal
from thundersvm import SVC
from sklearn.model_selection import KFold

In [3]:
##将序列转化为词向量用到的函数
def fa_seq(filepath):
    f = open(filepath,'r')
    x = []
    for i in f:
        x.append(i)
    seq = []
    for i in range(len(x)):
        if i%2==1:
            seq.append(x[i][:-1])
    return seq

In [4]:
train_seq = fa_seq("data/Mouse/mouse_train.fasta")
trainLabel = np.append(np.ones(int(len(train_seq)/2)),np.zeros(int(len(train_seq)-len(train_seq)/2)),axis=0)

In [5]:
trainData_EIIP = ensembleFeature.EIIP(train_seq)

trainData_PseEIIP = ensembleFeature.PseEIIP(train_seq)

trainData_PCP = ensembleFeature.PCP(train_seq)

trainData_NCPA = ensembleFeature.NCPA(train_seq)
trainData_NCPA = trainData_NCPA.reshape(trainData_NCPA.shape[0],-1)

trainData_DBPF = ensembleFeature.DBPF(train_seq)
trainData_DBPF = trainData_DBPF.reshape(trainData_DBPF.shape[0],-1)

trainData_com = np.concatenate(  (trainData_EIIP,trainData_PseEIIP,trainData_PCP,trainData_NCPA,trainData_DBPF),axis=1)
trainData = trainData_com.reshape(trainData_com.shape[0],1,-1)

In [35]:
test_seq = fa_seq("data/Mouse/mouse_indep.fasta")
testLabel = np.append(np.ones(int(len(test_seq)/2)),np.zeros(int(len(test_seq)-len(test_seq)/2)),axis=0)

testData_EIIP = ensembleFeature.EIIP(test_seq)

testData_PseEIIP = ensembleFeature.PseEIIP(test_seq)

testData_PCP = ensembleFeature.PCP(test_seq)

testData_NCPA = ensembleFeature.NCPA(test_seq)
testData_NCPA = testData_NCPA.reshape(testData_NCPA.shape[0],-1)

testData_DBPF = ensembleFeature.DBPF(test_seq)
testData_DBPF = testData_DBPF.reshape(testData_DBPF.shape[0],-1)

testData_com = np.concatenate(  (testData_EIIP,testData_PseEIIP,testData_PCP,testData_NCPA,testData_DBPF),axis=1)
testData = testData_com.reshape(testData_com.shape[0],1,-1)

In [6]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

def attention(query, key, value, mask=None, dropout=None):  # q,k,v: [batch, h, seq_len, d_k]
    d_k = query.size(-1)  # query的维度
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)  # 打分机制 [batch, h, seq_len, seq_len]
    p_atten = F.softmax(scores, dim=-1)  # 对最后一个维度归一化得分, [batch, h, seq_len, seq_len]
    if dropout is not None:
        p_atten = dropout(p_atten)
    return torch.matmul(p_atten, value), p_atten  # [batch, h, seq_len, d_k] 作矩阵的乘法

class PositionalEncoding(nn.Module):

    def __init__(self, dim1, dim2, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()

        #if dim % 2 != 0:
        #    raise ValueError("Cannot use sin/cos positional encoding with "
        #                     "odd dim (got dim={:d})".format(dim))

        """
        构建位置编码pe
        pe公式为：
        PE(pos,2i/2i+1) = sin/cos(pos/10000^{2i/d_{model}})
        """
        pe = torch.zeros(max_len, dim2)  # max_len 是解码器生成句子的最长的长度，假设是 10
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term0 = torch.exp((torch.arange(0, dim2, 2, dtype=torch.float) * -(math.log(10000.0) / dim2)))
        div_term1 = torch.exp((torch.arange(1, dim2, 2, dtype=torch.float) * -(math.log(10000.0) / dim2)))
        
        pe[:, 0::2] = torch.sin(position.float() * div_term0)
        pe[:, 1::2] = torch.cos(position.float() * div_term1)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        #self.drop_out = nn.Dropout(p=dropout)
        self.dim2 = dim2
        self.bm1 = nn.BatchNorm1d(dim1,eps=1e-05)

    def forward(self, emb, step=None):
        emb = emb * math.sqrt(self.dim2)
        if step is None:
            emb = torch.tensor(emb) + self.pe[:,:emb.shape[1]]
        else:
            emb = emb + self.pe[step]
        #emb = self.drop_out(emb)
        emb = self.bm1(emb.to(torch.float32))
        return emb
    
def clones(module, N):  #定义clones方法
    return nn.ModuleList([copy.deepcopy(module)
                          for _ in range(N)])  #让原来变量不影响,且克隆module N次

class SelfAttention(nn.Module):  #多头注意力机制

    def __init__(self,embedding_dim, dropout=0.1):
        super(SelfAttention, self).__init__()
        self.linears = clones(nn.Linear(embedding_dim, embedding_dim), 4)  #克隆四份Linear网络层
        self.dropout = nn.Dropout(p=dropout)  #定义Dropout层

    def forward(self,query,key,value,mask=None):  # q,k,v: [batch, seq_len, embedding_dim]
        nbatches = query.shape[0]  #批数量
        query, key, value = [
            l(x) for l, x in zip(self.linears,
                            (query.to(torch.float32),
                             key.to(torch.float32),
                             value.to(torch.float32)))
        ]  #获取zip的query,key,value权重矩阵
        attn, p_atten = attention(query,key,value,mask=mask,dropout=self.dropout)
        out = self.linears[-1](attn)  #得到最后一层线性层的输出
        return out,p_atten  #返回out结果

In [7]:
from sklearn.utils import shuffle
from utils import metricsCal
from torch.utils.data import DataLoader,TensorDataset
import copy
from torch.autograd import Variable
from sklearn.model_selection import KFold

def train(model,data,label,epoch,train_device,model_dir,batch_size):
    if os.path.exists(model_dir+'model.pt'):
        model_train = torch.load(model_dir+'/model.pt')
    else:
        model_train = model
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model_train.parameters(),lr=0.003)  #改变学习率
    dataX = torch.Tensor(data).clone().detach()
    label = torch.Tensor(label).clone().detach()
    train_data = TensorDataset(dataX, label)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    running_loss = 0.0
    model_train = model_train.to(train_device)
    for batch_idx,data in enumerate(train_loader,0):
        inputs,target = data
        #inputs = inputs.reshape(inputs.shape[0], 1, inputs.shape[1])
        inputs = inputs.to(train_device)
        target = target.to(train_device)
        target = target.reshape(target.shape[0],1)
        optimizer.zero_grad()
        outputs = model_train(inputs)
        loss = criterion(outputs,target)
        loss.backward()
        optimizer.step()
        #print(type(loss))
        running_loss += loss.item()
        if batch_idx == len(dataX)//batch_size:
            #print('[%d, %5d] epoch loss: %.3f' %(epoch+1,batch_idx+1,running_loss))
            print(running_loss)
    save_model(model_train,model_dir)
    model_train = torch.load(model_dir+'/model.pt')
    #device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")  # 选择设备
    th,_,_,_,_,_,_,_,_,_ = metricsCal.evaluate(model_train,train_loader,train_device)
    return running_loss,th

def test(data,label,best_auc,test_device,model_dir,batch_size,th):
    model_test = load_model(model_dir)
    #model_test.eval()
    #model_test.to(test_device)
    data = torch.Tensor(data).clone().detach()#torch.Tensor(data)
    label = torch.Tensor(label).clone().detach()#.requires_grad_(True)torch.Tensor(label)
    test_data = TensorDataset(data,label)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    #device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")  # 选择设备
    _,_,_,_,Sen,Spe, Acc, mcc, AUC = metricsCal.evaluate(model_test,test_loader,test_device,False,th)
    
    print('Accuracy on test set: %d %%' %Acc)
    print('Sensitivity on test set: %d %%' %Sen)
    print('Speciality on test set: %d %%' %Spe)
    print('MCC on test set: %.3f' %mcc)
    print('auc on test set: %.3f' %AUC)
    if(AUC > best_auc):
        torch.save(model_test,model_dir+'model_best.pt')
    return Sen,Spe,Acc, mcc, AUC

def independTest(data,label,test_device,model_dir,batch_size,th):
    model_test = load_bestModel(model_dir)
    data = torch.Tensor(data).clone().detach()#torch.Tensor(data)
    label = torch.Tensor(label).clone().detach()#.requires_grad_(True)torch.Tensor(label)
    test_data = TensorDataset(data,label)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    #device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")  # 选择设备
    _,_,_,_,Sen,Spe, Acc, mcc, AUC = metricsCal.evaluate(model_test,test_loader,test_device,False,th)
    print(Acc,mcc,AUC)
    print('Accuracy on test set: %d %%' %Acc)
    print('Sensitivity on test set: %d %%' %Sen)
    print('Speciality on test set: %d %%' %Spe)
    print('MCC on test set: %.3f' %mcc)
    print('auc on test set: %.3f' %AUC)
    return Acc, mcc, AUC

def load_model(model_dir):
    if os.path.exists(model_dir+'model.pt'):
        model_load = torch.load(model_dir+'model.pt')
    return model_load

def save_model(model_save,model_dir):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(model_save, model_dir+'model.pt')

def load_bestModel(model_dir):
    if os.path.exists(model_dir+'model_best.pt'):
        model_load = torch.load(model_dir+'model_best.pt')#,map_location='cuda:0')
    return model_load

In [8]:
#CNN
class Model_CNN(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1,3,kernel_size=5,stride=2,padding=2)
        self.conv2 = nn.Conv2d(3,3,kernel_size=5,stride=2,padding=2)
        #self.conv3 = nn.Conv2d(3,3,kernel_size=5,stride=2,padding=2)
        
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        k1 = int((k1+2*2-5)/2)+1
        k2 = int((k2+2*2-5)/2)+1
        #k1 = int((k1+2*2-5)/2)+1
        #k2 = int((k2+2*2-5)/2)+1
        
        self.fn1 = nn.Linear(3*k1*k2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        self.bm1 = nn.BatchNorm1d(dim1)
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)

    def forward(self, x):
        
        x1 = x.contiguous().view(-1,1,x.shape[1],x.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = self.conv2(x1)
        x1 = self.relu(x1)
        #x1 = self.conv3(x1)
        #x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out

#LSTM
class Model_LSTM(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_LSTM, self).__init__()
        self.posi = PositionalEncoding(dim1,dim2,dropout)
        self.lstm = nn.LSTM(input_size=dim2,hidden_size=dim2,batch_first=True,bidirectional=False)
        self.bm1 = nn.BatchNorm1d(dim1)
        self.conv1 = nn.Conv2d(1,1,kernel_size=5,stride=2,padding=2)
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        self.fn1 = nn.Linear(1*k1*k2,128)
        #self.fn1 = nn.Linear(dim1*dim2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)

    def forward(self, x):
        #所有输入，如果只有一维，需要变成1*n维
        x1 = self.posi(x)
        x1,(h_n,c_n) = self.lstm(x1)
        x1 = self.relu(x1)
        x1 = self.bm1(x1)
        
        x1 = x1.contiguous().view(-1,1,x1.shape[1],x1.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out

#BiLSTM
class Model_BiLSTM(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_BiLSTM, self).__init__()
        self.posi = PositionalEncoding(dim1,dim2,dropout)
        self.lstm = nn.LSTM(input_size=dim2,hidden_size=dim2,batch_first=True,bidirectional=True)
        self.bm1 = nn.BatchNorm1d(dim1)
        self.conv1 = nn.Conv2d(1,1,kernel_size=5,stride=2,padding=2)
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        self.fn1 = nn.Linear(1*k1*k2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)

    def forward(self, x):
        #所有输入，如果只有一维，需要变成1*n维
        x1 = self.posi(x)
        x1,(h_n,c_n) = self.lstm(x1)
        x1 = x1[:,:,0:869]+x1[:,:,869:1738]
        x1 = self.relu(x1)
        x1 = self.bm1(x1)
        x1 = x1.contiguous().view(-1,1,x1.shape[1],x1.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out
    
#Attention
class Model_Attention(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_Attention, self).__init__()
        self.posi = PositionalEncoding(dim1,dim2,dropout)
        self.self_A = SelfAttention(dim2)
        self.self_B = SelfAttention(dim1)
        self.bm1 = nn.BatchNorm1d(dim1)
        self.conv1 = nn.Conv2d(1,1,kernel_size=5,stride=2,padding=2)
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        self.fn1 = nn.Linear(1*k1*k2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        self.bm1 = nn.BatchNorm1d(dim1)
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)

    def forward(self, x):
        #所有输入，如果只有一维，需要变成1*n维
        x1 = self.posi(x)
        if x1.shape[1] == 1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
            x1,attn = self.self_B(x1,x1,x1)
        else:
            x1,attn = self.self_A(x1,x1,x1)
        if x1.shape[2]==1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
        x1 = self.bm1(x1)
        x1 = self.relu(x1)
        
        x1 = x1.contiguous().view(-1,1,x1.shape[1],x1.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out

#LSTM_Attention_CNN
class Model_LSTM_Attention_CNN(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_LSTM_Attention_CNN, self).__init__()
        self.posi = PositionalEncoding(dim1,dim2,dropout)
        self.self_A = SelfAttention(dim2)
        self.self_B = SelfAttention(dim1)
        self.lstm = nn.LSTM(input_size=dim2,hidden_size=dim2,batch_first=True,bidirectional=False)
        self.bm1 = nn.BatchNorm1d(dim1)
        self.conv1 = nn.Conv2d(1,1,kernel_size=5,stride=2,padding=2)
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        self.fn1 = nn.Linear(1*k1*k2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        self.bm1 = nn.BatchNorm1d(dim1)
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)
        #self.encoder = nn.Sequential()

    def forward(self, x):
        
        #所有输入，如果只有一维，需要变成1*n维
        x1 = self.posi(x)
        x1,(h_n,c_n) = self.lstm(x1)
        x1 = self.relu(x1)
        if x1.shape[1] == 1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
            x1,attn = self.self_B(x1,x1,x1)
        else:
            x1,attn = self.self_A(x1,x1,x1)
        if x1.shape[2]==1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
        x1 = self.bm1(x1)
        x1 = self.relu(x1)
        
        x1 = x1.contiguous().view(-1,1,x1.shape[1],x1.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out
    
#BiLSTM_Attention_CNN
class Model_BiLSTM_Attention_CNN(nn.Module):
    def __init__(self, dim1, dim2, dropout=0.1):
        super(Model_BiLSTM_Attention_CNN, self).__init__()
        self.posi = PositionalEncoding(dim1,dim2,dropout)
        self.self_A = SelfAttention(dim2)
        self.self_B = SelfAttention(dim1)
        self.lstm = nn.LSTM(input_size=dim2,hidden_size=dim2,batch_first=True,bidirectional=True)
        self.bm1 = nn.BatchNorm1d(dim1)
        self.conv1 = nn.Conv2d(1,1,kernel_size=5,stride=2,padding=2)
        k1 = int((dim1+2*2-5)/2)+1
        k2 = int((dim2+2*2-5)/2)+1
        self.fn1 = nn.Linear(1*k1*k2,128)
        self.fn2 = nn.Linear(128,1)
        self.ac = nn.Sigmoid()
        self.bm1 = nn.BatchNorm1d(dim1)
        
        self.relu = nn.ReLU()#inplace=True)
        self.drop = nn.Dropout(p=dropout)

    def forward(self, x):
        
        #所有输入，如果只有一维，需要变成1*n维
        x1 = self.posi(x)
        x1,(h_n,c_n) = self.lstm(x1)
        x1 = x1[:,:,0:869]+x1[:,:,869:1738]
        x1 = self.relu(x1)
        if x1.shape[1] == 1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
            x1,attn = self.self_B(x1,x1,x1)
        else:
            x1,attn = self.self_A(x1,x1,x1)
        if x1.shape[2]==1:
            x1 = x1.view(x1.shape[0],x1.shape[2],x1.shape[1])
        x1 = self.bm1(x1)
        x1 = self.relu(x1)
        
        x1 = x1.contiguous().view(-1,1,x1.shape[1],x1.shape[2])
        x1 = self.conv1(x1)
        x1 = self.relu(x1)
        x1 = x1.contiguous().view(x1.shape[0],-1)
        x1 = self.fn1(x1)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x1 = self.fn2(x1)
        out = self.ac(x1)
        return out

In [13]:
def independResult(data,label,test_device,model_dir,batch_size,th=0.5):
    model_test = load_bestModel(model_dir)
    data = torch.Tensor(data).clone().detach()#torch.Tensor(data)
    label = torch.Tensor(label).clone().detach()#.requires_grad_(True)torch.Tensor(label)
    test_data = TensorDataset(data,label)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    #device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")  # 选择设备
    
    return evaluate_result(model_test,test_loader,test_device,False)

def evaluate_result(model, dataloader, device, is_train=True, threshold=0.5):
    model.eval()
    y_true = torch.tensor([],dtype=torch.int)
    y_score = torch.tensor([])
    #for data in tqdm(dataloader):
    model = model.to(device)
    for data in dataloader:
        #if not isinstance(model, Res_Net):
        if 1==1:
            inputs,y = data
            inputs = inputs.to(device)
            out = model(inputs)
        out = out.squeeze(dim=-1)
        #out = torch.sigmoid(out)
        y_true = torch.cat((y_true, y.int().detach().cpu()))
        y_score = torch.cat((y_score, out.detach().cpu()))  #detach去除梯度，然后cpu()，然后cat将其连接起来
    y_true = y_true.numpy()
    y_score = y_score.numpy()
    return y_score, y_true

In [37]:
X_All = []
Label = []

In [85]:
trainData = trainData_com.reshape(trainData_com.shape[0],1,-1)
testData = testData_com.reshape(testData_com.shape[0],1,-1)

In [91]:
import os
import math
#先做交叉验证，看看多少个epoch合适
rows = 10
row = -1
max_epochs = 3
max_patience = 25
batch_size = 256

# X_BiLSTM_Attention_CNN = []
# test_BiLSTM_Attention_CNN = []

#然后全部all_in 一个模型出来
#%env CUDA_LAUNCH_BLOCKING=1
kf = KFold(10,True,0)
x = 0
#第三重循环
for i,[train_index, test_index] in enumerate(kf.split(trainData)):

    X_train = trainData[train_index]
    X_test = trainData[test_index]
    Y_train = trainLabel[train_index]
    Y_test = trainLabel[test_index]
    
    model_dir = "Model/Model_LSTM_Attention_CNN_com/KFold_" + str(i)+"/"
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  #选择设备
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    y_pred,y_true = independResult(X_test,Y_test,device,model_dir,batch_size)
    #X_BiLSTM_Attention_CNN.append(y_pred)
    
    test_pred,_ = independResult(testData,testLabel,device,model_dir,batch_size)
    #test_BiLSTM_Attention_CNN.append(test_pred)
    #Label.append(Y_test)
    x += roc_auc_score(testLabel,test_pred)
    



In [92]:
x

7.586042

In [105]:
from sklearn.decomposition import FastICA
ica = FastICA(n_components= 100).fit(trainData_loc)
trainData = ica.transform(trainData_loc)
testData = ica.transform(testData)

NameError: name 'trainData_loc' is not defined

In [47]:
kf = KFold(10,True,0)
from thundersvm import SVC
X_SVM = []
test_SVM = []
for i,[train_index, test_index] in enumerate(kf.split(trainData)):

    X_train = trainData[train_index]
    X_test = trainData[test_index]
    Y_train = trainLabel[train_index]
    Y_test = trainLabel[test_index]
    
    clf_svm = SVC(C=7.16281896075926, gamma=0.03935769127227975)
    
    clf_svm.fit(X_train,Y_train)

    X_SVM.append(   np.column_stack(#(clf_rf.predict_proba(X_test)[:,1],
               # clf_et.predict_proba(X_test)[:,1]))
               # clf_lgbm.predict_proba(X_test)[:,1],
               # clf_et.predict_proba(X_test)[:,1],
               # clf_gb.predict_proba(X_test)[:,1],
               clf_svm.decision_function(X_test)))
    
    test_SVM.append(   np.column_stack(#(clf_rf.predict_proba(X_test)[:,1],
               # clf_et.predict_proba(X_test)[:,1]))
               # clf_lgbm.predict_proba(X_test)[:,1],
               # clf_et.predict_proba(X_test)[:,1],
               # clf_gb.predict_proba(X_test)[:,1],
               clf_svm.decision_function(testData)))
    
    #Label.append(Y_test)



In [49]:
len(X_SVM)

10

In [60]:
Y_kk = np.hstack((Label[0],Label[1],Label[2],Label[3],Label[4],Label[5],Label[6],Label[7],Label[8],Label[9]))

valData_CNN = np.hstack((X_CNN[0],X_CNN[1],X_CNN[2],X_CNN[3],X_CNN[4],X_CNN[5],X_CNN[6],X_CNN[7],X_CNN[8],X_CNN[9]))
test_CNN = np.vstack((test_CNN[0],test_CNN[1],test_CNN[2],test_CNN[3],test_CNN[4],test_CNN[5],test_CNN[6],test_CNN[7],test_CNN[8],test_CNN[9]))

valData_LSTM = np.hstack((X_LSTM[0],X_LSTM[1],X_LSTM[2],X_LSTM[3],X_LSTM[4],X_LSTM[5],X_LSTM[6],X_LSTM[7],X_LSTM[8],X_LSTM[9]))
test_LSTM = np.vstack((test_LSTM[0],test_LSTM[1],test_LSTM[2],test_LSTM[3],test_LSTM[4],test_LSTM[5],test_LSTM[6],test_LSTM[7],test_LSTM[8],test_LSTM[9]))

valData_BiLSTM = np.hstack((X_BiLSTM[0],X_BiLSTM[1],X_BiLSTM[2],X_BiLSTM[3],X_BiLSTM[4],X_BiLSTM[5],X_BiLSTM[6],X_BiLSTM[7],X_BiLSTM[8],X_BiLSTM[9]))
test_BiLSTM = np.vstack((test_BiLSTM[0],test_BiLSTM[1],test_BiLSTM[2],test_BiLSTM[3],test_BiLSTM[4],test_BiLSTM[5],test_BiLSTM[6],test_BiLSTM[7],test_BiLSTM[8],test_BiLSTM[9]))

valData_LSTM_Attention_CNN = np.hstack((X_LSTM_Attention_CNN[0],X_LSTM_Attention_CNN[1],X_LSTM_Attention_CNN[2],X_LSTM_Attention_CNN[3],
                                        X_LSTM_Attention_CNN[4],X_LSTM_Attention_CNN[5],X_LSTM_Attention_CNN[6],X_LSTM_Attention_CNN[7],
                                        X_LSTM_Attention_CNN[8],X_LSTM_Attention_CNN[9]))
test_LSTM_Attention_CNN = np.vstack((test_LSTM_Attention_CNN[0],test_LSTM_Attention_CNN[1],test_LSTM_Attention_CNN[2],test_LSTM_Attention_CNN[3],
                                     test_LSTM_Attention_CNN[4],test_LSTM_Attention_CNN[5],test_LSTM_Attention_CNN[6],test_LSTM_Attention_CNN[7],
                                     test_LSTM_Attention_CNN[8],test_LSTM_Attention_CNN[9]))

valData_BiLSTM_Attention_CNN = np.hstack((X_BiLSTM_Attention_CNN[0],X_BiLSTM_Attention_CNN[1],X_BiLSTM_Attention_CNN[2],X_BiLSTM_Attention_CNN[3],X_BiLSTM_Attention_CNN[4],X_BiLSTM_Attention_CNN[5],X_BiLSTM_Attention_CNN[6],X_BiLSTM_Attention_CNN[7],X_BiLSTM_Attention_CNN[8],X_BiLSTM_Attention_CNN[9]))
test_BiLSTM_Attention_CNN = np.vstack((test_BiLSTM_Attention_CNN[0],test_BiLSTM_Attention_CNN[1],test_BiLSTM_Attention_CNN[2],test_BiLSTM_Attention_CNN[3],test_BiLSTM_Attention_CNN[4],test_BiLSTM_Attention_CNN[5],test_BiLSTM_Attention_CNN[6],test_BiLSTM_Attention_CNN[7],test_BiLSTM_Attention_CNN[8],test_BiLSTM_Attention_CNN[9]))

valData_SVM = np.hstack((X_SVM[0],X_SVM[1],X_SVM[2],X_SVM[3],X_SVM[4],X_SVM[5],X_SVM[6],X_SVM[7],X_SVM[8],X_SVM[9]))
test_SVM = np.vstack((test_SVM[0],test_SVM[1],test_SVM[2],test_SVM[3],test_SVM[4],test_SVM[5],test_SVM[6],test_SVM[7],test_SVM[8],test_SVM[9]))

#pd.DataFrame(np.append(valData_kk.reshape(-1,1),Y_kk.reshape(-1,1),axis=1)).to_csv("",header=["pred","true"])

In [66]:
pd.DataFrame(np.vstack((valData_CNN,valData_LSTM,valData_BiLSTM,valData_LSTM_Attention_CNN,valData_BiLSTM_Attention_CNN,valData_SVM,Y_kk)).T).to_csv("Result/com_result_val.csv",header=['CNN','LSTM','BiLSTM','LSTM_Attention_CNN','BiLSTM_Attention_CNN','SVM','True'])

In [96]:
# from sklearn.metrics import roc_auc_score
# roc_auc_score(np.array(pd.read_csv("Result/com_result_val.csv").iloc[:,-1]),np.mean(pd.read_csv("Result/com_result_val.csv").iloc[:,1:6],axis=1))

0.8219631852869096

In [103]:
pd.DataFrame(np.vstack((test_CNN,test_LSTM,test_BiLSTM,test_LSTM_Attention_CNN,test_BiLSTM_Attention_CNN,test_SVM,testLabel)).T).to_csv("Result/com_result_test.csv",header=ml)

In [101]:
kl = ['CNN', 'LSTM', 'BiLSTM', 'LSTM_Attention_CNN', 'BiLSTM_Attention_CNN', 'SVM']
ml = []
for i in range(len(kl)):
    for j in range(10):
        ml.append(str(kl[i])+"_KFold_"+str(j) )
ml.append("True")

In [104]:
pd.read_csv("Result/com_result_test.csv")

Unnamed: 0.1,Unnamed: 0,CNN_KFold_0,CNN_KFold_1,CNN_KFold_2,CNN_KFold_3,CNN_KFold_4,CNN_KFold_5,CNN_KFold_6,CNN_KFold_7,CNN_KFold_8,...,SVM_KFold_1,SVM_KFold_2,SVM_KFold_3,SVM_KFold_4,SVM_KFold_5,SVM_KFold_6,SVM_KFold_7,SVM_KFold_8,SVM_KFold_9,True
0,0,0.441461,0.506231,0.296314,0.605879,0.399027,0.408772,0.738035,0.513551,0.386520,...,0.174909,0.164790,0.097380,0.081283,0.141068,0.109408,0.182129,0.158753,0.095854,1.0
1,1,0.787726,0.694683,0.722888,0.751069,0.750623,0.796548,0.744546,0.698297,0.729899,...,0.558885,0.477677,0.474592,0.500053,0.522074,0.498110,0.527881,0.445533,0.548559,1.0
2,2,0.347455,0.262559,0.153083,0.301671,0.536042,0.299819,0.147712,0.213025,0.242467,...,-0.244139,-0.379522,-0.259928,-0.284804,-0.290708,-0.353717,-0.344595,-0.339721,-0.342710,1.0
3,3,0.311753,0.386325,0.374023,0.478871,0.264240,0.490883,0.451172,0.508316,0.440429,...,-0.152357,-0.070179,-0.163960,-0.149150,-0.164307,-0.157344,-0.184704,-0.093335,-0.162654,1.0
4,4,0.871074,0.696823,0.712239,0.651063,0.681810,0.701787,0.702683,0.904438,0.866531,...,0.205045,0.174488,0.266517,0.201796,0.222621,0.233738,0.261191,0.225162,0.194357,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1995,0.385140,0.167128,0.111940,0.280566,0.207912,0.368389,0.272384,0.397744,0.158749,...,-0.386169,-0.433405,-0.413225,-0.383125,-0.387552,-0.495238,-0.385498,-0.379884,-0.410556,0.0
1996,1996,0.977430,0.778175,0.792142,0.931308,0.927704,0.949077,0.953119,0.973709,0.964726,...,0.545376,0.491098,0.511091,0.424792,0.492508,0.459950,0.532656,0.495004,0.510312,0.0
1997,1997,0.063835,0.073194,0.050260,0.084390,0.145980,0.108324,0.075576,0.117494,0.114896,...,-0.557687,-0.552244,-0.545294,-0.537657,-0.570403,-0.489198,-0.549688,-0.511380,-0.517350,0.0
1998,1998,0.591777,0.341324,0.204929,0.361646,0.286397,0.459233,0.217524,0.286685,0.335422,...,-0.110476,-0.181320,-0.163894,-0.037015,-0.200439,-0.166484,-0.134078,-0.182023,-0.086352,0.0
