In [1]:
import sys
import os
# 添加模块所在的文件夹到 sys.path
folder_path = "src/"
sys.path.append(folder_path)

# 导入模块
#from DataCombine_ALL import GetData
from hmddv32_data import GetData
from compareLinearModels import Model
import numpy as np
import time
import random
from torch.backends import cudnn
import tensorly as tl
import torch
import pickle
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc, f1_score, precision_recall_curve, average_precision_score
from scipy.special import expit

class Experiments(object):

    def __init__(self, drug_drug_data, model_name='NCTF', times=5, folds=5, negs = 10,**kwargs):
        super().__init__()
        self.drug_drug_data = drug_drug_data
        self.model = Model(model_name)
        self.parameters = kwargs
        self.times = times
        self.folds = folds
        self.negs= negs

    def CV_triplet(self):
        fix_seed(2024)
        k_folds = self.folds
        np.random.seed(2024)
        metrics_tensor_all = np.zeros((1, 7))
        # avgmetrics_tensor_10 = np.zeros((1, 7))
        j = 0
        df = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        #score = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        for i in range(self.times):
            index_matrix = self.drug_drug_data.posidx[i].numpy().T
            poscv = self.drug_drug_data.poscv[i].numpy()
            neg_matrix = self.drug_drug_data.negidx[i].numpy().T
            negcv = self.drug_drug_data.negcv[i].numpy()
            metrics_tensor = np.zeros((1, 7))
            since = time.time()
            for k in range(k_folds):
                train_tensor = np.array(self.drug_drug_data.X, copy=True)
                trainpos_index = tuple(index_matrix[:, np.where(poscv == k)[0]])
                train_tensor[trainpos_index] = 0
                S1 = np.mat(self.drug_drug_data.S1)
                S2 = np.mat(self.drug_drug_data.S2)

                predict_tensor,M,C,D = self.model()(train_tensor, S1, S2,
                                              r=self.parameters['r'],
                                              mu=self.parameters['mu'], eta=self.parameters['eta'],
                                              alpha=self.parameters['alpha'], beta=self.parameters['beta'],
                                              lam=self.parameters['lam'],
                                              tol=self.parameters['tol'], max_iter=self.parameters['max_iter']
                                              )
    
                # print(k,'fold end!')
                fname='CTF_embeds/'+str(self.negs)+'n/factors_'+str(i)+'_times_'+str(k)+'_fold.pkl'
                print(fname)
                with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                    pickle.dump([M,C,D], f)
                
                del M,C,D
                
                #testpos_index 和 posIndex_test是一样的
                posIndex_test = torch.tensor(index_matrix[:, np.where(poscv == k)[0]], dtype=torch.int).T
                negIndex_test = torch.tensor(neg_matrix[:, np.where(negcv == k)[0]], dtype=torch.int).T
                idxs_test = tuple(torch.cat((posIndex_test, negIndex_test), dim=0).numpy().T)
                #print(idxs_test)
    
                poslabel_test = torch.ones(posIndex_test.shape[0])
                neglabel_test = torch.zeros(negIndex_test.shape[0])
                labels_test = torch.cat((poslabel_test, neglabel_test), dim=0)
                
                ### 获得预测值
                preds = predict_tensor[idxs_test].flatten()
                ## 存储每折每次的预测和真实值
                fname='pred_score_pkl/'+'CTF_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.pkl'
                print(fname)
                with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                    pickle.dump([predict_tensor,idxs_test,labels_test.cpu().numpy(),preds], f)


                results = pd.DataFrame({
                    'time': [i] * len(idxs_test[0]),  # 假设这是第 1 折
                    'fold': [i] * len(idxs_test[0]),  # 假设这是第 1 次
                    'm1': idxs_test[0],
                    'm2': idxs_test[1],
                    'd': idxs_test[2],
                    'true_label': labels_test.cpu().numpy(),
                    'pred_score': preds  # 假设 preds 是一个二维数组，取第二列作为预测概率
                })
                # 保存为 CSV 文件
                fname='pred_score_csv/'+'CTF_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.csv'
                results.to_csv(fname, index=False)

                metrics=self.get_metrics_1(labels_test.cpu().numpy(), preds)
                metrics_tensor = metrics_tensor + metrics
                metrics_tensor_all = metrics_tensor_all + metrics
                # print(metrics[0])
                # print(metrics)
                aupr, auc_value, f1_score, accuracy, recall, specificity, precision = metrics
                df.loc[j] = [j, i, k, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
                j=j+1
            
            result = np.around(metrics_tensor / k_folds, decimals=4)
            print('Times:\t',i+1,':\t',result)
            #avgmetrics_tensor_10 = avgmetrics_tensor_10 + result
            time_elapsed = time.time() - since
            print(time_elapsed // 60, time_elapsed % 60)

        fname = os.path.join('compareTF', 'CTF_hmddv3.2_'+str(self.negs)+'neg_results_new.csv')
        df.to_csv(fname, index=False)  # index=False 表示不写入行索引
        print(j)
        #print(df)
        #print(metrics_tensor_all)
        results_1 = np.around(metrics_tensor_all / j, decimals=4)
        print('final:\t',results_1)
        # results_2 = np.around(avgmetrics_tensor_10 / self.times, decimals=4)
        # print('final:\t',results_2)
        return results_1

    def get_metrics_1(self, real_score, predict_score):
        real_score=np.mat(real_score)
        predict_score=np.mat(predict_score)
        np.random.seed(2024)
        sorted_predict_score = np.array(sorted(list(set(np.array(predict_score).flatten()))))
        #sorted_predict_score = np.array(sorted(list(set(predict_score))))
        # print(sorted_predict_score)
        # print(sorted_predict_score.shape)
        # print(np.array(real_score).flatten())
        sorted_predict_score_num = len(sorted_predict_score)
        thresholds = sorted_predict_score[
            (np.array([sorted_predict_score_num]) * np.arange(1, 1000) / np.array([1000])).astype(int)]
        thresholds = np.mat(thresholds)
        thresholds_num = thresholds.shape[1]
    
        predict_score_matrix = np.tile(predict_score, (thresholds_num, 1))
        negative_index = np.where(predict_score_matrix < thresholds.T)
        positive_index = np.where(predict_score_matrix >= thresholds.T)
        predict_score_matrix[negative_index] = 0
        predict_score_matrix[positive_index] = 1
    
        # print(real_score.T)
        # print(real_score.T.shape)
        # print(np.mat(real_score).T)
        # print(np.mat(real_score).T.shape)
        # print(predict_score_matrix.shape)
        TP = predict_score_matrix * real_score.T
        FP = predict_score_matrix.sum(axis=1) - TP
        FN = real_score.sum() - TP
        TN = len(real_score.T) - TP - FP - FN
    
        fpr = FP / (FP + TN)
        tpr = TP / (TP + FN)
        ROC_dot_matrix = np.mat(sorted(np.column_stack((fpr, tpr)).tolist())).T
        # print(ROC_dot_matrix)
        ROC_dot_matrix.T[0] = [0, 0]
        ROC_dot_matrix = np.c_[ROC_dot_matrix, [1, 1]]
        x_ROC = ROC_dot_matrix[0].T
        y_ROC = ROC_dot_matrix[1].T
    
        auc = 0.5 * (x_ROC[1:] - x_ROC[:-1]).T * (y_ROC[:-1] + y_ROC[1:])
    
        recall_list = tpr
        precision_list = TP / (TP + FP)
        PR_dot_matrix = np.mat(sorted(np.column_stack((recall_list, -precision_list)).tolist())).T
        PR_dot_matrix[1, :] = -PR_dot_matrix[1, :]
        PR_dot_matrix.T[0] = [0, 1]
        PR_dot_matrix = np.c_[PR_dot_matrix, [1, 0]]
        x_PR = PR_dot_matrix[0].T
        y_PR = PR_dot_matrix[1].T
        aupr = 0.5 * (x_PR[1:] - x_PR[:-1]).T * (y_PR[:-1] + y_PR[1:])
    
        f1_score_list = 2 * TP / (len(real_score.T) + TP - TN)
        accuracy_list = (TP + TN) / len(real_score.T)
        specificity_list = TN / (TN + FP)
    
        max_index = np.argmax(f1_score_list)
        f1_score = f1_score_list[max_index, 0]
        accuracy = accuracy_list[max_index, 0]
        specificity = specificity_list[max_index, 0]
        recall = recall_list[max_index, 0]
        precision = precision_list[max_index, 0]
    
        return aupr[0, 0], auc[0, 0], f1_score, accuracy, recall, specificity, precision

def fix_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

if __name__ == '__main__':
    fix_seed(2024)
    since = time.time()
    #print(drug_drug_data)
    #print(drug_drug_data.shape)
    ### Split
    times=5
    folds=5
    df = pd.DataFrame(columns=['neg','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
    j=0
    r=57
    #mu,eta,alpha,beta,lam=0.75,0.125,0.25,0.25,0.001 ### 旧的NCTF参数组合 再用
    #mu,eta,alpha,beta,lam=0.5,0.75,0.5,0.125,0.001 ### 新的NCTF参数组合 不用
    #for neg in [1,2,4,6,8,10]:
    for neg in [1]:
        folder = '/data/hmddv32_neg/'+str(neg)+'n'
        drug_drug_data = GetData(miRNA_num=351, disease_num=325,filefolder=folder,signal=21,neg=neg)
        experiment = Experiments(drug_drug_data, model_name='CTF',  times=times, folds=folds,negs=neg,
                                 r=r,  mu=0.5, eta=0.2, alpha=0.5, beta=0.5, lam=0.5, tol=1e-6, max_iter=100)
        aupr, auc_value, f1_score, accuracy, recall, specificity, precision = experiment.CV_triplet()[0]
        df.loc[j] = [neg, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
        print(f"neg={neg}")
        print(f"auc={auc_value}\taupr={aupr}\tf1={f1_score}\tacc={accuracy}\trecall={recall}\tspe={specificity}\tpre={precision}\n")
        j=j+1

    df.to_csv('CTF_1negResults.csv',index=False)  # index=False 表示不写入行索引
    time_elapsed = time.time() - since
    print(time_elapsed // 60, time_elapsed % 60)


  from pandas.core.computation.check import NUMEXPR_INSTALLED


(351, 351)
(351, 351)
0
1
2
3
4
5
6
7
8
9
(351, 351, 325)
14679.0
CTF_embeds/1n/factors_0_times_0_fold.pkl
pred_score_pkl/CTF_hmddv3.2_0_times_0_foldscores.pkl
CTF_embeds/1n/factors_0_times_1_fold.pkl
pred_score_pkl/CTF_hmddv3.2_0_times_1_foldscores.pkl
CTF_embeds/1n/factors_0_times_2_fold.pkl
pred_score_pkl/CTF_hmddv3.2_0_times_2_foldscores.pkl
CTF_embeds/1n/factors_0_times_3_fold.pkl
pred_score_pkl/CTF_hmddv3.2_0_times_3_foldscores.pkl
CTF_embeds/1n/factors_0_times_4_fold.pkl
pred_score_pkl/CTF_hmddv3.2_0_times_4_foldscores.pkl
Times:	 1 :	 [[0.9438 0.9166 0.8706 0.8737 0.8496 0.8978 0.8927]]
5.0 7.599952936172485
CTF_embeds/1n/factors_1_times_0_fold.pkl
pred_score_pkl/CTF_hmddv3.2_1_times_0_foldscores.pkl
CTF_embeds/1n/factors_1_times_1_fold.pkl
pred_score_pkl/CTF_hmddv3.2_1_times_1_foldscores.pkl
CTF_embeds/1n/factors_1_times_2_fold.pkl
pred_score_pkl/CTF_hmddv3.2_1_times_2_foldscores.pkl
CTF_embeds/1n/factors_1_times_3_fold.pkl
pred_score_pkl/CTF_hmddv3.2_1_times_3_foldscores.pkl


In [1]:
import sys
import os
# 添加模块所在的文件夹到 sys.path
folder_path = "/mnt/sda/liupei/NCTF_new/src/"
sys.path.append(folder_path)

# 导入模块
from hmddv32_data import GetData
from compareLinearModels import Model
import numpy as np
import time
import random
from torch.backends import cudnn
import tensorly as tl
import torch
import pickle
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc, f1_score, precision_recall_curve, average_precision_score
from scipy.special import expit

class Experiments(object):

    def __init__(self, drug_drug_data, model_name='NCTF', times=5, folds=5, negs = 10,**kwargs):
        super().__init__()
        self.drug_drug_data = drug_drug_data
        self.model = Model(model_name)
        self.parameters = kwargs
        self.times = times
        self.folds = folds
        self.negs= negs

    def CV_triplet(self):
        fix_seed(2024)
        k_folds = self.folds
        np.random.seed(2024)
        metrics_tensor_all = np.zeros((1, 7))
        # avgmetrics_tensor_10 = np.zeros((1, 7))
        j = 0
        df = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        #score = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        for i in range(self.times):
            index_matrix = self.drug_drug_data.posidx[i].numpy().T
            poscv = self.drug_drug_data.poscv[i].numpy()
            neg_matrix = self.drug_drug_data.negidx[i].numpy().T
            negcv = self.drug_drug_data.negcv[i].numpy()
            metrics_tensor = np.zeros((1, 7))
            since = time.time()
            for k in range(k_folds):
                train_tensor = np.array(self.drug_drug_data.X, copy=True)
                trainpos_index = tuple(index_matrix[:, np.where(poscv == k)[0]])
                train_tensor[trainpos_index] = 0
                S1 = np.mat(self.drug_drug_data.S1)
                S2 = np.mat(self.drug_drug_data.S2)

                predict_tensor = self.model()(train_tensor, S1, S2,
                                              r=self.parameters['r'],
                                              mu=self.parameters['mu'], eta=self.parameters['eta'],
                                              alpha=self.parameters['alpha'], beta=self.parameters['beta'],
                                              lam=self.parameters['lam'],
                                              tol=self.parameters['tol'], max_iter=self.parameters['max_iter']
                                              )
    
                # print(k,'fold end!')
                # fname='CTF_embeds/'+str(self.negs)+'n/factors_'+str(i)+'_times_'+str(k)+'_fold.pkl'
                # print(fname)
                # with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                #     pickle.dump([M,C,D], f)
                
                # del M,C,D
                
                #testpos_index 和 posIndex_test是一样的
                posIndex_test = torch.tensor(index_matrix[:, np.where(poscv == k)[0]], dtype=torch.int).T
                negIndex_test = torch.tensor(neg_matrix[:, np.where(negcv == k)[0]], dtype=torch.int).T
                idxs_test = tuple(torch.cat((posIndex_test, negIndex_test), dim=0).numpy().T)
                #print(idxs_test)
    
                poslabel_test = torch.ones(posIndex_test.shape[0])
                neglabel_test = torch.zeros(negIndex_test.shape[0])
                labels_test = torch.cat((poslabel_test, neglabel_test), dim=0)
                
                ### 获得预测值
                preds = predict_tensor[idxs_test].flatten()
                #print(labels_test.cpu().numpy().shape, preds.shape)

                ## 存储每折每次的预测和真实值
                fname='pred_score_pkl/'+'TDRC_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.pkl'
                print(fname)
                with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                    pickle.dump([predict_tensor,idxs_test,labels_test.cpu().numpy(),preds], f)


                results = pd.DataFrame({
                    'time': [i] * len(idxs_test[0]),  # 假设这是第 1 折
                    'fold': [i] * len(idxs_test[0]),  # 假设这是第 1 次
                    'm1': idxs_test[0],
                    'm2': idxs_test[1],
                    'd': idxs_test[2],
                    'true_label': labels_test.cpu().numpy(),
                    'pred_score': preds  # 假设 preds 是一个二维数组，取第二列作为预测概率
                })
                # 保存为 CSV 文件
                fname='pred_score_csv/'+'TDRC_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.csv'
                results.to_csv(fname, index=False)

                metrics=self.get_metrics_1(labels_test.cpu().numpy(), preds)
                metrics_tensor = metrics_tensor + metrics
                metrics_tensor_all = metrics_tensor_all + metrics
                # print(metrics[0])
                # print(metrics)
                aupr, auc_value, f1_score, accuracy, recall, specificity, precision = metrics
                df.loc[j] = [j, i, k, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
                j=j+1
            
            result = np.around(metrics_tensor / k_folds, decimals=4)
            print('Times:\t',i+1,':\t',result)
            #avgmetrics_tensor_10 = avgmetrics_tensor_10 + result
            time_elapsed = time.time() - since
            print(time_elapsed // 60, time_elapsed % 60)

        fname = os.path.join('compareTF', 'TDRC_hmddv3.2_'+str(self.negs)+'neg_results_new.csv')
        df.to_csv(fname, index=False)  # index=False 表示不写入行索引
        print(j)
        #print(df)
        #print(metrics_tensor_all)
        results_1 = np.around(metrics_tensor_all / j, decimals=4)
        print('final:\t',results_1)
        # results_2 = np.around(avgmetrics_tensor_10 / self.times, decimals=4)
        # print('final:\t',results_2)
        return results_1

    def get_metrics_1(self, real_score, predict_score):
        real_score=np.mat(real_score)
        predict_score=np.mat(predict_score)
        # print(real_score)
        # print(real_score.shape)
        # print(predict_score)
        # print(predict_score.shape)
        np.random.seed(2024)
        sorted_predict_score = np.array(sorted(list(set(np.array(predict_score).flatten()))))
        #sorted_predict_score = np.array(sorted(list(set(predict_score))))
        # print(sorted_predict_score)
        # print(sorted_predict_score.shape)
        # print(np.array(real_score).flatten())
        sorted_predict_score_num = len(sorted_predict_score)
        thresholds = sorted_predict_score[
            (np.array([sorted_predict_score_num]) * np.arange(1, 1000) / np.array([1000])).astype(int)]
        thresholds = np.mat(thresholds)
        thresholds_num = thresholds.shape[1]
    
        predict_score_matrix = np.tile(predict_score, (thresholds_num, 1))
        negative_index = np.where(predict_score_matrix < thresholds.T)
        positive_index = np.where(predict_score_matrix >= thresholds.T)
        predict_score_matrix[negative_index] = 0
        predict_score_matrix[positive_index] = 1
    
        # print(real_score.T)
        # print(real_score.T.shape)
        # print(np.mat(real_score).T)
        # print(np.mat(real_score).T.shape)
        # print(predict_score_matrix.shape)
        TP = predict_score_matrix * real_score.T
        FP = predict_score_matrix.sum(axis=1) - TP
        FN = real_score.sum() - TP
        TN = len(real_score.T) - TP - FP - FN
    
        fpr = FP / (FP + TN)
        tpr = TP / (TP + FN)
        ROC_dot_matrix = np.mat(sorted(np.column_stack((fpr, tpr)).tolist())).T
        # print(ROC_dot_matrix)
        ROC_dot_matrix.T[0] = [0, 0]
        ROC_dot_matrix = np.c_[ROC_dot_matrix, [1, 1]]
        x_ROC = ROC_dot_matrix[0].T
        y_ROC = ROC_dot_matrix[1].T
    
        auc = 0.5 * (x_ROC[1:] - x_ROC[:-1]).T * (y_ROC[:-1] + y_ROC[1:])
    
        recall_list = tpr
        precision_list = TP / (TP + FP)
        PR_dot_matrix = np.mat(sorted(np.column_stack((recall_list, -precision_list)).tolist())).T
        PR_dot_matrix[1, :] = -PR_dot_matrix[1, :]
        PR_dot_matrix.T[0] = [0, 1]
        PR_dot_matrix = np.c_[PR_dot_matrix, [1, 0]]
        x_PR = PR_dot_matrix[0].T
        y_PR = PR_dot_matrix[1].T
        aupr = 0.5 * (x_PR[1:] - x_PR[:-1]).T * (y_PR[:-1] + y_PR[1:])
    
        f1_score_list = 2 * TP / (len(real_score.T) + TP - TN)
        accuracy_list = (TP + TN) / len(real_score.T)
        specificity_list = TN / (TN + FP)
    
        max_index = np.argmax(f1_score_list)
        f1_score = f1_score_list[max_index, 0]
        accuracy = accuracy_list[max_index, 0]
        specificity = specificity_list[max_index, 0]
        recall = recall_list[max_index, 0]
        precision = precision_list[max_index, 0]
    
        return aupr[0, 0], auc[0, 0], f1_score, accuracy, recall, specificity, precision

def fix_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

if __name__ == '__main__':
    fix_seed(2024)
    since = time.time()
    #print(drug_drug_data)
    #print(drug_drug_data.shape)
    ### Split
    times=5
    folds=5
    df = pd.DataFrame(columns=['neg','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
    j=0
    r=57
    #mu,eta,alpha,beta,lam=0.75,0.125,0.25,0.25,0.001 ### 旧的NCTF参数组合 再用
    #mu,eta,alpha,beta,lam=0.5,0.75,0.5,0.125,0.001 ### 新的NCTF参数组合 不用
    #for neg in [1,2,4,6,8,10]:
    for neg in [1]:
        folder = '/data/hmddv32_neg/'+str(neg)+'n'
        drug_drug_data = GetData(miRNA_num=351, disease_num=325,filefolder=folder,signal=21,neg=neg)
        experiment = Experiments(drug_drug_data, model_name='TDRC_mm',  times=times, folds=folds,negs=neg,
                                 r=r, mu=0.125, eta=0.25, alpha=2.0, beta=0.125, lam=0.001,  tol=1e-6, max_iter=100)
        aupr, auc_value, f1_score, accuracy, recall, specificity, precision = experiment.CV_triplet()[0]
        df.loc[j] = [neg, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
        print(f"neg={neg}")
        print(f"auc={auc_value}\taupr={aupr}\tf1={f1_score}\tacc={accuracy}\trecall={recall}\tspe={specificity}\tpre={precision}\n")
        j=j+1

    df.to_csv('TDRC_1negResults.csv',index=False)  # index=False 表示不写入行索引
    time_elapsed = time.time() - since
    print(time_elapsed // 60, time_elapsed % 60)


  from pandas.core.computation.check import NUMEXPR_INSTALLED


(351, 351)
(351, 351)
0
1
2
3
4
5
6
7
8
9
(351, 351, 325)
14679.0
pred_score_pkl/TDRC_hmddv3.2_0_times_0_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_0_times_1_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_0_times_2_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_0_times_3_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_0_times_4_foldscores.pkl
Times:	 1 :	 [[0.9739 0.9615 0.9247 0.9267 0.8993 0.9542 0.9516]]
5.0 0.10560131072998047
pred_score_pkl/TDRC_hmddv3.2_1_times_0_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_1_times_1_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_1_times_2_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_1_times_3_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_1_times_4_foldscores.pkl
Times:	 2 :	 [[0.9724 0.9585 0.9232 0.9253 0.8975 0.9532 0.9505]]
5.0 0.29433178901672363
pred_score_pkl/TDRC_hmddv3.2_2_times_0_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_2_times_1_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_2_times_2_foldscores.pkl
pred_score_pkl/TDRC_hmddv3.2_2_times_3_foldscores.pk

In [2]:
import sys
import os
# 添加模块所在的文件夹到 sys.path
folder_path = "src/"
sys.path.append(folder_path)


from hmddv32_data import GetData
from compareLinearModels import Model
import numpy as np
import time
import random
from torch.backends import cudnn
import tensorly as tl
import torch
import pickle
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc, f1_score, precision_recall_curve, average_precision_score
from scipy.special import expit

class Experiments(object):

    def __init__(self, drug_drug_data, model_name='NCTF', times=5, folds=5, negs = 10,**kwargs):
        super().__init__()
        self.drug_drug_data = drug_drug_data
        self.model = Model(model_name)
        self.parameters = kwargs
        self.times = times
        self.folds = folds
        self.negs= negs

    def CV_triplet(self):
        fix_seed(2024)
        k_folds = self.folds
        np.random.seed(2024)
        metrics_tensor_all = np.zeros((1, 7))
        # avgmetrics_tensor_10 = np.zeros((1, 7))
        j = 0
        df = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        #score = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        for i in range(self.times):
            index_matrix = self.drug_drug_data.posidx[i].numpy().T
            poscv = self.drug_drug_data.poscv[i].numpy()
            neg_matrix = self.drug_drug_data.negidx[i].numpy().T
            negcv = self.drug_drug_data.negcv[i].numpy()
            metrics_tensor = np.zeros((1, 7))
            since = time.time()
            for k in range(k_folds):
                train_tensor = np.array(self.drug_drug_data.X, copy=True)
                trainpos_index = tuple(index_matrix[:, np.where(poscv == k)[0]])
                train_tensor[trainpos_index] = 0
                S1 = np.mat(self.drug_drug_data.S1)
                S2 = np.mat(self.drug_drug_data.S2)

                predict_tensor = self.model()(train_tensor, S1, S2,
                                              r=self.parameters['r'],
                                              mu=self.parameters['mu'], eta=self.parameters['eta'],
                                              alpha=self.parameters['alpha'], beta=self.parameters['beta'],
                                              lam=self.parameters['lam'],
                                              tol=self.parameters['tol'], max_iter=self.parameters['max_iter']
                                              )
                
                #testpos_index 和 posIndex_test是一样的
                posIndex_test = torch.tensor(index_matrix[:, np.where(poscv == k)[0]], dtype=torch.int).T
                negIndex_test = torch.tensor(neg_matrix[:, np.where(negcv == k)[0]], dtype=torch.int).T
                idxs_test = tuple(torch.cat((posIndex_test, negIndex_test), dim=0).numpy().T)
                #print(idxs_test)
    
                poslabel_test = torch.ones(posIndex_test.shape[0])
                neglabel_test = torch.zeros(negIndex_test.shape[0])
                labels_test = torch.cat((poslabel_test, neglabel_test), dim=0)
                
                ### 获得预测值
                preds = predict_tensor[idxs_test].flatten()
                #print(labels_test.cpu().numpy().shape, preds.shape)
                #metrics_tensor = metrics_tensor + self.get_metrics_1(labels_test.cpu().numpy(), preds)
                #metrics_tensor = metrics_tensor + self.get_metrics_2(labels_test.cpu().numpy(), preds)
                #metrics_tensor = metrics_tensor + self.get_metrics_3(labels_test.cpu().numpy(), preds)

                ## 存储每折每次的预测和真实值
                fname='pred_score_pkl/'+'CP_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.pkl'
                print(fname)
                with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                    pickle.dump([predict_tensor,idxs_test,labels_test.cpu().numpy(),preds], f)


                results = pd.DataFrame({
                    'time': [i] * len(idxs_test[0]),  # 假设这是第 1 折
                    'fold': [i] * len(idxs_test[0]),  # 假设这是第 1 次
                    'm1': idxs_test[0],
                    'm2': idxs_test[1],
                    'd': idxs_test[2],
                    'true_label': labels_test.cpu().numpy(),
                    'pred_score': preds  # 假设 preds 是一个二维数组，取第二列作为预测概率
                })
                # 保存为 CSV 文件
                fname='pred_score_csv/'+'CP_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.csv'
                results.to_csv(fname, index=False)

                metrics=self.get_metrics_1(labels_test.cpu().numpy(), preds)
                metrics_tensor = metrics_tensor + metrics
                metrics_tensor_all = metrics_tensor_all + metrics
                # print(metrics[0])
                # print(metrics)
                aupr, auc_value, f1_score, accuracy, recall, specificity, precision = metrics
                df.loc[j] = [j, i, k, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
                j=j+1
            
            result = np.around(metrics_tensor / k_folds, decimals=4)
            print('Times:\t',i+1,':\t',result)
            #avgmetrics_tensor_10 = avgmetrics_tensor_10 + result
            time_elapsed = time.time() - since
            print(time_elapsed // 60, time_elapsed % 60)

        fname = os.path.join('compareTF', 'CP_hmddv3.2_'+str(self.negs)+'neg_results_new.csv')
        df.to_csv(fname, index=False)  # index=False 表示不写入行索引
        print(j)
        #print(df)
        #print(metrics_tensor_all)
        results_1 = np.around(metrics_tensor_all / j, decimals=4)
        print('final:\t',results_1)
        # results_2 = np.around(avgmetrics_tensor_10 / self.times, decimals=4)
        # print('final:\t',results_2)
        return results_1

    def get_metrics_1(self, real_score, predict_score):
        real_score=np.mat(real_score)
        predict_score=np.mat(predict_score)
        # print(real_score)
        # print(real_score.shape)
        # print(predict_score)
        # print(predict_score.shape)
        np.random.seed(2024)
        sorted_predict_score = np.array(sorted(list(set(np.array(predict_score).flatten()))))
        #sorted_predict_score = np.array(sorted(list(set(predict_score))))
        # print(sorted_predict_score)
        # print(sorted_predict_score.shape)
        # print(np.array(real_score).flatten())
        sorted_predict_score_num = len(sorted_predict_score)
        thresholds = sorted_predict_score[
            (np.array([sorted_predict_score_num]) * np.arange(1, 1000) / np.array([1000])).astype(int)]
        thresholds = np.mat(thresholds)
        thresholds_num = thresholds.shape[1]
    
        predict_score_matrix = np.tile(predict_score, (thresholds_num, 1))
        negative_index = np.where(predict_score_matrix < thresholds.T)
        positive_index = np.where(predict_score_matrix >= thresholds.T)
        predict_score_matrix[negative_index] = 0
        predict_score_matrix[positive_index] = 1
    
        # print(real_score.T)
        # print(real_score.T.shape)
        # print(np.mat(real_score).T)
        # print(np.mat(real_score).T.shape)
        # print(predict_score_matrix.shape)
        TP = predict_score_matrix * real_score.T
        FP = predict_score_matrix.sum(axis=1) - TP
        FN = real_score.sum() - TP
        TN = len(real_score.T) - TP - FP - FN
    
        fpr = FP / (FP + TN)
        tpr = TP / (TP + FN)
        ROC_dot_matrix = np.mat(sorted(np.column_stack((fpr, tpr)).tolist())).T
        # print(ROC_dot_matrix)
        ROC_dot_matrix.T[0] = [0, 0]
        ROC_dot_matrix = np.c_[ROC_dot_matrix, [1, 1]]
        x_ROC = ROC_dot_matrix[0].T
        y_ROC = ROC_dot_matrix[1].T
    
        auc = 0.5 * (x_ROC[1:] - x_ROC[:-1]).T * (y_ROC[:-1] + y_ROC[1:])
    
        recall_list = tpr
        precision_list = TP / (TP + FP)
        PR_dot_matrix = np.mat(sorted(np.column_stack((recall_list, -precision_list)).tolist())).T
        PR_dot_matrix[1, :] = -PR_dot_matrix[1, :]
        PR_dot_matrix.T[0] = [0, 1]
        PR_dot_matrix = np.c_[PR_dot_matrix, [1, 0]]
        x_PR = PR_dot_matrix[0].T
        y_PR = PR_dot_matrix[1].T
        aupr = 0.5 * (x_PR[1:] - x_PR[:-1]).T * (y_PR[:-1] + y_PR[1:])
    
        f1_score_list = 2 * TP / (len(real_score.T) + TP - TN)
        accuracy_list = (TP + TN) / len(real_score.T)
        specificity_list = TN / (TN + FP)
    
        max_index = np.argmax(f1_score_list)
        f1_score = f1_score_list[max_index, 0]
        accuracy = accuracy_list[max_index, 0]
        specificity = specificity_list[max_index, 0]
        recall = recall_list[max_index, 0]
        precision = precision_list[max_index, 0]
    
        return aupr[0, 0], auc[0, 0], f1_score, accuracy, recall, specificity, precision

def fix_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

if __name__ == '__main__':
    fix_seed(2024)
    since = time.time()
    #print(drug_drug_data)
    #print(drug_drug_data.shape)
    ### Split
    times=5
    folds=5
    df = pd.DataFrame(columns=['neg','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
    j=0
    r=57
    #mu,eta,alpha,beta,lam=0.75,0.125,0.25,0.25,0.001 ### 旧的NCTF参数组合 再用
    #mu,eta,alpha,beta,lam=0.5,0.75,0.5,0.125,0.001 ### 新的NCTF参数组合 不用
    #for neg in [1,2,4,6,8,10]:
    for neg in [1]:
        folder = '/mnt/sda/liupei/NCTF_new/data/hmddv32_neg/'+str(neg)+'n'
        drug_drug_data = GetData(miRNA_num=351, disease_num=325,filefolder=folder,signal=21,neg=neg)
        experiment = Experiments(drug_drug_data, model_name='CP',  times=times, folds=folds,negs=neg,
                         r=r, mu=0.125, eta=0.25, alpha=0.125, beta=0.25, lam=0.001,  tol=1e-6, max_iter=100)
        aupr, auc_value, f1_score, accuracy, recall, specificity, precision = experiment.CV_triplet()[0]
        df.loc[j] = [neg, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
        print(f"neg={neg}")
        print(f"auc={auc_value}\taupr={aupr}\tf1={f1_score}\tacc={accuracy}\trecall={recall}\tspe={specificity}\tpre={precision}\n")
        j=j+1

    df.to_csv('CP_1negResults.csv',index=False)  # index=False 表示不写入行索引
    time_elapsed = time.time() - since
    print(time_elapsed // 60, time_elapsed % 60)


(351, 351)
(351, 351)
0
1
2
3
4
5
6
7
8
9
(351, 351, 325)
14679.0
pred_score_pkl/CP_hmddv3.2_0_times_0_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_0_times_1_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_0_times_2_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_0_times_3_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_0_times_4_foldscores.pkl
Times:	 1 :	 [[0.9548 0.9398 0.8719 0.8731 0.8633 0.8829 0.882 ]]
15.0 31.834007024765015
pred_score_pkl/CP_hmddv3.2_1_times_0_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_1_times_1_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_1_times_2_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_1_times_3_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_1_times_4_foldscores.pkl
Times:	 2 :	 [[0.9558 0.9419 0.8721 0.8736 0.8614 0.8858 0.8838]]
15.0 26.73488140106201
pred_score_pkl/CP_hmddv3.2_2_times_0_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_2_times_1_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_2_times_2_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_2_times_3_foldscores.pkl
pred_score_pkl/CP_hmddv3.2_

In [3]:
import sys
import os
# 添加模块所在的文件夹到 sys.path
folder_path = "/mnt/sda/liupei/NCTF_new/src/"
sys.path.append(folder_path)

# 导入模块
from hmddv32_data import GetData
from compareLinearModels import Model
import numpy as np
import time
import random
from torch.backends import cudnn
import tensorly as tl
import torch
import pickle
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc, f1_score, precision_recall_curve, average_precision_score
from scipy.special import expit

class Experiments(object):

    def __init__(self, drug_drug_data, model_name='NCTF', times=5, folds=5, negs = 10,**kwargs):
        super().__init__()
        self.drug_drug_data = drug_drug_data
        self.model = Model(model_name)
        self.parameters = kwargs
        self.times = times
        self.folds = folds
        self.negs= negs

    def CV_triplet(self):
        fix_seed(2024)
        k_folds = self.folds
        np.random.seed(2024)
        metrics_tensor_all = np.zeros((1, 7))
        # avgmetrics_tensor_10 = np.zeros((1, 7))
        j = 0
        df = pd.DataFrame(columns=['j', 'times', 'folds','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
        for i in range(self.times):
            index_matrix = self.drug_drug_data.posidx[i].numpy().T
            poscv = self.drug_drug_data.poscv[i].numpy()
            neg_matrix = self.drug_drug_data.negidx[i].numpy().T
            negcv = self.drug_drug_data.negcv[i].numpy()
            metrics_tensor = np.zeros((1, 7))
            since = time.time()
            for k in range(k_folds):
                train_tensor = np.array(self.drug_drug_data.X, copy=True)
                trainpos_index = tuple(index_matrix[:, np.where(poscv == k)[0]])
                train_tensor[trainpos_index] = 0
                S1 = np.mat(self.drug_drug_data.S1)
                S2 = np.mat(self.drug_drug_data.S2)

                predict_tensor = self.model()(train_tensor, S1, S2,
                                              r=self.parameters['r'],
                                              mu=self.parameters['mu'], eta=self.parameters['eta'],
                                              alpha=self.parameters['alpha'], beta=self.parameters['beta'],
                                              lam=self.parameters['lam'],
                                              tol=self.parameters['tol'], max_iter=self.parameters['max_iter']
                                              )
    
                
                #testpos_index 和 posIndex_test是一样的
                posIndex_test = torch.tensor(index_matrix[:, np.where(poscv == k)[0]], dtype=torch.int).T
                negIndex_test = torch.tensor(neg_matrix[:, np.where(negcv == k)[0]], dtype=torch.int).T
                idxs_test = tuple(torch.cat((posIndex_test, negIndex_test), dim=0).numpy().T)
                #print(idxs_test)
    
                poslabel_test = torch.ones(posIndex_test.shape[0])
                neglabel_test = torch.zeros(negIndex_test.shape[0])
                labels_test = torch.cat((poslabel_test, neglabel_test), dim=0)
                
                ### 获得预测值
                preds = predict_tensor[idxs_test].flatten()

                ## 存储每折每次的预测和真实值
                fname='pred_score_pkl/'+'TFAI_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.pkl'
                print(fname)
                with open(fname, 'wb') as f:  # Python 3: open(..., 'wb')
                    pickle.dump([predict_tensor,idxs_test,labels_test.cpu().numpy(),preds], f)


                results = pd.DataFrame({
                    'time': [i] * len(idxs_test[0]),  # 假设这是第 1 折
                    'fold': [i] * len(idxs_test[0]),  # 假设这是第 1 次
                    'm1': idxs_test[0],
                    'm2': idxs_test[1],
                    'd': idxs_test[2],
                    'true_label': labels_test.cpu().numpy(),
                    'pred_score': preds  # 假设 preds 是一个二维数组，取第二列作为预测概率
                })
                # 保存为 CSV 文件
                fname='pred_score_csv/'+'TFAI_hmddv3.2_'+str(i)+'_times_'+str(k)+'_foldscores.csv'
                results.to_csv(fname, index=False)

                metrics=self.get_metrics_1(labels_test.cpu().numpy(), preds)
                metrics_tensor = metrics_tensor + metrics
                metrics_tensor_all = metrics_tensor_all + metrics
                # print(metrics[0])
                # print(metrics)
                aupr, auc_value, f1_score, accuracy, recall, specificity, precision = metrics
                df.loc[j] = [j, i, k, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
                j=j+1
            
            result = np.around(metrics_tensor / k_folds, decimals=4)
            print('Times:\t',i+1,':\t',result)
            #avgmetrics_tensor_10 = avgmetrics_tensor_10 + result
            time_elapsed = time.time() - since
            print(time_elapsed // 60, time_elapsed % 60)

        fname = os.path.join('compareTF', 'TFAI_hmddv3.2_'+str(self.negs)+'neg_results_new.csv')
        df.to_csv(fname, index=False)  # index=False 表示不写入行索引
        print(j)
        #print(df)
        #print(metrics_tensor_all)
        results_1 = np.around(metrics_tensor_all / j, decimals=4)
        print('final:\t',results_1)
        # results_2 = np.around(avgmetrics_tensor_10 / self.times, decimals=4)
        # print('final:\t',results_2)
        return results_1

    def get_metrics_1(self, real_score, predict_score):
        real_score=np.mat(real_score)
        predict_score=np.mat(predict_score)
        # print(real_score)
        # print(real_score.shape)
        # print(predict_score)
        # print(predict_score.shape)
        np.random.seed(2024)
        sorted_predict_score = np.array(sorted(list(set(np.array(predict_score).flatten()))))
        #sorted_predict_score = np.array(sorted(list(set(predict_score))))
        # print(sorted_predict_score)
        # print(sorted_predict_score.shape)
        # print(np.array(real_score).flatten())
        sorted_predict_score_num = len(sorted_predict_score)
        thresholds = sorted_predict_score[
            (np.array([sorted_predict_score_num]) * np.arange(1, 1000) / np.array([1000])).astype(int)]
        thresholds = np.mat(thresholds)
        thresholds_num = thresholds.shape[1]
    
        predict_score_matrix = np.tile(predict_score, (thresholds_num, 1))
        negative_index = np.where(predict_score_matrix < thresholds.T)
        positive_index = np.where(predict_score_matrix >= thresholds.T)
        predict_score_matrix[negative_index] = 0
        predict_score_matrix[positive_index] = 1
    
        # print(real_score.T)
        # print(real_score.T.shape)
        # print(np.mat(real_score).T)
        # print(np.mat(real_score).T.shape)
        # print(predict_score_matrix.shape)
        TP = predict_score_matrix * real_score.T
        FP = predict_score_matrix.sum(axis=1) - TP
        FN = real_score.sum() - TP
        TN = len(real_score.T) - TP - FP - FN
    
        fpr = FP / (FP + TN)
        tpr = TP / (TP + FN)
        ROC_dot_matrix = np.mat(sorted(np.column_stack((fpr, tpr)).tolist())).T
        # print(ROC_dot_matrix)
        ROC_dot_matrix.T[0] = [0, 0]
        ROC_dot_matrix = np.c_[ROC_dot_matrix, [1, 1]]
        x_ROC = ROC_dot_matrix[0].T
        y_ROC = ROC_dot_matrix[1].T
    
        auc = 0.5 * (x_ROC[1:] - x_ROC[:-1]).T * (y_ROC[:-1] + y_ROC[1:])
    
        recall_list = tpr
        precision_list = TP / (TP + FP)
        PR_dot_matrix = np.mat(sorted(np.column_stack((recall_list, -precision_list)).tolist())).T
        PR_dot_matrix[1, :] = -PR_dot_matrix[1, :]
        PR_dot_matrix.T[0] = [0, 1]
        PR_dot_matrix = np.c_[PR_dot_matrix, [1, 0]]
        x_PR = PR_dot_matrix[0].T
        y_PR = PR_dot_matrix[1].T
        aupr = 0.5 * (x_PR[1:] - x_PR[:-1]).T * (y_PR[:-1] + y_PR[1:])
    
        f1_score_list = 2 * TP / (len(real_score.T) + TP - TN)
        accuracy_list = (TP + TN) / len(real_score.T)
        specificity_list = TN / (TN + FP)
    
        max_index = np.argmax(f1_score_list)
        f1_score = f1_score_list[max_index, 0]
        accuracy = accuracy_list[max_index, 0]
        specificity = specificity_list[max_index, 0]
        recall = recall_list[max_index, 0]
        precision = precision_list[max_index, 0]
    
        return aupr[0, 0], auc[0, 0], f1_score, accuracy, recall, specificity, precision

def fix_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

if __name__ == '__main__':
    fix_seed(2024)
    since = time.time()
    #print(drug_drug_data)
    #print(drug_drug_data.shape)
    ### Split
    times=5
    folds=5
    df = pd.DataFrame(columns=['neg','aupr', 'auc', 'f1_score', 'accuracy', 'recall', 'specificity', 'precision'])
    j=0
    r=57
    #mu,eta,alpha,beta,lam=0.75,0.125,0.25,0.25,0.001 ### 旧的NCTF参数组合 再用
    #mu,eta,alpha,beta,lam=0.5,0.75,0.5,0.125,0.001 ### 新的NCTF参数组合 不用
    #for neg in [1,2,4,6,8,10]:
    for neg in [1]:
        folder = '/data/hmddv32_neg/'+str(neg)+'n'
        drug_drug_data = GetData(miRNA_num=351, disease_num=325,filefolder=folder,signal=21,neg=neg)
        experiment = Experiments(drug_drug_data, model_name='TFAI_CP_within_mod',  times=times, folds=folds,negs=neg,
                                 r=r, mu=0.125, eta=0.25, alpha=2.0, beta=0.125, lam=0.001,  tol=1e-6, max_iter=100)
        aupr, auc_value, f1_score, accuracy, recall, specificity, precision = experiment.CV_triplet()[0]
        df.loc[j] = [neg, aupr, auc_value, f1_score, accuracy, recall, specificity, precision]
        print(f"neg={neg}")
        print(f"auc={auc_value}\taupr={aupr}\tf1={f1_score}\tacc={accuracy}\trecall={recall}\tspe={specificity}\tpre={precision}\n")
        j=j+1

    df.to_csv('TFAI_1negResults.csv',index=False)  # index=False 表示不写入行索引
    time_elapsed = time.time() - since
    print(time_elapsed // 60, time_elapsed % 60)


(351, 351)
(351, 351)
0
1
2
3
4
5
6
7
8
9
(351, 351, 325)
14679.0
pred_score_pkl/TFAI_hmddv3.2_0_times_0_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_0_times_1_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_0_times_2_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_0_times_3_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_0_times_4_foldscores.pkl
Times:	 1 :	 [[0.9287 0.8913 0.8509 0.8545 0.8296 0.8793 0.8792]]
5.0 10.45041036605835
pred_score_pkl/TFAI_hmddv3.2_1_times_0_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_1_times_1_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_1_times_2_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_1_times_3_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_1_times_4_foldscores.pkl
Times:	 2 :	 [[0.9287 0.8904 0.8512 0.8519 0.8473 0.8565 0.8553]]
5.0 10.268566370010376
pred_score_pkl/TFAI_hmddv3.2_2_times_0_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_2_times_1_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_2_times_2_foldscores.pkl
pred_score_pkl/TFAI_hmddv3.2_2_times_3_foldscores.pkl
p