In [49]:
import numpy as np
from Bio import SeqIO
import json
from sklearn.model_selection import train_test_split
import feature as fe 
import copy 
from tensorflow.keras import *
import seaborn as sns 
import matplotlib
import matplotlib.pyplot as plt 
import pandas as pd 

from sklearn import metrics

def metricsEvaluate(y_true,y_pred,p_threshold=0.5):
    """
    input:
        y_pred:模型的预测概率，是一个np.array的数组，函数会对预测概率根据制定的预测阈值指定分类
        y_true:数据的真实分类，是一个np.array的数组
        p_threshold:指定分类阈值，大于这个值会分类为1，小于会分类为0
    output:[Sensitivity,Specificity,MCC]
    
    """
    y_pred = copy.deepcopy(y_pred)
    y_pred[y_pred>=p_threshold]=1
    y_pred[y_pred<=p_threshold]=0
    confusion_matrix = metrics.confusion_matrix(y_true=y_true,y_pred = y_pred)
    tn, fp, fn, tp = confusion_matrix.ravel()
    Sensitivity = tp/(tp+fn)
    Specificity = tn/(tn+fp)
    MCC = metrics.matthews_corrcoef(y_true,y_pred)
    ACC = metrics.accuracy_score(y_true,y_pred)
    print('模型的ACC为:{:.2f},\n模型的敏感性为:{:.2f},\n特异性为:{:.2f},\nMCC系数为:{:.2f}'.format(ACC,Sensitivity,Specificity,MCC))
    return [ACC,Sensitivity,Specificity,MCC]

In [31]:
def func(x):
    if x=='BCE':
        return 1 
    else:
        return 0

In [83]:
foodPositive = pd.read_csv('ibce-EL_foodPositive.csv')['Prob\xa0'].apply(func).values
foodNegative = pd.read_csv('ibce-EL_foodNegative.csv')['PIP or Non-PIP\xa0'].apply(func).values
foodPred = np.concatenate([foodPositive,foodNegative])
foodTrue = np.concatenate([np.ones((foodPositive.shape[0],)),np.zeros((foodNegative.shape[0],))])
milkPositive = pd.read_csv('ibce-ELMilkPositive.csv')['PIP or Non-PIP\xa0'].apply(func).values
milkNegative = pd.read_csv('ibce-ELMilkNegative.csv')['PIP or Non-PIP\xa0'].apply(func).values
milkPred = np.concatenate([milkPositive,milkNegative])
milkTrue = np.concatenate([np.ones((milkPositive.shape[0],)),np.zeros((milkNegative.shape[0],))])



In [43]:
foodPred.shape,foodTrue.shape,milkPred.shape,milkTrue.shape 

((4782,), (4782,), (464,), (464,))

In [95]:
metricsEvaluate(milkTrue,milkPred)

模型的ACC为:0.46,
模型的敏感性为:0.77,
特异性为:0.14,
MCC系数为:-0.12


[0.46120689655172414,
 0.7689075630252101,
 0.13716814159292035,
 -0.12081629635996015]

In [96]:
metricsEvaluate(foodTrue,foodPred)

模型的ACC为:0.62,
模型的敏感性为:0.80,
特异性为:0.39,
MCC系数为:0.21


[0.6179422835633626,
 0.7996999249812453,
 0.3889413988657845,
 0.20782822480170962]

In [97]:
metricsEvaluate(foodPositive,np.ones((foodPositive.shape[0],)))

模型的ACC为:0.80,
模型的敏感性为:1.00,
特异性为:0.00,
MCC系数为:0.00


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


[0.7996999249812453, 1.0, 0.0, 0.0]

In [101]:
metricsEvaluate(foodNegative,np.zeros((foodNegative.shape[0],)))

模型的ACC为:0.39,
模型的敏感性为:0.00,
特异性为:1.00,
MCC系数为:0.00


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


[0.3889413988657845, 0.0, 1.0, 0.0]

In [103]:
metricsEvaluate(milkPositive,np.ones((milkPositive.shape[0],)))

模型的ACC为:0.77,
模型的敏感性为:1.00,
特异性为:0.00,
MCC系数为:0.00


[0.7689075630252101, 1.0, 0.0, 0.0]

In [105]:
metricsEvaluate(milkfNegative,np.zeros((milkNegative.shape[0],)))

模型的ACC为:0.14,
模型的敏感性为:0.00,
特异性为:1.00,
MCC系数为:0.00


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


[0.13716814159292035, 0.0, 1.0, 0.0]