# Evaluator

In [22]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from functools import reduce
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, auc

- 性能指标的计算公式

$Sensitivity(Recall) = \frac{TP}{TP+FN}$

$Specificity = \frac{TN}{TN+FP}$

$Tpr = \frac{TP}{TP+FN}$

$Fpr = \frac{FP}{FP+TN}$

In [23]:
class Evaluator:
    
    def __init__(self, **kwgs):
        self.score_names = ['wam_scores', 'bn_scores', 'svm_scores']
        self.Multi_scores = {key: kwgs[key] for key in self.score_names}
        self.labels = kwgs['labels']
        self.Conf_mas = []
        
    def Sn_Sp_Curves(self, T_range=np.arange(0, 10, 0.5) ):
        if self.Conf_mas == []:
            Conf_tbs = self.Confusion_table(T_range)
        # use plt to plot sn-sp-curve
        lw = 2
        #plt.figure(figsize=(5,5), dpi=120)

        for name, tb in Conf_tbs.items():
            table = self.Cal_Sn_Sp(tb)
            # use plt to plot ROC
            Sn = table['Sensitivity']
            Sp = table['Specificity']
            name = name.rstrip('_scores').upper()#.ljust(5, ' ')
            
            plt.plot(Sp, Sn, 
                     lw=lw, label='Sn-Sp curve for {}'.format(name)) 
        plt.plot([1, 0], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Specificity')
        plt.ylabel('Sensitivity')
        plt.title('Sn-Sp Curve')
        plt.legend(loc="lower left", fontsize=8)
        #plt.show()
        
    def ROC_Curves(self, T_range=np.arange(0, 10, 0.5) ):
        if self.Conf_mas == []:
            Conf_tbs = self.Confusion_table(T_range)
        lw = 2
        #plt.figure(figsize=(5,5), dpi=120)

        for name, tb in Conf_tbs.items():
            table = self.Cal_Tpr_Fpr(tb)
            # use plt to plot ROC
            Tpr = table['Tpr']
            Fpr = table['Fpr']
            roc_auc = auc(Fpr,Tpr)
            name = name.rstrip('_scores').upper()#.ljust(5, ' ')
            
            plt.plot(Fpr, Tpr, 
                     lw=lw, label='ROC curve for {} (area = {:.2})'.format(name, roc_auc)) ###假正率为横坐标，真正率为纵坐标做曲线
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic')
        plt.legend(loc="lower right", fontsize=8)
        #plt.show()

    def Confusion_table(self, T_range):
        y_true = self.labels
        mScores = self.Multi_scores
        names = self.score_names
        ys_pred = { name: pd.DataFrame({ T: (mScores[name] > T).astype(np.int) for T in T_range }) 
                   for name in names }
        stats_Multi_T = { name: pd.DataFrame({'Threshold': [T for T in T_range]}) for name in names }
        for n in names:
            # tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel()
            stats_tmp = stats_Multi_T[n]['Threshold'].apply( lambda T: confusion_matrix( y_true, ys_pred[n][T], labels=[0,1] ).ravel() )
            stats_tmp = [stat for stat in zip(*stats_tmp)]

            stats_Multi_T[n]['TN'] = stats_tmp[0]
            stats_Multi_T[n]['FP'] = stats_tmp[1]
            stats_Multi_T[n]['FN'] = stats_tmp[2]
            stats_Multi_T[n]['TP'] = stats_tmp[3]
        return stats_Multi_T
        
    def Cal_Sn_Sp(self, conf_tb):
        conf_tb['Sensitivity'] = conf_tb.apply(lambda x: x['TP'] / (x['TP']+x['FN']), axis=1)
        conf_tb['Specificity'] = conf_tb.apply(lambda x: x['TN'] / (x['TN']+x['FP']), axis=1)
        return conf_tb[['Threshold', 'Sensitivity', 'Specificity']]
    
    def Cal_Tpr_Fpr(self, conf_tb):
        conf_tb['Tpr'] = conf_tb.apply(lambda x: x['TP'] / (x['TP']+x['FN']), axis=1)
        conf_tb['Fpr'] = conf_tb.apply(lambda x: x['FP'] / (x['TN']+x['FP']), axis=1)
        return conf_tb[['Threshold', 'Tpr', 'Fpr']]