In [17]:
import sys
sys.path.append("/afs/cs.stanford.edu/u/awni/scr/ecg-master/ecg")
import predict
import numpy as np
import sklearn.metrics as skm
import math

In [2]:
prediction_folder = "/deep/u/pranavsr/saved/predictions/1503713063/"
x, gt, probs, processor = predict.load_predictions(prediction_folder)

In [3]:
def c_statistic_with_95p_confidence_interval(cstat, num_positives, num_negatives, z_alpha_2=1.96):
    """
    Calculates the confidence interval of an ROC curve (c-statistic), using the method described
    under "Confidence Interval for AUC" here:
      https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/PASS/Confidence_Intervals_for_the_Area_Under_an_ROC_Curve.pdf
    Args:
        cstat: the c-statistic (equivalent to area under the ROC curve)
        num_positives: number of positive examples in the set.
        num_negatives: number of negative examples in the set.
        z_alpha_2 (optional): the critical value for an N% confidence interval, e.g., 1.96 for 95%,
            2.326 for 98%, 2.576 for 99%, etc.
    Returns:
        The 95% confidence interval half-width, e.g., the Y in X ± Y.
    """
    q1 = cstat / (2 - cstat)
    q2 = 2 * cstat**2 / (1 + cstat)
    numerator = cstat * (1 - cstat) \
        + (num_positives - 1) * (q1 - cstat**2) \
        + (num_negatives - 1) * (q2 - cstat**2)
    standard_error_auc = math.sqrt(numerator / (num_positives * num_negatives))
    return z_alpha_2 * standard_error_auc

In [51]:
gts = gt.squeeze()

def roc_auc(gts, probs, index):
    n_gts = np.zeros_like(gts)
    n_gts[gts==index] = 1
    n_pos = np.sum(n_gts == 1)
    n_neg = n_gts.size - n_pos
    n_ps = probs[:,:,index].squeeze()
    n_gts, n_ps = n_gts.ravel(), n_ps.ravel()
    return n_pos, n_neg, skm.roc_auc_score(n_gts, n_ps)

print "AUC Scores (sequence level)"
macro_average = 0.0; total = 0.0
for idx, cname in processor.int_to_class.items():
    pos, neg, auc = roc_auc(gts, probs, idx)
    total += pos
    macro_average += pos * auc
    conf = c_statistic_with_95p_confidence_interval(auc, pos, neg)
    print "{: <8}\t{:.3f}\t{:.3f}\t{:.3f}".format(cname, auc, auc-conf,auc+conf)
print "Average", macro_average / total


AUC Scores (sequence level)
AF      	0.975	0.968	0.982
AVB     	0.989	0.984	0.994
BIGEMINY	0.998	0.995	1.002
EAR     	0.908	0.883	0.932
IVR     	0.995	0.987	1.002
JUNCTIONAL	0.985	0.978	0.992
NOISE   	0.985	0.979	0.992
SINUS   	0.976	0.973	0.980
SVT     	0.972	0.959	0.985
TRIGEMINY	0.999	0.995	1.002
VT      	0.995	0.981	1.009
WENCKEBACH	0.982	0.972	0.991
0.978633555005


In [53]:
def roc_auc_set(gts, probs, index):
    max_ps = np.max(probs[...,index], axis=1)
    max_gts = np.any(gts==index, axis=1)
    pos = np.sum(max_gts)
    neg = max_gts.size - pos
    return pos, neg, skm.roc_auc_score(max_gts, max_ps)

print "AUC Scores (set level)"
macro_average = 0.0; total = 0.0
for idx, cname in processor.int_to_class.items():
    pos, neg, auc = roc_auc_set(gts, probs, idx)
    total += pos
    macro_average += pos * auc
    conf = c_statistic_with_95p_confidence_interval(auc, pos, neg)
    print "{: <8}\t{:.3f}\t{:.3f}\t{:.3f}".format(cname, auc, auc-conf, min(1, auc+conf))
print "Average", macro_average / total

AUC Scores (set level)
AF      	0.959	0.924	0.994
AVB     	0.981	0.954	1.000
BIGEMINY	0.997	0.981	1.000
EAR     	0.935	0.863	1.000
IVR     	0.986	0.958	1.000
JUNCTIONAL	0.980	0.949	1.000
NOISE   	0.958	0.914	1.000
SINUS   	0.981	0.968	0.994
SVT     	0.940	0.884	0.996
TRIGEMINY	0.997	0.979	1.000
VT      	0.981	0.935	1.000
WENCKEBACH	0.981	0.948	1.000
Average 0.974487907945
