<img src="prscatter.png" width="300">

# Precision/recall and ROC curve area comparison interactive chart

AUPR (area under the precision recall curve) and AUROC
(area under the receiver operator curve) are two metrics
that are sometimes used to evaluate the merits of ranking methods.

AUPR tends to be oriented towards the "web search analogy":
*An incorrect answer ranked first is a search response not
relevant to the user's goals which will frustrate the user.*

AUROC tends to be oriented towards the "bomber/radar analogy": 
*A correct answer ranked last represents a bomber that was not targetted
and got past the gunners."

This visualization compares these metrics.  We compare all possible
rankings of responses where there are a fixed number of right answers
and a fixed number of wrong answers.  In this case a ranking of
`1110001` means there are 4 right answers and 3 wrong answers and the
ranking ranked the first 3 right answers high but the last right answer
lower than all the wrong answers.

First we show a chart which displayes the PR and ROC curves and their
corresponding numeric areas.

Use "Kernel --> Runall" above to run all the cells.  Then look to the interactive chart at the bottom.

In [1]:
from __future__ import print_function
from jp_doodle import dual_canvas

# Basic computational definitions:

def precision(ntrue, alltrue, count, length=None):
    return ntrue * 1.0 / count

def recall(ntrue, alltrue, count, length=None):
    return ntrue * 1.0 / alltrue

def false_positive_rate(ntrue, alltrue, count, length):
    false_positive_count = count - ntrue
    all_negative = length - alltrue
    return false_positive_count * 1.0 / all_negative

def precision_recall_points(choice):
    result = []
    alltrue = len([c for c in choice if c])
    count = 0
    ntrue = 0
    for c in choice:
        count += 1
        if c:
            ntrue += 1
        result.append((precision(ntrue, alltrue, count), recall(ntrue, alltrue, count)))
    return result

def ROC_points(choice):
    result = []
    length = len(choice)
    alltrue = len([c for c in choice if c])
    assert length > alltrue, "no negatives? " + repr((length, alltrue, choice))
    count = 0
    ntrue = 0
    for c in choice:
        count += 1
        if c:
            ntrue += 1
        result.append((recall(ntrue, alltrue, count), false_positive_rate(ntrue, alltrue, count, length)))
    return result

example = [True, True, False, True]
print("for example", example)
print("precision recall curve is", precision_recall_points(example))
print("ROC curve is", ROC_points(example))

for example [True, True, False, True]
precision recall curve is [(1.0, 0.3333333333333333), (1.0, 0.6666666666666666), (0.6666666666666666, 0.6666666666666666), (0.75, 1.0)]
ROC curve is [(0.3333333333333333, 0.0), (0.6666666666666666, 0.0), (0.6666666666666666, 1.0), (1.0, 1.0)]


In [2]:
def pr_area(curve):
    "Curve area computation."
    result = 0
    last_recall = 0.0
    last_precision = 1.0
    for pr in curve:
        (precision, recall) = pr
        base = recall - last_recall
        height = 0.5 * (precision + last_precision)
        result += base * height
        (last_precision, last_recall) = pr
    return result


class Ranking:
    
    "Ranking object encapsulation."
    
    def __init__(self, order_correctness):
        self.c = order_correctness
        self.pr_curve = precision_recall_points(order_correctness)
        self.roc_curve = ROC_points(order_correctness)
        self.aupr = pr_area(self.pr_curve)
        self.auroc = pr_area(self.roc_curve)
        self.str = "".join("1" if x else "0" for x in order_correctness)
        
    def plot_on_frame(self, frame):
        frame.reset_frame()
        frame.lower_left_axes(0, 0, 1, 1)
        recall_precision = (
            [(0,1)] + 
            [(recall, precision) for (precision, recall) in self.pr_curve] +
            [(1,0), (0,0)])
        frame.polygon(points=recall_precision, color="rgba(244,0,0,0.5)")
        recall_fpr = (
            [(0, 0)] + 
            [(recall, fpr) for (fpr, recall) in self.roc_curve] +
            [(1, 0)])
        frame.polygon(points=recall_fpr, color="rgba(200,0,200,0.5)")
        frame.text(y=1, x=0.1, text=self.str[:50])
        frame.text(y=-0.2, x=0, text="aupr %4.2f"%self.aupr, color="red")
        frame.text(y=-0.3, x=0, text="auroc %4.2f"%self.auroc, color="magenta")
        
    def plot(self):
        demo = dual_canvas.SnapshotCanvas("prplot.png", width=520, height=320)
        frame = demo.frame_region(0, 0, 300, 300, -0.2, -0.2, 1.2, 1.2)
        self.plot_on_frame(frame)
        demo.fit()
        demo.display_all()
        return demo
        
r = Ranking([False, False, True, True, True, True, True, False, False, False, True, False])
#r = Ranking([True, True, True, False, False] * 15)
#print(r.pr_curve)
#print(r.roc_curve)
#print(r.aupr, r.auroc)
demo = r.plot()

VGFiKGNoaWxkcmVuPShWQm94KGNoaWxkcmVuPShTbmFwc2hvdENhbnZhcyhzdGF0dXM9dSdOb3QgeWV0IHJlbmRlcmVkJyksIEpTUHJveHlXaWRnZXQoc3RhdHVzPXUnTm90IHlldCByZW5kZXLigKY=


In [3]:
#r = Ranking([False] + [False,True] * 5 + [True] * 10 + [False] * 200)
r = Ranking([True, True, False] + [True, False] * 5 + [True] * 3 + [False] * 200)
#r = Ranking([False, False, True, True, True] * 15)
d = r.plot()

VGFiKGNoaWxkcmVuPShWQm94KGNoaWxkcmVuPShTbmFwc2hvdENhbnZhcyhzdGF0dXM9dSdOb3QgeWV0IHJlbmRlcmVkJyksIEpTUHJveHlXaWRnZXQoc3RhdHVzPXUnTm90IHlldCByZW5kZXLigKY=


# The interactive chart

Now we combine the above chart with a clickable scatter plot
that shows the pairing of AUPR and AUROC values for all possible
rankings where there are 6 right answers and 7 wrong answers.

Each possible ranking corresponds to a circle in the scatter plot.
Click on a circle in the scatter plot to view the detailed
curves associated with that ranking.

In [4]:
# generate all possible rankings where `chosen` of `number` are correct answers.
import itertools
rankings = {}
number = 16
chosen = 6
if 0:
    number = 18
    chosen = 5
choose7 = itertools.combinations(range(number), chosen)
limit = 200000
for (k, x) in enumerate(choose7):
    if limit and k > limit:
        break
    y = [False] * number
    for i in x:
        y[i] = True
    #print (x, y)
    rankings[k] = Ranking(y)
#rankings.keys()

In [5]:
def scatter_plotter(rankings):
    import random
    demo = dual_canvas.SnapshotCanvas("prscatter.png", width=820, height=620)
    pr_frame = demo.frame_region(0, 0, 300, 300, -0.2, -0.2, 1.2, 1.2)
    scatter_frame = demo.frame_region(320, 0, 620, 300, -0.2, -0.2, 1.2, 1.2)
    scatter_frame.lower_left_axes(0, 0, 1, 1)
    scatter_frame.text(0.5,-0.2,"aupr",color="red", align="center")
    scatter_frame.text(-0.2,0.5,"auroc",color="magenta", degrees=90, align="center")
    scatter_frame.line(0,0,1,1,color="green")
    h = scatter_frame.text(0.2, 1, rankings[len(rankings)-1].str, name=True)
    stats = scatter_frame.text(0.2, 1.1, "Scatter plot", name=True)
    rankings[len(rankings)-1].plot_on_frame(pr_frame)
    order = list(rankings.values())
    random.shuffle(order)
    for r in order:
        c = scatter_frame.circle(x=r.aupr, y=r.auroc, color="rgba(0,125,254,0.3)", r=5, name=True)
        def plot_for_circle(c=c, r=r):
            r.plot_on_frame(pr_frame)
        c.on("click", plot_for_circle)
        def hover_circle(c=c, r=r):
            h.change(text="ranking %s" % (r.str,))
            stats.change(text="aupr=%4.2f, auroc=%4.2f" % (r.aupr, r.auroc))
        c.on("mouseover", hover_circle)
    demo.fit(margin=40)
    demo.pr_frame = pr_frame
    demo.scatter_frame = scatter_frame
    return demo
    
d = scatter_plotter(rankings)
#d.element.invisible_canvas.show()
d.display_all()

VGFiKGNoaWxkcmVuPShWQm94KGNoaWxkcmVuPShTbmFwc2hvdENhbnZhcyhzdGF0dXM9dSdOb3QgeWV0IHJlbmRlcmVkJyksIEpTUHJveHlXaWRnZXQoc3RhdHVzPXUnTm90IHlldCByZW5kZXLigKY=


Click on the blue circles on the right to compare different rankings by AUPR and AUROC.

The most interesting comparisons diverge strongly from the green 45 degree line.