Reference: https://gist.github.com/bwhite/3726239

Args:
    * GT (Ground truth) : NxM sparse matrix, where N is the number of users and M is the number of items
    * Pred (Prediction) : an Nxk array of item IDs, ranked by decreasing relevance.
    * R: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
            shape: Nxk
    * r: relevance scores (list of numpy) in rank order of a specefic query
            (first element is the first item)
            shape: kx1
    where N is the number of queries, and k is the number of recommended items
    
Note:
'R' and 'r' represent both the ground truth (relevance) and the prediction (ranking). 

In [66]:
import numpy as np

In [161]:
"""Score is reciprocal of the rank of the first relevant item
First element is 'rank 1'. Relevance is binary (nonzero is relevant).
"""
def reciprocal_rank(r):
    r = np.asarray(r).nonzero()[0]
    return 1. / (r[0] + 1) if r.shape[0] else 0.


""" 'batch' makes no sense here.
There must be more than one query result that can cmopute mean values.
Return a value instead of a list
"""
def batch_mean_reciprocal_rank(R):
    R = np.asarray(R)
    val_line = np.unique(R.nonzero()[0])
    return np.sum(1. / ((R == 0).argmin(1)[val_line]+1)) / R.shape[0]
#     R = (np.asarray(r).nonzero()[0] for r in R)
#     return np.mean([1. / (r[0] + 1) if r.shape[0] else 0. for r in R])

In [162]:
r = [0, 0, 0, 1]
print(reciprocal_rank(r))  # 0.25

r = [1, 0, 0]
print(reciprocal_rank(r))  # 1.0

r = [0, 0, 0]
print(reciprocal_rank(r))  # 0.0

rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
print(batch_mean_reciprocal_rank(rs))  # 0.611111111111111

rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
print(batch_mean_reciprocal_rank(rs))  # 0.5

rs = [[0, 0, 0, 1], [1, 0, 0, 1], [1, 0, 0, 1]]
print(batch_mean_reciprocal_rank(rs))  # 0.75

0.25
1.0
0.0
0.611111111111111
0.5
0.75


In [57]:
"""Score is precision @ k
Relevance is binary (nonzero is relevant).
"""
def precision_at_k(r, k):
    assert k >= 1
    r = np.asarray(r)[:k] != 0  # true-false values
    if r.shape[0] != k:
        raise ValueError('Relevance score length < k')  # len(r) must be >= k
    return np.mean(r)


def batch_precision_at_k(R, k):
    assert k >= 1
    R = np.asarray(R)[:,:k] != 0
    if R.shape[1] != k:
        raise ValueError('At least one relevance score length < k')  # len(r) must be >= k
    return np.mean(R, axis=1)

In [58]:
r = [0, 0, 1]
R = [[0, 0, 1],[0, 0, 1], [0, 0, 1]]

print(precision_at_k(r, 1))  # 0.0

print(precision_at_k(r, 2))  # 0.0

print(precision_at_k(r, 3))  # 0.33333333333333331

print(batch_precision_at_k(R, 3))  # [0.33333333 0.33333333 0.33333333]

# print(precision_at_k(r, 4))  # ValueError: Relevance score length < k

# print(batch_precision_at_k(R, 4))  # At least one relevance score length < k

0.0
0.0
0.3333333333333333
[0.33333333 0.33333333 0.33333333]


In [59]:
"""Score is (mean) average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
"""
def average_precision(r):
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.shape[0]) if r[k]]  # r*mask/sum_of_mask
    if not out:
        return 0.
    return np.mean(out)


""" 'batch' makes no sense here.
There must be more than one query result that can cmopute mean values.
Return a value instead of a list
"""
def batch_mean_average_precision(R):
    return np.mean([average_precision(r) for r in R])

In [60]:
r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
print(average_precision(r))  # 0.78333333333333333

R = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
print(batch_mean_average_precision(R))  # 0.78333333333333333

R = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
print(batch_mean_average_precision(R))  # 0.39166666666666666

0.7833333333333333
0.7833333333333333
0.39166666666666666


In [61]:
"""Score is discounted cumulative gain (dcg)
Relevance is positive real values.  
Can use binary as the previous methods.

method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
        If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
"""
def dcg_at_k(r, k, method=0):
    r = np.asfarray(r)[:k]  # turn to float
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.shape[0] + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def batch_dcg_at_k(R, k, method=0):
    R = np.asfarray(R)[:,:k]
    N = R.size
    if N and R[0].size:
        if method == 0:
            return R[:,0] + np.sum(R[:,1:] / np.log2(np.arange(2, R[0].size + 1)), axis=1)
        elif method == 1:
            return np.sum(R / np.log2(np.arange(2, R[0].size + 2)), axis=1)
        else:
            raise ValueError('method must be 0 or 1')
    return np.zeros(N)

In [62]:
"""Score is normalized discounted cumulative gain (ndcg)
Relevance is positive real values.  
Can use binary as the previous methods.

method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
        If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
"""
def ndcg_at_k(r, k, method=0):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


def batch_ndcg_at_k(R, k, method=0):
    batch_dcg_max = batch_dcg_at_k(np.take_along_axis(R, np.argsort(-R, axis=1), axis=1), k, method)
    batch_dcg_max = np.where(batch_dcg_max == 0., 1e-5, batch_dcg_max)
    return np.where(batch_dcg_max == 0., 0., batch_dcg_at_k(R, k, method) / batch_dcg_max)

In [63]:
R = [[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0]]
R = np.array(R)
a = batch_dcg_at_k(R, 10, method=1)
r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
b = dcg_at_k(r, 10, method=1)
print(a)
print(b)

[8.3187531 8.3187531 8.3187531]
8.318753101481006


In [64]:
R = [[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
R = np.array(R)
a = batch_ndcg_at_k(R, 10, method=1)
r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
b = ndcg_at_k(r, 10, method=1)
print(a)
print(b)

[0.91680888 0.91680888 0.        ]
0.9168088790321769
