In [9]:
import numpy as np

* Ground_truth : NxM sparse matrix, where N is the number of users and M is the number of items
* Prediction : an Nxk array of item IDs, ranked by decreasing relevance.

DCG and nDCG for a specific query

In [65]:
def dcg_at_k(r, k, method=0):
    r = np.asfarray(r)[:k]  # turn to float
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.

def ndcg_at_k(r, k, method=0):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

I thought the list 'r' already represents the ground truth (relevance) and the prediction (ranking). Assume the shape matrix analyzed is Nxk, where each element means relevance score.

In [97]:
def batch_dcg_at_k(R, k, method=0):
    R = np.asfarray(R)[:,:k]
    N = R.size
    if N and R[0].size:
        if method == 0:
            return R[:,0] + np.sum(R[:,1:] / np.log2(np.arange(2, R[0].size + 1)), axis=1)
        elif method == 1:
            return np.sum(R / np.log2(np.arange(2, R[0].size + 2)), axis=1)
        else:
            raise ValueError('method must be 0 or 1')
    return np.zeros(N)

In [98]:
R = [[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0]]
R = np.array(R)
a = batch_dcg_at_k(R, 10, method=1)
r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
b = dcg_at_k(r, 10, method=1)
print(a)
print(b)

[8.3187531 8.3187531 8.3187531]
8.318753101481006


In [141]:
def batch_ndcg_at_k(R, k, method=0):
    batch_dcg_max = batch_dcg_at_k(np.take_along_axis(R, np.argsort(-R, axis=1), axis=1), k, method)
    batch_dcg_max = np.where(batch_dcg_max == 0., 1e-5, batch_dcg_max)
    return np.where(batch_dcg_max == 0., 0., batch_dcg_at_k(R, k, method) / batch_dcg_max)

In [142]:
R = [[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[3, 2, 3, 0, 0, 1, 2, 2, 3, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
R = np.array(R)
a = batch_ndcg_at_k(R, 10, method=1)
r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
b = ndcg_at_k(r, 10, method=1)
print(a)
print(b)

[0.91680888 0.91680888 0.        ]
0.9168088790321769
