/
metrics.py
39 lines (32 loc) · 1.16 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
def RecallPrecision_ATk(test_data, r, k):
"""
test_data should be a list? cause users may have different amount of pos items. shape (test_batch, k)
pred_data : shape (test_batch, k) NOTE: pred_data should be pre-sorted
k : top-k
"""
right_pred = r[:, :k].sum(1)
precis_n = k
recall_n = np.array([len(test_data[i]) for i in range(len(test_data))])
recall = np.sum(right_pred/recall_n)
precis = np.sum(right_pred)/precis_n
return {'recall': recall, 'precision': precis}
def NDCGatK_r(test_data, r, k):
"""
Normalized Discounted Cumulative Gain
rel_i = 1 or 0, so 2^{rel_i} - 1 = 1 or 0
"""
assert len(r) == len(test_data)
pred_data = r[:, :k]
test_matrix = np.zeros((len(pred_data), k))
for i, items in enumerate(test_data):
length = k if k <= len(items) else len(items)
test_matrix[i, :length] = 1
max_r = test_matrix
idcg = np.sum(max_r * 1./np.log2(np.arange(2, k + 2)), axis=1)
dcg = pred_data*(1./np.log2(np.arange(2, k + 2)))
dcg = np.sum(dcg, axis=1)
idcg[idcg == 0.] = 1.
ndcg = dcg/idcg
ndcg[np.isnan(ndcg)] = 0.
return np.sum(ndcg)