In [1]:
%load_ext autoreload
%autoreload 2
from metrics import discounted_cumulative_gain, find_precision_k, find_recall_k, mean_reciprocal_rank

In [42]:
# no prediction should result in 0

y_true = ["a","b","c","d"]
y_pred = []
y_score = {"a": 4, 
           "b": 2,
           "c": 2,
           "d": 1}
k = 3

assert discounted_cumulative_gain(y_score, y_true, y_pred, k) == 0
assert find_precision_k(y_pred, y_true, k) == 0
assert find_recall_k(y_pred, y_true, k) == 0
assert mean_reciprocal_rank(y_pred, y_true) == 0.01

# no relevant document in prediction should result in 0

y_true = ["a","b","c","d"]
y_pred = ["e","f","g","h"]
y_score = {"a": 4, 
           "b": 2,
           "c": 2,
           "d": 1}
k = 3
assert discounted_cumulative_gain(y_score, y_true, y_pred, k) == 0
assert find_precision_k(y_pred, y_true, k) == 0
assert find_recall_k(y_pred, y_true, k) == 0
assert mean_reciprocal_rank(y_pred, y_true) == 0.01

# more documents than k should have identical results

y_true = ["a","b","c","d"]
y_pred = ["a","f","c","h"]
y_pred_long = ["a","f","c","h", "d", "b"]

y_score = {"a": 4, 
           "b": 2,
           "c": 2,
           "d": 1}

k = 3
assert discounted_cumulative_gain(y_score, y_true, y_pred, k) == discounted_cumulative_gain(y_score, y_true, y_pred_long, k)
assert find_precision_k(y_pred, y_true, k) == find_precision_k(y_pred_long, y_true, k)
assert find_recall_k(y_pred, y_true, k) == find_recall_k(y_pred_long, y_true, k)
assert mean_reciprocal_rank(y_pred, y_true) == mean_reciprocal_rank(y_pred_long, y_true)

# assert better ranking give better scores...

y_true = ["a","b","c","d"]
y_pred_good = ["a","b","d"]
y_pred_bad = ["e","a","b","d"]

y_score = {"a": 4, 
           "b": 2,
           "c": 2,
           "d": 1}

assert discounted_cumulative_gain(y_score, y_true, y_pred_good, k) > discounted_cumulative_gain(y_score, y_true, y_pred_bad, k)
assert find_precision_k(y_pred_good, y_true, k) > find_precision_k(y_pred_bad, y_true, k)
assert find_recall_k(y_pred_good, y_true, k) > find_recall_k(y_pred_bad, y_true, k)
assert mean_reciprocal_rank(y_pred_good, y_true) > mean_reciprocal_rank(y_pred_bad, y_true)

# assert too high k causes no error

k = 5
assert discounted_cumulative_gain(y_score, y_true, y_pred_good, k)
assert find_precision_k(y_pred_good, y_true, k) 
assert find_recall_k(y_pred_good, y_true, k)
assert mean_reciprocal_rank(y_pred_good, y_true)

# assert ideal DCG is computed correctly: normalization makes effect

y_true = ["a"] # one good result with value 1 should yield
y_pred = ["a","b","d"]

y_score_high = {
    "a": 5,
    "e": 8 # simultaneously assert e has no impact
}
y_score_low = {
    "a": 1
}
k = 3

import numpy as np
A = discounted_cumulative_gain(y_score_high, y_true, y_pred, k)
B = discounted_cumulative_gain(y_score_low, y_true, y_pred, k)
np.testing.assert_almost_equal(A, B) # 

# assert DCG relative scores 

y_true = ["a", "b"] # one good result with value 1 should yield
y_pred = ["a","b"]

y_score_high = {
    "a": 5,
    "b": 3,
    "c": 5
}
y_score_low = {
    "a": 3,
    "b": 2,
    "c": 5
}
k = 3

A = discounted_cumulative_gain(y_score_high, y_true, y_pred, k)
B = discounted_cumulative_gain(y_score_low, y_true, y_pred, k)
assert A == B == 1

# assert perfect results:
#perfect recall: all relevant documents found
k = 5
y_true = ["a", "b"] # one good result with value 1 should yield
y_pred = ["a","e", "f", "g", "b"]

assert find_recall_k(y_pred, y_true, 5) == 1
assert find_precision_k(y_pred, y_true, 5) != 1

# former assertions still valid
assert find_recall_k([1, 2, 3], [1], k = 1) == 1
assert find_recall_k([1, 2, 3], [9], k = 1) == 0
assert find_recall_k([1, 2, 3], [1], k = 3) == 1
assert find_recall_k([1, 2, 3], [3, 1], k = 1) == 1/2
assert find_recall_k([1, 2, 3], [3, 1, 4], k = 2) == 1/3

# perfect precision:
k = 2
y_true = ["a","e", "f", "g", "b"] # one good result with value 1 should yield
y_pred = ["a", "g"]

assert find_precision_k(y_pred, y_true, 5) == 1
assert find_recall_k(y_pred, y_true, 5) != 1

assert find_precision_k([1,2,3], [1], 3) == 1/3
assert find_precision_k([1], [1], 3) == 1
assert find_precision_k([1, 2], [2, 1], 3) == find_precision_k([2, 1], [1, 2], 3) == 1

# assert dcg  is not messed by additional results

y_true = ["a", "w"] # one good result with value 1 should yield
y_pred_good = ["d", "a","w"]
y_pred_bad = ["d","w","a"]

y_score = {"a": 5, "w":3}

assert discounted_cumulative_gain(y_score, y_true, y_pred_good, k) > discounted_cumulative_gain(y_score, y_true, y_pred_bad, k)


y_true = ["a"] # one good result on first place should be ideal
y_pred = ["a","b","d"]

y_score = {"a": 5}
assert discounted_cumulative_gain(y_score, y_true, y_pred, k) == 1


# assert y_true is sorted

y_true_unsorted = ['a', "c", "b"]
y_true_sorted = ['a', "b", "c"]
y_pred = ["a", "b", 'w']
y_score = {"a": 3, 
           "b": 2,
           "c": 1}

assert discounted_cumulative_gain(y_score, y_true_sorted, y_pred, k) == discounted_cumulative_gain(y_score, y_true_unsorted, y_pred, k)



In [41]:
y_true = ["a", "w"] # one good result with value 1 should yield
y_pred_good = ["d", "a","w"]
y_pred_bad = ["d","w","a"]

y_score = {"a": 1, "w":3}

assert discounted_cumulative_gain(y_score, y_true, y_pred_good, k) > discounted_cumulative_gain(y_score, y_true, y_pred_bad, k)

In [17]:
discounted_cumulative_gain(y_score_low, y_true, y_pred, 6)


[3]
1.3333333333333333
3.0


0.4444444444444444

In [24]:
def compute_dcg(y_scoring, k):
    dcg = []
    for i, pred in enumerate(y_scoring[:k]):
        i+=1
        dcg.append(pred/(np.log2(i)+1))
    return np.mean(dcg)

In [28]:
compute_dcg([5, 0, 0], 3)/5

0.33333333333333337