[MRG] Add DCG and NDCG (scikit-learn#7739)

* Add DCG and NDCG ranking functions
Sundrique · Jun 14, 2017 · 92bef6c · 92bef6c
1 parent ea86b8e
commit 92bef6c
Show file tree

Hide file tree

Showing 3 changed files with 126 additions and 1 deletion.
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -12,6 +12,8 @@
 from .ranking import precision_recall_curve
 from .ranking import roc_auc_score
 from .ranking import roc_curve
+from .ranking import dcg_score
+from .ranking import ndcg_score
 
 from .classification import accuracy_score
 from .classification import classification_report
@@ -113,4 +115,6 @@
     'v_measure_score',
     'zero_one_loss',
     'brier_score_loss',
+    'dcg_score',
+    'ndcg_score'
 ]
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
@@ -26,11 +26,12 @@
 
 from ..utils import assert_all_finite
 from ..utils import check_consistent_length
-from ..utils import column_or_1d, check_array
+from ..utils import column_or_1d, check_array, check_X_y
 from ..utils.multiclass import type_of_target
 from ..utils.extmath import stable_cumsum
 from ..utils.sparsefuncs import count_nonzero
 from ..exceptions import UndefinedMetricWarning
+from ..preprocessing import LabelBinarizer
 
 from .base import _average_binary_score
 
@@ -763,3 +764,90 @@ def label_ranking_loss(y_true, y_score, sample_weight=None):
     loss[np.logical_or(n_positives == 0, n_positives == n_labels)] = 0.
 
     return np.average(loss, weights=sample_weight)
+
+
+def dcg_score(y_true, y_score, k=5):
+    """Discounted cumulative gain (DCG) at rank K.
+
+    Parameters
+    ----------
+    y_true : array, shape = [n_samples]
+        Ground truth (true relevance labels).
+    y_score : array, shape = [n_samples]
+        Predicted scores.
+    k : int
+        Rank.
+
+    Returns
+    -------
+    score : float
+
+    References
+    ----------
+    .. [1] `Wikipedia entry for the Discounted Cumulative Gain
+           <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_
+    """
+    order = np.argsort(y_score)[::-1]
+    y_true = np.take(y_true, order[:k])
+
+    gain = 2 ** y_true - 1
+
+    discounts = np.log2(np.arange(len(y_true)) + 2)
+    return np.sum(gain / discounts)
+
+
+def ndcg_score(y_true, y_score, k=5):
+    """Normalized discounted cumulative gain (NDCG) at rank K.
+    Normalized Discounted Cumulative Gain (NDCG) measures the performance of a
+    recommendation system based on the graded relevance of the recommended
+    entities. It varies from 0.0 to 1.0, with 1.0 representing the ideal
+    ranking of the entities.
+
+    Parameters
+    ----------
+    y_true : array, shape = [n_samples]
+        Ground truth (true labels represended as integers).
+    y_score : array, shape = [n_samples, n_classes]
+        Predicted probabilities.
+    k : int
+        Rank.
+
+    Returns
+    -------
+    score : float
+
+    Example
+    -------
+    >>> y_true = [1, 0, 2]
+    >>> y_score = [[0.15, 0.55, 0.2], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
+    >>> ndcg_score(y_true, y_score, k=2)
+    1.0
+    >>> y_score = [[0.9, 0.5, 0.8], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
+    >>> ndcg_score(y_true, y_score, k=2)
+    0.66666666666666663
+
+    References
+    ----------
+    .. [1] `Kaggle entry for the Normalized Discounted Cumulative Gain
+           <https://www.kaggle.com/wiki/NormalizedDiscountedCumulativeGain>`_
+    """
+    y_score, y_true = check_X_y(y_score, y_true)
+
+    # Make sure we use all the labels (max between the lenght and the higher
+    # number in the array)
+    lb = LabelBinarizer()
+    lb.fit(range(max(max(y_true) + 1, len(y_true))))
+    binarized_y_true = lb.transform(y_true)
+
+    if binarized_y_true.shape != y_score.shape:
+        raise ValueError("y_true and y_score have different value ranges")
+
+    scores = []
+
+    # Iterate over each y_value_true and compute the DCG score
+    for y_value_true, y_value_score in zip(binarized_y_true, y_score):
+        actual = dcg_score(y_value_true, y_value_score, k)
+        best = dcg_score(y_value_true, y_value_true, k)
+        scores.append(actual / best)
+
+    return np.mean(scores)
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -29,6 +29,7 @@
 from sklearn.metrics import label_ranking_loss
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import roc_curve
+from sklearn.metrics import ndcg_score
 
 from sklearn.exceptions import UndefinedMetricWarning
 
@@ -697,6 +698,38 @@ def check_zero_or_all_relevant_labels(lrap_score):
                                    [[0.5], [0.5], [0.5], [0.5]]), 1.)
 
 
+def test_ndcg_score():
+    # Check perfect ranking
+    y_true = [1, 0, 2]
+    y_score = [
+        [0.15, 0.55, 0.2],
+        [0.7, 0.2, 0.1],
+        [0.06, 0.04, 0.9]
+    ]
+    perfect = ndcg_score(y_true, y_score)
+    assert_equal(perfect, 1.0)
+
+    # Check bad ranking with a small K
+    y_true = [0, 2, 1]
+    y_score = [
+        [0.15, 0.55, 0.2],
+        [0.7, 0.2, 0.1],
+        [0.06, 0.04, 0.9]
+    ]
+    short_k = ndcg_score(y_true, y_score, k=1)
+    assert_equal(short_k, 0.0)
+
+    # Check a random scoring
+    y_true = [2, 1, 0]
+    y_score = [
+        [0.15, 0.55, 0.2],
+        [0.7, 0.2, 0.1],
+        [0.06, 0.04, 0.9]
+    ]
+    average_ranking = ndcg_score(y_true, y_score, k=2)
+    assert_almost_equal(average_ranking, 0.63092975)
+
+
 def check_lrap_error_raised(lrap_score):
     # Raise value error if not appropriate format
     assert_raises(ValueError, lrap_score,