Pringled · Pringled · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/README.md b/README.md
@@ -44,11 +44,13 @@ diversified_result = diversify(
     diversity=0.5 # Diversity parameter (higher values prioritize diversity)
 )
 
-# Get the indicices of the diversified result
+# Get the indices of the diversified result
 diversified_indices = diversified_result.indices
 ```
 
-The returned `DiversificationResult` can be used to access the diversified `indices`, as well as the `marginal gains` of the selected strategy and other useful info. The strategies are extremely fast and scalable: this example runs in 0.0001s.
+The returned `DiversificationResult` can be used to access the diversified `indices`, as well as the `selection_scores` of the selected strategy and other useful info. The strategies are extremely fast and scalable: this example runs in milliseconds.
+
+The `diversity` parameter tunes the trade-off between relevance and diversity: 0.0 focuses purely on relevance (no diversification), while 1.0 maximizes diversity, potentially at the cost of relevance.
 
 ## Supported Strategies
 
@@ -64,7 +66,7 @@ The following table describes the supported strategies, how they work, their tim
 
 ## Motivation
 
-Traditional retrieval systems rank results purely by relevance (how closely each item matches the query) While effective, this can lead to redundancy: top results often look nearly identical, which can create a poor user experience.
+Traditional retrieval systems rank results purely by relevance (how closely each item matches the query). While effective, this can lead to redundancy: top results often look nearly identical, which can create a poor user experience.
 
 Diversification techniques like MMR, MSD, COVER, and DPP help balance relevance and variety.
 Each new item is chosen not only because it’s relevant, but also because it adds new information that wasn’t already covered by earlier results.

diff --git a/src/pyversity/datatypes.py b/src/pyversity/datatypes.py
@@ -28,15 +28,15 @@ class DiversificationResult:
     Attributes
     ----------
         indices: Diversified item indices.
-        marginal_gains: Marginal gains/relevance scores for the diversified items.
+        selection_scores: Selection scores for the diversified items.
         strategy: Diversification strategy used.
         diversity: Diversity parameter used in the strategy.
         parameters: Additional parameters used in the strategy.
 
     """
 
     indices: np.ndarray
-    marginal_gains: np.ndarray
+    selection_scores: np.ndarray
     strategy: Strategy
     diversity: float
     parameters: dict | None = None
diff --git a/src/pyversity/pyversity.py b/src/pyversity/pyversity.py
@@ -25,7 +25,7 @@ def diversify(
     :param diversity: Diversity parameter (range of [0, 1]). Higher values prioritize diversity and lower values prioritize relevance.
     :param **kwargs: Additional keyword arguments passed to the specific strategy function.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
+      their selection scores, the strategy used, and the parameters.
     :raises ValueError: If the provided strategy is not recognized.
     """
     if strategy == Strategy.MMR:

diff --git a/src/pyversity/strategies/cover.py b/src/pyversity/strategies/cover.py
@@ -14,22 +14,22 @@ def cover(
     normalize: bool = True,
 ) -> DiversificationResult:
     """
-    Select a subset of items that balances relevance and coverage.
+    Select a subset of items that balances relevance and coverage/diversity.
 
     This strategy chooses `k` items by combining pure relevance with
     diversity-driven coverage using a concave submodular formulation.
 
     :param embeddings: 2D array of shape (n_samples, n_features).
     :param scores: 1D array of relevance scores for each item.
     :param k: Number of items to select.
-    :param diversity: Trade-off between relevance and coverage in [0, 1] (inverse of theta parameter).
+    :param diversity: Trade-off between relevance and coverage/diversity in [0, 1] (inverse of theta parameter).
                       1.0 = pure diversity, 0.0 = pure relevance.
     :param gamma: Concavity parameter in (0, 1]; lower values emphasize diversity.
     :param metric: Similarity metric to use. Default is Metric.COSINE.
     :param normalize: Whether to normalize embeddings before computing similarity.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
-    :raises ValueError: If theta is not in [0, 1].
+      their selection scores, the strategy used, and the parameters.
+    :raises ValueError: If diversity is not in [0, 1].
     :raises ValueError: If gamma is not in (0, 1].
     """
     # Validate parameters
@@ -53,7 +53,7 @@ def cover(
         # Nothing to select: return empty arrays
         return DiversificationResult(
             indices=np.empty(0, np.int32),
-            marginal_gains=np.empty(0, np.float32),
+            selection_scores=np.empty(0, np.float32),
             strategy=Strategy.COVER,
             diversity=diversity,
             parameters=params,
@@ -69,7 +69,7 @@ def cover(
         gains = relevance_scores[topk].astype(np.float32, copy=False)
         return DiversificationResult(
             indices=topk,
-            marginal_gains=gains,
+            selection_scores=gains,
             strategy=Strategy.COVER,
             diversity=diversity,
             parameters=params,
@@ -106,7 +106,7 @@ def cover(
 
     return DiversificationResult(
         indices=selected_indices,
-        marginal_gains=marginal_gains,
+        selection_scores=marginal_gains,
         strategy=Strategy.COVER,
         diversity=diversity,
         parameters=params,

diff --git a/src/pyversity/strategies/dpp.py b/src/pyversity/strategies/dpp.py
@@ -24,7 +24,7 @@ def dpp(
 
     This strategy selects a diverse and relevant subset of `k` items by
     maximizing the determinant of a kernel matrix that balances item relevance
-    and pairwise similarity.
+    and pairwise similarity. Note that
 
     :param embeddings: 2D array of shape (n_samples, n_features).
     :param scores: 1D array of relevance scores for each item.
@@ -33,7 +33,7 @@ def dpp(
                       Higher values increase the emphasis on diversity.
     :param scale: Optional scaling factor for the beta parameter to adjust relevance influence.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
+      their selection scores, the strategy used, and the parameters.
     :raises ValueError: If diversity is not in [0, 1].
     """
     if not (0.0 <= float(diversity) <= 1.0):
@@ -49,7 +49,7 @@ def dpp(
         # Nothing to select: return empty arrays
         return DiversificationResult(
             indices=np.empty(0, np.int32),
-            marginal_gains=np.empty(0, np.float32),
+            selection_scores=np.empty(0, np.float32),
             strategy=Strategy.DPP,
             diversity=diversity,
             parameters={"scale": scale},
@@ -80,8 +80,8 @@ def dpp(
         marginal_gains[step] = best_score
         selected_mask[best_index] = True
 
-        if step == top_k - 1 or best_score <= 0.0:
-            # No more items to select or no positive gain
+        if step == top_k - 1:
+            # No more items to select
             step += 1
             break
 
@@ -106,7 +106,7 @@ def dpp(
 
     return DiversificationResult(
         indices=selected_indices[:step],
-        marginal_gains=marginal_gains[:step],
+        selection_scores=marginal_gains[:step],
         strategy=Strategy.DPP,
         diversity=diversity,
         parameters={"scale": scale},

diff --git a/src/pyversity/strategies/mmr.py b/src/pyversity/strategies/mmr.py
@@ -27,7 +27,7 @@ def mmr(
     :param metric: Similarity metric to use. Default is Metric.COSINE.
     :param normalize: Whether to normalize embeddings before computing similarity.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
+      their selection scores, the strategy used, and the parameters.
     """
     return greedy_select(
         "mmr",

diff --git a/src/pyversity/strategies/msd.py b/src/pyversity/strategies/msd.py
@@ -27,7 +27,7 @@ def msd(
     :param metric: Similarity metric to use. Default is Metric.COSINE.
     :param normalize: Whether to normalize embeddings before computing similarity.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
+      their selection scores, the strategy used, and the parameters.
     """
     return greedy_select(
         "msd",

diff --git a/src/pyversity/strategies/utils.py b/src/pyversity/strategies/utils.py
@@ -33,7 +33,7 @@ def greedy_select(
     :param diversity: Trade-off parameter in [0, 1]. Inverse of lambda parameter.
                   1.0 = pure diversity, 0.0 = pure relevance.
     :return: A DiversificationResult containing the selected item indices,
-      their marginal gains, the strategy used, and the parameters.
+      their selection scores, the strategy used, and the parameters.
     :raises ValueError: If diversity is not in [0, 1].
     :raises ValueError: If input shapes are inconsistent.
     """
@@ -55,7 +55,7 @@ def greedy_select(
         # Nothing to select: return empty arrays
         return DiversificationResult(
             indices=np.empty(0, np.int32),
-            marginal_gains=np.empty(0, np.float32),
+            selection_scores=np.empty(0, np.float32),
             strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
             diversity=diversity,
             parameters=params,
@@ -110,7 +110,7 @@ def greedy_select(
 
     return DiversificationResult(
         indices=selected_indices,
-        marginal_gains=marginal_gains,
+        selection_scores=marginal_gains,
         strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
         diversity=diversity,
         parameters=params,

diff --git a/src/pyversity/utils.py b/src/pyversity/utils.py
@@ -23,10 +23,10 @@ def prepare_inputs(embeddings: np.ndarray, scores: np.ndarray, k: int) -> tuple[
     """
     Prepare relevance scores and embeddings.
 
-    :param embeddings: Array of shape embeddings.
+    :param embeddings: Array of embeddings.
     :param scores: Array of relevance scores.
     :param k: Number of top elements to consider.
-    :return: Tuple of relevances, embeddings, k_clamped, early_exit.
+    :return: Tuple of embeddings, relevances, k_clamped, early_exit.
     :raises ValueError: If input shapes are inconsistent.
     """
     relevance_scores = np.asarray(scores, dtype=np.float32).reshape(-1)

diff --git a/tests/test_strategies.py b/tests/test_strategies.py
@@ -14,7 +14,7 @@ def test_mmr() -> None:
     res = mmr(emb, scores, k=3, diversity=0.0, metric=Metric.COSINE, normalize=True)
     expected = np.array([1, 3, 2], dtype=np.int32)
     assert np.array_equal(res.indices, expected)
-    assert np.allclose(res.marginal_gains, scores[expected])
+    assert np.allclose(res.selection_scores, scores[expected])
 
     # Strong diversity (diversity=1): avoid near-duplicate
     emb = np.array([[1.0, 0.0], [0.999, 0.001], [0.0, 1.0]], dtype=np.float32)
@@ -63,7 +63,7 @@ def test_cover() -> None:
     res = cover(emb, scores, k=2, diversity=0.0)
     expected = np.array([1, 2], dtype=np.int32)
     assert np.array_equal(res.indices, expected)
-    assert np.allclose(res.marginal_gains, scores[expected])
+    assert np.allclose(res.selection_scores, scores[expected])
 
     # Balanced coverage (diversity=0.5, gamma=0.5): picks diverse set
     res = cover(emb, scores, k=2, diversity=0.5, gamma=0.5)
@@ -88,24 +88,24 @@ def test_dpp() -> None:
     # Strong diversity (diversity=1)
     res = dpp(emb, scores, k=2, diversity=1.0)
     assert 1 <= res.indices.size <= 2
-    assert np.all(res.marginal_gains >= -1e-7)
-    assert np.all(res.marginal_gains[:-1] + 1e-7 >= res.marginal_gains[1:])
+    assert np.all(res.selection_scores >= -1e-7)
+    assert np.all(res.selection_scores[:-1] + 1e-7 >= res.selection_scores[1:])
 
     # Balanced (diversity=0.5)
     res = dpp(emb, scores, k=2, diversity=0.5)
     assert 1 <= res.indices.size <= 2
-    assert np.all(res.marginal_gains >= -1e-7)
-    assert np.all(res.marginal_gains[:-1] + 1e-7 >= res.marginal_gains[1:])
+    assert np.all(res.selection_scores >= -1e-7)
+    assert np.all(res.selection_scores[:-1] + 1e-7 >= res.selection_scores[1:])
 
     # Low diversity (diversity=0.0): more relevance-driven
     res = dpp(emb, scores, k=2, diversity=0.0)
     assert 1 <= res.indices.size <= 2
-    assert np.all(res.marginal_gains >= -1e-7)
-    assert np.all(res.marginal_gains[:-1] + 1e-7 >= res.marginal_gains[1:])
+    assert np.all(res.selection_scores >= -1e-7)
+    assert np.all(res.selection_scores[:-1] + 1e-7 >= res.selection_scores[1:])
 
     # Early exit on empty input
     res = dpp(np.empty((0, 3), dtype=np.float32), np.array([], dtype=np.float32), k=3)
-    assert res.indices.size == 0 and res.marginal_gains.size == 0
+    assert res.indices.size == 0 and res.selection_scores.size == 0
 
 
 @pytest.mark.parametrize(
@@ -129,4 +129,4 @@ def test_diversify(strategy: Strategy, fn: Callable[..., DiversificationResult],
     res_disp = diversify(embeddings=emb, scores=scores, k=2, strategy=strategy, **kwargs)
 
     assert np.array_equal(res_direct.indices, res_disp.indices)
-    assert np.allclose(res_direct.marginal_gains, res_disp.marginal_gains)
+    assert np.allclose(res_direct.selection_scores, res_disp.selection_scores)