Merge branch 'master' of github.com:NicolasHug/Surprise

NicolasHug · Oct 20, 2017 · 1a12f8a · 1a12f8a
2 parents 47e719b + 9cb442e
commit 1a12f8a
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 7 deletions.
diff --git a/.github/issue_template.md b/.github/issue_template.md
@@ -1,6 +1,8 @@
-<!-- Before submitting an issue, make sure it hasn't already been addressed by
+<!-- IMPORTANT PLEASE READ!!!
+Before submitting an issue, make sure it hasn't already been addressed by
 checking the past issues and the documentation (FAQ, getting started / advanced
-usage guide, etc.).
+usage guide, etc.). In order to let me help you more efficiently, please fill
+the different fields below.
 
 Also, please keep in mind that I develop and maintain this software on my
 **free time**. So please, before asking for help, show me that you have already

diff --git a/surprise/dataset.py b/surprise/dataset.py
@@ -670,26 +670,35 @@ def build_testset(self):
         return [(self.to_raw_uid(u), self.to_raw_iid(i), r)
                 for (u, i, r) in self.all_ratings()]
 
-    def build_anti_testset(self):
+    def build_anti_testset(self, fill=None):
         """Return a list of ratings that can be used as a testset in the
         :meth:`test() <surprise.prediction_algorithms.algo_base.AlgoBase.test>`
         method.
 
         The ratings are all the ratings that are **not** in the trainset, i.e.
         all the ratings :math:`r_{ui}` where the user :math:`u` is known, the
         item :math:`i` is known, but the rating :math:`r_{ui}`  is not in the
-        trainset. As :math:`r_{ui}` is unknown, it is assumed to be equal to
-        the mean of all ratings :meth:`global_mean
-        <surprise.dataset.Trainset.global_mean>`.
+        trainset. As :math:`r_{ui}` is unknown, it is either replaced by the
+        :code:`fill` value or assumed to be equal to the mean of all ratings
+        :meth:`global_mean <surprise.dataset.Trainset.global_mean>`.
+
+        Args:
+            fill(float): The value to fill unknown ratings. If :code:`None` the
+                global mean of all ratings :meth:`global_mean
+                <surprise.dataset.Trainset.global_mean>` will be used.
+
+        Returns:
+            A list of tuples ``(uid, iid, fill)`` where ids are raw ids.
         """
+        fill = self.global_mean if fill is None else float(fill)
 
         anti_testset = []
         for u in self.all_users():
             for i in self.all_items():
                 user_items = [j for (j, _) in self.ur[u]]
                 if i not in user_items:
                     r_ui = (self.to_raw_uid(u), self.to_raw_iid(i),
-                            self.global_mean)
+                            fill)
                     anti_testset.append(r_ui)
         return anti_testset
 

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -197,3 +197,26 @@ def test_load_form_df():
     trainset = data.build_full_trainset()
     with pytest.raises(ValueError):
         trainset.to_inner_uid('10000')
+
+
+def test_build_anti_testset():
+    ratings_dict = {'itemID': [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                    'userID': [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                    'rating': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
+    df = pd.DataFrame(ratings_dict)
+
+    reader = Reader(rating_scale=(1, 5))
+    data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
+    data.split(2)
+    trainset, __testset = next(data.folds())
+    # fill with some specific value
+    for fillvalue in (0, 42., -1):
+        anti = trainset.build_anti_testset(fill=fillvalue)
+        for (u, i, r) in anti:
+            assert r == fillvalue
+    # fill with global_mean
+    anti = trainset.build_anti_testset(fill=None)
+    for (u, i, r) in anti:
+        assert r == trainset.global_mean
+    expect = trainset.n_users * trainset.n_items
+    assert trainset.n_ratings + len(anti) == expect