Skip to content

Commit

Permalink
[FIX] NMF: Handle 0 ratings and avoid ZeroDivisionError (#367)
Browse files Browse the repository at this point in the history
Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
  • Loading branch information
VesnaT and NicolasHug committed Aug 13, 2022
1 parent 22eb126 commit a6b84d7
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
10 changes: 6 additions & 4 deletions surprise/prediction_algorithms/matrix_factorization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -700,15 +700,17 @@ class NMF(AlgoBase):
for u in trainset.all_users():
n_ratings = len(trainset.ur[u])
for f in range(self.n_factors):
user_denom[u, f] += n_ratings * reg_pu * pu[u, f]
pu[u, f] *= user_num[u, f] / user_denom[u, f]
if pu[u, f] != 0: # Can happen if user only has 0 ratings
user_denom[u, f] += n_ratings * reg_pu * pu[u, f]
pu[u, f] *= user_num[u, f] / user_denom[u, f]

# Update item factors
for i in trainset.all_items():
n_ratings = len(trainset.ir[i])
for f in range(self.n_factors):
item_denom[i, f] += n_ratings * reg_qi * qi[i, f]
qi[i, f] *= item_num[i, f] / item_denom[i, f]
if qi[i, f] != 0:
item_denom[i, f] += n_ratings * reg_qi * qi[i, f]
qi[i, f] *= item_num[i, f] / item_denom[i, f]

self.bu = bu
self.bi = bi
Expand Down
18 changes: 18 additions & 0 deletions tests/test_NMF.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import pytest
import pandas as pd


from surprise import NMF
from surprise.model_selection import cross_validate
from surprise import Reader
from surprise import Dataset


def test_NMF_parameters(u1_ml100k, pkf):
Expand Down Expand Up @@ -75,3 +79,17 @@ def test_NMF_parameters(u1_ml100k, pkf):
algo = NMF(n_factors=1, n_epochs=1, init_high=.5, random_state=1)
rmse_init_high = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
assert rmse_default != rmse_init_high


def test_NMF_zero_ratings():
# Non-regression test for https://github.com/NicolasHug/Surprise/pull/367
reader = Reader(rating_scale=(-10, 10))

ratings_dict = {'itemID': [0, 0, 0, 0, 1, 1],
'userID': [0, 1, 2, 3, 3, 4],
'rating': [-10, 10, 0, -5, 0, 5]}
df = pd.DataFrame(ratings_dict)
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
trainset = data.build_full_trainset()
algo = NMF(n_factors=4, n_epochs=2)
algo.fit(trainset)

0 comments on commit a6b84d7

Please sign in to comment.