Skip to content

Commit

Permalink
Merge pull request #130 from ODemidenko/master
Browse files Browse the repository at this point in the history
fixes #127
  • Loading branch information
NicolasHug committed Jan 26, 2018
2 parents 8aae602 + a915ed7 commit c89a353
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 30 deletions.
41 changes: 22 additions & 19 deletions surprise/similarities.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,24 @@ def cosine(n_x, yr, min_support):
"""

# sum (r_xy * r_x'y) for common ys
cdef np.ndarray[np.int_t, ndim=2] prods
cdef np.ndarray[np.double_t, ndim=2] prods
# number of common ys
cdef np.ndarray[np.int_t, ndim=2] freq
# sum (r_xy ^ 2) for common ys
cdef np.ndarray[np.int_t, ndim=2] sqi
cdef np.ndarray[np.double_t, ndim=2] sqi
# sum (r_x'y ^ 2) for common ys
cdef np.ndarray[np.int_t, ndim=2] sqj
cdef np.ndarray[np.double_t, ndim=2] sqj
# the similarity matrix
cdef np.ndarray[np.double_t, ndim=2] sim

cdef int xi, xj, ri, rj
cdef int xi, xj
cdef double ri, rj
cdef int min_sprt = min_support

prods = np.zeros((n_x, n_x), np.int)
prods = np.zeros((n_x, n_x), np.double)
freq = np.zeros((n_x, n_x), np.int)
sqi = np.zeros((n_x, n_x), np.int)
sqj = np.zeros((n_x, n_x), np.int)
sqi = np.zeros((n_x, n_x), np.double)
sqj = np.zeros((n_x, n_x), np.double)
sim = np.zeros((n_x, n_x), np.double)

for y, y_ratings in iteritems(yr):
Expand Down Expand Up @@ -137,7 +138,8 @@ def msd(n_x, yr, min_support):
# the similarity matrix
cdef np.ndarray[np.double_t, ndim=2] sim

cdef int xi, xj, ri, rj
cdef int xi, xj
cdef double ri, rj
cdef int min_sprt = min_support

sq_diff = np.zeros((n_x, n_x), np.double)
Expand Down Expand Up @@ -201,27 +203,28 @@ def pearson(n_x, yr, min_support):
# number of common ys
cdef np.ndarray[np.int_t, ndim=2] freq
# sum (r_xy * r_x'y) for common ys
cdef np.ndarray[np.int_t, ndim=2] prods
cdef np.ndarray[np.double_t, ndim=2] prods
# sum (rxy ^ 2) for common ys
cdef np.ndarray[np.int_t, ndim=2] sqi
cdef np.ndarray[np.double_t, ndim=2] sqi
# sum (rx'y ^ 2) for common ys
cdef np.ndarray[np.int_t, ndim=2] sqj
cdef np.ndarray[np.double_t, ndim=2] sqj
# sum (rxy) for common ys
cdef np.ndarray[np.int_t, ndim=2] si
cdef np.ndarray[np.double_t, ndim=2] si
# sum (rx'y) for common ys
cdef np.ndarray[np.int_t, ndim=2] sj
cdef np.ndarray[np.double_t, ndim=2] sj
# the similarity matrix
cdef np.ndarray[np.double_t, ndim=2] sim

cdef int xi, xj, ri, rj
cdef int xi, xj
cdef double ri, rj
cdef int min_sprt = min_support

freq = np.zeros((n_x, n_x), np.int)
prods = np.zeros((n_x, n_x), np.int)
sqi = np.zeros((n_x, n_x), np.int)
sqj = np.zeros((n_x, n_x), np.int)
si = np.zeros((n_x, n_x), np.int)
sj = np.zeros((n_x, n_x), np.int)
prods = np.zeros((n_x, n_x), np.double)
sqi = np.zeros((n_x, n_x), np.double)
sqj = np.zeros((n_x, n_x), np.double)
si = np.zeros((n_x, n_x), np.double)
sj = np.zeros((n_x, n_x), np.double)
sim = np.zeros((n_x, n_x), np.double)

for y, y_ratings in iteritems(yr):
Expand Down
36 changes: 25 additions & 11 deletions tests/test_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@

import surprise.similarities as sims


n_x = 7
n_x = 8
yr_global = {
0: [(0, 3), (1, 3), (2, 3), (5, 1), (6, 2)], # noqa
1: [(0, 4), (1, 4), (2, 4), ], # noqa
2: [ (2, 5), (3, 2), (4, 3) ], # noqa
3: [ (1, 1), (2, 4), (3, 2), (4, 3), (5, 3), (6, 4)], # noqa
4: [ (1, 5), (2, 1), (5, 2), (6, 3)], # noqa
}
0: [(0, 3), (1, 3), (2, 3), (5, 1), (6, 1.5), (7, 3)], # noqa
1: [(0, 4), (1, 4), (2, 4), ], # noqa
2: [ (2, 5), (3, 2), (4, 3) ], # noqa
3: [(1, 1), (2, 4), (3, 2), (4, 3), (5, 3), (6, 3.5), (7, 2)], # noqa
4: [(1, 5), (2, 1), (5, 2), (6, 2.5), (7, 2.5)], # noqa
}


def test_cosine_sim():
Expand Down Expand Up @@ -53,8 +52,12 @@ def test_cosine_sim():
assert sim[0, 3] == 0
assert sim[0, 4] == 0

# non constant and different ratings: cosine sim must be in ]0, 1[
assert 0 < sim[5, 6] < 1
# check for float point support and computation correctness
dot_product56 = 1 * 1.5 + 3 * 3.5 + 2 * 2.5
assert sim[5, 6] == (dot_product56 /
((1 ** 2 + 3 ** 2 + 2 ** 2) *
(1.5 ** 2 + 3.5 ** 2 + 2.5 ** 2)) ** 0.5
)

# ensure min_support is taken into account. Only users 1 and 2 have more
# than 4 common ratings.
Expand Down Expand Up @@ -138,12 +141,23 @@ def test_pearson_sim():
assert sim[0, 3] == 0
assert sim[0, 4] == 0

# almost same ratings (just with an offset of 1)
# almost same ratings (just with an offset of 0.5)
assert sim[5, 6] == 1

# ratings vary in the same direction
assert sim[2, 5] > 0

# check for float point support and computation correctness
mean6 = (1.5 + 3.5 + 2.5) / 3
var6 = (1.5 - mean6) ** 2 + (3.5 - mean6) ** 2 + (2.5 - mean6) ** 2
mean7 = (3 + 2 + 2.5) / 3
var7 = (3 - mean7) ** 2 + (2 - mean7) ** 2 + (2.5 - mean7) ** 2
num = sum([((1.5 - mean6) * (3 - mean7)),
((3.5 - mean6) * (2 - mean7)),
((2.5 - mean6) * (2.5 - mean7))
])
assert sim[6, 7] == num / (var6 * var7) ** 0.5

# ensure min_support is taken into account. Only users 1 and 2 have more
# than 4 common ratings.
sim = sims.pearson(n_x, yr, min_support=4)
Expand Down

0 comments on commit c89a353

Please sign in to comment.