Skip to content

Commit

Permalink
Optimize SVDpp training loop by reducing Python / C-API interactions
Browse files Browse the repository at this point in the history
- adds dependency on C++ compilation
  • Loading branch information
ProfHercules committed Jan 23, 2022
1 parent 14a91d1 commit 8fbd660
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 24 deletions.
24 changes: 14 additions & 10 deletions setup.py
Expand Up @@ -58,7 +58,7 @@
else: else:
USE_CYTHON = True USE_CYTHON = True


__version__ = '1.1.1' __version__ = '1.1.2'


here = path.abspath(path.dirname(__file__)) here = path.abspath(path.dirname(__file__))


Expand Down Expand Up @@ -87,16 +87,20 @@
Extension( Extension(
'surprise.prediction_algorithms.matrix_factorization', 'surprise.prediction_algorithms.matrix_factorization',
['surprise/prediction_algorithms/matrix_factorization' + ext], ['surprise/prediction_algorithms/matrix_factorization' + ext],
include_dirs=[np.get_include()],
language="c++"), # generate and compile C++ code
Extension(
'surprise.prediction_algorithms.optimize_baselines',
['surprise/prediction_algorithms/optimize_baselines' + ext],
include_dirs=[np.get_include()]),
Extension(
'surprise.prediction_algorithms.slope_one',
['surprise/prediction_algorithms/slope_one' + ext],
include_dirs=[np.get_include()]),
Extension(
'surprise.prediction_algorithms.co_clustering',
['surprise/prediction_algorithms/co_clustering' + ext],
include_dirs=[np.get_include()]), include_dirs=[np.get_include()]),
Extension('surprise.prediction_algorithms.optimize_baselines',
['surprise/prediction_algorithms/optimize_baselines' + ext],
include_dirs=[np.get_include()]),
Extension('surprise.prediction_algorithms.slope_one',
['surprise/prediction_algorithms/slope_one' + ext],
include_dirs=[np.get_include()]),
Extension('surprise.prediction_algorithms.co_clustering',
['surprise/prediction_algorithms/co_clustering' + ext],
include_dirs=[np.get_include()]),
] ]


if USE_CYTHON: if USE_CYTHON:
Expand Down
59 changes: 45 additions & 14 deletions surprise/prediction_algorithms/matrix_factorization.pyx
Expand Up @@ -14,6 +14,10 @@ from .algo_base import AlgoBase
from .predictions import PredictionImpossible from .predictions import PredictionImpossible
from ..utils import get_rng from ..utils import get_rng


from libcpp.map cimport map as mapcpp
from libcpp.vector cimport vector as vectorcpp
from cython.operator import dereference, postincrement



class SVD(AlgoBase): class SVD(AlgoBase):
"""The famous *SVD* algorithm, as popularized by `Simon Funk """The famous *SVD* algorithm, as popularized by `Simon Funk
Expand Down Expand Up @@ -441,25 +445,51 @@ class SVDpp(AlgoBase):
(trainset.n_items, self.n_factors)) (trainset.n_items, self.n_factors))
u_impl_fdb = np.zeros(self.n_factors, np.double) u_impl_fdb = np.zeros(self.n_factors, np.double)


cdef mapcpp[int, vectorcpp[int]] Iu_items
cdef mapcpp[int, double] Iu_len_sqrts

for i in range(trainset.n_users):
for j, _ in trainset.ur[i]:
Iu_items[i].push_back(j)

cdef mapcpp[int, vectorcpp[int]].iterator it = Iu_items.begin()

while(it != Iu_items.end()):
Iu_len_sqrts[dereference(it).first] = 1.0 / np.sqrt(dereference(it).second.size())
postincrement(it)

cdef int facts = self.n_factors

cdef double err_qif_sqrt = 0.0

for current_epoch in range(self.n_epochs): for current_epoch in range(self.n_epochs):
if self.verbose: if self.verbose:
print(" processing epoch {}".format(current_epoch)) print(" processing epoch {}".format(current_epoch))

for u, i, r in trainset.all_ratings(): for u, i, r in trainset.all_ratings():


# items rated by u. This is COSTLY # items rated by u.
Iu = [j for (j, _) in trainset.ur[u]] Iu = Iu_items[u]
sqrt_Iu = np.sqrt(len(Iu)) sqrt_Iu = Iu_len_sqrts[u]


# compute user implicit feedback # compute user implicit feedback
u_impl_fdb = np.zeros(self.n_factors, np.double) f = 0
for j in Iu: while f < facts:
for f in range(self.n_factors): u_impl_fdb[f] = 0
u_impl_fdb[f] += yj[j, f] / sqrt_Iu f += 1


for j in Iu:
f = 0
while f < facts:
u_impl_fdb[f] += yj[j, f] * sqrt_Iu
f += 1

# compute current error # compute current error
dot = 0 # <q_i, (p_u + sum_{j in Iu} y_j / sqrt{Iu}> dot = 0
for f in range(self.n_factors): f = 0
while f < facts:
dot += qi[i, f] * (pu[u, f] + u_impl_fdb[f]) dot += qi[i, f] * (pu[u, f] + u_impl_fdb[f])
f += 1


err = r - (global_mean + bu[u] + bi[i] + dot) err = r - (global_mean + bu[u] + bi[i] + dot)


Expand All @@ -468,15 +498,16 @@ class SVDpp(AlgoBase):
bi[i] += lr_bi * (err - reg_bi * bi[i]) bi[i] += lr_bi * (err - reg_bi * bi[i])


# update factors # update factors
for f in range(self.n_factors): f = 0
while f < facts:
puf = pu[u, f] puf = pu[u, f]
qif = qi[i, f] qif = qi[i, f]
pu[u, f] += lr_pu * (err * qif - reg_pu * puf) pu[u, f] += lr_pu * (err * qif - reg_pu * puf)
qi[i, f] += lr_qi * (err * (puf + u_impl_fdb[f]) - qi[i, f] += lr_qi * (err * (puf + u_impl_fdb[f]) - reg_qi * qif)
reg_qi * qif) err_qif_sqrt = err * qif * sqrt_Iu
for j in Iu: for j in Iu:
yj[j, f] += lr_yj * (err * qif / sqrt_Iu - yj[j, f] += lr_yj * (err_qif_sqrt - reg_yj * yj[j, f])
reg_yj * yj[j, f]) f += 1


self.bu = bu self.bu = bu
self.bi = bi self.bi = bi
Expand Down

0 comments on commit 8fbd660

Please sign in to comment.