forked from NicolasHug/Surprise
-
Notifications
You must be signed in to change notification settings - Fork 5
/
matrix_factorization.pyx
785 lines (634 loc) · 30.9 KB
/
matrix_factorization.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
"""
the :mod:`matrix_factorization` module includes some algorithms using matrix
factorization.
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
cimport numpy as np # noqa
import numpy as np
from six.moves import range
from .algo_base import AlgoBase
from .predictions import PredictionImpossible
from ..utils import get_rng
from libcpp.map cimport map as mapcpp
from libcpp.vector cimport vector as vectorcpp
from cython.operator import dereference, postincrement
import cython
class SVD(AlgoBase):
"""The famous *SVD* algorithm, as popularized by `Simon Funk
<http://sifter.org/~simon/journal/20061211.html>`_ during the Netflix
Prize. When baselines are not used, this is equivalent to Probabilistic
Matrix Factorization :cite:`salakhutdinov2008a` (see :ref:`note
<unbiased_note>` below).
The prediction :math:`\\hat{r}_{ui}` is set as:
.. math::
\hat{r}_{ui} = \mu + b_u + b_i + q_i^Tp_u
If user :math:`u` is unknown, then the bias :math:`b_u` and the factors
:math:`p_u` are assumed to be zero. The same applies for item :math:`i`
with :math:`b_i` and :math:`q_i`.
For details, see equation (5) from :cite:`Koren:2009`. See also
:cite:`Ricci:2010`, section 5.3.1.
To estimate all the unknown, we minimize the following regularized squared
error:
.. math::
\sum_{r_{ui} \in R_{train}} \left(r_{ui} - \hat{r}_{ui} \\right)^2 +
\lambda\\left(b_i^2 + b_u^2 + ||q_i||^2 + ||p_u||^2\\right)
The minimization is performed by a very straightforward stochastic gradient
descent:
.. math::
b_u &\\leftarrow b_u &+ \gamma (e_{ui} - \lambda b_u)\\\\
b_i &\\leftarrow b_i &+ \gamma (e_{ui} - \lambda b_i)\\\\
p_u &\\leftarrow p_u &+ \gamma (e_{ui} \\cdot q_i - \lambda p_u)\\\\
q_i &\\leftarrow q_i &+ \gamma (e_{ui} \\cdot p_u - \lambda q_i)
where :math:`e_{ui} = r_{ui} - \\hat{r}_{ui}`. These steps are performed
over all the ratings of the trainset and repeated ``n_epochs`` times.
Baselines are initialized to ``0``. User and item factors are randomly
initialized according to a normal distribution, which can be tuned using
the ``init_mean`` and ``init_std_dev`` parameters.
You also have control over the learning rate :math:`\gamma` and the
regularization term :math:`\lambda`. Both can be different for each
kind of parameter (see below). By default, learning rates are set to
``0.005`` and regularization terms are set to ``0.02``.
.. _unbiased_note:
.. note::
You can choose to use an unbiased version of this algorithm, simply
predicting:
.. math::
\hat{r}_{ui} = q_i^Tp_u
This is equivalent to Probabilistic Matrix Factorization
(:cite:`salakhutdinov2008a`, section 2) and can be achieved by setting
the ``biased`` parameter to ``False``.
Args:
n_factors: The number of factors. Default is ``100``.
n_epochs: The number of iteration of the SGD procedure. Default is
``20``.
biased(bool): Whether to use baselines (or biases). See :ref:`note
<unbiased_note>` above. Default is ``True``.
init_mean: The mean of the normal distribution for factor vectors
initialization. Default is ``0``.
init_std_dev: The standard deviation of the normal distribution for
factor vectors initialization. Default is ``0.1``.
lr_all: The learning rate for all parameters. Default is ``0.005``.
reg_all: The regularization term for all parameters. Default is
``0.02``.
lr_bu: The learning rate for :math:`b_u`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_bi: The learning rate for :math:`b_i`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_pu: The learning rate for :math:`p_u`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_qi: The learning rate for :math:`q_i`. Takes precedence over
``lr_all`` if set. Default is ``None``.
reg_bu: The regularization term for :math:`b_u`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_bi: The regularization term for :math:`b_i`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_pu: The regularization term for :math:`p_u`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_qi: The regularization term for :math:`q_i`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
random_state(int, RandomState instance from numpy, or ``None``):
Determines the RNG that will be used for initialization. If
int, ``random_state`` will be used as a seed for a new RNG. This is
useful to get the same initialization over multiple calls to
``fit()``. If RandomState instance, this same instance is used as
RNG. If ``None``, the current RNG from numpy is used. Default is
``None``.
verbose: If ``True``, prints the current epoch. Default is ``False``.
Attributes:
pu(numpy array of size (n_users, n_factors)): The user factors (only
exists if ``fit()`` has been called)
qi(numpy array of size (n_items, n_factors)): The item factors (only
exists if ``fit()`` has been called)
bu(numpy array of size (n_users)): The user biases (only
exists if ``fit()`` has been called)
bi(numpy array of size (n_items)): The item biases (only
exists if ``fit()`` has been called)
"""
def __init__(self, n_factors=100, n_epochs=20, biased=True, init_mean=0,
init_std_dev=.1, lr_all=.005,
reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None, lr_qi=None,
reg_bu=None, reg_bi=None, reg_pu=None, reg_qi=None,
random_state=None, verbose=False):
self.n_factors = n_factors
self.n_epochs = n_epochs
self.biased = biased
self.init_mean = init_mean
self.init_std_dev = init_std_dev
self.lr_bu = lr_bu if lr_bu is not None else lr_all
self.lr_bi = lr_bi if lr_bi is not None else lr_all
self.lr_pu = lr_pu if lr_pu is not None else lr_all
self.lr_qi = lr_qi if lr_qi is not None else lr_all
self.reg_bu = reg_bu if reg_bu is not None else reg_all
self.reg_bi = reg_bi if reg_bi is not None else reg_all
self.reg_pu = reg_pu if reg_pu is not None else reg_all
self.reg_qi = reg_qi if reg_qi is not None else reg_all
self.random_state = random_state
self.verbose = verbose
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
self.sgd(trainset)
return self
@cython.boundscheck(False) # Deactivate bounds checking
@cython.wraparound(False) # Deactivate negative indexing.
def sgd(self, trainset):
# OK, let's breath. I've seen so many different implementation of this
# algorithm that I just not sure anymore of what it should do. I've
# implemented the version as described in the BellKor papers (RS
# Handbook, etc.). Mymedialite also does it this way. In his post
# however, Funk seems to implicitly say that the algo looks like this
# (see reg below):
# for f in range(n_factors):
# for _ in range(n_iter):
# for u, i, r in all_ratings:
# err = r_ui - <p[u, :f+1], q[i, :f+1]>
# update p[u, f]
# update q[i, f]
# which is also the way https://github.com/aaw/IncrementalSVD.jl
# implemented it.
#
# Funk: "Anyway, this will train one feature (aspect), and in
# particular will find the most prominent feature remaining (the one
# that will most reduce the error that's left over after previously
# trained features have done their best). When it's as good as it's
# going to get, shift it onto the pile of done features, and start a
# new one. For efficiency's sake, cache the residuals (all 100 million
# of them) so when you're training feature 72 you don't have to wait
# for predictRating() to re-compute the contributions of the previous
# 71 features. You will need 2 Gig of ram, a C compiler, and good
# programming habits to do this."
# A note on cythonization: I haven't dived into the details, but
# accessing 2D arrays like pu using just one of the indices like pu[u]
# is not efficient. That's why the old (cleaner) version can't be used
# anymore, we need to compute the dot products by hand, and update
# user and items factors by iterating over all factors...
# user biases
cdef np.ndarray[np.double_t] bu
# item biases
cdef np.ndarray[np.double_t] bi
# user factors
cdef np.ndarray[np.double_t, ndim=2] pu
# item factors
cdef np.ndarray[np.double_t, ndim=2] qi
cdef int u, i, f
cdef double r, err, dot, puf, qif
cdef double global_mean = self.trainset.global_mean
cdef double lr_bu = self.lr_bu
cdef double lr_bi = self.lr_bi
cdef double lr_pu = self.lr_pu
cdef double lr_qi = self.lr_qi
cdef double reg_bu = self.reg_bu
cdef double reg_bi = self.reg_bi
cdef double reg_pu = self.reg_pu
cdef double reg_qi = self.reg_qi
rng = get_rng(self.random_state)
bu = np.zeros(trainset.n_users, np.double)
bi = np.zeros(trainset.n_items, np.double)
pu = rng.normal(self.init_mean, self.init_std_dev,
(trainset.n_users, self.n_factors))
qi = rng.normal(self.init_mean, self.init_std_dev,
(trainset.n_items, self.n_factors))
if not self.biased:
global_mean = 0
cdef int facts = self.n_factors
cdef bint biased = self.biased
for current_epoch in range(self.n_epochs):
if self.verbose:
print("Processing epoch {}".format(current_epoch))
for u, i, r in trainset.all_ratings():
# compute current error
dot = 0 # <q_i, p_u>
f = 0
while f < facts:
dot += qi[i, f] * pu[u, f]
f += 1
err = r - (global_mean + bu[u] + bi[i] + dot)
# update biases
if biased:
bu[u] += lr_bu * (err - reg_bu * bu[u])
bi[i] += lr_bi * (err - reg_bi * bi[i])
# update factors
f = 0
while f < facts:
puf = pu[u, f]
qif = qi[i, f]
pu[u, f] += lr_pu * (err * qif - reg_pu * puf)
qi[i, f] += lr_qi * (err * puf - reg_qi * qif)
f += 1
self.bu = bu
self.bi = bi
self.pu = pu
self.qi = qi
def estimate(self, u, i):
# Should we cythonize this as well?
known_user = self.trainset.knows_user(u)
known_item = self.trainset.knows_item(i)
if self.biased:
est = self.trainset.global_mean
if known_user:
est += self.bu[u]
if known_item:
est += self.bi[i]
if known_user and known_item:
est += np.dot(self.qi[i], self.pu[u])
else:
if known_user and known_item:
est = np.dot(self.qi[i], self.pu[u])
else:
raise PredictionImpossible('User and item are unknown.')
return est
class SVDpp(AlgoBase):
"""The *SVD++* algorithm, an extension of :class:`SVD` taking into account
implicit ratings.
The prediction :math:`\\hat{r}_{ui}` is set as:
.. math::
\hat{r}_{ui} = \mu + b_u + b_i + q_i^T\\left(p_u +
|I_u|^{-\\frac{1}{2}} \sum_{j \\in I_u}y_j\\right)
Where the :math:`y_j` terms are a new set of item factors that capture
implicit ratings. Here, an implicit rating describes the fact that a user
:math:`u` rated an item :math:`j`, regardless of the rating value.
If user :math:`u` is unknown, then the bias :math:`b_u` and the factors
:math:`p_u` are assumed to be zero. The same applies for item :math:`i`
with :math:`b_i`, :math:`q_i` and :math:`y_i`.
For details, see section 4 of :cite:`Koren:2008:FMN`. See also
:cite:`Ricci:2010`, section 5.3.1.
Just as for :class:`SVD`, the parameters are learned using a SGD on the
regularized squared error objective.
Baselines are initialized to ``0``. User and item factors are randomly
initialized according to a normal distribution, which can be tuned using
the ``init_mean`` and ``init_std_dev`` parameters.
You have control over the learning rate :math:`\gamma` and the
regularization term :math:`\lambda`. Both can be different for each
kind of parameter (see below). By default, learning rates are set to
``0.005`` and regularization terms are set to ``0.02``.
Args:
n_factors: The number of factors. Default is ``20``.
n_epochs: The number of iteration of the SGD procedure. Default is
``20``.
init_mean: The mean of the normal distribution for factor vectors
initialization. Default is ``0``.
init_std_dev: The standard deviation of the normal distribution for
factor vectors initialization. Default is ``0.1``.
lr_all: The learning rate for all parameters. Default is ``0.007``.
reg_all: The regularization term for all parameters. Default is
``0.02``.
lr_bu: The learning rate for :math:`b_u`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_bi: The learning rate for :math:`b_i`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_pu: The learning rate for :math:`p_u`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_qi: The learning rate for :math:`q_i`. Takes precedence over
``lr_all`` if set. Default is ``None``.
lr_yj: The learning rate for :math:`y_j`. Takes precedence over
``lr_all`` if set. Default is ``None``.
reg_bu: The regularization term for :math:`b_u`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_bi: The regularization term for :math:`b_i`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_pu: The regularization term for :math:`p_u`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_qi: The regularization term for :math:`q_i`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
reg_yj: The regularization term for :math:`y_j`. Takes precedence
over ``reg_all`` if set. Default is ``None``.
random_state(int, RandomState instance from numpy, or ``None``):
Determines the RNG that will be used for initialization. If
int, ``random_state`` will be used as a seed for a new RNG. This is
useful to get the same initialization over multiple calls to
``fit()``. If RandomState instance, this same instance is used as
RNG. If ``None``, the current RNG from numpy is used. Default is
``None``.
verbose: If ``True``, prints the current epoch. Default is ``False``.
Attributes:
pu(numpy array of size (n_users, n_factors)): The user factors (only
exists if ``fit()`` has been called)
qi(numpy array of size (n_items, n_factors)): The item factors (only
exists if ``fit()`` has been called)
yj(numpy array of size (n_items, n_factors)): The (implicit) item
factors (only exists if ``fit()`` has been called)
bu(numpy array of size (n_users)): The user biases (only
exists if ``fit()`` has been called)
bi(numpy array of size (n_items)): The item biases (only
exists if ``fit()`` has been called)
"""
def __init__(self, n_factors=20, n_epochs=20, init_mean=0, init_std_dev=.1,
lr_all=.007, reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None,
lr_qi=None, lr_yj=None, reg_bu=None, reg_bi=None, reg_pu=None,
reg_qi=None, reg_yj=None, random_state=None, verbose=False):
self.n_factors = n_factors
self.n_epochs = n_epochs
self.init_mean = init_mean
self.init_std_dev = init_std_dev
self.lr_bu = lr_bu if lr_bu is not None else lr_all
self.lr_bi = lr_bi if lr_bi is not None else lr_all
self.lr_pu = lr_pu if lr_pu is not None else lr_all
self.lr_qi = lr_qi if lr_qi is not None else lr_all
self.lr_yj = lr_yj if lr_yj is not None else lr_all
self.reg_bu = reg_bu if reg_bu is not None else reg_all
self.reg_bi = reg_bi if reg_bi is not None else reg_all
self.reg_pu = reg_pu if reg_pu is not None else reg_all
self.reg_qi = reg_qi if reg_qi is not None else reg_all
self.reg_yj = reg_yj if reg_yj is not None else reg_all
self.random_state = random_state
self.verbose = verbose
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
self.sgd(trainset)
return self
@cython.boundscheck(False) # Deactivate bounds checking
@cython.wraparound(False) # Deactivate negative indexing.
def sgd(self, trainset):
# user biases
cdef np.ndarray[np.double_t] bu
# item biases
cdef np.ndarray[np.double_t] bi
# user factors
cdef np.ndarray[np.double_t, ndim=2] pu
# item factors
cdef np.ndarray[np.double_t, ndim=2] qi
# item implicit factors
cdef np.ndarray[np.double_t, ndim=2] yj
cdef int u, i, j, f
cdef double r, err, dot, puf, qif, sqrt_Iu, _
cdef double global_mean = self.trainset.global_mean
cdef np.ndarray[np.double_t] u_impl_fdb
cdef double lr_bu = self.lr_bu
cdef double lr_bi = self.lr_bi
cdef double lr_pu = self.lr_pu
cdef double lr_qi = self.lr_qi
cdef double lr_yj = self.lr_yj
cdef double reg_bu = self.reg_bu
cdef double reg_bi = self.reg_bi
cdef double reg_pu = self.reg_pu
cdef double reg_qi = self.reg_qi
cdef double reg_yj = self.reg_yj
bu = np.zeros(trainset.n_users, np.double)
bi = np.zeros(trainset.n_items, np.double)
rng = get_rng(self.random_state)
pu = rng.normal(self.init_mean, self.init_std_dev,
(trainset.n_users, self.n_factors))
qi = rng.normal(self.init_mean, self.init_std_dev,
(trainset.n_items, self.n_factors))
yj = rng.normal(self.init_mean, self.init_std_dev,
(trainset.n_items, self.n_factors))
u_impl_fdb = np.zeros(self.n_factors, np.double)
cdef mapcpp[int, vectorcpp[int]] Iu_items
cdef mapcpp[int, double] Iu_len_sqrts
for i in range(trainset.n_users):
for j, _ in trainset.ur[i]:
Iu_items[i].push_back(j)
cdef mapcpp[int, vectorcpp[int]].iterator it = Iu_items.begin()
while(it != Iu_items.end()):
Iu_len_sqrts[dereference(it).first] = 1.0 / np.sqrt(dereference(it).second.size())
postincrement(it)
cdef int facts = self.n_factors
cdef double err_qif_sqrt = 0.0
for current_epoch in range(self.n_epochs):
if self.verbose:
print(" processing epoch {}".format(current_epoch))
for u, i, r in trainset.all_ratings():
# items rated by u.
Iu = Iu_items[u]
sqrt_Iu = Iu_len_sqrts[u]
# compute user implicit feedback
f = 0
while f < facts:
u_impl_fdb[f] = 0
f += 1
for j in Iu:
f = 0
while f < facts:
u_impl_fdb[f] += yj[j, f] * sqrt_Iu
f += 1
# compute current error
dot = 0
f = 0
while f < facts:
dot += qi[i, f] * (pu[u, f] + u_impl_fdb[f])
f += 1
err = r - (global_mean + bu[u] + bi[i] + dot)
# update biases
bu[u] += lr_bu * (err - reg_bu * bu[u])
bi[i] += lr_bi * (err - reg_bi * bi[i])
# update factors
f = 0
while f < facts:
puf = pu[u, f]
qif = qi[i, f]
pu[u, f] += lr_pu * (err * qif - reg_pu * puf)
qi[i, f] += lr_qi * (err * (puf + u_impl_fdb[f]) - reg_qi * qif)
err_qif_sqrt = err * qif * sqrt_Iu
for j in Iu:
yj[j, f] += lr_yj * (err_qif_sqrt - reg_yj * yj[j, f])
f += 1
self.bu = bu
self.bi = bi
self.pu = pu
self.qi = qi
self.yj = yj
def estimate(self, u, i):
est = self.trainset.global_mean
if self.trainset.knows_user(u):
est += self.bu[u]
if self.trainset.knows_item(i):
est += self.bi[i]
if self.trainset.knows_user(u) and self.trainset.knows_item(i):
Iu = len(self.trainset.ur[u]) # nb of items rated by u
u_impl_feedback = (sum(self.yj[j] for (j, _)
in self.trainset.ur[u]) / np.sqrt(Iu))
est += np.dot(self.qi[i], self.pu[u] + u_impl_feedback)
return est
class NMF(AlgoBase):
"""A collaborative filtering algorithm based on Non-negative Matrix
Factorization.
This algorithm is very similar to :class:`SVD`. The prediction
:math:`\\hat{r}_{ui}` is set as:
.. math::
\hat{r}_{ui} = q_i^Tp_u,
where user and item factors are kept **positive**. Our implementation
follows that suggested in :cite:`NMF:2014`, which is equivalent to
:cite:`Zhang96` in its non-regularized form. Both are direct applications
of NMF for dense matrices :cite:`NMF_algo`.
The optimization procedure is a (regularized) stochastic gradient descent
with a specific choice of step size that ensures non-negativity of factors,
provided that their initial values are also positive.
At each step of the SGD procedure, the factors :math:`f` or user :math:`u`
and item :math:`i` are updated as follows:
.. math::
p_{uf} &\\leftarrow p_{uf} &\cdot \\frac{\\sum_{i \in I_u} q_{if}
\\cdot r_{ui}}{\\sum_{i \in I_u} q_{if} \\cdot \\hat{r_{ui}} +
\\lambda_u |I_u| p_{uf}}\\\\
q_{if} &\\leftarrow q_{if} &\cdot \\frac{\\sum_{u \in U_i} p_{uf}
\\cdot r_{ui}}{\\sum_{u \in U_i} p_{uf} \\cdot \\hat{r_{ui}} +
\lambda_i |U_i| q_{if}}\\\\
where :math:`\lambda_u` and :math:`\lambda_i` are regularization
parameters.
This algorithm is highly dependent on initial values. User and item factors
are uniformly initialized between ``init_low`` and ``init_high``. Change
them at your own risks!
A biased version is available by setting the ``biased`` parameter to
``True``. In this case, the prediction is set as
.. math::
\hat{r}_{ui} = \mu + b_u + b_i + q_i^Tp_u,
still ensuring positive factors. Baselines are optimized in the same way as
in the :class:`SVD` algorithm. While yielding better accuracy, the biased
version seems highly prone to overfitting so you may want to reduce the
number of factors (or increase regularization).
Args:
n_factors: The number of factors. Default is ``15``.
n_epochs: The number of iteration of the SGD procedure. Default is
``50``.
biased(bool): Whether to use baselines (or biases). Default is
``False``.
reg_pu: The regularization term for users :math:`\lambda_u`. Default is
``0.06``.
reg_qi: The regularization term for items :math:`\lambda_i`. Default is
``0.06``.
reg_bu: The regularization term for :math:`b_u`. Only relevant for
biased version. Default is ``0.02``.
reg_bi: The regularization term for :math:`b_i`. Only relevant for
biased version. Default is ``0.02``.
lr_bu: The learning rate for :math:`b_u`. Only relevant for biased
version. Default is ``0.005``.
lr_bi: The learning rate for :math:`b_i`. Only relevant for biased
version. Default is ``0.005``.
init_low: Lower bound for random initialization of factors. Must be
greater than ``0`` to ensure non-negative factors. Default is
``0``.
init_high: Higher bound for random initialization of factors. Default
is ``1``.
random_state(int, RandomState instance from numpy, or ``None``):
Determines the RNG that will be used for initialization. If
int, ``random_state`` will be used as a seed for a new RNG. This is
useful to get the same initialization over multiple calls to
``fit()``. If RandomState instance, this same instance is used as
RNG. If ``None``, the current RNG from numpy is used. Default is
``None``.
verbose: If ``True``, prints the current epoch. Default is ``False``.
Attributes:
pu(numpy array of size (n_users, n_factors)): The user factors (only
exists if ``fit()`` has been called)
qi(numpy array of size (n_items, n_factors)): The item factors (only
exists if ``fit()`` has been called)
bu(numpy array of size (n_users)): The user biases (only
exists if ``fit()`` has been called)
bi(numpy array of size (n_items)): The item biases (only
exists if ``fit()`` has been called)
"""
def __init__(self, n_factors=15, n_epochs=50, biased=False, reg_pu=.06,
reg_qi=.06, reg_bu=.02, reg_bi=.02, lr_bu=.005, lr_bi=.005,
init_low=0, init_high=1, random_state=None, verbose=False):
self.n_factors = n_factors
self.n_epochs = n_epochs
self.biased = biased
self.reg_pu = reg_pu
self.reg_qi = reg_qi
self.lr_bu = lr_bu
self.lr_bi = lr_bi
self.reg_bu = reg_bu
self.reg_bi = reg_bi
self.init_low = init_low
self.init_high = init_high
self.random_state = random_state
self.verbose = verbose
if self.init_low < 0:
raise ValueError('init_low should be greater than zero')
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
self.sgd(trainset)
return self
def sgd(self, trainset):
# user and item factors
cdef np.ndarray[np.double_t, ndim=2] pu
cdef np.ndarray[np.double_t, ndim=2] qi
# user and item biases
cdef np.ndarray[np.double_t] bu
cdef np.ndarray[np.double_t] bi
# auxiliary matrices used in optimization process
cdef np.ndarray[np.double_t, ndim=2] user_num
cdef np.ndarray[np.double_t, ndim=2] user_denom
cdef np.ndarray[np.double_t, ndim=2] item_num
cdef np.ndarray[np.double_t, ndim=2] item_denom
cdef int u, i, f
cdef double r, est, l, dot, err
cdef double reg_pu = self.reg_pu
cdef double reg_qi = self.reg_qi
cdef double reg_bu = self.reg_bu
cdef double reg_bi = self.reg_bi
cdef double lr_bu = self.lr_bu
cdef double lr_bi = self.lr_bi
cdef double global_mean = self.trainset.global_mean
# Randomly initialize user and item factors
rng = get_rng(self.random_state)
pu = rng.uniform(self.init_low, self.init_high,
size=(trainset.n_users, self.n_factors))
qi = rng.uniform(self.init_low, self.init_high,
size=(trainset.n_items, self.n_factors))
bu = np.zeros(trainset.n_users, np.double)
bi = np.zeros(trainset.n_items, np.double)
if not self.biased:
global_mean = 0
for current_epoch in range(self.n_epochs):
if self.verbose:
print("Processing epoch {}".format(current_epoch))
# (re)initialize nums and denoms to zero
user_num = np.zeros((trainset.n_users, self.n_factors))
user_denom = np.zeros((trainset.n_users, self.n_factors))
item_num = np.zeros((trainset.n_items, self.n_factors))
item_denom = np.zeros((trainset.n_items, self.n_factors))
# Compute numerators and denominators for users and items factors
for u, i, r in trainset.all_ratings():
# compute current estimation and error
dot = 0 # <q_i, p_u>
for f in range(self.n_factors):
dot += qi[i, f] * pu[u, f]
est = global_mean + bu[u] + bi[i] + dot
err = r - est
# update biases
if self.biased:
bu[u] += lr_bu * (err - reg_bu * bu[u])
bi[i] += lr_bi * (err - reg_bi * bi[i])
# compute numerators and denominators
for f in range(self.n_factors):
user_num[u, f] += qi[i, f] * r
user_denom[u, f] += qi[i, f] * est
item_num[i, f] += pu[u, f] * r
item_denom[i, f] += pu[u, f] * est
# Update user factors
for u in trainset.all_users():
n_ratings = len(trainset.ur[u])
for f in range(self.n_factors):
user_denom[u, f] += n_ratings * reg_pu * pu[u, f]
pu[u, f] *= user_num[u, f] / user_denom[u, f]
# Update item factors
for i in trainset.all_items():
n_ratings = len(trainset.ir[i])
for f in range(self.n_factors):
item_denom[i, f] += n_ratings * reg_qi * qi[i, f]
qi[i, f] *= item_num[i, f] / item_denom[i, f]
self.bu = bu
self.bi = bi
self.pu = pu
self.qi = qi
def estimate(self, u, i):
# Should we cythonize this as well?
known_user = self.trainset.knows_user(u)
known_item = self.trainset.knows_item(i)
if self.biased:
est = self.trainset.global_mean
if known_user:
est += self.bu[u]
if known_item:
est += self.bi[i]
if known_user and known_item:
est += np.dot(self.qi[i], self.pu[u])
else:
if known_user and known_item:
est = np.dot(self.qi[i], self.pu[u])
else:
raise PredictionImpossible('User and item are unknown.')
return est