-
Notifications
You must be signed in to change notification settings - Fork 0
/
multi-thread_NMF.py
76 lines (65 loc) · 2.37 KB
/
multi-thread_NMF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import _thread
import time
import numpy as np
from scipy.sparse import csr_matrix
import mv100
from scipy.sparse import rand
def rmse(rm, erm):
sum = 0
i = 0
error_matrix = rm - erm
rows, cols = rm.nonzero()
for row, col in zip(rows, cols):
rui = error_matrix.__getitem__((row, col))
sum += rui ** 2
i += 1
return (sum / i) ** 0.5
def updateP(rows, cols, p, q_const, error_matrix,flag):
i = 0
for row, col in zip(rows, cols):
rui = error_matrix.__getitem__((row, col))
p[row, :] += rui * q_const.getcol(col).transpose() * lr
i += 1
if (i % 10000 == 0):
print("P updating finished {}%!".format(i/800))
flag = True
def updateQ(rows, cols, p_const, q, error_matrix,flag):
i = 0
for row, col in zip(rows, cols):
rui = error_matrix.__getitem__((row, col))
q[:, col] += rui * p_const.getrow(row).transpose() * lr
i += 1
if (i % 10000 == 0):
print("Q updating finished {}%!".format(i/800))
flag = True
train_list = mv100.mv1002list("./ml-100k/u5_fix.base")
test_list = mv100.mv1002list("./ml-100k/u5.test")
test_rm = csr_matrix(mv100.creat_matrix(test_list))
train_rm = csr_matrix(mv100.creat_matrix(train_list))
print("in the movie-100k datasets, there are {} users and {} items !".format(train_rm.shape[0], train_rm.shape[1]))
m = train_rm.shape[0] # the numbers of user
n = train_rm.shape[1] # the numbers of item
k = 50 # Hyper parameters
p = csr_matrix(np.full((m, k), (3 / k) ** 0.5)) # the first matrix
q = csr_matrix(np.full((k, n), (3 / k) ** 0.5)) # the second matrix
epochs = 30
lr = 0.0001
erm = p.dot(q) # estimated rating matrix
rows, cols = train_rm.nonzero()
# for epoch in range(0,epochs):
for epoch in range(0, epochs):
threadP = False
threadQ = False
time1 = time.time()
erm = p.dot(q) # estimated rating matrix
error_matrix = train_rm - erm
p_const = p
q_const = q
_thread.start_new_thread(updateP,(rows,cols,p,q_const,error_matrix,threadP))
_thread.start_new_thread(updateQ,(rows,cols,p_const,q,error_matrix,threadQ))
time2 = time.time()
time.sleep(10000)
# print(p * q)
# print("rmse loss on training set is {}\n"
# "rmse loss on test set is {}\n"
# "for this epoch using {} seconds".format(rmse(train_rm, p.dot(q)), rmse(test_rm, p.dot(q)), time2 - time1))