-
Notifications
You must be signed in to change notification settings - Fork 0
/
PMF.py
145 lines (131 loc) · 5.78 KB
/
PMF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Probabilistic Matrix Factorization Algorithm
# Created by August
# 2018/10/13
# Source:1.Salakhutdinov R, Mnih A. Probabilistic Matrix Factorization[C]// International Conference on Neural
# Information Processing Systems. Curran Associates Inc. 2007:1257-1264.
# 2.https://blog.csdn.net/shenxiaolu1984/article/details/50372909
from re import split
import numpy as np
from collections import defaultdict
from Evaluation import Evaluate
from time import strftime, localtime, time
import os.path
class PMF(object):
def __init__(self):
print("-------------------------------PMF---------------------------------")
# get all rating records, training data set and testing data set from local file
ratings_file_path = r"C:\Users\August\PycharmProjects\RecommendAlgorithm\dataset\doubanTest\ratings.txt"
self.ratings = self.load_data(ratings_file_path)
train_file_path = r"C:\Users\August\PycharmProjects\RecommendAlgorithm\dataset\doubanTest\train.txt"
self.train = self.load_data(train_file_path)
test_file_path = r"C:\Users\August\PycharmProjects\RecommendAlgorithm\dataset\doubanTest\test.txt"
self.test = self.load_data(test_file_path)
# definite parameters in this algorithm
self.factors = 10
self.iter = 20
self.learningRate = 0.05
self.lambda_u = 0.01
self.lambda_v = 0.001
self.P = []
self.Q = []
self.test_uir = []
self.user = {}
self.item = {}
self.ratings_ui = defaultdict(dict)
self.test_ui = defaultdict(dict)
self.predict = defaultdict(dict)
def load_data(self, filename):
with open(filename) as f:
data = f.readlines()
D = []
for line in data:
e = []
items = split(' ', line.strip())
e.append(items[0])
e.append(items[1])
e.append(items[2])
e = np.array(e, dtype=float)
D.append(e)
return D
def init_algorithm(self):
# set the parameters up
self.factors = 40
self.iter = 10
self.learningRate = 0.01
self.lambda_u = 0.01
self.lambda_v = 0.01
print("The parameters in this algorithm are as following:")
print("Number of potential factors: " + str(self.factors))
print("Maximum number of iterations: " + str(self.iter))
print("Learning rate: " + str(self.learningRate))
print("Lambda U: " + str(self.lambda_u))
print("Lambda V: " + str(self.lambda_v))
print("-------------------------------MF---------------------------------")
print("Init the algorithm...")
# fill the user array and the item array
for record in self.ratings:
user_name, item_name, rating = record
if user_name not in self.user.values():
self.user[user_name] = len(self.user)
if item_name not in self.item.values():
self.item[item_name] = len(self.item)
self.ratings_ui[user_name][item_name] = rating
# initialize matrix P and matrix Q randomly
self.P = np.random.rand(len(self.user), self.factors)/2
self.Q = np.random.rand(len(self.item), self.factors)/2
print("Init successfully!")
print("-------------------------------MF---------------------------------")
def run_algorithm(self):
print("Run the algorithm...")
# iteration process of Matrix Factorization Algorithm
iter_curr = 0
while iter_curr < self.iter:
loss = 0
for record in self.train:
user_name, item_name, rating = record
user_id = self.user[user_name]
item_id = self.item[item_name]
error = rating - self.P[user_id].dot(self.Q[item_id])
loss += error
p = self.P[user_id]
q = self.Q[item_id]
self.P[user_id] += self.learningRate * (error * q - self.lambda_u * p)
self.Q[item_id] += self.learningRate * (error * p - self.lambda_v * q)
iter_curr += 1
print("Iteration %s: Loss %s" % (str(iter_curr), loss))
print("Finished!")
print("-------------------------------MF---------------------------------")
def evaluate(self):
out_predict = []
for record in self.test:
user_name, item_name, rating = record
user_id = self.user[user_name]
item_id = self.item[item_name]
self.test_ui[user_name][item_name] = rating
e = []
e.append(user_name)
e.append(item_name)
e.append(rating)
e = np.array(e, dtype=float)
self.test_uir.append(e)
self.predict[user_name][item_name] = round(self.P[user_id].dot(self.Q[item_id]), 0)
out_predict.append(str(user_name) + ' ' + str(item_name) + ' ' + str(rating) + ' '
+ str(self.predict[user_name][item_name]) + '\n')
# output to file
out_path = "../Result/"
current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
out_filename = "PMF_Predict_for_Rating" + "@" + current_time + ".txt"
if not os.path.exists(out_path):
os.makedirs(out_path)
with open(out_path + out_filename, 'w') as f:
f.writelines(out_predict)
print("The predict result has been output to ..\Dataset\Result")
evaluate = Evaluate(self.test_uir, self.predict)
rmse = evaluate.RMSE()
mae = evaluate.MAE()
print("Evaluate:")
print("RMSE: %s, MAE: %s" % (rmse, mae))
def execute(self):
self.init_algorithm()
self.run_algorithm()
self.evaluate()