Skip to content

Commit

Permalink
acceleration for neighborhood-based methods
Browse files Browse the repository at this point in the history
  • Loading branch information
Coder-Yu committed Apr 11, 2017
1 parent ab74464 commit a83027a
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 19 deletions.
2 changes: 1 addition & 1 deletion algorithm/rating/ItemKNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def computeCorr(self):
if i1 <> i2:
if self.itemSim.contains(i1,i2):
continue
sim = qmath.similarity(self.dao.col(i1),self.dao.col(i2),self.sim)
sim = qmath.similarity(self.dao.sCol(i1),self.dao.sCol(i2),self.sim)
self.itemSim.set(i1,i2,sim)
print 'item '+i1+' finished.'
print 'The item correlation has been figured out.'
Expand Down
2 changes: 1 addition & 1 deletion algorithm/rating/UserKNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def computeCorr(self):
if u1 <> u2:
if self.userSim.contains(u1,u2):
continue
sim = qmath.similarity(self.dao.row(u1),self.dao.row(u2),self.sim)
sim = qmath.similarity(self.dao.sRow(u1),self.dao.sRow(u2),self.sim)
self.userSim.set(u1,u2,sim)
print 'user '+u1+' finished.'
print 'The user correlation has been figured out.'
Expand Down
4 changes: 4 additions & 0 deletions baseclass/IterativeRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def updateLearningRate(self,iter):
def predict(self,u,i):
if self.dao.containsUser(u) and self.dao.containsItem(i):
return self.P[self.dao.getUserId(u)].dot(self.Q[self.dao.getItemId(i)])
elif self.dao.containsUser(u) and not self.dao.containsItem(i):
return self.dao.userMeans[u]
elif not self.dao.containsUser(u) and self.dao.containsItem(i):
return self.dao.itemMeans[i]
else:
return self.dao.globalMean

Expand Down
10 changes: 5 additions & 5 deletions config/CoFactor.conf
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ratings=../dataset/FilmTrust/trainset.txt
ratings=../dataset/FilmTrust/trainset2.txt
ratings.setup=-columns 0 1 2
recommender=CoFactor
evaluation.setup=-testSet ../dataset/FilmTrust/testset.txt
evaluation.setup=-testSet ../dataset/FilmTrust/testset2.txt
item.ranking=on -topN 50
num.factors=5
num.max.iter=5
num.max.iter=30
learnRate=-init 0.05 -max 1
reg.lambda=-u 0.03 -i 0.03 -b 0.03 -s 0.1
CoFactor=-k 3 -gamma 0.03 -filter 10
reg.lambda=-u 0.01 -i 0.01 -b 0.01 -s 0.1
CoFactor=-k 5 -gamma 0.03 -filter 10
output.setup=on -dir ../results/
12 changes: 6 additions & 6 deletions config/PMF.conf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ratings=../dataset/FilmTrust/trainset.txt
ratings=../dataset/FilmTrust/trainset2.txt
ratings.setup=-columns 0 1 2
recommender=PMF
evaluation.setup=-testSet ../dataset/FilmTrust/testset.txt
item.ranking=on -topN 10
evaluation.setup=-testSet ../dataset/FilmTrust/testset2.txt
item.ranking=on -topN 50
num.factors=5
num.max.iter=5
learnRate=-init 0.1 -max 1
reg.lambda=-u 0.03 -i 0.03 -b 0.1 -s 0.1
num.max.iter=30
learnRate=-init 0.05 -max 1
reg.lambda=-u 0.01 -i 0.01 -b 0.1 -s 0.1
output.setup=on -dir ../results/
10 changes: 5 additions & 5 deletions config/SVD.conf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ratings=../dataset/FilmTrust/trainset.txt
ratings=../dataset/FilmTrust/trainset2.txt
ratings.setup=-columns 0 1 2
recommender=SVD
evaluation.setup=-testSet ../dataset/FilmTrust/testset.txt
evaluation.setup=-testSet ../dataset/FilmTrust/testset2.txt
item.ranking=on -topN 10
num.factors=5
num.max.iter=10
learnRate=-init 0.005 -max 1
reg.lambda=-u 0.05 -i 0.05 -b 0.05 -s 0.1
num.max.iter=30
learnRate=-init 0.01 -max 1
reg.lambda=-u 0.02 -i 0.02 -b 0.02 -s 0.1
output.setup=on -dir ../results/
6 changes: 6 additions & 0 deletions data/rating.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ def row(self,u):
def col(self,c):
return self.trainingMatrix.col(self.getItemId(c))

def sRow(self,u):
return self.trainingMatrix.sRow(self.getUserId(u))

def sCol(self,c):
return self.trainingMatrix.sCol(self.getItemId(c))

def rating(self,u,c):
return self.trainingMatrix.elem(self.getUserId(u),self.getItemId(c))

Expand Down
15 changes: 15 additions & 0 deletions structure/new_sparseMatrix.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
#class Triple(object):


class SparseMatrix():
Expand All @@ -16,6 +17,20 @@ def __init__(self,triple):
self.elemNum = len(triple)
self.size = (len(self.matrix_User),len(self.matrix_Item))

def sRow(self,r):
if not self.matrix_User.has_key(r):
return {}
else:
return self.matrix_User[r]

def sCol(self,c):
if not self.matrix_Item.has_key(c):
return {}
else:
return self.matrix_Item[c]



def row(self,r):
if not self.matrix_User.has_key(r):
return np.zeros((1,self.size[1]))
Expand Down
18 changes: 17 additions & 1 deletion tool/qmath.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,22 @@ def common(x1,x2):
new_x2 = x2[common]
return new_x1,new_x2

def cosine_sp(x1,x2):
'x1,x2 are dicts,this version is for sparse representation'
total = 0
denom1 = 0
denom2 =0
for k in x1:
if x2.has_key(k):
total+=x1[k]*x2[k]
denom1+=x1[k]**2
denom2+=x2[k]**2
try:
return (total + 0.0) / (sqrt(denom1) * sqrt(denom2))
except ZeroDivisionError:
return 0


def cosine(x1,x2):
#find common ratings
new_x1, new_x2 = common(x1,x2)
Expand Down Expand Up @@ -67,7 +83,7 @@ def similarity(x1,x2,sim):
if sim == 'euclidean':
return euclidean(x1,x2)
else:
return cosine(x1, x2)
return cosine_sp(x1, x2)


def normalize(vec,maxVal,minVal):
Expand Down

0 comments on commit a83027a

Please sign in to comment.