Permalink
Browse files

Merge pull request #22 from Ykid/master

fix indexing function and add neighborhood model
  • Loading branch information...
2 parents ae57f46 + 339145b commit 8fbfab30857e6c446fb6ec8e15cca14283678137 @ChrisRackauckas committed Aug 4, 2013
Showing with 438 additions and 51 deletions.
  1. +14 −17 config.py
  2. +99 −20 utils/ImplicitFeedbackFunctions.py
  3. +1 −1 utils/Model.py
  4. +45 −13 utils/SVDModel.py
  5. +279 −0 utils/neighborhoodFunctions.py
View
31 config.py
@@ -1,16 +1,15 @@
################### Select Models ##################
models = [
- ['basicFM','FM','Basic',['2']],
- #['basicMovTag','FM','BasicMovieTag',['2']],
- #['nearNeib', 'FM', 'NearestNeighbor',['2']],
- #['rmtThresh5t','FM','RelatedMovieTagThreshold',['2']],
- #['rmtThresh2','FM','RelatedMovieTagThreshold2',['2']],
- #['userHist','FM','UserHistory',['2']],
- #['userSocial','FM','UserSocial',['2']]
-
- ['basicSVD','SVD','Basic',[]],
+ #['basicFM','FM','Basic',['2']],
+ #['bmt','FM','BasicMovieTag',['2']],
+ #['amt','FM','AdjustedMovieTag',['2']]
+ #['nn', 'FM', 'NearestNeighbor',['2']]
+ #['rmt','FM','RelatedMovieTag',['2']]
+
+ #['basicSVD','SVD','Basic',[]]
['ImplicitFeedbackSVD','SVD','ImplicitFeedback',[]]
+ #['NeighborhoodMovieTag', 'SVD' , 'Neighborhood' , ['MovieTag']]
]
# Defining models:
@@ -45,8 +44,7 @@
################### Select Parts ##################
LAPTOP_TEST = True # uses small data set to run features on laptop
-
-TRIALS = 1
+TRIALS = 1
PRE_PROCESS = True
# ---- ---- PreProcess Selection ---- ---- #
TEST_SUBSET = True # uses small data set
@@ -61,11 +59,11 @@
# ---- ---- ---- ---- ----- ---- ---- ---- #
SETUP_MODELS = True
RUN_MODELS = True
-SETUP_HYBRID = True
-RUN_HYBRID = True
-SETUP_SYNTHESIZE = True
-RUN_SYNTHESIZE = True
-POST_PROCESS = True
+SETUP_HYBRID = False
+RUN_HYBRID = False
+SETUP_SYNTHESIZE = False
+RUN_SYNTHESIZE = False
+POST_PROCESS = False
################## Select Bootstrap Parameters ##################
@@ -88,7 +86,6 @@
SVD_REGULARIZATION_ITEM = '.004'
SVD_REGULARIZATION_USER = '.004'
SVD_REGULARIZATION_GLOBAL = '.001'
-
SVD_REGULARIZATION_FEEDBACK = '.004'
SVD_NUM_FACTOR = '64'
SVD_ACTIVE_TYPE = '0'
View
119 utils/ImplicitFeedbackFunctions.py
@@ -1,16 +1,48 @@
-def reIndex_Implicit(fin):
- print("Reindexing Origin Data Set and Building the Correspondence Dics")
- fi = open( fin, 'r' ) #training set
- #extract from input file
+import sys
+'''
+Program input requirement:
+ 1.reIndex_Implicit: The format of fin should be UserID \t MovieID \t ratings \n
+ 2.translate:
+ This function is to reindex the input file according to the correspondence Dictionaries from reIndex_Implicit function
+ input file format:
+ UserID \t MovieID \t [The third column is optional] \n
+ 3.userfeedback, usergroup and mkfeature are the functions called by mkImplicitFeatureFile.
+ input file format:
+ the input file1 format of ftrain: userid \t itemid \t rate \n
+ the input file2 format of fgtrain: userid \t itemid \t rate \n, which is grouped by user
+ the output format: rate \t number of user group \t number of user implicit feedback \t fid1:fvalue1, fid2:fvalue2 ... \n
+'''
+
+def reIndex_Implicit(ftrain,fCV,ftest,ftrainOut,fCVOut,ftestOut):
+ print("Reindexing Data Sets and Building the Correspondence Dics")
+ bootTrainFile = open(ftrain, 'r')
+ bootCVFile = open(fCV , 'r')
+ bootTestFile = open(ftest , 'r')
+ tmpTrainFile = open(ftrainOut, 'w')
+ tmpTestFile = open(ftestOut, 'w')
+ tmpCVFile = open(fCVOut, 'w')
+
+ ############# Write tmp file reindexed ###############3
+ trainLines = bootTrainFile.readlines()
+ CVLines = bootCVFile.readlines()
+ testLines = bootTestFile.readlines()
+
+ fullInput = []
+ fullInput.append(trainLines)
+ fullInput.append(CVLines)
+ fullInput.append(testLines)
+
uidDic={}
iidDic={}
newuid=1
newiid=1
ctr=0 # is the counter of the total number.
sum=0.0
- for line in fi:
- arr = line.split()
+ #Build dictionary
+
+ for line in trainLines:
+ arr = line.rsplit('\t')
uid = int(arr[0].strip())
iid = int(arr[1].strip())
rating = int(float(arr[2].strip()))
@@ -26,10 +58,61 @@ def reIndex_Implicit(fin):
if iid not in iidDic:
iidDic[iid]=newiid
newiid+=1
-
- fi.close()
- #calculate different parameter.
+
+ for line in CVLines:
+ arr = line.rsplit('\t')
+ uid = int(arr[0].strip())
+ iid = int(arr[1].strip())
+ #this part for reindexing the user ID
+ if uid not in uidDic:
+ uidDic[uid]=newuid
+ newuid+=1
+ #this part for reindexing the item ID
+ if iid not in iidDic:
+ iidDic[iid]=newiid
+ newiid+=1
+
+ for line in testLines:
+ arr = line.rsplit('\t')
+ uid = int(arr[0].strip())
+ iid = int(arr[1].strip())
+ #this part for reindexing the user ID
+ if uid not in uidDic:
+ uidDic[uid]=newuid
+ newuid+=1
+ #this part for reindexing the item ID
+ if iid not in iidDic:
+ iidDic[iid]=newiid
+ newiid+=1
+
+ #Re-index
+ for line in trainLines:
+ arr = line.split()
+ uid = int(arr[0].strip())
+ iid = int(arr[1].strip())
+ rating = int(float(arr[2].strip()))
+ tmpTrainFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))
+ for line in CVLines:
+ arr = line.split()
+ uid = int(arr[0].strip())
+ iid = int(arr[1].strip())
+ rating = int(float(arr[2].strip()))
+ tmpCVFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))
+ for line in testLines:
+ arr = line.split()
+ uid = int(arr[0].strip())
+ iid = int(arr[1].strip())
+ rating = int(float(arr[2].strip()))
+ tmpTestFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))
+
avg=sum/ctr
+ #Close files
+ bootTrainFile.close()
+ bootTestFile.close()
+ bootCVFile.close()
+ tmpTrainFile.close()
+ tmpTestFile.close()
+ tmpCVFile.close()
print("Finished")
return(uidDic,iidDic,avg)
@@ -57,32 +140,29 @@ def translate(fin,fout,Udic,ItemDic):
fo.close()
print("Translation Finished.")
-
def userfeedback(fname):
fi = open(fname,'r')
feedback = {}
for line in fi:
attr = line.strip().split('\t')
- uid = int(attr[0])-1
- iid = int(attr[1])-1
+ uid = int(attr[0])-1#uid actually start from 0
+ iid = int(attr[1])-1#mid actually start from 0
if uid in feedback:
feedback[uid].append(iid)
else:
feedback[uid] = [iid]
fi.close()
return feedback
-#group num and order of the grouped training data
-
-
+#usergroup function is to find out group num and order of user in the grouped training data file
def usergroup(fname):
fi = open(fname,'r')
userorder = []
groupnum = {}
lastuid = -1
for line in fi:
attr = line.strip().split('\t')
- uid = int(attr[0])-1
+ uid = int(attr[0])-1 #uid actually start from 0
if uid in groupnum:
groupnum[uid] += 1
else:
@@ -93,10 +173,8 @@ def usergroup(fname):
fi.close()
return userorder,groupnum
-#make implict feedback feature, one line for a user, wihch is in the order of the grouped training data
-#the output format:rate \t number of user group \t number of user implicit feedback \t fid1:fvalue1, fid2:fvalue2 ... \n
-
+#mkfeature is to calculate the parameters of the feadback features
def mkfeature(fout,userorder,groupnum,feedback):
fo = open(fout,'w')
for uid in userorder:
@@ -108,8 +186,9 @@ def mkfeature(fout,userorder,groupnum,feedback):
fo.write('\n')
+
+#make implicit feedback features
def mkImplicitFeatureFile(ftrain,fgtrain,fout):
- '''usage:<training_file> <grouped training_file> <output>'''
feedback = userfeedback(ftrain)
userorder,groupnum = usergroup(fgtrain)
#make features and print them out in file fout
View
2 utils/Model.py
@@ -17,7 +17,6 @@ def __init__(self,configModel,utils,strTrial):
'_CV' + '_t' + strTrial
self.featTest = utils.MODEL_FEATURED_PATH + self.tag + \
'_test' + '_t' + strTrial
-
self.tmpTrain = utils.MODEL_TMP_PATH + self.tag + \
'_train' + '_t' + strTrial
self.tmpCV = utils.MODEL_TMP_PATH + self.tag + \
@@ -41,6 +40,7 @@ def __init__(self,configModel,utils,strTrial):
self.userSocialPath = utils.USER_SOCIAL_PATH
self.userHistoryPath= utils.USER_HISTORY_PATH
+
def prependUserMovieToPredictions(self,idsPath,fixPath,savePath):
### Takes in a column of ratings as toFix
### Takes in user and movie id's through idsPath
View
58 utils/SVDModel.py
@@ -1,18 +1,20 @@
from Model import Model
import ImplicitFeedbackFunctions as IFF #IFF for implicitFeedbackFunctions
+import neighborhoodFunctions as NF
class SVDModel(Model):
### Construct ###
def __init__(self,configModel,utils,config,strTrial):
- Model.__init__(self,configModel,utils,strTrial)
+ #This function is to set up different parameters.
+ Model.__init__(self,configModel,utils,strTrial)
self.configPath = utils.MODEL_CONFIG_PATH + self.tag + \
'_t' + strTrial
- self.userHistoryReindexPath= utils.MODEL_TMP_PATH + self.tag + \
- '_userHistoryReindex' + '_t' + strTrial
-
+
### Baidu Specific ###
### Implicit Feedback Files ###
+ self.userHistoryReindexPath= utils.MODEL_TMP_PATH + self.tag + \
+ '_userHistoryReindex' + '_t' + strTrial
#The following 3 files are implicit feature files
self.ImfeatTrain = utils.MODEL_FEATURED_PATH + self.tag + \
'_Imtrain' + '_t' + strTrial
@@ -31,6 +33,17 @@ def __init__(self,configModel,utils,config,strTrial):
### End Implicit Feature Files ###
self.regularizationFeedback = config.SVD_REGULARIZATION_FEEDBACK
+
+
+ ### Neighborhood Model Files###
+ if len(self.misc) > 0:
+ if self.misc[0] == "MovieTag":
+ self.TagFilePath = self.movieTagPath
+ self.TagFileReindexPath = utils.MODEL_TMP_PATH + self.tag + \
+ '_' + self.misc[0] + '_t' + strTrial
+ self.ShareTagPath = utils.MODEL_TMP_PATH + self.tag + \
+ '_share_' + self.misc[0] + '_t' + strTrial
+ ### End Neighborhood Model Files###
### End Baidu Specific ###
self.numIter = config.SVD_NUM_ITER
@@ -49,6 +62,10 @@ def __init__(self,configModel,utils,config,strTrial):
self.SVDFeatureSVDPPRandOrder = utils.SVDFEATURE_SVDPP_RANDORDER
self.formatType = 0
self.numUserFeedback = 0
+ self.numUser= 0
+ self.numMovie= 0
+ self.numGlobal = 0
+ self.avg= 0
self.originDataSet = utils.ORIGINAL_DATA_PATH
# 0 is the default value
@@ -179,7 +196,8 @@ def reIndex(self):
def dataConvert(self):
import os
- if self.featureSet == 'Basic':
+ if self.featureSet == 'Basic' or \
+ self.featureSet == 'Neighborhood':
os.system(self.SVDBufferPath + ' ' +
self.featTrain + ' ' + self.runTrain)
os.system(self.SVDBufferPath + ' ' +
@@ -237,6 +255,8 @@ def setupFeatures(self):
### Baidu Specific Features ###
if self.featureSet == 'ImplicitFeedback':
self.setupImplicitFeatures()
+ if self.featureSet == 'Neighborhood':
+ self.NeighborhoodSetup()
### End Baidu Specific Features ###
def basicConvert(self,fin,fout):
@@ -256,17 +276,10 @@ def basicConvert(self,fin,fout):
def setupImplicitFeatures(self):
import os
-
#reindex the training files and build two dicts
- Udic,ItemDic,avg=IFF.reIndex_Implicit(self.originDataSet)
+ Udic,ItemDic,avg=IFF.reIndex_Implicit(self.bootTrain, self.bootCV, self.bootTest, self.tmpTrain, self.tmpCV, self.tmpTest)
#reindex the history
IFF.translate(self.userHistoryPath, self.userHistoryReindexPath, Udic, ItemDic)
- #reindex CV file
- IFF.translate(self.bootCV, self.tmpCV, Udic, ItemDic)
- #reindex Testfile
- IFF.translate(self.bootTest, self.tmpTest, Udic, ItemDic)
- #reindex the training
- IFF.translate(self.bootTrain,self.tmpTrain,Udic,ItemDic)
#make group training files
os.system(self.SVDFeatureSVDPPRandOrder +' '+ self.tmpTrain + ' ' + self.tmpLineOrder)
@@ -319,3 +332,22 @@ def fixRun(self):
' name_pred=' + self.predTestTmp)
self.prependUserMovieToPredictions(self.bootCV,self.predCVTmp,self.predCV)
self.prependUserMovieToPredictions(self.bootTest,self.predTestTmp,self.predTest)
+
+
+ def NeighborhoodSetup(self):
+ #second
+ NSnoUser,NSnoMovie,NSAvg = NF.reIndex(self.bootTrain, self.TagFilePath, self.bootTest, self.bootCV, self.tmpTrain, self.TagFileReindexPath, self.tmpTest, self.tmpCV)
+
+ #third
+ NF.share(self.TagFileReindexPath,self.ShareTagPath)
+
+ #fourth
+ NumGlobal = NF.neighborhood(self.tmpTrain, self.ShareTagPath, self.tmpTest, self.featTrain, self.featTest)
+ NF.neighborhood(self.tmpCV, self.ShareTagPath, self.tmpTest, self.featCV, self.featTest)
+
+ # set the parameters.
+ self.numUser = NSnoUser
+ self.numMovie = NSnoMovie
+ self.numGlobal = NumGlobal + 1
+ self.avg = NSAvg
+
View
279 utils/neighborhoodFunctions.py
@@ -0,0 +1,279 @@
+import sys
+#This function is going to use the original training set, original movie set and original prediction set as input
+#The TrainFile is in format of "uid \t mid \t rating"
+#The PredFile is in format of "uid \t mid \t rating"
+#The MovieTagFile is in format of "mid \t tag1,tag2,......."
+#The CVFile is in format of "uid \t mid \t rating"
+#For output
+#The TrainFileReindex is in formate of "uid \t mid \t rating", with reindexed
+#The PredFileReindex is in format of "uid \t mid \t rating",with reindexed
+#The MovieTagFileReindex is in format of "mid \t tag1,tag2,.......", with reindexed
+#The CVFileReindex is in format of "uid \t mid \t rating",with reindexed
+def reIndex(fin,gin,hin,CVfin,fout,gout,hout,CVfout):
+ print("Run Neighborhood Model, Start Reindexing")
+
+ TrainFile =open(fin,'r')
+ MovieTagFile =open(gin,'r')
+ PredFile =open(hin,'r')
+ CVFile =open(CVfin,'r')
+ TrainFileReindex =open(fout,'w')
+ MovieTagFileReindex =open(gout,'w')
+ PredFileReindex =open(hout,'w')
+ CVFileReindex =open(CVfout,'w')
+
+ uidDic={} #Key is original uid. Corresponding value is reindexed uid
+ midDic={} #Key is original mid. Corresponding value is reindexed mid
+ tidDic={} #Key is original tid. Corresponding value is reindexed tid
+ mtlDic={} #Key is mid. Correspongding value is a list of the movie's tags
+
+ newuid=0
+ newmid=0
+ newtid=0
+ ctr =0 #ctr keep the number of rating in training file.
+ sum =0 #sum up all the ratings in the training file
+#this part is for reindexing trainfile
+ for line in TrainFile:
+ arr=line.split()
+ uid=int(arr[0].strip())
+ mid=int(arr[1].strip())
+ rating=int(float(arr[2].strip()))
+ sum +=rating
+ ctr += 1
+
+ if uid not in uidDic:
+ uidDic[uid]=newuid
+ newuid+=1
+
+ if mid not in midDic:
+ midDic[mid]=newmid
+ newmid+=1
+
+ TrainFileReindex.write('%d\t%d\t%d\n' %(uidDic[uid],midDic[mid],rating))
+
+#this part is for reindexing CVfile
+ for line in CVFile:
+ arr=line.split()
+ uid=int(arr[0].strip())
+ mid=int(arr[1].strip())
+ rating=int(float(arr[2].strip()))
+
+ if uid not in uidDic:
+ uidDic[uid]=newuid
+ newuid+=1
+
+ if mid not in midDic:
+ midDic[mid]=newmid
+ newmid+=1
+
+ CVFileReindex.write('%d\t%d\t%d\n' %(uidDic[uid],midDic[mid],rating))
+
+
+#this part is for reindexing predicting file
+ for line in PredFile:
+ arr=line.split()
+ uid=int(arr[0].strip())
+ mid=int(arr[1].strip())
+ rating=int(float(arr[2].strip()))
+
+ if uid not in uidDic:
+ uidDic[uid]=newuid
+ newuid+=1
+
+ if mid not in midDic:
+ midDic[mid]=newmid
+ newmid+=1
+
+ PredFileReindex.write('%d\t%d\t%d\n' %(uidDic[uid],midDic[mid],rating))
+
+
+#this part is for reindexing movie-tag file
+ for line in MovieTagFile:
+ arr=line.split()
+ mid=int(arr[0].strip())
+
+ if mid in midDic:
+ Tag=(arr[1].strip())
+ mtlDic[midDic[mid]]=list()
+ TagList=Tag.split(',')
+
+ for tid in TagList:
+ if tid not in tidDic:
+ tidDic[tid]=newtid
+ newtid+=1
+ mtlDic[midDic[mid]].append(tidDic[tid])
+
+ MovieTagFileReindex.write(str(midDic[mid])+'\t')
+ for tag in mtlDic[midDic[mid]]:
+ MovieTagFileReindex.write(str(tag))
+ if tag !=mtlDic[midDic[mid]][-1]:
+ MovieTagFileReindex.write(',')
+
+ MovieTagFileReindex.write('\n')
+
+
+ noUser =len(uidDic)
+ noMovie=len(midDic)
+ avg =sum/ctr
+
+ TrainFileReindex.close()
+ MovieTagFileReindex.close()
+ PredFileReindex.close()
+ CVFileReindex.close()
+ TrainFile.close()
+ MovieTagFile.close()
+ PredFile.close()
+ CVFile.close()
+ print("Reindexing Finished")
+ return(noUser,noMovie,avg)
+
+#This function is going to use movie_tag_new.txt to get movie pairs which have certain number of tags in common
+#The input is movie-tag file(after reindexing). The format is "mid \t tag1,tag2,...."
+#The output is in the format of "mid1 \t mid2 \t" Here the mid1 and mid 2 shares enough number of tags in common
+def share(fin,fout):
+ print("Generating Share Tag Files.")
+
+ fi=open(fin,'r')
+ fo=open(fout,'w')
+ mtlDic={}
+
+#this part is going to contruct the dictionary: movie id as key, and tag list as value
+ for line in fi:
+
+ arr=line.split()
+ mid=int(arr[0].strip())
+ tag=(arr[1].strip())
+ taglist=tag.split(',')
+
+ mtlDic[mid]=list()
+
+ for tid in taglist:
+ mtlDic[mid].append(tid)
+
+
+#this part is for making the file of movie parirs which share enough number of tags
+ for mid in mtlDic:
+ for i in range(mid+1,len(mtlDic)+1):
+ a_set=set(mtlDic[mid])
+
+ if i in mtlDic:
+ b_set=set(mtlDic[i])
+ c_set=a_set.intersection(b_set)
+
+ if len(c_set)>=10:
+ fo.write('%s\t%s\n' %(mid,i))
+
+ fo.close()
+ fi.close()
+ print("Generation Finished")
+
+#This function is going to use user-movie-rating file (both training and testing set), sharing-tag movies pair file as input to get neighbourhoods sets for each given users and movies.
+#TrainingFile is in the format of "uid \t mid \t rating"
+#ShareTag is in the format of "mid1 \t mid2"
+#TeseFile is in the format of "uid \t mid \t rating"
+def neighborhood(fin,gin,hin,fout,gout):
+ print("Generating Neighborhood for" + ' ' + fin + ' and ' + hin + '.' )
+ TrainingFile =open(fin,'r') #refers to the file of user-movie-rating training set
+ ShareTag =open(gin,'r') #refers to the file of sharing-tag movie pair
+ TestFile =open(hin,'r') #refers to the file of user-movie-rating test set
+ TrainingFile_reformated =open(fout,'w') #refers to the transfered format of training set
+ TestFile_reformated =open(gout,'w') #refers to the transfered format of text set
+
+ MovieNbhood={} #refers to the dictionary of movie and a list which share tags with this movie
+ UserMovieDic={} #refers to the dictionary of user-movielist
+ RatingDic={} #refers to the dictionary of tuple of user-rating as a key and rating as a value
+ AvgDic={} #refers to the dictionary of user and user's average rating
+ IndexCorresDic={} #refers to the dictionary of a scalar as a value and movie i and move j as key
+
+#firstly, we make a dictionary of movie and a list inside which movies share tags with the key
+
+ for line in ShareTag:
+ arr=line.split()
+ mid=int(arr[0].strip())
+ MovNeighbor=int(arr[1].strip())
+
+ if mid not in MovieNbhood:
+ MovieNbhood[mid]=list()
+ MovieNbhood[mid].append(MovNeighbor)
+ else:
+ MovieNbhood[mid].append(MovNeighbor)
+
+#then, we get the dictionary of user-movielist and dictionary of tuple of user-rating as a key and rating as value and dictionary fo user and user's average rating
+
+ for line in TrainingFile:
+ arr=line.split()
+ uid=int(arr[0].strip())
+ mid=int(arr[1].strip())
+ rating=int(float(arr[2].strip()))
+ RatingDic[(uid,mid)]=rating
+
+ if uid not in UserMovieDic:
+ UserMovieDic[uid]=list()
+ UserMovieDic[uid].append(mid)
+ AvgDic[uid]=rating
+ else:
+ AvgDic[uid]=(AvgDic[uid]*len(UserMovieDic[uid])+rating)/(len(UserMovieDic[uid])+1)
+ UserMovieDic[uid].append(mid)
+
+#now we get the neighborhood of training set
+
+ b=0 #total no. of neighbors
+ for uid in UserMovieDic:
+ for mid in UserMovieDic[uid]:
+ TrainingFile_reformated.write('%d\t' %RatingDic[(uid,mid)])
+ a=0 #no. of neighbors of that particular movie
+ a_list=list()
+ if mid in MovieNbhood:
+ for movie in MovieNbhood[mid]:
+ if movie in UserMovieDic[uid]: # if the user watched that movie in the neighborhood of the mid
+ b=b+1
+ a=a+1
+ c=RatingDic[(uid,movie)]-AvgDic[uid] # c is the distance between rating and AVG
+ a_list.append(c)
+ IndexCorresDic[(mid,movie)]=b # b is the index of the global feature.
+
+
+ TrainingFile_reformated.write('%d\t1\t1\t' %a)
+ for i in range(0,len(a_list)):
+ TrainingFile_reformated.write('%d:%f\t' %(b-a+i+1,a_list[i]))
+
+
+ else:
+ TrainingFile_reformated.write('0\t1\t1\t')
+ TrainingFile_reformated.write('%d:1\t%d:1\n' %(uid,mid))
+
+
+#the nwe get the neighborhood of testing set
+ for line in TestFile:
+ arr=line.split()
+ uid=int(arr[0].strip())
+ mid=int(arr[1].strip())
+ rating=int(float(arr[2].strip()))
+ TestFile_reformated.write('%d\t' %rating)
+ a=0
+ a_list=list()
+ b_list=list()
+
+ if mid in MovieNbhood and uid in UserMovieDic:
+ for movie in MovieNbhood[mid]:
+ if (mid,movie) in IndexCorresDic and movie in UserMovieDic[uid]:
+ c=RatingDic[(uid,movie)]-AvgDic[uid]
+ a_list.append(c)
+ m=IndexCorresDic[(mid,movie)]
+ b_list.append(m)
+ a=a+1
+ TestFile_reformated.write('%d\t1\t1\t' %a)
+ for i in range(0,len(a_list)):
+ TestFile_reformated.write('%d:%f\t' %(b_list[i],a_list[i]))
+ else:
+ TestFile_reformated.write('0\t1\t1\t')
+ TestFile_reformated.write('%d:1\t%d:1\n' %(uid,mid))
+
+ TrainingFile.close()
+ ShareTag.close()
+ TestFile.close()
+ TrainingFile_reformated.close()
+ TestFile_reformated.close()
+ print("Generation Finished")
+
+ return(b)
+

0 comments on commit 8fbfab3

Please sign in to comment.