Add more algorithms to the redesign, add matrix optimizations, and cl…

…ean up code
HCDM · Feb 26, 2018 · 10a3aff · 10a3aff · MareoRaft · Mar 19, 2018
1 parent 00bfbf9
commit 10a3aff
Show file tree

Hide file tree

Showing 21 changed files with 525 additions and 332 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 .project
 .pydevproject
 *.pyc
-./SimulationResults/*
+./SimulationResults/*
+/SimulationResults/
diff --git a/DiffList/DiffListClasses.py b/DiffList/DiffListClasses.py
@@ -5,29 +5,30 @@ def __init__(self, alg_name):
 		BaseDiffList.__init__(self, alg_name)
 		self.name = 'CoTheta'
 
-	def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
-		self.diff[alg_name] += reward_manager.getL2Diff(user.CoTheta[:reward_manager.context_dimension], alg.getCoTheta(user.id)[:reward_manager.context_dimension])
+	def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
+		diff = reward_manager.getL2Diff(user.CoTheta[:reward_manager.context_dimension], alg.getCoTheta(user.id)[:reward_manager.context_dimension])
+		self.diff[alg_name] += diff
 
 class ThetaDiffList(BaseDiffList):
 	def __init__(self, alg_name):
 		BaseDiffList.__init__(self, alg_name)
 		self.name = 'Theta'
 
-	def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
+	def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
 		self.diff[alg_name] += reward_manager.getL2Diff(user.theta, alg.getTheta(user.id))
 
 class WDiffList(BaseDiffList):
 	def __init__(self, alg_name):
 		BaseDiffList.__init__(self, alg_name)
 		self.name = 'W'
 
-	def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
-		self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.W.T[user.id], alg.getW(u.id))
+	def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
+		self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.W.T[user.id], alg.getW(user.id))
 
 class VDiffList(BaseDiffList):
 	def __init__(self, alg_name):
 		BaseDiffList.__init__(self, alg_name)
 		self.name = 'V'
 
-	def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
+	def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
 		self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.articles[pickedArticle.id].featureVector, alg.getV(pickedArticle.id))
diff --git a/DiffList/DiffManager.py b/DiffList/DiffManager.py
@@ -20,7 +20,11 @@ def add_algorithm(self, alg_name, pref_dict):
 			else:
 				self.lists_dict['W'] = WDiffList(alg_name)
 
-		# if pref_dict['CanEstimateV']:
+		if pref_dict['CanEstimateV']:
+			if self.lists_dict.has_key('V'):
+				self.lists_dict['V'].add(alg_name)
+			else:
+				self.lists_dict['V'] = VDiffList(alg_name)
 
 	def initial_write(self, f):
 		for value in self.lists_dict.values():
@@ -33,7 +37,7 @@ def iteration_write(self, f):
 	def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
 		for value in self.lists_dict.values():
 			if value.includes(alg_name):
-				value.update_parameters(alg_name, reward_manager, user, alg, pickedArticle, reward, noise)
+				value.update_class_parameters(alg_name, reward_manager, user, alg, pickedArticle, reward, noise)
 
 	def append_to_lists(self, userSize):
 		for value in self.lists_dict.values():

diff --git a/RewardManager.py b/RewardManager.py
@@ -9,14 +9,14 @@
 import matplotlib.pyplot as plt
 
 class RewardManager():
-	def __init__(self, arg_dict, reward_type = 'linear', reward_options = {}):
+	def __init__(self, arg_dict, reward_type = 'linear'):
 		for key in arg_dict:
 			setattr(self, key, arg_dict[key])
 		#self.W, self.W0 = self.constructAdjMatrix(self.sparseLevel)
 		if(reward_type == 'social_linear'):
 			self.reward = SocialLinearReward(self.k, self.W)
 		else:
-			self.reward = LinearReward(self.k, reward_options)
+			self.reward = LinearReward(self.k)
 
 	def batchRecord(self, iter_):
 		print "Iteration %d"%iter_, "Pool", len(self.articlePool)," Elapsed time", datetime.datetime.now() - self.startTime
@@ -38,18 +38,10 @@ def runAlgorithms(self, algorithms, diffLists):
 		tim_ = []
 		BatchCumlateRegret = {}
 		AlgRegret = {}
-		ThetaDiffList = {}
-		CoThetaDiffList = {}
-		WDiffList = {}
-		VDiffList = {}
 		CoThetaVDiffList = {}
 		RDiffList ={}
 		RVDiffList = {}
 
-		ThetaDiff = {}
-		CoThetaDiff = {}
-		WDiff = {}
-		VDiff = {}
 		CoThetaVDiff = {}
 		RDiff ={}
 		RVDiff = {}
@@ -90,19 +82,6 @@ def runAlgorithms(self, algorithms, diffLists):
 
 		#Testing
 		for iter_ in range(self.testing_iterations):
-			# prepare to record theta estimation error
-			# for alg_name, alg in algorithms.items():
-			# 	if alg.CanEstimateUserPreference:
-			# 		ThetaDiff[alg_name] = 0
-			# 	if alg.CanEstimateCoUserPreference:
-			# 		CoThetaDiff[alg_name] = 0
-			# 	if alg.CanEstimateW:
-			# 		WDiff[alg_name] = 0
-			# 	if alg.CanEstimateV:
-			# 		VDiff[alg_name]	= 0	
-			# 		CoThetaVDiff[alg_name] = 0	
-			# 		RVDiff[alg_name]	= 0	
-			# 	RDiff[alg_name]	= 0	
 
 			for u in self.users:
 				self.regulateArticlePool() # select random articles
@@ -115,22 +94,15 @@ def runAlgorithms(self, algorithms, diffLists):
 				OptimalReward += noise
 
 				for alg_name, alg in algorithms.items():
-					if alg_name == 'linUCB' or alg_name == 'CoLin':
-						recommendation = alg.createRecommendation(self.articlePool, u.id, self.k)
+					recommendation = alg.createRecommendation(self.articlePool, u.id, self.k)
 
-						pickedArticle = recommendation.articles[0]
-						reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise)
-						if (self.testing_method=="online"):
-							#alg.updateParameters(pickedArticle, reward, u.id)
-							alg.updateRecommendationParameters(recommendation, rewardList, u.id)
-
-					else:
-						pickedArticle = alg.decide(self.articlePool, u.id)
-						reward = self.reward.getReward(u, pickedArticle) + noise
-						if (self.testing_method=="online"): # for batch test, do not update while testing
-							alg.updateParameters(pickedArticle, reward, u.id)
-							if alg_name =='CLUB':
-								n_components= alg.updateGraphClusters(u.id,'False')
+					pickedArticle = recommendation.articles[0]
+					reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise)
+					if (self.testing_method=="online"):
+						#alg.updateParameters(pickedArticle, reward, u.id)
+						alg.updateRecommendationParameters(recommendation, rewardList, u.id)
+						if alg_name =='CLUB':
+							n_components= alg.updateGraphClusters(u.id,'False')
 
 					regret = OptimalReward - reward
 					AlgRegret[alg_name].append(regret)
@@ -142,17 +114,7 @@ def runAlgorithms(self, algorithms, diffLists):
 
 					# #update parameter estimation record
 					diffLists.update_parameters(alg_name, self, u, alg, pickedArticle, reward, noise)
-					# if alg.CanEstimateUserPreference:
-					# 	ThetaDiff[alg_name] += self.getL2Diff(u.theta, alg.getTheta(u.id))
-					# if alg.CanEstimateCoUserPreference:
-					# 	CoThetaDiff[alg_name] += self.getL2Diff(u.CoTheta[:self.context_dimension], alg.getCoTheta(u.id)[:self.context_dimension])
-					# if alg.CanEstimateW:
-					# 	WDiff[alg_name] += self.getL2Diff(self.W.T[u.id], alg.getW(u.id))	
-					# if alg.CanEstimateV:
-					# 	VDiff[alg_name]	+= self.getL2Diff(self.articles[pickedArticle.id].featureVector, alg.getV(pickedArticle.id))
-					# 	CoThetaVDiff[alg_name]	+= self.getL2Diff(u.CoTheta[self.context_dimension:], alg.getCoTheta(u.id)[self.context_dimension:])
-					# 	RVDiff[alg_name] += abs(u.CoTheta[self.context_dimension:].dot(self.articles[pickedArticle.id].featureVector[self.context_dimension:]) - alg.getCoTheta(u.id)[self.context_dimension:].dot(alg.getV(pickedArticle.id)[self.context_dimension:]))
-					# 	RDiff[alg_name] += reward-noise -  alg.getCoTheta(u.id).dot(alg.getV(pickedArticle.id))
+
 			if 'syncCoLinUCB' in algorithms:
 				algorithms['syncCoLinUCB'].LateUpdate()	
 			diffLists.append_to_lists(userSize)

diff --git a/Rewards/LinearReward.py b/Rewards/LinearReward.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 class LinearReward(Reward):
-	def __init__(self, k, reward_dict):
+	def __init__(self, k, reward_dict={}):
 		Reward.__init__(self, k)
 		for key in reward_dict:
 			setattr(self, key, reward_dict[key])
@@ -15,35 +15,17 @@ def getReward(self, user, pickedArticle):
 		# return np.dot(user.CoTheta, pickedArticle.featureVector)
 		###########
 		# Should get
-		return np.dot(self.getTheta(user), pickedArticle.featureVector)
+		return np.dot(user.theta, pickedArticle.featureVector)
 		#return eval(self.reward_function)
 
 	def getRecommendationReward(self, user, recommendation, noise):
-		print "get linear recommendation reward"
 		total = 0
 		rewardList = []
 		for i in recommendation.articles:
 			articleReward = self.getReward(user, i) + noise
 			total += articleReward
 			rewardList.append(articleReward)
-		print "Total: " + str(total)
 		return (total/self.k), rewardList
 
-	# def getOptimalRecommendationReward(self, user, articlePool, k):
-	# 	total = 0
-	# 	local_pool = articlePool
-	# 	for x in range(k):
-	# 		articleReward, articlePicked = self.getOptimalReward(user, local_pool)
-	# 		total += articleReward
-	# 		local_pool.remove(articlePicked)
-	# 	return total/k
-
-	# def getOptimalReward(self, user, articlePool):		
-	# 	maxReward = float('-inf')
-	# 	maxx = None
-	# 	for x in articlePool:	 
-	# 		reward = self.getReward(user, x)
-	# 		if reward > maxReward:
-	# 			maxReward = reward
-	# 			maxx = x
-	# 	return maxReward, x
+	def get_user_features(self, user):
+		return user.theta
diff --git a/Rewards/Reward.py b/Rewards/Reward.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 class Reward():
 	def __init__(self, k):
 		self.k = k
@@ -12,12 +14,23 @@ def getOptimalRecommendationReward(self, user, articlePool, k):
 			#local_pool.remove(articlePicked)
 		return total/k
 
-	def getOptimalReward(self, user, articlePool, exclude = []):		
-		maxReward = float('-inf')
-		maxx = None
-		for x in articlePool:	 
-			reward = self.getReward(user, x)
-			if reward > maxReward and x not in exclude:
-				maxReward = reward
-				maxx = x
-		return maxReward, x
+	def getOptimalReward(self, user, articlePool, exclude = []):
+		art_features = np.empty([len(articlePool), len(articlePool[0].featureVector)])
+		for i in range(len(articlePool)):
+			art_features[i, :] = articlePool[i].featureVector
+		user_features = self.get_user_features(user)
+		reward_matrix = np.dot(art_features, user_features)
+		pool_position = np.argmax(reward_matrix)
+		return reward_matrix[pool_position], articlePool[pool_position]
+
+ #    ### Broadcasting Here #######
+	# def getOptimalReward(self, user, articlePool, exclude = []):
+	# 	maxReward = float('-inf')
+	# 	maxx = None
+	# 	for x in articlePool:
+	# 		reward = self.getReward(user, x)
+	# 		if reward > maxReward and x not in exclude:
+	# 		#if reward > maxReward:
+	# 			maxReward = reward
+	# 			maxx = x
+	# 	return maxReward, x
diff --git a/Rewards/SocialLinearReward.py b/Rewards/SocialLinearReward.py
@@ -15,33 +15,13 @@ def getRecommendationReward(self, user, recommendation, noise, cotheta = False):
 		total = 0
 		rewardList = []
 		for i in recommendation.articles:
-			if cotheta is False:
-				articleReward = np.dot(user.CoTheta, i.featureVector) + noise
-			else:
-				articleReward = np.dot(cotheta, i.featureVector)
+			articleReward = np.dot(user.CoTheta, i.featureVector) + noise
 			total += articleReward
 			rewardList.append(articleReward)
 		return (total/self.k), rewardList
 
-	# def getOptimalRecommendationReward(self, user, articlePool, k):
-	# 	total = 0
-	# 	prev_selections = []
-	# 	for x in range(k):
-	# 		articleReward, articlePicked = self.getOptimalReward(user, articlePool, prev_selections)
-	# 		total += articleReward
-	# 		prev_selections.append(articlePicked)
-	# 		#local_pool.remove(articlePicked)
-	# 	return total/k
-
-	# def getOptimalReward(self, user, articlePool, exclude = []):		
-	# 	maxReward = float('-inf')
-	# 	maxx = None
-	# 	for x in articlePool:	 
-	# 		reward = self.getReward(user, x)
-	# 		if reward > maxReward and x not in exclude:
-	# 			maxReward = reward
-	# 			maxx = x
-	# 	return maxReward, x
+	def get_user_features(self, user):
+		return user.CoTheta
 
 	def constructLaplacianMatrix(self, W, Gepsilon):
 		G = W.copy()