Skip to content

Commit

Permalink
Add more algorithms to the redesign, add matrix optimizations, and cl…
Browse files Browse the repository at this point in the history
…ean up code
  • Loading branch information
bjw4ph committed Feb 26, 2018
1 parent 00bfbf9 commit 10a3aff
Show file tree
Hide file tree
Showing 21 changed files with 525 additions and 332 deletions.
3 changes: 2 additions & 1 deletion .gitignore
@@ -1,4 +1,5 @@
.project
.pydevproject
*.pyc
./SimulationResults/*
./SimulationResults/*
/SimulationResults/
13 changes: 7 additions & 6 deletions DiffList/DiffListClasses.py
Expand Up @@ -5,29 +5,30 @@ def __init__(self, alg_name):
BaseDiffList.__init__(self, alg_name)
self.name = 'CoTheta'

def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
self.diff[alg_name] += reward_manager.getL2Diff(user.CoTheta[:reward_manager.context_dimension], alg.getCoTheta(user.id)[:reward_manager.context_dimension])
def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
diff = reward_manager.getL2Diff(user.CoTheta[:reward_manager.context_dimension], alg.getCoTheta(user.id)[:reward_manager.context_dimension])
self.diff[alg_name] += diff

class ThetaDiffList(BaseDiffList):
def __init__(self, alg_name):
BaseDiffList.__init__(self, alg_name)
self.name = 'Theta'

def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
self.diff[alg_name] += reward_manager.getL2Diff(user.theta, alg.getTheta(user.id))

class WDiffList(BaseDiffList):
def __init__(self, alg_name):
BaseDiffList.__init__(self, alg_name)
self.name = 'W'

def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.W.T[user.id], alg.getW(u.id))
def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.W.T[user.id], alg.getW(user.id))

class VDiffList(BaseDiffList):
def __init__(self, alg_name):
BaseDiffList.__init__(self, alg_name)
self.name = 'V'

def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
self.diff[alg_name] += reward_manager.getL2Diff(reward_manager.articles[pickedArticle.id].featureVector, alg.getV(pickedArticle.id))
8 changes: 6 additions & 2 deletions DiffList/DiffManager.py
Expand Up @@ -20,7 +20,11 @@ def add_algorithm(self, alg_name, pref_dict):
else:
self.lists_dict['W'] = WDiffList(alg_name)

# if pref_dict['CanEstimateV']:
if pref_dict['CanEstimateV']:
if self.lists_dict.has_key('V'):
self.lists_dict['V'].add(alg_name)
else:
self.lists_dict['V'] = VDiffList(alg_name)

def initial_write(self, f):
for value in self.lists_dict.values():
Expand All @@ -33,7 +37,7 @@ def iteration_write(self, f):
def update_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
for value in self.lists_dict.values():
if value.includes(alg_name):
value.update_parameters(alg_name, reward_manager, user, alg, pickedArticle, reward, noise)
value.update_class_parameters(alg_name, reward_manager, user, alg, pickedArticle, reward, noise)

def append_to_lists(self, userSize):
for value in self.lists_dict.values():
Expand Down
60 changes: 11 additions & 49 deletions RewardManager.py
Expand Up @@ -9,14 +9,14 @@
import matplotlib.pyplot as plt

class RewardManager():
def __init__(self, arg_dict, reward_type = 'linear', reward_options = {}):
def __init__(self, arg_dict, reward_type = 'linear'):
for key in arg_dict:
setattr(self, key, arg_dict[key])
#self.W, self.W0 = self.constructAdjMatrix(self.sparseLevel)
if(reward_type == 'social_linear'):
self.reward = SocialLinearReward(self.k, self.W)
else:
self.reward = LinearReward(self.k, reward_options)
self.reward = LinearReward(self.k)

def batchRecord(self, iter_):
print "Iteration %d"%iter_, "Pool", len(self.articlePool)," Elapsed time", datetime.datetime.now() - self.startTime
Expand All @@ -38,18 +38,10 @@ def runAlgorithms(self, algorithms, diffLists):
tim_ = []
BatchCumlateRegret = {}
AlgRegret = {}
ThetaDiffList = {}
CoThetaDiffList = {}
WDiffList = {}
VDiffList = {}
CoThetaVDiffList = {}
RDiffList ={}
RVDiffList = {}

ThetaDiff = {}
CoThetaDiff = {}
WDiff = {}
VDiff = {}
CoThetaVDiff = {}
RDiff ={}
RVDiff = {}
Expand Down Expand Up @@ -90,19 +82,6 @@ def runAlgorithms(self, algorithms, diffLists):

#Testing
for iter_ in range(self.testing_iterations):
# prepare to record theta estimation error
# for alg_name, alg in algorithms.items():
# if alg.CanEstimateUserPreference:
# ThetaDiff[alg_name] = 0
# if alg.CanEstimateCoUserPreference:
# CoThetaDiff[alg_name] = 0
# if alg.CanEstimateW:
# WDiff[alg_name] = 0
# if alg.CanEstimateV:
# VDiff[alg_name] = 0
# CoThetaVDiff[alg_name] = 0
# RVDiff[alg_name] = 0
# RDiff[alg_name] = 0

for u in self.users:
self.regulateArticlePool() # select random articles
Expand All @@ -115,22 +94,15 @@ def runAlgorithms(self, algorithms, diffLists):
OptimalReward += noise

for alg_name, alg in algorithms.items():
if alg_name == 'linUCB' or alg_name == 'CoLin':
recommendation = alg.createRecommendation(self.articlePool, u.id, self.k)
recommendation = alg.createRecommendation(self.articlePool, u.id, self.k)

pickedArticle = recommendation.articles[0]
reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise)
if (self.testing_method=="online"):
#alg.updateParameters(pickedArticle, reward, u.id)
alg.updateRecommendationParameters(recommendation, rewardList, u.id)

else:
pickedArticle = alg.decide(self.articlePool, u.id)
reward = self.reward.getReward(u, pickedArticle) + noise
if (self.testing_method=="online"): # for batch test, do not update while testing
alg.updateParameters(pickedArticle, reward, u.id)
if alg_name =='CLUB':
n_components= alg.updateGraphClusters(u.id,'False')
pickedArticle = recommendation.articles[0]
reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise)
if (self.testing_method=="online"):
#alg.updateParameters(pickedArticle, reward, u.id)
alg.updateRecommendationParameters(recommendation, rewardList, u.id)
if alg_name =='CLUB':
n_components= alg.updateGraphClusters(u.id,'False')

regret = OptimalReward - reward
AlgRegret[alg_name].append(regret)
Expand All @@ -142,17 +114,7 @@ def runAlgorithms(self, algorithms, diffLists):

# #update parameter estimation record
diffLists.update_parameters(alg_name, self, u, alg, pickedArticle, reward, noise)
# if alg.CanEstimateUserPreference:
# ThetaDiff[alg_name] += self.getL2Diff(u.theta, alg.getTheta(u.id))
# if alg.CanEstimateCoUserPreference:
# CoThetaDiff[alg_name] += self.getL2Diff(u.CoTheta[:self.context_dimension], alg.getCoTheta(u.id)[:self.context_dimension])
# if alg.CanEstimateW:
# WDiff[alg_name] += self.getL2Diff(self.W.T[u.id], alg.getW(u.id))
# if alg.CanEstimateV:
# VDiff[alg_name] += self.getL2Diff(self.articles[pickedArticle.id].featureVector, alg.getV(pickedArticle.id))
# CoThetaVDiff[alg_name] += self.getL2Diff(u.CoTheta[self.context_dimension:], alg.getCoTheta(u.id)[self.context_dimension:])
# RVDiff[alg_name] += abs(u.CoTheta[self.context_dimension:].dot(self.articles[pickedArticle.id].featureVector[self.context_dimension:]) - alg.getCoTheta(u.id)[self.context_dimension:].dot(alg.getV(pickedArticle.id)[self.context_dimension:]))
# RDiff[alg_name] += reward-noise - alg.getCoTheta(u.id).dot(alg.getV(pickedArticle.id))

if 'syncCoLinUCB' in algorithms:
algorithms['syncCoLinUCB'].LateUpdate()
diffLists.append_to_lists(userSize)
Expand Down
26 changes: 4 additions & 22 deletions Rewards/LinearReward.py
Expand Up @@ -2,7 +2,7 @@
import numpy as np

class LinearReward(Reward):
def __init__(self, k, reward_dict):
def __init__(self, k, reward_dict={}):
Reward.__init__(self, k)
for key in reward_dict:
setattr(self, key, reward_dict[key])
Expand All @@ -15,35 +15,17 @@ def getReward(self, user, pickedArticle):
# return np.dot(user.CoTheta, pickedArticle.featureVector)
###########
# Should get
return np.dot(self.getTheta(user), pickedArticle.featureVector)
return np.dot(user.theta, pickedArticle.featureVector)
#return eval(self.reward_function)

def getRecommendationReward(self, user, recommendation, noise):
print "get linear recommendation reward"
total = 0
rewardList = []
for i in recommendation.articles:
articleReward = self.getReward(user, i) + noise
total += articleReward
rewardList.append(articleReward)
print "Total: " + str(total)
return (total/self.k), rewardList

# def getOptimalRecommendationReward(self, user, articlePool, k):
# total = 0
# local_pool = articlePool
# for x in range(k):
# articleReward, articlePicked = self.getOptimalReward(user, local_pool)
# total += articleReward
# local_pool.remove(articlePicked)
# return total/k

# def getOptimalReward(self, user, articlePool):
# maxReward = float('-inf')
# maxx = None
# for x in articlePool:
# reward = self.getReward(user, x)
# if reward > maxReward:
# maxReward = reward
# maxx = x
# return maxReward, x
def get_user_features(self, user):
return user.theta
31 changes: 22 additions & 9 deletions Rewards/Reward.py
@@ -1,3 +1,5 @@
import numpy as np

class Reward():
def __init__(self, k):
self.k = k
Expand All @@ -12,12 +14,23 @@ def getOptimalRecommendationReward(self, user, articlePool, k):
#local_pool.remove(articlePicked)
return total/k

def getOptimalReward(self, user, articlePool, exclude = []):
maxReward = float('-inf')
maxx = None
for x in articlePool:
reward = self.getReward(user, x)
if reward > maxReward and x not in exclude:
maxReward = reward
maxx = x
return maxReward, x
def getOptimalReward(self, user, articlePool, exclude = []):
art_features = np.empty([len(articlePool), len(articlePool[0].featureVector)])
for i in range(len(articlePool)):
art_features[i, :] = articlePool[i].featureVector
user_features = self.get_user_features(user)
reward_matrix = np.dot(art_features, user_features)
pool_position = np.argmax(reward_matrix)
return reward_matrix[pool_position], articlePool[pool_position]

# ### Broadcasting Here #######
# def getOptimalReward(self, user, articlePool, exclude = []):
# maxReward = float('-inf')
# maxx = None
# for x in articlePool:
# reward = self.getReward(user, x)
# if reward > maxReward and x not in exclude:
# #if reward > maxReward:
# maxReward = reward
# maxx = x
# return maxReward, x
26 changes: 3 additions & 23 deletions Rewards/SocialLinearReward.py
Expand Up @@ -15,33 +15,13 @@ def getRecommendationReward(self, user, recommendation, noise, cotheta = False):
total = 0
rewardList = []
for i in recommendation.articles:
if cotheta is False:
articleReward = np.dot(user.CoTheta, i.featureVector) + noise
else:
articleReward = np.dot(cotheta, i.featureVector)
articleReward = np.dot(user.CoTheta, i.featureVector) + noise
total += articleReward
rewardList.append(articleReward)
return (total/self.k), rewardList

# def getOptimalRecommendationReward(self, user, articlePool, k):
# total = 0
# prev_selections = []
# for x in range(k):
# articleReward, articlePicked = self.getOptimalReward(user, articlePool, prev_selections)
# total += articleReward
# prev_selections.append(articlePicked)
# #local_pool.remove(articlePicked)
# return total/k

# def getOptimalReward(self, user, articlePool, exclude = []):
# maxReward = float('-inf')
# maxx = None
# for x in articlePool:
# reward = self.getReward(user, x)
# if reward > maxReward and x not in exclude:
# maxReward = reward
# maxx = x
# return maxReward, x
def get_user_features(self, user):
return user.CoTheta

def constructLaplacianMatrix(self, W, Gepsilon):
G = W.copy()
Expand Down

2 comments on commit 10a3aff

@MareoRaft
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great! Is this working or still in progress?

@huazhengwang
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MareoRaft We are almost finished with redesign and will merge the update to master branch soon:)

Please sign in to comment.