diff --git a/DiffList/DiffListClasses.py b/DiffList/DiffListClasses.py index 7343e2e..9232093 100644 --- a/DiffList/DiffListClasses.py +++ b/DiffList/DiffListClasses.py @@ -15,7 +15,7 @@ def __init__(self, alg_name): self.name = 'Theta' def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise): - self.diff[alg_name] += reward_manager.getL2Diff(user.theta, alg.getTheta(user.id)) + self.diff[alg_name] += reward_manager.getL2Diff(user.theta[:reward_manager.context_dimension], alg.getTheta(user.id)) class WDiffList(BaseDiffList): def __init__(self, alg_name): diff --git a/RewardManager.py b/RewardManager.py index 64bc370..6c93e8e 100644 --- a/RewardManager.py +++ b/RewardManager.py @@ -95,12 +95,13 @@ def runAlgorithms(self, algorithms, diffLists): for alg_name, alg in algorithms.items(): recommendation = alg.createRecommendation(self.articlePool, u.id, self.k) - - pickedArticle = recommendation.articles[0] - reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise) + + # Assuming that the user will always be selecting one item for each iteration + #pickedArticle = recommendation.articles[0] + reward, pickedArticle = self.reward.getRecommendationReward(u, recommendation, noise) if (self.testing_method=="online"): - #alg.updateParameters(pickedArticle, reward, u.id) - alg.updateRecommendationParameters(recommendation, rewardList, u.id) + alg.updateParameters(pickedArticle, reward, u.id) + #alg.updateRecommendationParameters(recommendation, rewardList, u.id) if alg_name =='CLUB': n_components= alg.updateGraphClusters(u.id,'False') diff --git a/Rewards/LinearReward.py b/Rewards/LinearReward.py index 5bdedc7..3b177de 100644 --- a/Rewards/LinearReward.py +++ b/Rewards/LinearReward.py @@ -18,14 +18,5 @@ def getReward(self, user, pickedArticle): return np.dot(user.theta, pickedArticle.featureVector) #return eval(self.reward_function) - def getRecommendationReward(self, user, recommendation, noise): - total = 0 - rewardList = [] - for i in recommendation.articles: - articleReward = self.getReward(user, i) + noise - total += articleReward - rewardList.append(articleReward) - return (total/self.k), rewardList - def get_user_features(self, user): return user.theta diff --git a/Rewards/Reward.py b/Rewards/Reward.py index add748e..ab9dbd9 100644 --- a/Rewards/Reward.py +++ b/Rewards/Reward.py @@ -23,14 +23,13 @@ def getOptimalReward(self, user, articlePool, exclude = []): pool_position = np.argmax(reward_matrix) return reward_matrix[pool_position], articlePool[pool_position] - # ### Broadcasting Here ####### - # def getOptimalReward(self, user, articlePool, exclude = []): - # maxReward = float('-inf') - # maxx = None - # for x in articlePool: - # reward = self.getReward(user, x) - # if reward > maxReward and x not in exclude: - # #if reward > maxReward: - # maxReward = reward - # maxx = x - # return maxReward, x \ No newline at end of file + def getRecommendationReward(self, user, recommendation, noise): + max_reward = float('-inf') + max_article = None + for i in recommendation.articles: + articleReward = self.getReward(user, i) + noise + if articleReward > max_reward: + max_reward = articleReward + max_article = i + return max_reward, max_article + diff --git a/Rewards/SocialLinearReward.py b/Rewards/SocialLinearReward.py index 3a03d78..e1a3329 100644 --- a/Rewards/SocialLinearReward.py +++ b/Rewards/SocialLinearReward.py @@ -11,14 +11,14 @@ def getReward(self, user, pickedArticle): # How to conditionally change return np.dot(user.CoTheta, pickedArticle.featureVector) - def getRecommendationReward(self, user, recommendation, noise, cotheta = False): - total = 0 - rewardList = [] - for i in recommendation.articles: - articleReward = np.dot(user.CoTheta, i.featureVector) + noise - total += articleReward - rewardList.append(articleReward) - return (total/self.k), rewardList + # def getRecommendationReward(self, user, recommendation, noise, cotheta = False): + # total = 0 + # rewardList = [] + # for i in recommendation.articles: + # articleReward = np.dot(user.CoTheta, i.featureVector) + noise + # total += articleReward + # rewardList.append(articleReward) + # return (total/self.k), rewardList def get_user_features(self, user): return user.CoTheta diff --git a/Simulation.py b/Simulation.py index 3477fcd..8257abf 100644 --- a/Simulation.py +++ b/Simulation.py @@ -152,7 +152,21 @@ def generate_algorithms(alg_dict, W, system_params): rewardManagerDict['Gepsilon'] = 1 user['default_file'] = os.path.join(sim_files_folder, "users_"+str(n_users)+"context_"+str(context_dimension)+"latent_"+str(latent_dimension)+ "Ugroups" + str(UserGroups)+".json") - if user.has_key('collaborative') and user['collaborative']: + # Override User type + if gen.has_key('collaborative'): + if gen['collaborative']: + use_coUsers = True + reward_type = 'social_linear' + else: + use_coUsers = False + reward_type = 'linear' + else: + use_coUsers = user.has_key('collaborative') and user['collaborative'] + reward_type = reco['type'] if reco.has_key('type') else 'linear' + + + #if user.has_key('collaborative') and user['collaborative']: + if use_coUsers: UM = CoUserManager(context_dimension+latent_dimension, user, argv={'l2_limit':1, 'sparseLevel': n_users, 'matrixNoise': rewardManagerDict['matrixNoise']}) else: UM = UserManager(context_dimension+latent_dimension, user, argv={'l2_limit':1}) @@ -171,7 +185,7 @@ def generate_algorithms(alg_dict, W, system_params): if article.has_key('save') and article['save']: AM.saveArticles(articles, articlesFilename, force=False) rewardManagerDict['k'] = reco['k'] if reco.has_key('k') else 1 - reward_type = reco['type'] if reco.has_key('type') else 'linear' + #reward_type = reco['type'] if reco.has_key('type') else 'linear' #PCA pca_articles(articles, 'random') diff --git a/config.yaml b/config.yaml index beec188..aa8b46a 100644 --- a/config.yaml +++ b/config.yaml @@ -8,7 +8,7 @@ # System level settings to be used in all algorithms general: - testing_iterations: 100 # (int) Number of iterations to testing and refine model upon + testing_iterations: 300 # (int) Number of iterations to testing and refine model upon training_iterations: 0 # (int) Number of iterations to train a model context_dimension: 16 # (int) The number of features which the algorithm knows about each article hidden_dimension: 0 # (int) The number of features hidden from the algorithms @@ -16,6 +16,7 @@ general: batch_size: 1 # (int) testing_method: 'online' # (string) online: update the model after each iterations plot: True # (True/False) Should plots be created + collaborative: True # (True/False) connection between collaborative user and social_linear rewards # Different settings for user objects user: @@ -24,7 +25,7 @@ user: theta_func: featureUniform # (string) load: no # (yes/no) Load the theta for each user from file specified by file_name save: no # (yes/no Save the thetas for each user to be loaded for future simulations - collaborative: yes # (yes/no) Is the user able to access information about other users + collaborative: no # (yes/no) Is the user able to access information about other users, overridden by general collaborative setting file_name: test.json # (string) File from which to load user thetas # Different settings for article arms @@ -38,7 +39,7 @@ article: # Reward to determine the accuracy of choices made by the algorithm reward: k: 1 # (int) number of articles to recommend for each user each iteration - type: social_linear # (string) linear/ social_linear: determines which reward function is used + type: linear # (string) linear/ social_linear: determines which reward function is used, overridden by general collaborative setting # Parameters for the different algorithms alg: @@ -66,6 +67,7 @@ alg: CoLin: alpha: 0.3 lambda_: 0.1 + use_alpha_t: False # Use a theoretical alpha_t value parameters: Theta: False CoTheta: True @@ -80,7 +82,7 @@ alg: W: False V: False HLinUCB: - alpha: 0.1 + alpha: 0.3 alpha2: 0.1 lambda_: 0.1 parameters: @@ -88,42 +90,42 @@ alg: CoTheta: True W: False V: True - UCBPMF: - sigma : 0.5 - sigmaU : 1 - sigmaV : 1 - alpha : 0.1 - parameters: - Theta: False - CoTheta: False - W: False - V: False - FactorUCB: - alpha: 0.05 - alpha2: 0.025 - lambda_: 0.1 - parameters: - Theta: False - CoTheta: True - W: False - V: True - CLUB: - alpha: 0.1 - alpha2: 0.5 - lambda_: 0.1 - cluster_init : 'Erdos-Renyi' - parameters: - Theta: False - CoTheta: False - W: False - V: False - PTS: - particle_num : 10 - sigma : 0.5 - sigmaU : 1 - sigmaV : 1 - parameters: - Theta: False - CoTheta: False - W: False - V: False + # UCBPMF: + # sigma : 0.5 + # sigmaU : 1 + # sigmaV : 1 + # alpha : 0.1 + # parameters: + # Theta: False + # CoTheta: False + # W: False + # V: False + # FactorUCB: + # alpha: 0.05 + # alpha2: 0.025 + # lambda_: 0.1 + # parameters: + # Theta: False + # CoTheta: True + # W: False + # V: True + # CLUB: + # alpha: 0.1 + # alpha2: 0.5 + # lambda_: 0.1 + # cluster_init : 'Erdos-Renyi' + # parameters: + # Theta: False + # CoTheta: False + # W: False + # V: False + # PTS: + # particle_num : 10 + # sigma : 0.5 + # sigmaU : 1 + # sigmaV : 1 + # parameters: + # Theta: False + # CoTheta: False + # W: False + # V: False diff --git a/lib/BaseAlg.py b/lib/BaseAlg.py index 68d05a9..d944caa 100644 --- a/lib/BaseAlg.py +++ b/lib/BaseAlg.py @@ -21,10 +21,7 @@ def decide(self, pool_articles, userID, exclude = []): return pool_articles[len(exclude)] def createRecommendation(self, pool_articles, userID, k): - articles = [] - for x in range(k): - articlePicked = self.decide(pool_articles, userID, articles) - articles.append(articlePicked) + articles = self.decide(pool_articles, userID, k) recommendation = Recommendation(k, articles) return recommendation diff --git a/lib/CLUB.py b/lib/CLUB.py index 2b75ada..17dbe73 100644 --- a/lib/CLUB.py +++ b/lib/CLUB.py @@ -67,22 +67,25 @@ def __init__(self, arg_dict): N_components, components = connected_components(g) - def decide(self,pool_articles,userID, exclude = []): + def decide(self,pool_articles,userID, k = 1): self.users[userID].updateParametersofClusters(self.clusters,userID,self.Graph, self.users) - maxPTA = float('-inf') - articlePicked = None + articles = [] + for i in range(k): + maxPTA = float('-inf') + articlePicked = None - for x in pool_articles: - x_pta = self.users[userID].getProb(self.alpha, x.contextFeatureVector[:self.dimension],self.time) - # pick article with highest Prob - if maxPTA < x_pta: - articlePicked = x.id - featureVectorPicked = x.contextFeatureVector[:self.dimension] - picked = x - maxPTA = x_pta + for x in pool_articles: + x_pta = self.users[userID].getProb(self.alpha, x.contextFeatureVector[:self.dimension],self.time) + # pick article with highest Prob + if maxPTA < x_pta and x not in articles: + articlePicked = x.id + featureVectorPicked = x.contextFeatureVector[:self.dimension] + picked = x + maxPTA = x_pta + articles.append(picked) self.time +=1 - return picked + return articles def updateParameters(self, articlePicked, click,userID): self.users[userID].updateParameters(articlePicked.contextFeatureVector[:self.dimension], click, self.alpha_2) def updateGraphClusters(self,userID, binaryRatio): diff --git a/lib/CoLin.py b/lib/CoLin.py index d427514..531638e 100644 --- a/lib/CoLin.py +++ b/lib/CoLin.py @@ -89,13 +89,13 @@ def decide_old(self, pool_articles, userID, exclude = []): for x in pool_articles: x_pta = self.USERS.getProb(self.alpha, x, userID) # pick article with highest Prob - if maxPTA < x_pta and x not in exclude: + if maxPTA < x_pta: articlePicked = x maxPTA = x_pta - return articlePicked + return [articlePicked] - def decide(self, pool_articles, userID, exclude = []): + def decide(self, pool_articles, userID, k = 1): # MEAN art_features = np.empty([len(pool_articles), len(pool_articles[0].contextFeatureVector)]) for i in range(len(pool_articles)): @@ -110,11 +110,20 @@ def decide(self, pool_articles, userID, exclude = []): TempFeatureM.T[userID] = pool_articles[i].contextFeatureVector art_temp_features[i, :] = vectorize(TempFeatureM) var_matrix = np.sqrt(np.dot(np.dot(art_temp_features, self.USERS.CCA), art_temp_features.T)) - self.USERS.calculateAlphaT() - pta_matrix = mean_matrix + self.USERS.alpha_t*np.diag(var_matrix) + #self.USERS.calculateAlphaT() + if self.use_alpha_t: - pool_position = np.argmax(pta_matrix) - return pool_articles[pool_position] + self.USERS.calculateAlphaT() + pta_matrix = mean_matrix + self.USERS.alpha_t*np.diag(var_matrix) + else: + pta_matrix = mean_matrix + self.alpha*np.diag(var_matrix) + + pool_positions = np.argsort(pta_matrix)[(k*-1):] + articles = [] + for i in range(k): + articles.append(pool_articles[pool_positions[i]]) + return articles + #return pool_articles[pool_position] def updateParameters(self, articlePicked, click, userID, update='Inv'): self.USERS.updateParameters(articlePicked, click, userID, update) diff --git a/lib/GOBLin.py b/lib/GOBLin.py index 4860ba1..a4dd886 100644 --- a/lib/GOBLin.py +++ b/lib/GOBLin.py @@ -20,6 +20,7 @@ def __init__(self, featureDimension, lambda_, userNum, W): self.AInv = np.linalg.inv(self.A) self.theta = np.dot(self.AInv , self.b) + print np.kron(W, np.identity(n=featureDimension)) self.STBigWInv = sqrtm( np.linalg.inv(np.kron(W, np.identity(n=featureDimension))) ) self.STBigW = sqrtm(np.kron(W, np.identity(n=featureDimension))) def updateParameters(self, articlePicked, click, userID, update): @@ -55,12 +56,12 @@ class GOBLinAlgorithm(CoLinUCBAlgorithm): def __init__(self, arg_dict): CoLinUCBAlgorithm.__init__(self, arg_dict) self.USERS = GOBLinSharedStruct(self.dimension, self.lambda_, self.n_users, self.W) - self.estimates['CanEstimateCoUserPreference'] = False + #self.estimates['CanEstimateCoUserPreference'] = False def getLearntParameters(self, userID): thetaMatrix = matrixize(self.USERS.theta, self.dimension) return thetaMatrix.T[userID] - def decide(self, pool_articles, userID, exclude = []): + def decide(self, pool_articles, userID, k = 1): # MEAN art_features = np.empty([len(pool_articles), len(pool_articles[0].contextFeatureVector)*self.n_users]) for i in range(len(pool_articles)): @@ -72,8 +73,14 @@ def decide(self, pool_articles, userID, exclude = []): var_matrix = np.sqrt(np.dot(np.dot(CoFeaV, self.USERS.AInv), CoFeaV.T).clip(0)) pta_matrix = mean_matrix + self.alpha*np.diag(var_matrix) - pool_position = np.argmax(pta_matrix) - return pool_articles[pool_position] + + pool_positions = np.argsort(pta_matrix)[(k*-1):] + articles = [] + for i in range(k): + articles.append(pool_articles[pool_positions[i]]) + return articles + # pool_position = np.argmax(pta_matrix) + # return pool_articles[pool_position] #inherite from CoLinUCB_SelectUserAlgorithm # class GOBLin_SelectUserAlgorithm(CoLinUCB_SelectUserAlgorithm): diff --git a/lib/LinUCB.py b/lib/LinUCB.py index 8cb7e75..59a88bd 100644 --- a/lib/LinUCB.py +++ b/lib/LinUCB.py @@ -16,6 +16,7 @@ def __init__(self, featureDimension, lambda_, init="zero"): self.time = 0 def updateParameters(self, articlePicked_FeatureVector, click): + change = np.outer(articlePicked_FeatureVector,articlePicked_FeatureVector) self.A += np.outer(articlePicked_FeatureVector,articlePicked_FeatureVector) self.b += articlePicked_FeatureVector*click self.AInv = np.linalg.inv(self.A) @@ -91,7 +92,7 @@ def decide_old(self, pool_articles, userID, exclude = []): return articlePicked - def decide(self, pool_articles, userID, exclude = []): + def decide(self, pool_articles, userID, k = 1): # MEAN art_features = np.empty([len(pool_articles), len(pool_articles[0].contextFeatureVector[:self.dimension])]) for i in range(len(pool_articles)): @@ -103,8 +104,12 @@ def decide(self, pool_articles, userID, exclude = []): var_matrix = np.sqrt(np.dot(np.dot(art_features, self.users[userID].AInv), art_features.T).clip(0)) pta_matrix = mean_matrix + self.alpha*np.diag(var_matrix) - pool_position = np.argmax(pta_matrix) - return pool_articles[pool_position] + + pool_positions = np.argsort(pta_matrix)[(k*-1):] + articles = [] + for i in range(k): + articles.append(pool_articles[pool_positions[i]]) + return articles def getProb(self, pool_articles, userID): means = [] @@ -117,7 +122,6 @@ def getProb(self, pool_articles, userID): def updateParameters(self, articlePicked, click, userID): self.users[userID].updateParameters(articlePicked.contextFeatureVector[:self.dimension], click) - ##### SHOULD THIS BE CALLED GET COTHETA ##### def getCoTheta(self, userID): diff --git a/lib/PTS.py b/lib/PTS.py index 877e399..2d62d6a 100644 --- a/lib/PTS.py +++ b/lib/PTS.py @@ -86,25 +86,25 @@ def __init__(self, arg_dict): # n is number of users self.particles.append(PTSParticleStruct(self.dimension, self.n, self.itemNum, self.sigma, self.sigmaU, self.sigmaV, 1.0/self.particle_num)) self.time = 0 - def decide(self, pool_articles, userID, exclude = []): + def decide(self, pool_articles, userID, k = 1): #Sample a Particle d = np.random.choice(self.particle_num, p = [p.weight for p in self.particles]) p = self.particles[d] #For PTS-B - - - maxPTA = float('-inf') - articlePicked = None - - for x in pool_articles: - x_pta = p.users[userID].U.dot(p.articles[x.id].V) - # pick article with highest Prob - # print x_pta - if maxPTA < x_pta: - articlePicked = x - maxPTA = x_pta - return articlePicked + articles = [] + for i in range(k): + maxPTA = float('-inf') + articlePicked = None + + for x in pool_articles: + x_pta = p.users[userID].U.dot(p.articles[x.id].V) + # pick article with highest Prob + if maxPTA < x_pta and x not in articles: + articlePicked = x + maxPTA = x_pta + articles.append(articlePicked) + return articles def updateParameters(self, articlePicked, click, userID): self.time += 1 diff --git a/lib/UCBPMF.py b/lib/UCBPMF.py index e3cdb59..6339c29 100644 --- a/lib/UCBPMF.py +++ b/lib/UCBPMF.py @@ -86,19 +86,20 @@ def __init__(self, arg_dict): # n is number of users self.articles.append(UCBPMFArticleStruct(i, self.dimension, self.sigma, self.sigmaV,)) self.time = 0 - def decide(self, pool_articles, userID, exclude = []): - - maxPTA = float('-inf') - articlePicked = None - - for x in pool_articles: - x_pta = self.users[userID].getProb(self.alpha, self.articles[x.id]) - # pick article with highest Prob - # print x_pta - if maxPTA < x_pta: - articlePicked = x - maxPTA = x_pta - return articlePicked + def decide(self, pool_articles, userID, k = 1): + articles = [] + for i in range(k): + maxPTA = float('-inf') + articlePicked = None + + for x in pool_articles: + x_pta = self.users[userID].getProb(self.alpha, self.articles[x.id]) + # pick article with highest Prob + if maxPTA < x_pta and x not in articles: + articlePicked = x + maxPTA = x_pta + articles.append(articlePicked) + return articles def updateParameters(self, articlePicked, click, userID): self.time += 1 diff --git a/lib/factorUCB.py b/lib/factorUCB.py index dd405a9..1942410 100644 --- a/lib/factorUCB.py +++ b/lib/factorUCB.py @@ -138,21 +138,23 @@ def __init__(self, arg_dict, init='random', window_size = 1, max_window_size = 1 self.window = [] self.time = 0 - def decide(self, pool_articles, userID, exclude = []): - maxPTA = float('-inf') - articlePicked = None - - for x in pool_articles: - self.articles[x.id].V[:self.context_dimension] = x.contextFeatureVector[:self.context_dimension] - x_pta = self.USERS.getProb(self.alpha, self.alpha2, self.articles[x.id], userID) - - # pick article with highest Prob - # print x_pta - if maxPTA < x_pta: - articlePicked = x - maxPTA = x_pta - - return articlePicked + def decide(self, pool_articles, userID, k = 1): + articles = [] + for i in range(k): + maxPTA = float('-inf') + articlePicked = None + + for x in pool_articles: + self.articles[x.id].V[:self.context_dimension] = x.contextFeatureVector[:self.context_dimension] + x_pta = self.USERS.getProb(self.alpha, self.alpha2, self.articles[x.id], userID) + + # pick article with highest Prob + # print x_pta + if maxPTA < x_pta and x not in articles: + articlePicked = x + maxPTA = x_pta + articles.append(articlePicked) + return articles def getProb(self, pool_articles, userID): means = [] diff --git a/lib/hLinUCB.py b/lib/hLinUCB.py index 9c9fbe0..ec05284 100644 --- a/lib/hLinUCB.py +++ b/lib/hLinUCB.py @@ -122,26 +122,23 @@ def __init__(self, arg_dict, init="zero", window_size = 1, max_window_size = 50) self.max_window_size = max_window_size self.window = [] self.time = 0 - # self.estimates['CanEstimateUserPreference'] = False - # self.estimates['CanEstimateCoUserPreference'] = True - # self.estimates['CanEstimateW'] = False - # self.estimates['CanEstimateV'] = True - def decide(self, pool_articles, userID, exclude = []): - maxPTA = float('-inf') - articlePicked = None - - for x in pool_articles: - self.articles[x.id].V[:self.context_dimension] = x.contextFeatureVector[:self.context_dimension] - x_pta = self.users[userID].getProb(self.alpha, self.alpha2, self.articles[x.id]) - - # pick article with highest Prob - # print x_pta - if maxPTA < x_pta: - articlePicked = x - maxPTA = x_pta - - return articlePicked + def decide(self, pool_articles, userID, k = 1): + articles = [] + for i in range(k): + maxPTA = float('-inf') + articlePicked = None + for x in pool_articles: + self.articles[x.id].V[:self.context_dimension] = x.contextFeatureVector[:self.context_dimension] + x_pta = self.users[userID].getProb(self.alpha, self.alpha2, self.articles[x.id]) + + # pick article with highest Prob + # print x_pta + if maxPTA < x_pta and x not in articles: + articlePicked = x + maxPTA = x_pta + articles.append(articlePicked) + return articles def getProb(self, pool_articles, userID): means = [] diff --git a/util_functions.py b/util_functions.py index b25e73d..a1b9956 100644 --- a/util_functions.py +++ b/util_functions.py @@ -28,6 +28,7 @@ def createCoLinDict(specific, general, W, system_params): 'dimension': system_params['context_dim'], 'alpha': 0.3, 'lambda_': 0.1, + 'use_alpha_t': False, 'n_users': system_params['n_users'], 'parameters': { 'Theta': False, @@ -147,7 +148,7 @@ def update_dict(a, b): for j in a[i]: if j in b['parameters']: c[i][j] = a[i][j] - elif i in b: + else: c[i] = a[i] return c