In [3]:
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline

In [11]:
class player(object):
    def __init__(self, num_arm):
        self.num_arm = num_arm
        self.count = np.zeros(num_arm)
        self.prefer = np.zeros(num_arm)
        self.inf = 1000000
        self.rankings = np.ones(num_arm) * self.inf

    def update(self, reward, arm_index, round):
        self.count[arm_index] += 1
        self.prefer[arm_index] += self.count[arm_index] * (reward - self.prefer[arm_index])
        self.rankings[arm_index] = self.prefer[arm_index] + np.sqrt(3 * np.log(round) / (2 * self.count[arm_index]))
    
    def rankings(self):
        return np.argsort(-self.rankings)

In [5]:
class arm(object):
    def __init__(self, num_player, mean, var):
        self.num_player = num_player
        self.mean = mean
        self.var = var
        self.ranking = np.zeros(num_player)

    def set_prefer(self, ranking_list):
        self.ranking = np.array(ranking_list)

    def rankings(self):
        return self.ranking
    
    def gene_reward(self, player_num):
        return np.random.normal(self.mean[player_num], self.var)

In [8]:
class platform(object):
    def __init__(self, num_players, num_arms):
        self.num_players = num_players
        self.num_arms = num_arms
        self.round = 0 
    
    def update_round(self):
        self.round += 1

    def GS(self, players, arms):
        players_to_arms = np.zeros([self.num_players, self.num_arms])
        arms_to_players = np.zeros([self.num_arms, self.num_players])
        for i in self.num_players:
            players_to_arms[i] = players[i].rankings()
        for i in self.num_arms:
            arms_to_players = arms[i].rankings()
        
        # trajectory records the proposing, match_or_not record single or not, 
        # match_buffer records all people who propose same arm
        trajectory = np.zeros(self.num_players)
        matching_or_not = np.zeros(self.num_players)
        matching_buffer = []
        for i in range(self.num_arms):
            matching_buffer.append([])

        while True:
            if np.sum(matching_or_not) == num_player:
                break

            #firstly let all players propose
            for p in range(self.num_players):
                if matching_or_not[p] == 0:
                    p_choice = players_to_arms[p][trajectory[p]]
                    matching_buffer[p_choice].append(p)
            
            #secondly each arm needs to choose the best
            for a in range(arms):
                if len(matching_buffer[a]) != 0:
                    #get the best choice
                    for i in range(self.num_players):
                        flag = False
                        for j in matching_buffer[a]:
                            flag = arms_to_players[a][i] == j
                            if flag == True:
                                break
                        if flag == True:
                            a_choice = arms_to_players[a][i]
                            break
                    #update player
                    for p_ in match_buffer[a]:
                        if p_ == a_choice:
                            matching_or_not[p_] = 1
                        else:
                            matching_or_not[p_] = 0
                            trajectory[p_] += 1
                    #update arm
                    matching_buffer[a] = a_choice

        return matching_buffer


In [9]:
NUM_PLAYERS = 3
NUM_ARMS = 3
MEAN_ARMS =[[]]

In [12]:
players = []
for i in range(NUM_PLAYERS):
    players.append(player(NUM_PLAYERS))
arms = []
arms.append(arm())