In [1]:
import numpy as np
np.set_printoptions(formatter={'float': lambda x: "{0:0.4f}".format(x)})
import jax.numpy as jnp
from jax import grad
from itertools import combinations_with_replacement, product, chain
from collections import Counter
from scipy.special import loggamma

In [2]:
# computes the multinomial: sum(counts) choose c_1, c_2, ...
def multinomial(*counts):
    return int(round(np.exp(loggamma(sum(counts) + 1) - sum(loggamma(np.array(counts) + 1)))))

# computes n choose k
def binomial(n,k):
    return multinomial(k,n-k)

The following action-graph game representation stores two numpy arrays for each action: a table of all opponent-configurations over the neighborhood `self.config_tables[action]`, and a table of the action's payoff in each of these configurations `self.payoff_tables[action]`. Configurations are represented by a vector of counts for the number of players choosing each action in the neighborhood (the out-of-neighborhood count is implicit). The ordering of the configuration vector corresponds to the ordering of the neighboring actions in `self.action_graph[action]`. The ordering of the configuration and payoff tables match.

Consider the 15-player lemonade-stand game below. Note that this lemonade-game variant has fewer edges than the one from the vieo: each action's neighborhood includes both adjacent locations for players of the same type, but only the matching location for players of the other type. The diagonal edges from the video are not present. This means, for example, that action *L3* has neighborhood *L2, L3, L4, H3*.

The configuration table for action *L3* has shape (1320, 4), where the number of configurations corresponds to the `1320 = stars_and_bars(9, 4 - 1) * stars_and_bars(5, 2 - 1)` arrangements of 9 lemonade opponents and 5 hot-dog opponents over the neighborhood. Each configuration is a four-vector giving the number of opponents choosing each of *(L1, L2, L3, H2)*. At index 1009 of the configuration table, the configuration vector is `[0 7 2 5]`, and at index 1009 of the payoff table is the number `3.9994`. This means that when the 14 opponents jointly play `<0xL1, 7xL2, 2xL3, 2xH2>`, with 0 lemonade and 3 hot dog players choosing actions outside the neighborhood, a player selecting *L3* will get a payoff of `3.9994`.

Your first task is to implement a `deviation_payoffs()` method for this `ActionGraphGame` class. The method takes as input a role-symmetric mixed-strategy profile (where each row gives the mixed strategy for one role) and outputs a deviation payoff for a player of the specified role using the specified action. To assist with the deviation payoff calculation, you will implement a helper function to compute the probability of a configuration under a given role-symmetric mixed-strategy profile, which in turn has a helper function to compute the number of asymmetric repetitions of a role-symmetric configuration.

In [3]:
class ActionGraphGame:
    def __init__(self, json_data, payoff_function):
        self.roles = [role for role in json_data["roles"]]
        self.num_roles = len(self.roles)
        self.num_players = {role:num_players for role,num_players in json_data["players"].items()}
        self.action_sets = {role:actions for role,actions in json_data["actions"].items()}
        self.num_actions = {role:len(actions) for role,actions in json_data["actions"].items()}
        self.action_graph = {action:neighborhood for action,neighborhood in json_data["action_graph"].items()}
        self.total_actions = len(self.action_graph)
        self.action_roles = {action:[role for role in self.roles if action in self.action_sets[role]][0] for action in self.action_graph} # assumes roles don't share actions
        self.action_indices = {role:{action:index for index,action in enumerate(self.action_sets[role])} for role in self.roles}
        self.role_config_indices = {action:{role:self._get_role_config_indices(action,role) for role in self.roles} for action in self.action_graph}
        self.neighbor_masks = {action:self._get_neighbor_prof_mask(action) for action in self.action_graph}
        
        self.config_tables = {}
        self.payoff_tables = {}
        for action,neighbors in self.action_graph.items():
            role_configs = []
            for role in self.roles:
                role_actions = [a for a in self.action_sets[role] if a in neighbors] + [None]
                role_opponents = self.num_opponents(action, role)
                role_configs.append(combinations_with_replacement(role_actions, role_opponents))
            configs = product(*role_configs)
            role_symmetric_configs = []
            payoffs = []
            for config in configs:
                counts = Counter(chain(*config)) 
                vector_config = [counts[n] for n in neighbors]
                dict_config = {n:counts[n] for n in neighbors}
                role_symmetric_configs.append(vector_config)
                payoffs.append(payoff_function(action, dict_config))
            self.config_tables[action] = np.array(role_symmetric_configs)
            self.payoff_tables[action] = np.array(payoffs)

    def _get_role_neighborhood(self, action, role):
        """Determines which nodes in the neighborhood of action belong to role."""
        neighborhood = set(self.action_graph[action])
        role_actions = set(self.action_sets[role])
        return sorted(neighborhood & role_actions)
    
    def regret(self, sym_prof):
        return jnp.max(self.deviation_gains(sym_prof))
    
    def _get_role_config_indices(self, action, role):
        """Determines which indices the action's configurations belong to role."""
        neighborhood = self.action_graph[action]
        role_actions = self._get_role_neighborhood(action, role)
        return [neighborhood.index(act) for act in role_actions]

    def _get_neighbor_prof_mask(self, action):
        """Generates a 0,1 mask for the entries in a profile that belong to an action's neighborhood."""
        mask = np.zeros([self.num_roles, max(self.num_actions.values())], dtype=bool)
        for r,role in enumerate(self.roles):
            for n,node in enumerate(self.action_sets[role]):
                if node in self.action_graph[action]:
                    mask[r,n] = True
        return mask

    def num_opponents(self, action, role):
        """Determines the number of opponents playing role when the deviating player plays action."""
        player_role = self.action_roles[action]
        if role == player_role:
            return self.num_players[role] - 1
        return self.num_players[role]
    
    def uniform_profile(self):
        prof = np.zeros([self.num_roles, max(self.num_actions.values())])
        for r,role in enumerate(self.roles):
            prof[r] = np.ones(self.num_actions[role])/self.num_actions[role]
        return prof
    
    def random_profile(self):
        prof = np.zeros([self.num_roles, max(self.num_actions.values())])
        for r,role in enumerate(self.roles):
            prof[r] = np.random.dirichlet(np.ones(self.num_actions[role]))
        return prof

    def deviation_gains(self, role_sym_prof):
        gain = np.zeros([self.num_roles, max(self.num_actions.values())])
        for r, role in enumerate(self.roles):
            deviation = self.deviation_payoffs(role_sym_prof, role)
            expected_utility = jnp.dot(deviation, role_sym_prof[r])
            gain[r] = jnp.maximum(0, deviation - expected_utility)
        return gain

    def deviation_payoffs(self, role_sym_prof, role):
        arr = np.zeros(self.num_actions[role])
        for a,act in enumerate(self.action_sets[role]):
            arr[a] = self.deviation_payoff(role_sym_prof, role, act)
        return arr
        
    def deviation_payoff(self, role_sym_prof, role, action):
        sum = 0
        for config, payoff in zip(self.config_tables[action], self.payoff_tables[action]):
            sum += self.config_prob(role_sym_prof, config, action)*payoff
        return sum

    def config_prob(self, role_sym_prof, opp_config, action):
        prob = 1
        mask = self.neighbor_masks[action]
        for r,role in enumerate(self.roles):
            ind = self.role_config_indices[action][role]
            role_prof = role_sym_prof[r]
            role_mask = mask[r]
            prob_neighborhood = role_prof[role_mask]
            outside_prob = 1 - sum(prob_neighborhood)
            num_opp = self.num_opponents(action, role)
            role_config = opp_config[ind]
            outside_opp = self.num_opponents(action, role) - sum(role_config)
            exp = opp_config[self.role_config_indices[action][role]]
            prob *= np.prod(prob_neighborhood ** exp)
            prob *= (outside_prob ** outside_opp)
        prob *= self.repetitions(opp_config, action)
        return prob
    
    def repetitions(self, opp_config, action):
        reps = 1
        for r,role in enumerate(self.roles):
            ind = self.role_config_indices[action][role]
            role_config = opp_config[ind]
            reps *= multinomial(*role_config, self.num_opponents(action, role) - sum(role_config))
        return reps
      #for each role there is a multinomial configuration for each role and oplayers outside of the neighborhood for each role
      #for probabilities find probabiliteis in neighborhood to the count ^count
      #Roles are basically lemonade stands or hot dogs for example

The ``ActionGraphGame.__init__`` method takes as input a dictionary in JSON-serializable format with the following information:
* names for each of the game's roles
* the number of players for each role
* the set of actions for each role
* the action graph, represented by a mapping of actions to neighbors

It also takes a payoff function which can generate a payoff for any configuration. This function takes as input and action and a dictionary representation of the neighborhood's opponent-configuration, for example ``"L2", {"L1":0, "L2":8, "L3":1, "H2":2}`` would ask for the payoff to action *L2* when the neighborhood configuration is ``<0xL1, 8xL2, 1xL3, 2xH2>``, with 0 lemonade and 3 hot dog players choosing actions outside the neighborhood. 

Moderately-configurable examples for the lemonade-stand game appear below.

In [4]:
LEMONADE_PLAYERS = 10
HOT_DOG_PLAYERS = 5
LOCATIONS = 8

lemonade_settings = {
  "roles":["lemonade", "hot_dogs"], 
  "players":{"lemonade":LEMONADE_PLAYERS, "hot_dogs":HOT_DOG_PLAYERS},
  "actions":{"lemonade":["L"+str(i) for i in range(LOCATIONS)],
             "hot_dogs":["H"+str(i) for i in range(LOCATIONS)]},
  "action_graph":{**{"L"+str(i):["L"+str(j) for j in range(i-1,i+2) if j>=0 and j<=7]+["H"+str(i)] for i in range(LOCATIONS)},
                  **{"H"+str(i):["H"+str(j) for j in range(i-1,i+2) if j>=0 and j<=7]+["L"+str(i)] for i in range(LOCATIONS)}}
}

LEMONADE_MULTIPLIER = 2.0
HOT_DOG_BONUS = 5.0
NEIGHBOR_DISCOUNT = 0.9

def lemonade_payoffs(action, opp_config):
    role = action[0]
    location = int(action[1])
    payoff = (LOCATIONS+2)*2 - ((LOCATIONS-1)/2 - location)**2 # center of the beach is more popular
    if role == "L" and opp_config["H"+str(location)] > 0:
        payoff *= LEMONADE_MULTIPLIER # scale the lemonade stand payoff if there's a hot dog stand at the same location
    if role == "H":
        payoff += HOT_DOG_BONUS * opp_config["L"+str(location)] # hot dog stand gets a bonus for each co-located lemonade stand
    payoff /= opp_config[action] + 1 # competitors at the same location split the payoff
    if location > 0:
        payoff *= NEIGHBOR_DISCOUNT**opp_config[role + str(location-1)] # nearby competitors also reduce payoff 
    if location < LOCATIONS-1:
        payoff *= NEIGHBOR_DISCOUNT**opp_config[role + str(location+1)] # nearby competitors also reduce payoff 
    return payoff

lemonade_game = ActionGraphGame(lemonade_settings, lemonade_payoffs)

In [5]:
print("H7 neighborhood:", lemonade_game.action_graph["H7"])
print("H7 config-table shape:", lemonade_game.config_tables["H7"].shape, "\n")
print("L3 neighborhood:", lemonade_game.action_graph["L3"])
print("L3 config-table shape:", lemonade_game.config_tables["L3"].shape, "\n")
print(lemonade_game.config_tables["L3"][1000:1009,:], "\n")
print(lemonade_game.payoff_tables["L3"][1000:1009], "\n")

# New since the version from class:
print(lemonade_game.role_config_indices["L3"], "\n")
print(lemonade_game.neighbor_masks["L3"])

H7 neighborhood: ['H6', 'H7', 'L7']
H7 config-table shape: (165, 3) 

L3 neighborhood: ['L2', 'L3', 'L4', 'H3']
L3 config-table shape: (1320, 4) 

[[0 8 1 1]
 [0 8 1 0]
 [0 8 0 5]
 [0 8 0 4]
 [0 8 0 3]
 [0 8 0 2]
 [0 8 0 1]
 [0 8 0 0]
 [0 7 2 5]] 

[3.9500 1.9750 4.3889 4.3889 4.3889 4.3889 4.3889 2.1944 3.9994] 

{'lemonade': [0, 1, 2], 'hot_dogs': [3]} 

[[False False  True  True  True False False False]
 [False False False  True False False False False]]


In [6]:
# Example usage of the new attributes:
A = "L3"
R = "lemonade"
opp_config = lemonade_game.config_tables[A][345]
print(opp_config)
print(opp_config[lemonade_game.role_config_indices[A][R]])
prof = lemonade_game.random_profile()
print(prof)
mask = lemonade_game.neighbor_masks[A]
print(mask.astype(int))
print(prof[mask])
print(prof[0][mask[0]])

[3 5 1 2]
[3 5 1]
[[0.0428 0.0872 0.0059 0.0365 0.2073 0.0888 0.2048 0.3267]
 [0.1179 0.2833 0.1102 0.0551 0.1147 0.1887 0.0391 0.0909]]
[[0 0 1 1 1 0 0 0]
 [0 0 0 1 0 0 0 0]]
[0.0059 0.0365 0.2073 0.0551]
[0.0059 0.0365 0.2073]


In [7]:
#TODO: test your deviation payoffs functions incrementally as you develop them!
prof = lemonade_game.uniform_profile()
lemonade_game.deviation_gains(prof)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


array([[0.0000, 0.0000, 1.8963, 3.2750, 3.2750, 1.8963, 0.0000, 0.0000],
       [0.0000, 0.0000, 1.9631, 3.3628, 3.3628, 1.9631, 0.0000, 0.0000]])

On the default lemonade game, I get the following results for `deviation_payoffs` on the uniform profile:

`hot_dogs: [10.3429, 13.9965, 16.7958, 18.1954, 18.1954, 16.7958, 13.9965, 10.3429]`

`lemonade: [6.0297, 9.4787, 12.2362, 13.6149, 13.6149, 12.2362, 9.4787, 6.0297]`

In [8]:
prof = lemonade_game.uniform_profile()
print("hot_dogs:", lemonade_game.deviation_payoffs(prof, "hot_dogs"))
print("lemonade:", lemonade_game.deviation_payoffs(prof, "lemonade"))

hot_dogs: [10.3429 13.9965 16.7958 18.1954 18.1954 16.7958 13.9965 10.3429]
lemonade: [6.0297 9.4787 12.2362 13.6149 13.6149 12.2362 9.4787 6.0297]


Now you should adapt your `Nash_local_search` methods from previous classes, modify them to work on AGGs, and compute the role-symmetric mixed-strategy Nash equilibria of the lemonade game!

In [19]:
def normalize(w):
    return w / jnp.sum(w)

def regret_matching(sym_game, iterations=200, initial_mixture=None, initial_weight=1):
    if initial_mixture is None:
        initial_mixture = sym_game.uniform_profile()
    gains = initial_mixture * initial_weight
    profile = initial_mixture
    for i in range(iterations):
        deviation = sym_game.deviation_gains(profile)
        gains = gains + deviation 
        profile = normalize(gains)  
    return profile


In [None]:
def is_epsilon_equilibrium(sym_game, sym_prof, epsilon=0.001):
        return sym_game.regret(sym_prof) < epsilon
    
def filter_regrets(sym_game, candidate_equilibria, epsilon=1e-2):
    list_equilibria = []
    for p in range(len(candidate_equilibria)):
        if(is_epsilon_equilibrium(sym_game, candidate_equilibria[p], epsilon)):
            list_equilibria.append(candidate_equilibria[p])
    return list_equilibria


def filter_unique(candidate_equilibria, min_dist=1e-2):
    size = len(candidate_equilibria)
    if size == 0:
        return candidate_equilibria
    sorted_list = []
    unique_equilibria = []
    unique_equilibria = candidate_equilibria[0]
    for i in range(size, 1, 1):
        for u in range(len(unique_equilibria)):
            if(np.allclose(unique_equilibria[u], candidate_equilibria[i])):
                unique_equilibria.append(candidate_equilibria[i], atol = min_dist)
    return unique_equilibria

def Nash_local_search(sym_game, method=regret_matching, restarts=10, eps = 1e-3, **search_kwds):
    candidate = []
    for i in range(restarts):
        prof = sym_game.random_profile()
        candidate.append(method(sym_game, initial_mixture = prof, **search_kwds))
    candidate = filter_regrets(sym_game, candidate)
    candidate = filter_unique(candidate)
    return candidate

Nash_local_search(lemonade_game, regret_matching, restarts = 1, eps = 1e-3, iterations = 50)