In [13]:
import numpy as np
np.set_printoptions(formatter={'float': lambda x: "{0:0.4f}".format(x)})
import jax.numpy as jnp
import jax
from scipy.special import loggamma

Since we're working with symmetric games, we'll be looking for symmetric equilibria, which means we can represent profiles by storing a single mixed strategy that will be used by all players. This simplifies the following functions for generating profiles, and will also require us to re-write some of our other functions.

In [14]:
def uniform_mixture(num_actions):
    return np.ones(num_actions) / num_actions

def random_mixture(num_actions):
    return np.random.dirichlet([1]*num_actions)

print(random_mixture(4))

[0.2860 0.0410 0.2303 0.4427]


The following functions compute binomial or multinomial coefficients, using the loggamma function to avoid intermediate overflows.

In [15]:
# computes the multinomial: sum(counts) choose c_1, c_2, ...
def multinomial(*counts):
    return int(round(np.exp(loggamma(sum(counts) + 1) - sum(loggamma(np.array(counts) + 1)))))

# computes n choose k
def binomial(n,k):
    return multinomial(k,n-k)

The `SymGame` class represents a symmetric game by storing a configurations array, a payoffs array, and a repeats array. Since we're using multiple arrays, it makes sense to package them into an object that also provides other methods. In particular, we ask the game to compute its own `deviation_payoffs`, `deviation_gains`, `total_gain`, and `regret`, and to check a candidate equilibrium. Some of these are left for you to implement.

In [30]:
from itertools import combinations_with_replacement as CwR

class SymGame:
    def __init__(self, num_players, num_actions, payoff_func):
        self.num_players = num_players
        self.num_actions = num_actions
        num_configs = binomial(num_players + num_actions - 2, num_players - 1)
        self.configs = np.zeros([num_actions, num_configs], dtype=int)
        self.payoffs = np.zeros([num_actions, num_configs], dtype=float)
        self.repeats = np.zeros(num_configs, dtype=int)
        
        for c,cfg in enumerate(CwR(range(num_actions), num_players - 1)):
            for a in cfg:
                self.configs[a,c] += 1
            self.payoffs[:,c] = payoff_func(self.configs[:,c])
            self.repeats[c] = multinomial(*self.configs[:,c])
            
    def deviation_payoffs(self, sym_prof):
        exponent = np.reshape(sym_prof, [sym_prof.shape[0], 1]) ** self.configs
        product = np.prod(exponent, axis = 0)
        probs = self.repeats*product
        weighted_pays = probs*self.payoffs
        dev_pays = np.sum(weighted_pays, axis = 1)
        return dev_pays
    
    def deviation_gains(self, sym_prof):
        deviation = self.deviation_payoffs(sym_prof)
        expected_utility = jnp.dot(deviation, sym_prof)
        gain = jnp.maximum(0, deviation - expected_utility)
        return gain

    def total_gain(self, sym_prof):
        return jnp.sum(self.deviation_gains(sym_prof))
    
    def regret(self, sym_prof):
        return jnp.max(self.deviation_gains(sym_prof))

    def is_epsilon_equilibrium(self, sym_prof, epsilon=0.001):
        return self.regret(sym_prof) < epsilon
            
    def __repr__(self):
        return "Symmetric Game: P="+str(self.num_players)+", A="+str(self.num_actions)

This shows an example of how we can implement a payoff function and use it to create a game. This is the game from Appendix A in the paper and the examples in the video.

In [17]:
def payoffs_from_appendix(opp_config):
    num_opponents = sum(opp_config)
    payoffs = np.zeros(opp_config.shape)
    for i,c in enumerate(opp_config):
        if c == 0:
            payoffs[i] = i+1
        elif c < num_opponents:
            payoffs[i] = -(i+1)
    return payoffs

game_from_appendix = SymGame(3, 3, payoffs_from_appendix)
print(game_from_appendix)
print(game_from_appendix.configs)
print(game_from_appendix.payoffs)
print(game_from_appendix.repeats)

Symmetric Game: P=3, A=3
[[2 1 1 0 0 0]
 [0 1 0 2 1 0]
 [0 0 1 0 1 2]]
[[0.0000 -1.0000 -1.0000 1.0000 1.0000 1.0000]
 [2.0000 -2.0000 2.0000 0.0000 -2.0000 2.0000]
 [3.0000 3.0000 -3.0000 3.0000 -3.0000 0.0000]]
[1 2 2 1 2 1]


Use this as a starting point for testing your `deviation_payoffs` and `deviation_gains` methods.

In [37]:
sym_game = game_from_appendix
# sym_prof = random_mixture(sym_game.num_actions)
# sym_prof = uniform_mixture(sym_game.num_actions)
sym_prof = np.array([.1, .5, .4])
print(sym_game)
print("profile:", sym_prof)
print("deviation payoffs:", sym_game.deviation_payoffs(sym_prof))
print("deviation gains:", sym_game.deviation_gains(sym_prof))
print("total gain:", sym_game.total_gain(sym_prof))
print("regret:", sym_game.regret(sym_prof))
print("equilibrium?", sym_game.is_epsilon_equilibrium(sym_prof))

Symmetric Game: P=3, A=3
profile: [0.1000 0.5000 0.4000]
deviation payoffs: [0.6300 -0.5000 -0.3600]
deviation gains: [0.9610 0.0000 0.0000]
total gain: 0.96099997
regret: 0.96099997
equilibrium? False


These helper functions are for normalizing or projecting a vector onto a probability simplex. Both functions take a vector and project it onto the probability simplex of the same dimension. `simplex_normalize` assumes all entries in the array are non-negative.

In [19]:
def simplex_normalize(array):
    return array / np.sum(array)

_SIMPLEX_BIG = 1 / np.finfo(float).resolution
def simplex_project(array):
    """Return the projection onto the simplex"""
    array = np.asarray(array, float)
    #check(not np.isnan(array).any(), "can't project nan onto simplex: {}", array)
    # This fails for really large values, so we normalize the array so the
    # largest element has absolute value at most _SIMPLEX_BIG
    array = np.clip(array, -_SIMPLEX_BIG, _SIMPLEX_BIG)
    size = array.shape[-1]
    sort = -np.sort(-array, -1)
    rho = (1 - sort.cumsum(-1)) / np.arange(1, size + 1)
    inds = size - 1 - np.argmax((rho + sort > 0)[..., ::-1], -1)
    rho.shape = (-1, size)
    lam = rho[np.arange(rho.shape[0]), inds.flat]
    lam.shape = array.shape[:-1] + (1,)
    return np.maximum(array + lam, 0)

The following functions that you've implemented before will all need to be updated slightly to work with symmetric profiles and our `SymGame` data structure.

In [20]:
def normalize(w):
    return w / jnp.sum(w)

def regret_matching(sym_game, iterations=200, initial_mixture=None, initial_weight=1):
    if initial_mixture is None:
        initial_mixture = uniform_mixture(sym_game.num_actions)
    gains = initial_mixture * initial_weight
    profile = initial_mixture
    for i in range(iterations):
        deviation = sym_game.deviation_gains(profile)
        gains = gains + deviation 
        profile = normalize(gains)  
    return profile

regret_matching(sym_game, iterations = 2000, initial_mixture = sym_prof )


Array([0.3333, 0.3333, 0.3333], dtype=float32)

In [21]:
def replicator_dynamics(sym_game, iterations=200, initial_mixture=None, min_payoff=None):
    if initial_mixture is None:
        initial_mixture = uniform_mixture(sym_game.num_actions)
    if min_payoff is None:
        min_payoff = sym_game.payoffs.min()
    
    curr_profile = initial_mixture
    for i in range(iterations):
        new_profile = np.zeros_like(initial_mixture)
        dev_pays = sym_game.deviation_payoffs(curr_profile)
        dev_pays -= min_payoff
        new_profile = dev_pays * curr_profile
        curr_profile = normalize(new_profile)        
    return curr_profile

replicator_dynamics(sym_game, iterations = 200, initial_mixture = sym_prof )

Array([0.3333, 0.3333, 0.3333], dtype=float32)

In [22]:
def gradient_descent(sym_game, iterations=200, initial_mixture=None, step_size=0.001):
    if initial_mixture is None:
        initial_mixture = uniform_mixture(sym_game.num_actions)
    
    gain_gradient = jax.grad(lambda prof: sym_game.total_gain(prof))
    curr_profile = initial_mixture
    for i in range(iterations):
        grad = gain_gradient(curr_profile)
        curr_profile = curr_profile - (step_size*grad)
        curr_profile = simplex_project(curr_profile)
    return curr_profile

gradient_descent(sym_game, iterations = 1000, initial_mixture = sym_prof)

array([0.3322, 0.3325, 0.3353])

In [1]:
def filter_regrets(sym_game, candidate_equilibria, epsilon=1e-2):
    list_equilibria = []
    for p in range(len(candidate_equilibria)):
        if(sym_game.is_epsilon_equilibrium(candidate_equilibria[p], epsilon)):
            list_equilibria.append(candidate_equilibria[p])
    return list_equilibria


def filter_unique(candidate_equilibria, min_dist=1e-2):
    size = len(candidate_equilibria)
    if size == 0:
        return candidate_equilibria
    sorted_list = []
    unique_equilibria = []
    unique_equilibria = candidate_equilibria[0]
    for i in range(size, 1, 1):
        for u in range(len(unique_equilibria)):
            if(np.allclose(unique_equilibria[u], candidate_equilibria[i])):
                unique_equilibria.append(candidate_equilibria[i], atol = min_dist)
    return unique_equilibria

def Nash_local_search(sym_game, method=gradient_descent, restarts=10, **search_kwds):
    candidate = []
    for i in range(restarts):
        prof = random_mixture(sym_game.num_actions)
        candidate.append(method(sym_game, initial_mixture = prof, **search_kwds))
    candidate = filter_regrets(sym_game, candidate)
    candidate = filter_unique(candidate)
    return candidate

Nash_local_search(sym_game, method = gradient_descent, iterations = 1000, step_size = .0001) 

NameError: name 'gradient_descent' is not defined

Test your implementations! Here's a start, but you should add more.

In [11]:
P = 6
A = 4
sym_game = SymGame(P, A, lambda prof: np.random.uniform(0,1,prof.shape))
rm_eq = Nash_local_search(sym_game, regret_matching, 10, iterations=200, initial_weight=1)
rd_eq = Nash_local_search(sym_game, replicator_dynamics, 10, iterations=200, min_payoffs=None)
gd_eq = Nash_local_search(sym_game, gradient_descent, 10, iterations=200, step_size=0.01)
print("regret matching found", len(rm_eq), "equilibria:", rm_eq)
print("replicator dynamics found", len(rm_eq), "equilibria:", rd_eq)
print("gradient descent found", len(rm_eq), "equilibria:", gd_eq)

NameError: name 'Nash_local_search' is not defined

In [52]:
sym_game = game_from_appendix
rm_eq = Nash_local_search(sym_game, regret_matching, 10, iterations=200, initial_weight=1)
rd_eq = Nash_local_search(sym_game, replicator_dynamics, 10, iterations=200, min_payoffs=None)
gd_eq = Nash_local_search(sym_game, gradient_descent, 10, iterations=200, step_size=0.01)
print("regret matching found", len(rm_eq), "equilibria:", rm_eq)
print("replicator dynamics found", len(rm_eq), "equilibria:", rd_eq)
print("gradient descent found", len(rm_eq), "equilibria:", gd_eq)

TypeError: can't multiply sequence by non-int of type 'SymGame'