In [4]:
import pulp
import numpy as np

In [18]:
games = np.load("games.npz")
for game_name in sorted(games.keys()):
    G = games[game_name]
    P = G.shape[-1]
    A = G.shape[:-1]
    print(game_name + ":", P, "players,", A, "actions")

bach_or_stravinsky: 2 players, (2, 2) actions
chicken: 2 players, (2, 2) actions
five_by_five: 2 players, (5, 5) actions
four_players: 4 players, (2, 2, 2, 2) actions
hawk_dove: 2 players, (2, 2) actions
matching_pennies: 2 players, (2, 2) actions
penalty_kick: 2 players, (2, 2) actions
pk_3_actions: 2 players, (3, 3) actions
prisoners_dilemma: 2 players, (2, 2) actions
robot_escape: 2 players, (2, 2) actions
rock_paper_scissors: 2 players, (3, 3) actions
rps_both_hate_ties: 2 players, (3, 3) actions
rps_p1_likes_rock: 2 players, (3, 3) actions
rps_p2_dislikes_ties: 2 players, (3, 3) actions
two_by_three_by_four: 3 players, (2, 3, 4) actions


In [19]:
pk = games["penalty_kick"]
pk_p1_pays = pk[:,:,0]
pk_p2_pays = pk[:,:,1]
print(pk_p1_pays)
print(pk_p2_pays)

[[0.3 0.9]
 [0.6 0.2]]
[[0.7 0.1]
 [0.4 0.8]]


The penalty kick game is zero-sum because we can apply a positive affine transformation to player 2's payoffs that makes them equal to -(player 1's payoffs).

In [7]:
pk_p2_transformed = 1*pk_p2_pays - 1
print(np.allclose(-pk_p1_pays, pk_p2_transformed))

True


The check_zero_sum function should check whether there is a positive affine transformation (m,b) such that `m*u_2 + b = -u_1`. If such a transformation exists, return (m,b); otherwise return None.

In [8]:
def check_zero_sum(game):
    assert game.ndim == 3, "check_zero_sum() only works on 2-player games."

    #TODO: implement this function

    return None

In [9]:
pk3 = games["pk_3_actions"]
print(pk3[:,:,0])
print(pk3[:,:,1])

[[0.3 0.9 0.8]
 [0.6 0.2 0.6]
 [0.4 0.4 0.1]]
[[0.7 0.1 0.2]
 [0.4 0.8 0.4]
 [0.6 0.6 0.9]]


The following linear programs identify Nash equilibrium strategies for both players in the 3-action penalty kick game.

In [10]:
p1_lp = pulp.LpProblem("Penalty_Kick", pulp.LpMinimize) # create LP object
p_kl = pulp.LpVariable("Pr_KL", 0, 1) # lower bound = 0, upper bound = 1
p_kr = pulp.LpVariable("Pr_KR", 0, 1) # lower bound = 0, upper bound = 1
p_ks = pulp.LpVariable("Pr_KS", 0, 1) # lower bound = 0, upper bound = 1
v2 = pulp.LpVariable("v_2") # no upper/lower bounds on utility

p1_lp += v2
p1_lp += p_kl + p_kr + p_ks == 1
p1_lp += v2 >= pk3[0,0,1]*p_kl + pk3[1,0,1]*p_kr + pk3[2,0,1]*p_ks
p1_lp += v2 >= pk3[0,1,1]*p_kl + pk3[1,1,1]*p_kr + pk3[2,1,1]*p_ks
p1_lp += v2 >= pk3[0,2,1]*p_kl + pk3[1,2,1]*p_kr + pk3[2,2,1]*p_ks
print(p1_lp)


Penalty_Kick:
MINIMIZE
1*v_2 + 0
SUBJECT TO
_C1: Pr_KL + Pr_KR + Pr_KS = 1

_C2: - 0.7 Pr_KL - 0.4 Pr_KR - 0.6 Pr_KS + v_2 >= 0

_C3: - 0.1 Pr_KL - 0.8 Pr_KR - 0.6 Pr_KS + v_2 >= 0

_C4: - 0.2 Pr_KL - 0.4 Pr_KR - 0.9 Pr_KS + v_2 >= 0

VARIABLES
Pr_KL <= 1 Continuous
Pr_KR <= 1 Continuous
Pr_KS <= 1 Continuous
v_2 free Continuous



In [11]:

p2_lp = pulp.LpProblem("Penalty_Kick", pulp.LpMinimize) # create LP object
p2_vars = [pulp.LpVariable("Pr_" + a, 0, 1) for a in ["JL","JR","DJ"]]
v1 = pulp.LpVariable("v_1")

p2_lp += v1
p2_lp += pulp.lpSum(p2_vars) == 1
for a in range(3):
    p2_lp += v1 >= pulp.lpSum(prob*util for prob,util in zip(p2_vars, pk3[a,:,0]))
print(p2_lp)

Penalty_Kick:
MINIMIZE
1*v_1 + 0
SUBJECT TO
_C1: Pr_DJ + Pr_JL + Pr_JR = 1

_C2: - 0.8 Pr_DJ - 0.3 Pr_JL - 0.9 Pr_JR + v_1 >= 0

_C3: - 0.6 Pr_DJ - 0.6 Pr_JL - 0.2 Pr_JR + v_1 >= 0

_C4: - 0.1 Pr_DJ - 0.4 Pr_JL - 0.4 Pr_JR + v_1 >= 0

VARIABLES
Pr_DJ <= 1 Continuous
Pr_JL <= 1 Continuous
Pr_JR <= 1 Continuous
v_1 free Continuous



In [40]:
p1_lp.solve(solver=pulp.PULP_CBC_CMD(msg=0))
p1_strat = np.array([p_kl.varValue, p_kr.varValue, p_ks.varValue])

p2_lp.solve(solver=pulp.PULP_CBC_CMD(msg=0))
p2_strat = np.array([v.varValue for v in p2_vars])

prof = [p1_strat, p2_strat]
print(prof)

[array([0.4, 0.6, 0. ]), array([0.7, 0.3, 0. ])]


Generalize the approach demonstrated above to identify and return a Nash equilibrium in any 2-player zero-sum game.

In [41]:
def two_player_zero_sum_Nash(game):
    assert game.ndim == 3, "two_player_zero_sum_Nash() only works on 2-player games."
    
    actions_p1 = game.shape[0]
    actions_p2 = game.shape[1]
    
    
    p1_lp = pulp.LpProblem("Game", pulp.LpMinimize) # create LP object
    p1_vars = [pulp.LpVariable("A_" + str(a), 0, 1) for a in range(actions_p1)]
    v2 = pulp.LpVariable("v_2")

    p1_lp += v2
    p1_lp += pulp.lpSum(p1_vars) == 1
    for a in range(actions_p1):
        p1_lp += v2 >= pulp.lpSum(prob*util for prob,util in zip(p1_vars, pk3[:,a,0]))
        
        
    p2_lp = pulp.LpProblem("Game", pulp.LpMinimize) # create LP object
    p2_vars = [pulp.LpVariable("A_" + str(a), 0, 1) for a in range(actions_p2)]
    v1 = pulp.LpVariable("v_1")

    p2_lp += v1
    p2_lp += pulp.lpSum(p2_vars) == 1
    for a in range(actions_p2):
        p2_lp += v1 >= pulp.lpSum(prob*util for prob,util in zip(p2_vars, pk3[a,:,1]))
                       
               
    p1_lp.solve(solver=pulp.PULP_CBC_CMD(msg=0))
    p1_strat = np.array([v.varValue for v in p1_vars])
               
    p2_lp.solve(solver=pulp.PULP_CBC_CMD(msg=0))
    p2_strat = np.array([v.varValue for v in p2_vars])

    prof = [p1_strat, p2_strat]
               
    return prof