/
repeated_game.py
202 lines (171 loc) · 6.33 KB
/
repeated_game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
"""
Algorithms for repeated game.
"""
import numpy as np
from scipy.spatial import ConvexHull
from numba import jit
def AS(g, delta, tol=1e-12, max_iter=500, u=np.zeros(2)):
"""
Using AS algorithm to compute the set of payoff pairs of all pure-strategy
subgame-perfect equilibria with public randomization for any repeated
two-player games with perfect monitoring and discounting, following
Abreu and Sannikov (2014).
Parameters
----------
g : NormalFormGame
NormalFormGame instance with 2 players.
delta : scalar(float)
The discounting factor.
tol : scalar(float), optional(default=1e-12)
Tolerance for convergence checking.
max_iter : scalar(int), optional(default=500)
Maximum number of iterations.
u : ndarray(float, ndim=1)
The initial threat points.
Returns
-------
hull : scipy.spatial.ConvexHull
The convex hull of feasible payoff pairs.
"""
best_dev_gains = _best_dev_gains(g, delta)
C = np.empty((4, 2))
IC = np.empty(2)
action_profile_payoff = np.empty(2)
# array for checking if payoff is inside the polytope or not
# the last entry is set to be 1
extended_payoff = np.ones(3)
# array to store new points of C in each intersection
# at most 4 new points will be generated
new_pts = np.empty((4, 2))
# array to store the points of W
# the length of v is limited by |A1|*|A2|*4
W_new = np.empty((np.prod(g.nums_actions)*4, 2))
W_old = np.empty((np.prod(g.nums_actions)*4, 2))
# count the new points generated in each iteration
n_new_pt = 0
# initialization
payoff_pts = \
g.payoff_profile_array.reshape(np.prod(g.nums_actions), 2)
W_new[:np.prod(g.nums_actions)] = payoff_pts
n_new_pt = np.prod(g.nums_actions)
n_iter = 0
while True:
W_old[:n_new_pt] = W_new[:n_new_pt]
n_old_pt = n_new_pt
hull = ConvexHull(W_old[:n_old_pt])
W_new, n_new_pt = \
R(delta, g.nums_actions, g.payoff_arrays,
best_dev_gains, hull.points, hull.vertices,
hull.equations, u, IC, action_profile_payoff,
extended_payoff, new_pts, W_new)
n_iter += 1
if n_iter >= max_iter:
break
# check convergence
if n_new_pt == n_old_pt:
if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol:
break
# update threat points
update_u(u, W_new[:n_new_pt])
hull = ConvexHull(W_new[:n_new_pt])
return hull
@jit()
def _best_dev_gains(g, delta):
"""
Calculate the normalized payoff gains from deviating from the current
action to the best response for each player.
"""
best_dev_gains0 = (1-delta)/delta * \
(np.max(g.payoff_arrays[0], 0) - g.payoff_arrays[0])
best_dev_gains1 = (1-delta)/delta * \
(np.max(g.payoff_arrays[1], 0) - g.payoff_arrays[1])
return best_dev_gains0, best_dev_gains1
@jit(nopython=True)
def R(delta, nums_actions, payoff_arrays, best_dev_gains, points,
vertices, equations, u, IC, action_profile_payoff,
extended_payoff, new_pts, W_new, tol=1e-10):
"""
Updating the payoff convex hull by iterating all action pairs.
Using the R operator proposed by Abreu and Sannikov 2014.
"""
n_new_pt = 0
for a0 in range(nums_actions[0]):
for a1 in range(nums_actions[1]):
action_profile_payoff[0] = payoff_arrays[0][a0, a1]
action_profile_payoff[1] = payoff_arrays[1][a1, a0]
IC[0] = u[0] + best_dev_gains[0][a0, a1]
IC[1] = u[1] + best_dev_gains[1][a1, a0]
# check if payoff is larger than IC
if (action_profile_payoff >= IC).all():
# check if payoff is inside the convex hull
extended_payoff[:2] = action_profile_payoff
if (np.dot(equations, extended_payoff) <= tol).all():
W_new[n_new_pt] = action_profile_payoff
n_new_pt += 1
continue
new_pts, n = find_C(new_pts, points, vertices, IC, tol)
for i in range(n):
W_new[n_new_pt] = \
delta * new_pts[i] + (1-delta) * action_profile_payoff
n_new_pt += 1
return W_new, n_new_pt
@jit(nopython=True)
def find_C(C, points, vertices, IC, tol):
"""
Find all the intersection points between the current polytope
and the two IC constraints. It is done by iterating simplex
counterclockwise.
"""
# record the number of intersections for each IC.
n_IC = [0, 0]
weights = np.empty(2)
# vertices is ordered counterclockwise
for i in range(len(vertices)-1):
intersect(C, n_IC, weights, IC,
points[vertices[i]], points[vertices[i+1]], tol)
intersect(C, n_IC, weights, IC,
points[vertices[-1]], points[vertices[0]], tol)
# check the case that IC is a interior point of the polytope
n = n_IC[0] + n_IC[1]
if (n_IC[0] == 1 & n_IC[1] == 1):
C[2, 0] = IC[0]
C[2, 1] = IC[1]
n += 1
return C, n
@jit(nopython=True)
def intersect(C, n_IC, weights, IC, pt0, pt1, tol):
"""
Find the intersection points of a simplex and IC constraints.
"""
for i in range(2):
if (abs(pt0[i] - pt1[i]) < tol):
None
else:
weights[i] = (pt0[i] - IC[i]) / (pt0[i] - pt1[i])
# intersection of IC[j]
if (0 < weights[i] <= 1):
x = (1 - weights[i]) * pt0[1-i] + weights[i] * pt1[1-i]
# x has to be strictly higher than IC[1-j]
# if it is equal, then it means IC is one of the vertex
# it will be added to C in below
if x - IC[1-i] > tol:
C[n_IC[0]+n_IC[1], i] = IC[i]
C[n_IC[0]+n_IC[1], 1-i] = x
n_IC[i] += 1
elif x - IC[1-i] > -tol:
C[n_IC[0]+n_IC[1], i] = IC[i]
C[n_IC[0]+n_IC[1], 1-i] = x
n_IC[i] += 1
# to avoid duplication when IC is a vertex
break
return C, n_IC
@jit(nopython=True)
def update_u(u, v):
"""
Update the threat points.
"""
for i in range(2):
v_min = v[:, i].min()
if u[i] < v_min:
u[i] = v_min
return u