In [16]:
import sympy as sym

import numpy as np

import repeated_play

In [17]:
import itertools

In [18]:
def trnsf_transition_m_memory_two(player, analytical=True):
    
    if analytical == True:
        M = sym.Matrix([[player[0], (1 - player[0]), 0, 0],
                        [0, 0, player[1], (1 - player[1])],
                        [player[2], (1 - player[2]), 0, 0],
                        [0, 0, player[3], (1 - player[3])]])
    else:
        M = np.array([[player[0], (1 - player[0]), 0, 0],
                        [0, 0, player[1], (1 - player[1])],
                        [player[2], (1 - player[2]), 0, 0],
                        [0, 0, player[3], (1 - player[3])]])
    
    
    return M

In [19]:
def trnsf_transition_m_memory_three(player, analytical=True):
    
    if analytical == True:
        M = sym.Matrix([[player[0], (1 - player[0]), 0, 0],
                        [0, 0, player[1], (1 - player[1])],
                        [player[2], (1 - player[2]), 0, 0],
                        [0, 0, player[3], (1 - player[3])]])
    else:
        M = np.array([[player[0], (1 - player[0]), 0, 0],
                        [0, 0, player[1], (1 - player[1])],
                        [player[2], (1 - player[2]), 0, 0],
                        [0, 0, player[3], (1 - player[3])]])
    
    
    return M

In [20]:
r, s, t, p = sym.symbols("R, S, T, P")

In [21]:
q1, q2, q3, q4 = sym.symbols(r"\tilde{p}_1, \tilde{p}_2, \tilde{p}_3, \tilde{p}_4")

In [22]:
p1, p2, p3, p4 = sym.symbols(r"p_1, p_2, p_3, p_4")

### Algebra

In [23]:
rhs = (2 * p + r * (p4 - 3)) / (p - 1) - p2

In [24]:
lhs = (p * (p2 - 1) - p2 - 1) / r - p4

In [25]:
lhs

-p_4 + (P*(p_2 - 1) - p_2 - 1)/R

In [26]:
(lhs).factor().collect(p2).collect(p4)

(-P - R*p_4 + p_2*(P - 1) - 1)/R

In [27]:
sym.solve(rhs < lhs, p2).rhs.collect(r).collect(p)

-(P**2 + R**2*(p_4 - 3) + R*(P*(p_4 + 2) - p_4) - 1)/(R*(P - 1))

### $n=2$

In [28]:
M = trnsf_transition_m_memory_two([q1, q2, q3, q4], analytical=True)

In [29]:
print(sym.latex(M))

\left[\begin{matrix}\tilde{p}_1 & 1 - \tilde{p}_1 & 0 & 0\\0 & 0 & \tilde{p}_2 & 1 - \tilde{p}_2\\\tilde{p}_3 & 1 - \tilde{p}_3 & 0 & 0\\0 & 0 & \tilde{p}_4 & 1 - \tilde{p}_4\end{matrix}\right]


Then the payoff of the self-reactive player in the general repeated prisoner's dilemma is 


$pi(q,p) = aR \cdot R + aS \cdot S + aT \cdot T + aP \cdot P.$

Here, the coefficients are, for example:

$aR = vCC  pCC  qCC + vCD  pCD  qCD + vDC  pDC  qDC + vDD  pDD  qDD. $

In [30]:
pure_self_reactive = list(itertools.product([0, 1], repeat=4))

In [31]:
player = [p1, p2, p3, p4]

In [32]:
coplayer = pure_self_reactive[0]

In [33]:
M = trnsf_transition_m_memory_two(coplayer, analytical=False)

In [34]:
ss = repeated_play.stationary_distribution(M)[0]

In [35]:
rho_q = ss[0] + ss[1]

In [37]:
rho_p = sum([ss[i] * player[i] for i in range(4)])

In [42]:
(rho_p * rho_q * r + rho_p * (1 - rho_q) * s + (1 - rho_p) * rho_q  * t 
 + (1 - rho_p) * (1 - rho_q)  * p).factor()

-1.0*(P*p_4 - P - S*p_4)

In [209]:
r, s, t, p = 3, 0, 5, 1

q_v1, q_v2, q_v3, q_v4 = pure_self_reactive[1]

np.random.seed(0)

p1, p2, p3, p4 = np.random.random(4)

player = [p1, p2, p3, p4]
    
coplayer = [q_v1, q_v2, q_v3, q_v4]

M = trnsf_transition_m_memory_two(coplayer, analytical=False)

ss = repeated_play.stationary_distribution(M)[0]

rho_q = ss[0] + ss[1]

rho_p = sum([ss[i] * player[i] for i in range(4)])

x1 = sum([ss[i] * player[i] * coplayer[i] for i in range(4)])
x2 = sum([ss[i] * player[i] * (1 - coplayer[i]) for i in range(4)])
x3 = sum([ss[i] * (1 - player[i]) * coplayer[i] for i in range(4)])
x4 = sum([ss[i] * (1 - player[i]) * (1 - coplayer[i]) for i in range(4)])

payoff = (x1 * r + x2 * s + x3  * t + x4  * p)


M2 = repeated_play.transition_matrix_repeated_game([p1, p2, p1, p2, p3, p4, p3, p4, 
                                                    p1, p2, p1, p2, p3, p4, p3, p4],
                                                   [q_v1, q_v1, q_v2, q_v2, q_v1, q_v1, q_v2, q_v2, 
                                                    q_v3, q_v3, q_v4, q_v4, q_v3, q_v3, q_v4, q_v4],
                                                   memory="two",
                                                   analytical=False)

ss2 = repeated_play.stationary_distribution(M2, analytical=False)

assert np.isclose(payoff, sum(ss2 @ np.array([r, s, t, p] * 4)))

In [405]:
r, s, t, p = 3, 0, 5, 1

q_v1, q_v2, q_v3, q_v4 = pure_self_reactive[1]

np.random.seed(0)

p1, p2, p3, p4 = np.random.random(4)

player = [p1, p2, p3, p4]
    
coplayer = [q_v1, q_v2, q_v3, q_v4]

M = trnsf_transition_m_memory_two(coplayer, analytical=False)

ss = repeated_play.stationary_distribution(M)[0]

rho_q = ss[0] + ss[1]

rho_p = sum([ss[i] * player[i] for i in range(4)])

x1 = sum([ss[i] * player[i] * coplayer[i] for i in range(4)])
x2 = sum([ss[i] * player[i] * (1 - coplayer[i]) for i in range(4)])
x3 = sum([ss[i] * (1 - player[i]) * coplayer[i] for i in range(4)])
x4 = sum([ss[i] * (1 - player[i]) * (1 - coplayer[i]) for i in range(4)])

payoff2 = (x1 * r + x3 * s + x2  * t + x4  * p)


M2 = repeated_play.transition_matrix_repeated_game([p1, p2, p1, p2, p3, p4, p3, p4, 
                                                    p1, p2, p1, p2, p3, p4, p3, p4],
                                                   [q_v1, q_v1, q_v2, q_v2, q_v1, q_v1, q_v2, q_v2, 
                                                    q_v3, q_v3, q_v4, q_v4, q_v3, q_v3, q_v4, q_v4],
                                                   memory="two",
                                                   analytical=False)

ss2 = repeated_play.stationary_distribution(M2, analytical=False)

assert np.isclose(payoff2, sum(ss2 @ np.array([r, t, s, p] * 4)))

### Analytical Expressions

In [44]:
p1, p2, p3, p4 = sym.symbols(r"p_1, p_2, p_3, p_4")

player = [1, p2, p3, p4]

r, s, t, p = sym.symbols("R, S, T, P")

In [45]:
payoffs = []

for i, coplayer in enumerate(pure_self_reactive):
    
    M = trnsf_transition_m_memory_two(coplayer, analytical=False)
    
    states = repeated_play.stationary_distribution(M)
    
    for ss in states:
        
        x1 = sum([ss[i] * player[i] * coplayer[i] for i in range(4)])
        x2 = sum([ss[i] * player[i] * (1 - coplayer[i]) for i in range(4)])
        x3 = sum([ss[i] * (1 - player[i]) * coplayer[i] for i in range(4)])
        x4 = sum([ss[i] * (1 - player[i]) * (1 - coplayer[i]) for i in range(4)])
        
        payoff = (x1 * r + x3 * s + x2  * t + x4  * p).factor()
        
        payoffs.append((i, payoff))
    


In [46]:
len(payoffs)

25

In [47]:
pure_self_reactive[3]

(0, 0, 1, 1)

In [48]:
payoffs

[(0, -1.0*(P*p_4 - P - T*p_4)),
 (1,
  -0.666666666666667*(0.5*P*p_2 + 0.5*P*p_3 - 1.0*P - 0.5*R*p_4 + 0.5*S*p_4 - 0.5*S - 0.5*T*p_2 - 0.5*T*p_3)),
 (2, -1.0*(P*p_4 - P - T*p_4)),
 (3,
  -0.5*(0.5*P*p_2 - 0.5*P - 0.5*R*p_3 - 0.5*R*p_4 + 0.5*S*p_3 + 0.5*S*p_4 - 1.0*S - 0.5*T*p_2 - 0.5*T)),
 (4, -0.5*(P*p_3 - P - R*p_2 + S*p_2 - S - T*p_3)),
 (4, -1.0*(P*p_4 - P - T*p_4)),
 (5, -0.5*(P*p_3 - P - R*p_2 + S*p_2 - S - T*p_3)),
 (6,
  0.666666666666667*(0.5*R*p_2 + 0.5*R*p_3 - 0.5*S*p_2 - 0.5*S*p_3 + 1.0*S + 0.5*T)),
 (6, -1.0*(P*p_4 - P - T*p_4)),
 (7,
  0.666666666666667*(0.5*R*p_2 + 0.5*R*p_3 - 0.5*S*p_2 - 0.5*S*p_3 + 1.0*S + 0.5*T)),
 (8, 1.0*R),
 (8, -1.0*(P*p_4 - P - T*p_4)),
 (9,
  -0.666666666666667*(0.5*P*p_2 + 0.5*P*p_3 - 1.0*P - 0.5*R*p_4 + 0.5*S*p_4 - 0.5*S - 0.5*T*p_2 - 0.5*T*p_3)),
 (9, 1.0*R),
 (10, 1.0*R),
 (10, -1.0*(P*p_4 - P - T*p_4)),
 (11, 1.0*R),
 (12, -0.5*(P*p_3 - P - R*p_2 + S*p_2 - S - T*p_3)),
 (12, 1.0*R),
 (12, -1.0*(P*p_4 - P - T*p_4)),
 (13, -0.5*(P*p_3 - P - R

In [None]:
for

In [41]:
[i for i, p in payoffs if p == payoffs[0][1]]

[0, 2, 4, 6, 8, 10, 12, 14]

In [42]:
payoffs[0][1].simplify().collect(p)

P*(1.0 - 1.0*p_4) + 1.0*T*p_4

In [43]:
[i for i, p in payoffs if p == payoffs[1][1]]

[1, 9]

In [37]:
wrt2 =  -(p * (- 2 + p2  + p3 ) - r * p4 + s * (p4 - 1) - t * (p2 + p3)) / 3

In [38]:
(wrt2 - payoffs[1][1]).factor()

0

In [39]:
wrt2

-P*(p_2 + p_3 - 2)/3 + R*p_4/3 - S*(p_4 - 1)/3 + T*(p_2 + p_3)/3

In [116]:
[i for i, p in payoffs if p == payoffs[3][1]]

[3]

In [155]:
wrt3 = ( p * (1 - p2) + r * (p3 + p4) - s * (p3 + p4 - 2) + t * (p2 + 1)) / 4

In [156]:
(wrt3 - payoffs[3][1]).factor()

0

In [157]:
wrt3

P*(1 - p_2)/4 + R*(p_3 + p_4)/4 - S*(p_3 + p_4 - 2)/4 + T*(p_2 + 1)/4

In [120]:
[i for i, p in payoffs if p == payoffs[4][1]]

[4, 5, 12, 13]

In [121]:
sym.solve(wrt3 - r, p3 + p4)[0]

(P*p_2 - P + 4*R - 2*S - T*p_2 - T)/(R - S)

In [46]:
wrt4 = (p * (- p3 + 1) + r * p2 - s * (p2 - 1) + t * p3) / 2

In [47]:
(wrt4 - payoffs[4][1]).expand()

0

In [48]:
wrt4

P*(1 - p_3)/2 + R*p_2/2 - S*(p_2 - 1)/2 + T*p_3/2

In [49]:
(wrt4 - payoffs[6][1]).expand()

0

In [51]:
[i for i, p in payoffs if p == payoffs[7][1]]

[6, 7]

In [52]:
wrt5 = (r * (p2 + p3) - s * (p2 + p3 - 2) + t) / 3

In [53]:
(wrt5 - payoffs[7][1]).factor()

0

In [54]:
wrt5

R*(p_2 + p_3)/3 - S*(p_2 + p_3 - 2)/3 + T/3

### Conditions

In [55]:
sym.solve(payoffs[0][1] - r, p4)[0]

(P - R)/(P - T)

In [56]:
sym.solve(payoffs[0][1] - r, p4)[0].subs({s:0, t:1})

(P - R)/(P - 1)

In [61]:
sym.solve(wrt2 - r, p2 + p3)[0].collect(r).collect(s)

(2*P + R*(p_4 - 3) + S*(1 - p_4))/(P - T)

In [62]:
sym.solve(wrt2 - r, p2 + p3)[0].collect(r).collect(s).subs({s:0, t:1})

(2*P + R*(p_4 - 3))/(P - 1)

In [63]:
sym.solve(wrt5 - r, p2 + p3)[0]

(3*R - 2*S - T)/(R - S)

In [64]:
sym.solve(wrt5 - r, p2 + p3)[0].subs({s:0, t:1})

(3*R - 1)/R

In [68]:
sym.solve(wrt3 - r, p3 + p4)[0].collect(p).collect(t)

(P*(p_2 - 1) + 4*R - 2*S + T*(-p_2 - 1))/(R - S)

In [69]:
print(sym.latex(sym.solve(wrt3 - r, p3 + p4)[0].collect(p).collect(t)))

\frac{P \left(p_{2} - 1\right) + 4 R - 2 S + T \left(- p_{2} - 1\right)}{R - S}


In [70]:
sym.solve(wrt3 - r, p3 + p4)[0].collect(p).collect(t).subs({s:0, t:1})

(P*(p_2 - 1) + 4*R - p_2 - 1)/R

In [71]:
print(sym.latex(sym.solve(wrt3 - r, p3 + p4)[0].collect(p).collect(t).subs({s:0, t:1})))

\frac{P \left(p_{2} - 1\right) + 4 R - p_{2} - 1}{R}


In [72]:
wrt4

P*(1 - p_3)/2 + R*p_2/2 - S*(p_2 - 1)/2 + T*p_3/2

In [73]:
sym.solve(wrt4 - r, p2)[0]

(P*p_3 - P + 2*R - S - T*p_3)/(R - S)

In [75]:
sym.solve(wrt4 - r, p2)[0].subs({t:1, s:0})

(P*p_3 - P + 2*R - p_3)/R

### Comparing conditions

In [122]:
sym.solve(wrt3 - r, p3)

[(P*p_2 - P - R*p_4 + 4*R + S*p_4 - 2*S - T*p_2 - T)/(R - S)]

In [149]:
lhs = sym.solve(wrt3 - r, p2)[0].subs({t:1, s:0})

In [150]:
lhs

(P + R*p_3 + R*p_4 - 4*R + 1)/(P - 1)

In [151]:
lhs.collect(r)

(P + R*(p_3 + p_4 - 4) + 1)/(P - 1)

In [152]:
rhs = sym.solve(wrt2 - r, p2)[0].subs({t:1, s:0})

In [154]:
rhs

(-P*p_3 + 2*P + R*p_4 - 3*R + p_3)/(P - 1)

In [153]:
(lhs - rhs).factor()

(p_3 - 1)*(P + R - 1)/(P - 1)

In [91]:
(lhs - rhs).factor()

(p_3 - 1)*(P + R - 1)/(P - 1)

In [96]:
sym.solve(wrt3 - r, p3 + p4)[0]

(P*p_2 - P + 4*R - 2*S - T*p_2 - T)/(R - S)

In [99]:
sym.solve(wrt2 - r, p2 + p3)[0]

(2*P + R*p_4 - 3*R - S*p_4 + S)/(P - T)

In [138]:
lhs = sym.solve(wrt4 - r, p2)[0].subs({t:1, s:0})

In [139]:
rhs = sym.solve(wrt3 - r, p2)[0].subs({t:1, s:0})

In [141]:
(lhs - rhs).factor()

(P**2*p_3 - P**2 + P*R - 2*P*p_3 + P - R**2*p_3 - R**2*p_4 + 4*R**2 - 3*R + p_3)/(R*(P - 1))