In [1]:
import itertools

import numpy as np

import tqdm

import math

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os

os.chdir('..')

In [4]:
from importlib.machinery import SourceFileLoader

eq = SourceFileLoader("eq", "src/equilibria.py").load_module()

from eq import *

In [5]:
from importlib.machinery import SourceFileLoader

main = SourceFileLoader("main", "src/main.py").load_module()

from main import *

In [6]:
import tqdm

In [7]:
import axelrod as axl

In [8]:
from axelrod.action import Action

C, D = Action.C, Action.D

In this notebook we evaluate the result **Appendix A: Shortest-Memory Player Sets the Rules of the Game** from [Press & Dyson](https://www.pnas.org/doi/abs/10.1073/pnas.1206569109).


We will initially check that we can get the **memory-one representation of a memory-two strategy**.

In [9]:
set_of_memory_one_s = list(itertools.product([0, 1], repeat=4))

In [10]:
set_of_memory_two_s = list(itertools.product([0, 1], repeat=16))

#### Approach: simulations

In [11]:
def simulate_play(p, q, turns=10 * 5, noise=0):
    
    states_to_actions = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    states_counts = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, history in enumerate(match.result[2:-1]):

        states_counts[history[::-1]] += 1
        action = match.result[i + 2 +1][-1]

        if action == C:
            states_to_actions[history[::-1]] += 1
        
        
    strategy = []
    states = [(C, C), (C, D), (D, C), (D, D)]

    for state in states:
        if states_counts[state] > 0:
            strategy.append(states_to_actions[state] / states_counts[state])
        else:
            strategy.append(0)
    
    return strategy, states_counts, states_to_actions

In [12]:
def simulate_play_mem_two(p, q, turns=10 * 5, noise=0):
    
    states_to_actions = {((C, C), (C, C)): 0, 
                         ((C, C), (C, D)): 0, 
                         ((C, C), (D, C)): 0, 
                         ((C, C), (D, D)):0,
                         ((C, D), (C, C)): 0,
                         ((C, D), (C, D)): 0,
                         ((C, D), (D, C)): 0,
                         ((C, D), (D, D)): 0,
                         ((D, C), (C, C)): 0,
                         ((D, C), (C, D)): 0,
                         ((D, C), (D, C)): 0,
                         ((D, C), (D, D)): 0,
                         ((D, D), (C, C)): 0,
                         ((D, D), (C, D)): 0,
                         ((D, D), (D, C)): 0,
                         ((D, D), (D, D)): 0}

    states_counts = {((C, C), (C, C)): 0, 
                     ((C, C), (C, D)): 0, 
                     ((C, C), (D, C)): 0, 
                     ((C, C), (D, D)): 0,
                     ((C, D), (C, C)): 0,
                     ((C, D), (C, D)): 0,
                     ((C, D), (D, C)): 0,
                     ((C, D), (D, D)): 0,
                     ((D, C), (C, C)): 0,
                     ((D, C), (C, D)): 0,
                     ((D, C), (D, C)): 0,
                     ((D, C), (D, D)): 0,
                     ((D, D), (C, C)): 0,
                     ((D, D), (C, D)): 0,
                     ((D, D), (D, C)): 0,
                     ((D, D), (D, D)): 0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, (previous, history) in enumerate(zip(match.result[1:-2], match.result[2:-1])):

        states_counts[(previous[::-1], history[::-1])] += 1
        action = match.result[i + 2 + 1][-1]

        if action == C:
            states_to_actions[(previous[::-1], history[::-1])] += 1
        
    
    return states_counts, states_to_actions

In [13]:
def invariant_distributions(M):
    """Returns all the invariant distributions in case of absorbing states."""
    
    stationaries = []
    
    eigenvalues, eigenvectors = np.linalg.eig(M.T)
    
    for index in np.where(eigenvalues == np.max(eigenvalues))[0]:
        
        eigenvectors_one = eigenvectors[:, index]

        stationary = eigenvectors_one / eigenvectors_one.sum()

        stationaries.append(stationary.real)
        
    return stationaries


def calculate_ss_for_mem_one(player, coplayer):
    """
    Invariant distribution for memory-one.
    """
    M = calculate_M(player, coplayer)
    ss = invariant_distributions(M)
    
    return ss

The initial check with just ALLC as the player.

In [14]:
b, c, player = 2, 1, [1, 1, 1, 1]

for i, coplayer in enumerate(set_of_memory_one_s):

    mem_one_representation, _, _ = simulate_play(player * 4, coplayer * 4, turns=1000)

    ss = calculate_ss_for_mem_one(player, coplayer)

    ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)
    
    check = False
    
    for ss1 in ss_estimated:
        for ss2 in ss:
            if (np.isclose(ss1, ss2, atol=10**-3).all()):
                check = True

    assert check, f"{ss} and {ss_estimated}. player: {player} coplayer:{coplayer}"

Secondly we check all of them.

In [15]:
b, c = 2, 1

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation, a, b = simulate_play(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1)
player: (1, 0, 0, 1) coplayer:(0, 1, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)



### Approach: numerical

In [16]:
def calculate_ss_for_mem_two(player, coplayer):
    """
    Invariant distribution for memory-two.
    """
    M = calculate_M_memory_two(player, coplayer)
    ss = invariant_distributions(M)
    
    return ss

**Sanity check:** A memory-one strategy written as a memory two strategy is still the same.

In [17]:
b, c = 2, 1

np.random.seed(1)

for _ in range(1000):
    
    p1, p2, p3, p4 = np.random.random(4)
    
    q1, q2, q3, q4 = np.random.random(4)
    
    ss_one = calculate_ss_for_mem_one([p1, p2, p3, p4], [q1, q2, q3, q4])
    
    ss_two = calculate_ss_for_mem_two([p1, p2, p3, p4] * 4,
                                      [q1, q2, q3, q4] * 4)
    
    assert np.isclose(
        ss_one @ np.array([b - c, -c, b, 0]), ss_two @ np.array([b - c, -c, b, 0] * 4) 
    )

Now we can loop over all the possible memory-one strategies, expressed as memory-two strategies, and compare the results. Thus, verify Press and Dyson.

In [18]:
def old_expected_memory_one_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    ss *= ([coplayer[0], coplayer[2], coplayer[1], coplayer[3]] * 4)
    
    strategy = [math.ceil(sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return (strategy[0], strategy[2], strategy[1], strategy[-1])    

In [19]:
def expected_memory_one_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
#     print(num)
    
    strategy = [(sum([num[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]   

In [22]:
# b, c, player = 2, 1, [1, 1, 1, 1]

# for i, coplayer in enumerate(set_of_memory_one_s):

#     mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4)

#     mem_one_representation2 = old_expected_memory_one_strategy(player * 4, coplayer * 4)
    
#     print(mem_one_representation, mem_one_representation2)

In [23]:
b, c, player = 2, 1, [1, 1, 1, 1]

for i, coplayer in enumerate(set_of_memory_one_s):

    mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4)

    ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

    ss = calculate_ss_for_mem_one(player, coplayer)

    assert (np.isclose(ss[0],
                       ss_estimated[0], atol=10**-3).all()), f"{ss} and {ss_estimated}. player: {player} coplayer:{coplayer}"

In [24]:
b, c = 2, 1

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4, index=-1)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_one(player, coplayer)


        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True

        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)



These strategies are subset of the above. Let's explore.

The pairs in discussion:

1. player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0). simulations fail here because one state (CD) is very visited.
2. (+) player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1). cycling behavior
3. (+) player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1). cycling behavior
4. (+) player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1). cycling behavior
5. player: (1, 0, 0, 1) coplayer:(0, 1, 0, 1). simulations fail here because one state (CD) is very visited.
6. (+) player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1). cycling behavior
    

In [32]:
pairs = [[(0, 1, 0, 1), (0, 1, 1, 0)],
    [(0, 1, 0, 1), (1, 0, 1, 1)],
    [(0, 1, 1, 0), (0, 1, 1, 1)],
    [(0, 1, 1, 0), (1, 0, 1, 1)],
    [(1, 0, 0, 1), (0, 1, 0, 1)],
    [(1, 0, 1, 0), (0, 1, 1, 1)]]

In [33]:
player = pairs[1][0]

coplayer = pairs[1][1]

In [34]:
mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4)

mem_one_representation

[1.0, 0.0, 0.0, 1.0]

In [35]:
calculate_ss_for_mem_one(player, mem_one_representation)

[array([0., 1., 0., 0.])]

In [36]:
mem_one_sim_representation, a, b = simulate_play(player * 4, coplayer * 4, turns=2000, noise=0.0)

mem_one_sim_representation

[1.0, 0.0, 0, 1.0]

In [37]:
calculate_ss_for_mem_one(player, mem_one_sim_representation)

[array([0., 1., 0., 0.])]

In [38]:
a

{(C, C): 665, (C, D): 666, (D, C): 0, (D, D): 666}

In [39]:
b

{(C, C): 665, (C, D): 0, (D, C): 0, (D, D): 666}

In [40]:
calculate_ss_for_mem_one(player, coplayer)

[array([ 0.33333333, -0.        ,  0.33333333,  0.33333333])]

In [41]:
calculate_ss_for_mem_one(player, coplayer)[0] @ np.array([2 - 1, -1, 2, 0])

1.0000000000000002

In [42]:
calculate_ss_for_mem_one(player, mem_one_representation)[0] @ np.array([2 - 1, -1, 2, 0])

-1.0

In [43]:
calculate_ss_for_mem_one(player, coplayer)[0], calculate_ss_for_mem_one(player, mem_one_representation)[0]

(array([ 0.33333333, -0.        ,  0.33333333,  0.33333333]),
 array([0., 1., 0., 0.]))

In [44]:
M = calculate_M(player, coplayer)

M

array([[0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0]])

In [45]:
M = calculate_M(player, mem_one_representation)

M

array([[0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]])

In [46]:
_, states_counts, states_to_actions = simulate_play(player * 4, coplayer * 4, turns=1000, noise=0.0)

In [47]:
states_counts, states_to_actions

({(C, C): 332, (C, D): 333, (D, C): 0, (D, D): 332},
 {(C, C): 332, (C, D): 0, (D, C): 0, (D, D): 332})

In [48]:
_, states_counts2, states_to_actions2 = simulate_play(player * 4, mem_one_representation * 4, turns=1000, noise=0.0)

In [49]:
states_counts2, states_to_actions2

({(C, C): 332, (C, D): 333, (D, C): 0, (D, D): 332},
 {(C, C): 332, (C, D): 0, (D, C): 0, (D, D): 332})

### Against Pure Memory-Two strategies

In [106]:
coplayer = set_of_memory_two_s[12]
coplayer

(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0)

In [107]:
player = [1, 1, 1, 1]

In [108]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

In [109]:
ss[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [65]:
player, counts, actions =  simulate_play(player * 4, coplayer, turns = 10000)

In [66]:
counts, actions

({(C, C): 1999, (C, D): 1999, (D, C): 2000, (D, D): 3999},
 {(C, C): 0, (C, D): 1999, (D, C): 0, (D, D): 1999})

In [67]:
counts2, actions2 = simulate_play_mem_two(player * 4, coplayer, turns = 10000)

In [84]:
first = [((C, C), (C, C)),
((C, D), (C, C)),
((D, C), (C, C)),
((D, D), (C, C))]

second = [((C, C), (C, D)),
((C, D), (C, D)),
((D, C), (C, D)),
((D, D), (C, D))]

third = [((C, C), (D, C)),
((C, D), (D, C)),
((D, C), (D, C)),
((D, D), (D, C))]

fourth = [((C, C), (D, D)),
((C, D), (D, D)),
((D, C), (D, D)),
((D, D), (D, D))]

In [85]:
sum([counts2[i] for i in first]), sum([counts2[i] for i in second])

(1969, 2706)

In [86]:
sum([counts2[i] for i in third]), sum([counts2[i] for i in fourth])

(1322, 4000)

In [87]:
sum([actions2[i] for i in first]), sum([actions2[i] for i in second])

(1322, 692)

In [88]:
sum([actions2[i] for i in third]), sum([actions2[i] for i in fourth])

(1322, 1339)

In [89]:
sum([actions2[i] for i in first]) / sum([counts2[i] for i in first])

0.6714068054850177

In [90]:
sum([actions2[i] for i in second]) / sum([counts2[i] for i in second])

0.2557280118255728

In [91]:
sum([actions2[i] for i in third]) / sum([counts2[i] for i in third])

1.0

In [92]:
sum([actions2[i] for i in fourth]) / sum([counts2[i] for i in fourth])

0.33475

In [93]:
for a, b in zip(actions.values(), counts.values()):
    print(a / b)

0.0
1.0
0.0
0.49987496874218557


In [95]:
ss = calculate_ss_for_mem_one((1, 1, 1, 1), [0.5217391304347826, 0, 0.5, 0])
ss

[array([0.51111111, 0.48888889, 0.        , 0.        ])]

In [None]:
calculate_ss_for_mem_two([1, 1, 1, 1] * 4, )

In [597]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

ss = ss[0]

In [598]:
num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
             coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
             coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
             coplayer[12], coplayer[14], coplayer[13], coplayer[15]]

In [599]:
ss

array([ 0.25,  0.25, -0.  , -0.  ,  0.25,  0.25, -0.  , -0.  , -0.  ,
       -0.  , -0.  , -0.  , -0.  , -0.  , -0.  , -0.  ])

In [600]:
num

array([ 0.,  0., -0., -0.,  0.,  0., -0., -0., -0., -0., -0., -0., -0.,
       -0., -0., -0.])

In [590]:
coplayer

(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1)

In [591]:
ss

array([ 0.25,  0.25, -0.  , -0.  ,  0.25,  0.25, -0.  , -0.  , -0.  ,
       -0.  , -0.  , -0.  , -0.  , -0.  , -0.  , -0.  ])

In [592]:
expected_memory_one_strategy(player * 4, coplayer)

[0.0, 0.0, 0.0, 0.0]

In [593]:
calculate_ss_for_mem_one(player, [0, 0, 0, 0])

[array([0., 1., 0., 0.])]

In [594]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

In [595]:
ss

[array([ 0.25,  0.25, -0.  , -0.  ,  0.25,  0.25, -0.  , -0.  , -0.  ,
        -0.  , -0.  , -0.  , -0.  , -0.  , -0.  , -0.  ])]

In [None]:

    
#     print(num)
    
    strategy = [(sum([num[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]  

In [112]:
b, c = 2, 1

states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

np.random.seed(0)

count = 0

for player in set_of_memory_one_s[-2:]:

    for coplayer in tqdm.tqdm_notebook(set_of_memory_two_s):

        mem_one_representation = expected_memory_one_strategy(player * 4, coplayer)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        

        ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states] for s in ss]
        
        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-2).all()):
                    check = True

#         there are going to be some failures and I am just printing them. 
        if check == False:
            count += 1
#             print(f"ss: {ss} ss estimated:{ss_estimated}")
#         print(mem_one_representation)

HBox(children=(FloatProgress(value=0.0, max=65536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=65536.0), HTML(value='')))




In [606]:
count

18432

### Against Generic Memory-Two strategies

In [481]:
def expected_memory_one_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]] / ss
    
    num = np.nan_to_num(num)
    
    strategy = [np.average([num[i] for i in range(16) if states_in_two_bits[i] == state],
                 weights = [ss[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states
                ]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]   

In [482]:
b, c = 2, 1

states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

np.random.seed(0)

for player in tqdm.tqdm_notebook(set_of_memory_one_s[-1:]):

    for coplayer in [np.random.random(16) for _ in range(3)]:

        mem_one_representation = expected_memory_one_strategy(player * 4, coplayer)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        

        ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states] for s in ss]
        
        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True

        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"ss: {ss} ss estimated:{ss_estimated}")

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




ZeroDivisionError: Weights sum to zero, can't be normalized

In [470]:
b, c = 2, 1

states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

np.random.seed(0)

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for coplayer in [np.random.random(16) for _ in range(1)]:

        mem_one_representation, _, _ = simulate_play(player * 4, coplayer, turns=100000)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        

        ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states] for s in ss]
        
        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-2).all()):
                    check = True

        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"ss: {ss} ss estimated:{ss_estimated}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

ss: [[0.05203331873512497, 0.011837472059824572, 0.924291737145226, 0.011837472059824428]] ss estimated:[array([0.04197997, 0.0092847 , 0.93945063, 0.0092847 ])]



In [504]:
player

(1, 1, 1, 1)

In [515]:
coplayer 

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  , 0.96366276, 0.38344152,
       0.79172504, 0.52889492, 0.56804456, 0.92559664, 0.07103606,
       0.0871293 ])

In [516]:
mem_one_representation, a, b = simulate_play(player * 4, coplayer, turns=100000)

In [517]:
mem_one_representation

[0.6815416722170419, 0, 0.6633779367330579, 0]

In [535]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

ss = ss[-1]

num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
             coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
             coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
             coplayer[12], coplayer[14], coplayer[13], coplayer[15]] / ss

num = np.nan_to_num(num)

sums = [[num[i] for i in range(16) if states_in_two_bits[i] == state] for state in states]

weights = [[ss[i] for i in range(16) if states_in_two_bits[i] == state] for state in states]

strategy = []

for s, w in zip(sums, weights):
    if sum(w) > 0:
        strategy.append(np.average(s, weights=w))
    else:
        strategy.append(0)


In [536]:
num

array([0.5488135 , 0.60276338, 0.        , 0.        , 0.4236548 ,
       0.43758721, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        ])

In [537]:
ss

array([ 0.46055114,  0.21562985, -0.        , -0.        ,  0.21562985,
        0.10818917, -0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        ])

In [531]:
[strategy[i] for i in [0, 2, 1, -1]]

[0.5089011831526044, 0, 0.5475773897117798, 0]

In [532]:
calculate_ss_for_mem_one(player, [strategy[i] for i in [0, 2, 1, -1]])

[array([0.52718777, 0.47281223, 0.        , 0.        ])]

In [524]:
calculate_ss_for_mem_one(player, mem_one_representation)

[array([0.67565027, 0.32434973, 0.        , 0.        ])]

In [538]:
mem_one_representation

[0.6815416722170419, 0, 0.6633779367330579, 0]

In [521]:
[[ss[i] for i in range(16) if states_in_two_bits[i] == state] for state in states]

[[0.4605511376861959, 0.2156298474861497, -0.0, -0.0],
 [0.2156298474861498, 0.10818916734150454, -0.0, -0.0],
 [-0.0, -0.0, -0.0, -0.0],
 [-0.0, -0.0, -0.0, -0.0]]

In [528]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)


ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
        for state in states] for s in ss]

In [529]:
ss

[[0.6761809851723456, 0.32381901482765435, 0.0, 0.0]]

In [None]:
player * 4

In [None]:
a

In [489]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

In [490]:
ss = ss[0]

In [477]:
num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
             coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
             coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
             coplayer[12], coplayer[14], coplayer[13], coplayer[15]] / ss

TypeError: can't multiply sequence by non-int of type 'list'

In [478]:
num = np.nan_to_num(num)

In [433]:
num

array([0.86385561, 0.51737911, 0.        , 0.        , 0.71685968,
       0.56542131, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        ])

In [434]:
(ss[0] + ss[4] + ss[8] + ss[12])

0.46683779678902637

In [437]:
(coplayer[0] + coplayer[4] + coplayer[8] + coplayer[12]) / 4

0.6227220750665419

In [444]:
(coplayer[0] + coplayer[4]) / 2

0.7903576435579125

In [314]:
ss[4]

0.24584937361787787

In [319]:
(ss[0] + ss[4]) 

0.4083663289503512

In [333]:
rep = [np.average([num[0], num[4]], weights=[ss[0], ss[4]]),
0,
np.average([num[1], num[5]], weights=[ss[1], ss[5]]),
0]

In [331]:
np.average([num[1], num[5]], weights=[ss[1], ss[5]])

0.5593879171249415

In [327]:
np.mean?

In [317]:
(num[0] * ss[0] + num[4] * ss[4]) / 0.5

0.4492310263824385

In [267]:
0.7926341470297388 / 1.5

0.5284227646864925

In [277]:
rep = [sum([num[i] for i in range(16) if states_in_two_bits[i] == state]) / 2
                for state in states]

In [334]:
ss_estimated = calculate_ss_for_mem_one(player, rep)

In [335]:
ss_estimated

[array([0.55420414, 0.44579586, 0.        , 0.        ])]

In [285]:
rep = [rep[0], rep[2], rep[1], rep[-1]]

In [336]:
calculate_ss_for_mem_one(player,mem_one_representation)

[array([ 0.40802224,  0.59197776, -0.        , -0.        ])]

In [283]:
mem_one_representation

[0.5140786351099994, 0, 0.42704419059071097, 0]

In [227]:
num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
             coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
             coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
             coplayer[12], coplayer[14], coplayer[13], coplayer[15]]

In [225]:
[sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]) for state in states]

[0.46683779678902637, 0.5331622032109736, 0.0, 0.0]

In [None]:


    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    num = ss * [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
    strategy = [(sum([num[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]  

In [141]:
ss = calculate_ss_for_mem_two(player * 4, coplayer)

In [200]:
np.isclose(ss1, ss2, atol=10**-1)

array([ True,  True,  True,  True])

In [144]:
ss

array([ 0.46055114,  0.21562985, -0.        , -0.        ,  0.21562985,
        0.10818917, -0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        ])

In [134]:
[0.67624029, 0.32375971, 0.        , 0.        ]

[0.67624029, 0.32375971, 0.0, 0.0]

### Reactive case

In [28]:
set_of_reactive_s = [(0, 0, 0, 0), (1, 0, 1, 0), (0, 1, 0, 1), (1, 1, 1, 1)]

In [29]:
def simulate_play_reactive(p, q, turns=10 * 5, noise=0):
    
    states_to_actions = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    states_counts = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, history in enumerate(match.result[2:-1]):

        states_counts[history[::-1]] += 1
        action = match.result[i + 2 +1][-1]

        if action == C:
            states_to_actions[history[::-1]] += 1
        
        
    strategy = []
    states = [(C, C), (C, D), (D, C), (D, D)]
    
    denominator_p1 = (states_counts[(C, C)] + states_counts[(D, C)])
    denominator_p2 = (states_counts[(C, D)] + states_counts[(D, D)])

    if denominator_p1 > 0:
        p_1 = ((states_to_actions[(C, C)] +  states_to_actions[(D, C)]) 
               / denominator_p1)
    else:
        p_1 = 0
        
    if denominator_p2 > 0:
        p_2 = ((states_to_actions[(C, D)] +  states_to_actions[(D, D)]) 
               / denominator_p2)
    else:
        p_2 = 0
    
    strategy = [p_1, p_2, p_1, p_2]
    
    return strategy, states_counts, states_to_actions

In [149]:
player = (1, 0, 1, 0)

coplayer = (1, 0, 1, 1)

In [150]:
mem_one_sim_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

mem_one_sim_representation

[1.0, 0, 1.0, 0]

In [30]:
b, c, player = 2, 1, [1, 1, 1, 1]

for i, coplayer in enumerate(set_of_memory_one_s):

    mem_one_representation, _, _ = simulate_play_reactive(player * 4, coplayer * 4, turns=1000)

    ss_estimated = calculate_ss_for_mem_one(player, coplayer)

    ss = calculate_ss_for_mem_one(player, mem_one_representation)
    
    check = False
    
    for ss1 in ss_estimated:
        for ss2 in ss:
            if (np.isclose(ss1, ss2, atol=10**-3).all()):
                check = True

    assert check, f"{ss} and {ss_estimated}. player: {player} coplayer:{coplayer}"

In [31]:
b, c = 2, 1

for player in tqdm.notebook.tqdm(set_of_reactive_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

player: (1, 0, 1, 0) coplayer:(0, 0, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 0, 1) coplayer:(0, 0, 1, 0)
player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 0, 1)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)



In [718]:
b, c = 2, 1

for player in tqdm.notebook.tqdm(set_of_memory_one_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

player: (0, 0, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 0, 0, 1) coplayer:(1, 0, 0, 1)
player: (0, 0, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 0, 1, 0) coplayer:(0, 0, 0, 1)
player: (0, 0, 1, 0) coplayer:(0, 0, 1, 1)
player: (0, 0, 1, 0) coplayer:(1, 0, 0, 1)
player: (0, 1, 0, 1) coplayer:(0, 0, 1, 0)
player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 0, 1)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1)
player: (0, 1, 1, 1) coplayer:(0, 0, 1, 0)
player: (0, 1, 1, 1) coplayer:(0, 1, 1, 0)
player: (1, 0, 0, 1) coplayer:(0, 0, 0, 1)
player: (1, 0, 0, 1) coplayer:(0, 0, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 0, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)
player: (1, 0, 1, 1) coplayer:(0, 1, 1, 0)
player: (1, 0, 1, 1) coplayer:(0, 1, 1, 1)



In [675]:
player = (1, 0, 1, 0)

coplayer = (0, 0, 0, 1)

In [676]:
mem_one_sim_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

mem_one_sim_representation

[0.0, 0.5, 0.0, 0.5]

In [677]:
calculate_ss_for_mem_one(player, coplayer)[0] @ np.array([2 - 1, -1, 2, 0])

0.33333333333333365

In [678]:
calculate_ss_for_mem_one(player, [0, .5, 0, .5])[0] @ np.array([2 - 1, -1, 2, 0])

0.3333333333333337

In [624]:
_, states_counts, states_to_actions = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

In [625]:
states_counts, states_to_actions

({(C, C): 0, (C, D): 332, (D, C): 333, (D, D): 332},
 {(C, C): 0, (C, D): 0, (D, C): 0, (D, D): 332})

In [626]:
_, states_counts2, states_to_actions2 = simulate_play_reactive(player * 4,
                                                               mem_one_sim_representation * 4,
                                                               turns=1000, noise=0.0)

In [627]:
states_counts2, states_to_actions2

({(C, C): 112, (C, D): 220, (D, C): 221, (D, D): 444},
 {(C, C): 0, (C, D): 112, (D, C): 0, (D, D): 220})

In [681]:
player = (1, 0, 1, 0)

coplayer = (1, 1, 0, 0)

In [682]:
mem_one_sim_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

mem_one_sim_representation

[1.0, 0, 1.0, 0]

In [717]:
b, c = 2, 1

for player in tqdm.notebook.tqdm(set_of_reactive_s):

    for i, coplayer in enumerate([[1, 1, 0, 0]]):

        mem_one_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)
        

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




In [32]:
b, c = 2, 1

for player in tqdm.tqdm_notebook([list(np.random.random(4)) for _ in range(4)]):

    for i, coplayer in enumerate([list(np.random.random(16)) for _ in range(4)]):

        mem_one_representation = mem_one_representation, _, _ = simulate_play(player * 4,
                                                                              coplayer,
                                                                              turns=50000)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        
        


        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True

        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"ss: {ss} ss estimated:{ss_estimated}")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

ss: [array([0.28249833, 0.21413846, 0.30671966, 0.19664356])] ss estimated:[array([0.24382379, 0.25278602, 0.2253425 , 0.2780477 ])]
ss: [array([0.28041569, 0.20666903, 0.25456095, 0.25835433])] ss estimated:[array([0.20418999, 0.30505052, 0.2052697 , 0.2854898 ])]
ss: [array([0.37447293, 0.1028332 , 0.40322664, 0.11946723])] ss estimated:[array([0.32715309, 0.15981096, 0.35213686, 0.16089909])]
ss: [array([0.14298384, 0.38797706, 0.18535141, 0.28368768])] ss estimated:[array([0.24118128, 0.25605536, 0.22292718, 0.27983618])]
ss: [array([0.21587723, 0.3199096 , 0.14375366, 0.3204595 ])] ss estimated:[array([0.17785658, 0.37572217, 0.12623522, 0.32018603])]
ss: [array([0.30039121, 0.21648931, 0.14888638, 0.3342331 ])] ss estimated:[array([0.25314236, 0.19923049, 0.2689789 , 0.27864826])]
ss: [array([0.24921178, 0.10233588, 0.43531183, 0.2131405 ])] ss estimated:[array([0.26996709, 0.16336837, 0.29501578, 0.27164875])]
ss: [array([0.11775896, 0.46025246, 0.10458849, 0.3174001 ])] ss esti

In [711]:
b, c = 2, 1

for player in tqdm.tqdm_notebook([list(np.random.random(416)) for _ in range(4)]):

    for i, coplayer in enumerate([list(np.random.random(4)) for _ in range(4)]):

        mem_one_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=20000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)
        
        print(ss_estimated, ss)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-2).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
                    print(f"ss: {ss} ss estimated:{ss_estimated}")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[array([0.14984473, 0.22006878, 0.08235775, 0.54772874])] [array([0.09027957, 0.27148401, 0.13243796, 0.50579847])]
ss: [array([0.09027957, 0.27148401, 0.13243796, 0.50579847])] ss estimated:[array([0.14984473, 0.22006878, 0.08235775, 0.54772874])]
[array([0.24727822, 0.33418998, 0.29265229, 0.12587952])] [array([0.29468897, 0.28618813, 0.24235692, 0.17676598])]
ss: [array([0.29468897, 0.28618813, 0.24235692, 0.17676598])] ss estimated:[array([0.24727822, 0.33418998, 0.29265229, 0.12587952])]
[array([0.50595803, 0.13285844, 0.10752844, 0.25365509])] [array([0.45641576, 0.18243204, 0.15922067, 0.20193154])]
ss: [array([0.45641576, 0.18243204, 0.15922067, 0.20193154])] ss estimated:[array([0.50595803, 0.13285844, 0.10752844, 0.25365509])]
[array([0.43234028, 0.19872293, 0.17284275, 0.19609405])] [array([0.44426192, 0.18808138, 0.16230193, 0.20535476])]
ss: [array([0.44426192, 0.18808138, 0.16230193, 0.20535476])] ss estimated:[array([0.43234028, 0.19872293, 0.17284275, 0.19609405])]
[arr