In [1]:
import itertools

import numpy as np

import tqdm

import math

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os

os.chdir('..')

In [6]:
import os

os.chdir('..')

from importlib.machinery import SourceFileLoader

eq = SourceFileLoader("eq", "src/numerical_equilibria_n_bit_vs_n_bit.py").load_module()

from eq import *

from importlib.machinery import SourceFileLoader

main = SourceFileLoader("main", "src/main.py").load_module()

from main import *

In [7]:
import axelrod as axl

from axelrod.action import Action

C, D = Action.C, Action.D

In this notebook we evaluate the result **Appendix A: Shortest-Memory Player Sets the Rules of the Game** from [Press & Dyson](https://www.pnas.org/doi/abs/10.1073/pnas.1206569109).

We will initially check that we can get the **memory-one representation of a memory-two strategy**.

### Memory-one

In [8]:
# code for approach "simulations"

def simulate_play(p, q, turns=10 * 5, noise=0):
    
    states_to_actions = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    states_counts = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, history in enumerate(match.result[2:-1]):

        states_counts[history[::-1]] += 1
        action = match.result[i + 2 +1][-1]

        if action == C:
            states_to_actions[history[::-1]] += 1
        
        
    strategy = []
    states = [(C, C), (C, D), (D, C), (D, D)]

    for state in states:
        if states_counts[state] > 0:
            strategy.append(states_to_actions[state] / states_counts[state])
        else:
            strategy.append(0)
    
    return strategy, states_counts, states_to_actions


def simulate_play_mem_two(p, q, turns=10 * 5, noise=0):
    
    first = [((C, C), (C, C)), ((C, D), (C, C)), ((D, C), (C, C)), ((D, D), (C, C))]

    second = [((C, C), (C, D)), ((C, D), (C, D)), ((D, C), (C, D)), ((D, D), (C, D))]

    third = [((C, C), (D, C)), ((C, D), (D, C)), ((D, C), (D, C)), ((D, D), (D, C))]

    fourth = [((C, C), (D, D)), ((C, D), (D, D)), ((D, C), (D, D)), ((D, D), (D, D))]
    
    states_to_actions = {((C, C), (C, C)): 0, 
                         ((C, C), (C, D)): 0, 
                         ((C, C), (D, C)): 0, 
                         ((C, C), (D, D)):0,
                         ((C, D), (C, C)): 0,
                         ((C, D), (C, D)): 0,
                         ((C, D), (D, C)): 0,
                         ((C, D), (D, D)): 0,
                         ((D, C), (C, C)): 0,
                         ((D, C), (C, D)): 0,
                         ((D, C), (D, C)): 0,
                         ((D, C), (D, D)): 0,
                         ((D, D), (C, C)): 0,
                         ((D, D), (C, D)): 0,
                         ((D, D), (D, C)): 0,
                         ((D, D), (D, D)): 0}

    states_counts = {((C, C), (C, C)): 0, 
                     ((C, C), (C, D)): 0, 
                     ((C, C), (D, C)): 0, 
                     ((C, C), (D, D)): 0,
                     ((C, D), (C, C)): 0,
                     ((C, D), (C, D)): 0,
                     ((C, D), (D, C)): 0,
                     ((C, D), (D, D)): 0,
                     ((D, C), (C, C)): 0,
                     ((D, C), (C, D)): 0,
                     ((D, C), (D, C)): 0,
                     ((D, C), (D, D)): 0,
                     ((D, D), (C, C)): 0,
                     ((D, D), (C, D)): 0,
                     ((D, D), (D, C)): 0,
                     ((D, D), (D, D)): 0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, (previous, history) in enumerate(zip(match.result[1:-2], match.result[2:-1])):

        states_counts[(previous[::-1], history[::-1])] += 1
        action = match.result[i + 2 + 1][-1]

        if action == C:
            states_to_actions[(previous[::-1], history[::-1])] += 1
            
    strategy = []
    
    for state in [first, second, third, fourth]:
        if sum([states_counts[i] for i in state]) > 1:
            strategy.append(sum([states_to_actions[i] for i in state]) /
                            sum([states_counts[i] for i in state]))
        else:
            strategy.append(0)
        
    return strategy, states_counts, states_to_actions

Using either of the above functions gives the same result.

In [9]:
for _ in tqdm.tqdm_notebook(range(20)):
    p = np.random.random(16)
    q = np.random.random(16)
    
    expected_mem_one, _, _ = simulate_play(p, q, turns=10 ** 5, noise=0)
    
    expected_mem_two, _, _ = simulate_play_mem_two(p, q, turns=10 ** 5, noise=0)
    
    assert np.isclose(expected_mem_one, expected_mem_two, atol=10**-1).all()

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [22]:
# code for "numerical" approach

def invariant_distributions(M):
    """Returns all the invariant distributions in case of absorbing states."""
    
    stationaries = []
    
    eigenvalues, eigenvectors = np.linalg.eig(M.T)
    
    for index in np.where(eigenvalues == np.max(eigenvalues))[0]:
        
        eigenvectors_one = eigenvectors[:, index]

        stationary = eigenvectors_one / eigenvectors_one.sum()

        stationaries.append(stationary.real)
        
    return stationaries


def calculate_ss_for_mem_one(player, coplayer):
    """
    Invariant distribution for memory-one.
    """
    M = calculate_M(player, coplayer)
    ss = invariant_distribution(M)
    
    return ss


def calculate_ss_for_mem_two(player, coplayer):
    """
    Invariant distribution for memory-two.
    """
    M = calculate_M_memory_two(player, coplayer)
    ss = invariant_distribution(M)
    
    return ss


def expected_memory_one_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
#     print(num)
    
    strategy = [(sum([coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]   


def expected_memory_one_strategy_ch(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
#     ss = ss[index]
    
    coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
#     print(num)
    
    strategy = [(sum([coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 2, 1, -1]]   

In [3]:
# the two sets of strategies

import itertools

set_of_memory_one_s = list(itertools.product([0, 1], repeat=4))

set_of_memory_two_s = list(itertools.product([0, 1], repeat=16))

In [4]:
set_of_memory_one_s 

[(0, 0, 0, 0),
 (0, 0, 0, 1),
 (0, 0, 1, 0),
 (0, 0, 1, 1),
 (0, 1, 0, 0),
 (0, 1, 0, 1),
 (0, 1, 1, 0),
 (0, 1, 1, 1),
 (1, 0, 0, 0),
 (1, 0, 0, 1),
 (1, 0, 1, 0),
 (1, 0, 1, 1),
 (1, 1, 0, 0),
 (1, 1, 0, 1),
 (1, 1, 1, 0),
 (1, 1, 1, 1)]

**Sanity check:** A memory-one strategy written as a memory two strategy is still the same.

In [24]:
b, c = 2, 1

np.random.seed(1)

for _ in range(1000):
    
    p1, p2, p3, p4 = np.random.random(4)
    
    q1, q2, q3, q4 = np.random.random(4)
    
    ss_one = calculate_ss_for_mem_one([p1, p2, p3, p4], [q1, q2, q3, q4])
    
    ss_two = calculate_ss_for_mem_two([p1, p2, p3, p4] * 4,
                                      [q1, q2, q3, q4] * 4)
    
    assert np.isclose(
        ss_one @ np.array([b - c, -c, b, 0]), ss_two @ np.array([b - c, -c, b, 0] * 4) 
    )

Now we can loop over all the possible memory-one strategies, expressed as memory-two strategies, and compare the results. Thus, verify Press and Dyson.

The initial check with just ALLC as the player.

In [13]:
# simulations

b, c, player = 2, 1, [1, 1, 1, 1]

for i, coplayer in enumerate(set_of_memory_one_s):

    mem_one_representation, _, _ = simulate_play_mem_two(player * 4, coplayer * 4, turns=1000)

    ss = calculate_ss_for_mem_one(player, coplayer)

    ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)
    
    check = False
    
    for ss1 in ss_estimated:
        for ss2 in ss:
            if (np.isclose(ss1, ss2, atol=10**-3).all()):
                check = True

    assert check, f"{ss} and {ss_estimated}. player: {player} coplayer:{coplayer}"

In [14]:
# numerical

b, c, player = 2, 1, [1, 1, 1, 1]

for i, coplayer in enumerate(set_of_memory_one_s):

    mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4)

    ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

    ss = calculate_ss_for_mem_one(player, coplayer)

    assert (np.isclose(ss[0],
                       ss_estimated[0], atol=10**-3).all()), f"{ss} and {ss_estimated}. player: {player} coplayer:{coplayer}"

All strategies.

In [15]:
b, c = 2, 1

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation, a, b = simulate_play(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1)
player: (1, 0, 0, 1) coplayer:(0, 1, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)



In [25]:
b, c = 2, 1

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation = expected_memory_one_strategy_ch(player * 4, coplayer * 4, index=0)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_one(player, coplayer)


        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True

        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




The pairs in discussion:

1. player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0). simulations fail here because one state (CD) is very visited.
2. (+) player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1). cycling behavior
3. (+) player: (0, 1, 1, 0) coplayer:(0, 1, 1, 1). cycling behavior
4. (+) player: (0, 1, 1, 0) coplayer:(1, 0, 1, 1). cycling behavior
5. player: (1, 0, 0, 1) coplayer:(0, 1, 0, 1). simulations fail here because one state (CD) is very visited.
6. (+) player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1). cycling behavior
    

In [26]:
pairs = [[(0, 1, 0, 1), (0, 1, 1, 0)],
    [(0, 1, 0, 1), (1, 0, 1, 1)],
    [(0, 1, 1, 0), (0, 1, 1, 1)],
    [(0, 1, 1, 0), (1, 0, 1, 1)],
    [(1, 0, 0, 1), (0, 1, 0, 1)],
    [(1, 0, 1, 0), (0, 1, 1, 1)]]

In [27]:
player = pairs[1][0]

coplayer = pairs[1][1]

In [28]:
mem_one_representation = expected_memory_one_strategy(player * 4, coplayer * 4)

mem_one_representation

[1.0, 0.0, 0.0, 1.0]

In [29]:
calculate_ss_for_mem_one(player, mem_one_representation)

[array([0., 1., 0., 0.])]

In [30]:
mem_one_sim_representation, a, b = simulate_play(player * 4, coplayer * 4, turns=2000, noise=0.0)

mem_one_sim_representation

[1.0, 0.0, 0, 1.0]

In [31]:
calculate_ss_for_mem_one(player, mem_one_sim_representation)

[array([0., 1., 0., 0.])]

In [32]:
a

{(C, C): 665, (C, D): 666, (D, C): 0, (D, D): 666}

In [33]:
b

{(C, C): 665, (C, D): 0, (D, C): 0, (D, D): 666}

In [40]:
4 / (16 ** 2)

0.015625

The numerical approach can verify most cases expect 4. Thus, from 256 checks only 4 can not be verified (1.5%).

However so far we have only focused on memory-two strategies that are memory-one strategies in essence. 

In [283]:
# numerical

b, c = 2, 1

states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

np.random.seed(0)

count = 0

for player in set_of_memory_one_s[:1]:

    for coplayer in tqdm.tqdm_notebook(set_of_memory_two_s):

        mem_one_representation = expected_memory_one_strategy(player * 4, coplayer)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        

        ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states] for s in ss]
        
        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-2).all()):
                    check = True

#         there are going to be some failures and I am just counting them. 
        if check == False:
            count += 1

HBox(children=(FloatProgress(value=0.0, max=65536.0), HTML(value='')))




In [285]:
count / (len(set_of_memory_two_s)) #* (len(set_of_memory_one_s)))

0.28125

In [None]:
# simulations

b, c = 2, 1

states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

np.random.seed(0)

count = 0

for player in tqdm.tqdm_notebook(set_of_memory_one_s):

    for coplayer in set_of_memory_two_s:

        mem_one_representation = expected_memory_one_strategy(player * 4, coplayer)

        ss_estimated = calculate_ss_for_mem_one(player, mem_one_representation)

        ss = calculate_ss_for_mem_two(player * 4, coplayer)
        

        ss = [[sum([s[i] for i in range(16) if states_in_two_bits[i] == state])
                for state in states] for s in ss]
        
        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-2).all()):
                    check = True

#         there are going to be some failures and I am just counting them. 
        if check == False:
            count += 1

### Reactive strategies

In [191]:
set_of_reactive_s = [(0, 0, 0, 0), (1, 0, 1, 0), (0, 1, 0, 1), (1, 1, 1, 1)]

In [235]:
def simulate_play_reactive(p, q, turns=10 * 5, noise=0):
    
    states_to_actions = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    states_counts = {(C, C): 0, 
                     (C, D): 0, 
                     (D, C): 0, 
                     (D, D):0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, history in enumerate(match.result[2:-1]):

        states_counts[history[::-1]] += 1
        action = match.result[i + 2 +1][-1]

        if action == C:
            states_to_actions[history[::-1]] += 1
        
        
    strategy = []
    states = [(C, C), (C, D), (D, C), (D, D)]
    
    denominator_p1 = (states_counts[(C, C)] + states_counts[(C, D)])
    denominator_p2 = (states_counts[(D, C)] + states_counts[(D, D)])

    if denominator_p1 > 0:
        p_1 = ((states_to_actions[(C, C)] +  states_to_actions[(C, D)]) 
               / denominator_p1)
    else:
        p_1 = 0
        
    if denominator_p2 > 0:
        p_2 = ((states_to_actions[(D, C)] +  states_to_actions[(D, D)]) 
               / denominator_p2)
    else:
        p_2 = 0
    
    strategy = [p_1, p_2, p_1, p_2]
    
    return strategy, states_counts, states_to_actions



def simulate_play_reactive_two(p, q, turns=10 * 5, noise=0):
    
    first = [((C, C), (C, C)), ((C, D), (C, C)), ((D, C), (C, C)), ((D, D), (C, C)),
             ((C, C), (C, D)), ((C, D), (C, D)), ((D, C), (C, D)), ((D, D), (C, D))]
             
             
    second = [((C, C), (D, C)), ((D, C), (D, C)), ((D, C), (D, C)), ((D, D), (D, C)),
              ((C, C), (D, D)), ((C, D), (D, D)), ((D, C), (D, D)), ((D, D), (D, D))]

    
    states_to_actions = {((C, C), (C, C)): 0, 
                         ((C, C), (C, D)): 0, 
                         ((C, C), (D, C)): 0, 
                         ((C, C), (D, D)):0,
                         ((C, D), (C, C)): 0,
                         ((C, D), (C, D)): 0,
                         ((C, D), (D, C)): 0,
                         ((C, D), (D, D)): 0,
                         ((D, C), (C, C)): 0,
                         ((D, C), (C, D)): 0,
                         ((D, C), (D, C)): 0,
                         ((D, C), (D, D)): 0,
                         ((D, D), (C, C)): 0,
                         ((D, D), (C, D)): 0,
                         ((D, D), (D, C)): 0,
                         ((D, D), (D, D)): 0}

    states_counts = {((C, C), (C, C)): 0, 
                     ((C, C), (C, D)): 0, 
                     ((C, C), (D, C)): 0, 
                     ((C, C), (D, D)): 0,
                     ((C, D), (C, C)): 0,
                     ((C, D), (C, D)): 0,
                     ((C, D), (D, C)): 0,
                     ((C, D), (D, D)): 0,
                     ((D, C), (C, C)): 0,
                     ((D, C), (C, D)): 0,
                     ((D, C), (D, C)): 0,
                     ((D, C), (D, D)): 0,
                     ((D, D), (C, C)): 0,
                     ((D, D), (C, D)): 0,
                     ((D, D), (D, C)): 0,
                     ((D, D), (D, D)): 0}

    player = axl.MemoryTwoPlayer([p[0], p[1], p[4], p[5],
                                  p[2], p[3], p[6], p[7],
                                  p[8], p[9], p[12], p[13], 
                                  p[10], p[11], p[14], p[15]])

    coplayer = axl.MemoryTwoPlayer([q[0], q[1], q[4], q[5],
                                    q[2], q[3], q[6], q[7],
                                    q[8], q[9], q[12], q[13], 
                                    q[10], q[11], q[14], q[15]])
    
    
    match = axl.Match(players=[player, coplayer], turns=turns, noise=noise)
    
    _ = match.play()
  
    for i, (previous, history) in enumerate(zip(match.result[1:-2], match.result[2:-1])):

        states_counts[(previous[::-1], history[::-1])] += 1
        action = match.result[i + 2 + 1][-1]

        if action == C:
            states_to_actions[(previous[::-1], history[::-1])] += 1
            
    strategy = []
    
    for state in [first, second]:
        if sum([states_counts[i] for i in state]) > 1:
            strategy.append(sum([states_to_actions[i] for i in state]) /
                            sum([states_counts[i] for i in state]))
        else:
            strategy.append(0)
        
    return strategy * 2, states_counts, states_to_actions

In [237]:
for _ in tqdm.tqdm_notebook(range(20)):
    
    p = np.random.random(16)
    q = np.random.random(16)
    
    expected_reactive_one, _, _ = simulate_play_reactive(p, q, turns=10 ** 5, noise=0)
    
    expected_reactive_two, _, _ = simulate_play_reactive_two(p, q, turns=10 ** 5, noise=0)
    
    assert np.isclose(expected_reactive_one, expected_reactive_two, atol=10**-1).all()

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [286]:
def expected_one_bit_reactive_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['C', 'D']

    states_in_reactive = ["".join(i[-1:]) for i in list(itertools.product(['C', 'D'], repeat=2))]

    ss = calculate_ss_for_mem_one(player, coplayer)
    
    ss = ss[index]
    
    coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3]]
    
    strategy = [(sum([coplayer[i] * ss[i] for i in range(4) if states_in_reactive[i] == state]) 
                 / sum([ss[i] for i in range(4) if states_in_reactive[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 1, 0, 1]]


def expected_one_bit_reactive_strategy_vs_memory_two(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['C', 'D']
    
    states_in_two_bits = ["".join(i[-1:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
    strategy = [(sum([coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return [np.nan_to_num(strategy[i]) for i in [0, 1, 0, 1]]



def expected_two_bit_reactive_strategy(player, coplayer, index=0):
    """
    Returns the expected memory-one strategy for a memory-two
    strategy from the player's prespective.
    
    This uses the method by Press and Dyson.
    """
    states = ['CC', 'CD', 'DC', 'DD']
    
    states_in_two_bits = ["".join([i[1], i[-1]]) for i in list(itertools.product(['C', 'D'], repeat=4))]

    ss = calculate_ss_for_mem_two(player, coplayer)
    
    ss = ss[index]
    
    coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
                 coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
                 coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
                 coplayer[12], coplayer[14], coplayer[13], coplayer[15]]
    
    strategy = [(sum([coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

    return np.nan_to_num(strategy)

In [287]:
# reactive trying to estimate memory-two (which are memory-ones)

b, c = 2, 1

count = 0 

for player in tqdm.notebook.tqdm(set_of_reactive_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        estimated_reactive = expected_one_bit_reactive_strategy_vs_memory_two(player * 4, coplayer * 4)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, estimated_reactive)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            count += 1
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

player: (0, 0, 0, 0) coplayer:(0, 0, 0, 1)
player: (0, 0, 0, 0) coplayer:(0, 0, 1, 1)
player: (0, 0, 0, 0) coplayer:(0, 1, 0, 1)
player: (0, 0, 0, 0) coplayer:(0, 1, 1, 1)
player: (0, 0, 0, 0) coplayer:(1, 0, 0, 1)
player: (0, 0, 0, 0) coplayer:(1, 0, 1, 1)
player: (0, 0, 0, 0) coplayer:(1, 1, 0, 1)
player: (0, 0, 0, 0) coplayer:(1, 1, 1, 1)
player: (1, 0, 1, 0) coplayer:(0, 0, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 0, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 0, 1, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)
player: (1, 0, 1, 0) coplayer:(1, 0, 1, 0)
player: (1, 0, 1, 0) coplayer:(1, 0, 1, 1)
player: (0, 1, 0, 1) coplayer:(0, 0, 1, 0)
player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 0, 1) coplayer:(1, 1, 0, 0)
player: (0, 1, 0, 1) coplayer:(1, 1, 0, 1)
player: (0, 1, 0, 1) coplayer:(1, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 1, 1, 1)
player: (1, 1, 1, 1) coplayer:(0, 0, 1, 0)
player: (1, 1, 1, 1) coplayer:(0, 0, 1, 1)
player: (1,

In [230]:
states = ['C', 'D']

states_in_two_bits = ["".join(i[-1:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

In [231]:
p = (1, 1, 1, 1)

q = (0, 1, 1, 0)

In [220]:
ss = calculate_ss_for_mem_two(p * 4, q * 4)

ss = ss[0]

In [221]:
ss

array([0. , 0.5, 0. , 0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. ])

In [222]:
coplayer = q * 4

coplayer = [coplayer[0],  coplayer[2],  coplayer[1],  coplayer[3],
             coplayer[4],  coplayer[6],  coplayer[5],  coplayer[7],
             coplayer[8],  coplayer[10], coplayer[9],  coplayer[11],
             coplayer[12], coplayer[14], coplayer[13], coplayer[15]]

In [223]:
coplayer

[0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0]

In [224]:
[[ss[i] for i in range(16) if states_in_two_bits[i] == state] for state in states]

[[0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]

In [225]:
[[coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state] for state in states]

[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]

In [226]:
[sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]) for state in states]

[0.5, 0.5]

In [227]:
[(sum([coplayer[i] * ss[i] for i in range(16) if states_in_two_bits[i] == state]) 
                 / sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]))
                for state in states]

[0.0, 1.0]

In [228]:
simulate_play_reactive(p * 4, q * 4, turns=1000, noise=0.0)

([0.0, 1.0, 0.0, 1.0],
 {(C, C): 498, (C, D): 0, (D, C): 499, (D, D): 0},
 {(C, C): 0, (C, D): 0, (D, C): 499, (D, D): 0})

In [232]:
b, c = 2, 1

for player in tqdm.notebook.tqdm(set_of_reactive_s):

    for i, coplayer in enumerate(set_of_memory_one_s):

        mem_one_representation, a, b = simulate_play_reactive(player * 4, coplayer * 4, turns=1000, noise=0.0)

        ss_estimated = calculate_ss_for_mem_one(player, coplayer)

        ss = calculate_ss_for_mem_one(player, mem_one_representation)
        

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

player: (0, 0, 0, 0) coplayer:(0, 0, 0, 1)
player: (0, 0, 0, 0) coplayer:(0, 0, 1, 1)
player: (0, 0, 0, 0) coplayer:(1, 0, 0, 1)
player: (0, 0, 0, 0) coplayer:(1, 0, 1, 1)
player: (0, 0, 0, 0) coplayer:(1, 1, 0, 1)
player: (0, 0, 0, 0) coplayer:(1, 1, 1, 1)
player: (1, 0, 1, 0) coplayer:(0, 0, 0, 1)
player: (1, 0, 1, 0) coplayer:(0, 0, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 0, 1, 1)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 0)
player: (1, 0, 1, 0) coplayer:(0, 1, 1, 1)
player: (0, 1, 0, 1) coplayer:(0, 0, 1, 0)
player: (0, 1, 0, 1) coplayer:(0, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 0, 0, 1)
player: (0, 1, 0, 1) coplayer:(1, 0, 1, 1)
player: (0, 1, 0, 1) coplayer:(1, 1, 0, 0)
player: (0, 1, 0, 1) coplayer:(1, 1, 0, 1)
player: (0, 1, 0, 1) coplayer:(1, 1, 1, 0)
player: (0, 1, 0, 1) coplayer:(1, 1, 1, 1)
player: (1, 1, 1, 1) coplayer:(0, 0, 1, 0)
player: (1, 1, 1, 1) coplayer:(0, 0, 1, 1)
player: (1, 1, 1, 1) coplayer:(0, 1, 1, 0)
player: (1, 1, 1, 1) coplayer:(0, 1, 1, 1)



In [299]:
p = (1, 0, 1, 0)

q = (0, 0, 0, 1)

In [300]:
calculate_ss_for_mem_one(p, q)

[array([-0.        ,  0.33333333,  0.33333333,  0.33333333])]

In [301]:
simulate_play_reactive(p * 4, q * 4, turns=1000, noise=0.0)

([0.0, 0.4992481203007519, 0.0, 0.4992481203007519],
 {(C, C): 0, (C, D): 332, (D, C): 333, (D, D): 332},
 {(C, C): 0, (C, D): 0, (D, C): 0, (D, D): 332})

In [302]:
calculate_ss_for_mem_one(p, [0.0, 0.5, 0.0, 0.5])

[array([0.11111111, 0.22222222, 0.22222222, 0.44444444])]

In [303]:
estimated_mem_one_sim(p, q)

TypeError: 'list' object is not callable

In [138]:
def two_bit_reactive(p1, p2, p3, p4):
    return [p1, p2, p1, p2, p3, p4, p3, p4,
            p1, p2, p1, p2, p3, p4, p3, p4]

In [139]:
set_of_two_bit_reactive_s = [two_bit_reactive(*p) for p in set_of_memory_one_s]

In [146]:
# reactive trying to estimate memory-two (which are memory-ones)

b, c = 2, 1

count = 0 

for player in tqdm.notebook.tqdm(set_of_two_bit_reactive_s[:1]):

    for i, coplayer in enumerate(set_of_memory_two_s):

        estimated_reactive = expected_two_bit_reactive_strategy(player, coplayer)
        
        estimated_reactive = two_bit_reactive(*estimated_reactive)
        
        ss = calculate_ss_for_mem_two(player, coplayer)

        ss_estimated = calculate_ss_for_mem_two(player, estimated_reactive)

        check = False

        for ss1 in ss_estimated:
            for ss2 in ss:
                if (np.isclose(ss1, ss2, atol=10**-3).all()):
                    check = True
                    
        # there are going to be some failures and I am just printing them. 
        if check == False:
            count += 1
#             print(f"player: {player} coplayer:{coplayer}")

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [152]:
count, len(set_of_memory_two_s)

(32768, 65536)

### Fully stochastic strategies

### or just pure strategies with epsilon error.

### One bit can reduce two bits to one bit

In [288]:
p = [1, 1, 1, 1] * 4

np.random.seed(1)

q = np.random.random(16)

In [289]:
ss = calculate_ss_for_mem_two(p, q)
ss = ss[0]

In [290]:
states = ['CC', 'CD', 'DC', 'DD']

states_in_two_bits = ["".join(i[-2:]) for i in list(itertools.product(['C', 'D'], repeat=4))]

[sum([ss[i] for i in range(16) if states_in_two_bits[i] == state]) for state in states]

[0.3317488801082833, 0.6682511198917169, 0.0, 0.0]

In [291]:
estimated_mem_one = expected_memory_one_strategy_ch(p, q)

In [292]:
estimated_mem_one

[0.4049699504318982, 0.0, 0.295398760584166, 0.0]

In [293]:
ss_estimated = calculate_ss_for_mem_one(p, estimated_mem_one)

In [294]:
ss_estimated

[array([ 0.33174888,  0.66825112, -0.        , -0.        ])]

In [295]:
estimated_mem_one_sim, a, b = simulate_play(p, q, turns=10 ** 5)

In [296]:
estimated_mem_one_sim

[0.4044790593369295, 0, 0.2933028851176883, 0]

In [297]:
calculate_ss_for_mem_one(p, estimated_mem_one_sim)

[array([ 0.3299899,  0.6700101, -0.       , -0.       ])]