In [10]:
from imports import *
from information_conditions import Information_Conditions
from base_ecopg import BaseEcologicalPublicGood
from helper_functions import *
from simulation_and_results_functions import *

In [1]:
def calculate_avg_value_given_policy(policy, mae):
    Vio = mae.Vio(policy)
    obsdist = mae.obsdist(policy)
    agents = 0
    states = 1
    avg_value = jnp.einsum(Vio, [agents, states], obsdist , [agents, states], [agents])
    return avg_value

In [10]:
list_of_determinisic_strategy = list(list(strat) for strat in itertools.product([0,1], repeat = 4))

In [None]:
WSLS = [1,0,0,1]
GT = [1,0,0,0]
ALLC = [1,1,1,1]
TFT = [1,0,1,0]

ALLD = [0,0,0,0]
ReverseGT = [0,0,0,1]

strategy_set_p1_only_action =  {
    'WSLS': WSLS,
    'GT': GT,
    'ALLC': ALLC,
    'ALLD': ALLD,
    'ReverseGT': ReverseGT
}
strategy_set_p2_only_action =  {
    'WSLS': WSLS,
    'GT': GT,
    'ALLC': ALLC,
    'ALLD': ALLD,
    'ReverseGT': ReverseGT
}



In [19]:
def add_degraded_state_policies(strategy):
        
        strategy_propserous_and_degraded_state = strategy.copy()

        for i in [0, 2, 4, 6]:

            strategy_propserous_and_degraded_state.insert(i, 0.5)
        
        return strategy_propserous_and_degraded_state



In [2]:
def create_policy_from_strategy(agent_1_strategy, agent_2_strategy):


    agent_1_strategy = [[x, 1-x] for x in agent_1_strategy]
    agent_2_strategy = [[x, 1-x] for x in agent_2_strategy]

    policy = np.array([agent_1_strategy, agent_2_strategy])

    return policy


In [3]:
def metgame_reward_matrix(strategy_set_p1, strategy_set_p2, mae):
    p1_reward_matrix = np.zeros((len(strategy_set_p1), len(strategy_set_p2)))
    p2_reward_matrix = np.zeros((len(strategy_set_p1), len(strategy_set_p2)))

    for i, p1_strategy in enumerate(strategy_set_p1.values()):
        for j, p2_strategy in enumerate(strategy_set_p2.values()):
            policy = create_policy_from_strategy(p1_strategy, p2_strategy)
            values = calculate_avg_value_given_policy(policy, mae)
            value_p1 = values[0]
            value_p2 = values[1]
            p1_reward_matrix[i,j] = value_p1
            p2_reward_matrix[i,j] = value_p2

    return p1_reward_matrix, p2_reward_matrix

In [None]:


mode = 'only_action_history_information'
ecopg = BaseEcologicalPublicGood()

information_condition_instance = Information_Conditions(ecopg, mode=mode)
mae_ecopg = POstratAC(env=information_condition_instance, learning_rates=0.05, discount_factors= 0.98)

p1_reward_matrix_only_action, p2_reward_matrix_only_action = metgame_reward_matrix(strategy_set_p1_only_action, strategy_set_p2_only_action, mae_ecopg)
       

p1_reward_matrix_only_action_df = pd.DataFrame(p1_reward_matrix_only_action, index = list(strategy_set_p1_only_action.keys()), columns= list(strategy_set_p2_only_action.keys()))
p2_reward_matrix_only_action_df = pd.DataFrame(p2_reward_matrix_only_action, index = list(strategy_set_p1_only_action.keys()), columns= list(strategy_set_p2_only_action.keys()))

with pd.ExcelWriter('reward_matrix_only_action.xlsx') as excel_file:
    p1_reward_matrix_only_action_df.to_excel(excel_file, sheet_name='p1_reward_matrix', index=True, header=True)
    p2_reward_matrix_only_action_df.to_excel(excel_file, sheet_name='p2_reward_matrix', index=True, header=True)


In [6]:
def calculate_nash_equilibria(p1_reward_matrix, p2_reward_matrix, strategy_set_p1, strategy_set_p2):

    game = nashpy.Game(p1_reward_matrix, p2_reward_matrix)

    p1_list_of_strategies = list(strategy_set_p1.keys())
    p2_list_of_strategies = list(strategy_set_p2.keys())

    # equilibria = game.support_enumeration()
    equilibria = game.vertex_enumeration()


    # pure_equilibria = []
    # for eq in equilibria:
    #     if all((prob == 1.0 or prob == 0.0) for prob in eq[0]) and all((prob == 1.0 or prob == 0.0) for prob in eq[1]):
    #         pure_equilibria.append(eq)

    print(" Nash Equilibria")
    list_of_eq_strategies = []
    for eq in equilibria:
        # print("Player 1 strategy:", eq[0].astype(bool), "Player 2 strategy:", eq[1])

        p1_active_strategy = np.array(p1_list_of_strategies)[eq[0].astype(bool)]
        p2_active_strategy = np.array(p2_list_of_strategies)[eq[1].astype(bool)]

        # print("P1:", p1_active_strategy, "P2:", p2_active_strategy)
        list_of_eq_strategies.append({"P1" : p1_active_strategy, "P2:":p2_active_strategy})

    return equilibria, list_of_eq_strategies
   

In [65]:
def replicator_dynamics(p1_reward_matrix, p2_reward_matrix, strategy_set_p1, strategy_set_p2):

    game = nashpy.Game(p1_reward_matrix, p2_reward_matrix)

    p1_list_of_strategies = list(strategy_set_p1.keys())
    p2_list_of_strategies = list(strategy_set_p2.keys())

    dynamics = game.replicator_dynamics()

    return dynamics

In [91]:
calculate_nash_equilibria(p1_reward_matrix_only_action, p2_reward_matrix_only_action, strategy_set_p1_only_action, strategy_set_p2_only_action)


Pure Nash Equilibria
P1: ['WSLS'] P2: ['WSLS']
P1: ['WSLS'] P2: ['GT']
P1: ['WSLS'] P2: ['ALLC']
P1: ['GT'] P2: ['WSLS']
P1: ['GT'] P2: ['GT']
P1: ['GT'] P2: ['ALLC']
P1: ['ALLC'] P2: ['WSLS']
P1: ['ALLC'] P2: ['GT']
P1: ['ALLC'] P2: ['ALLC']
P1: ['ReverseGT'] P2: ['ReverseGT']
P1: ['WSLS' 'ReverseGT'] P2: ['WSLS' 'ReverseGT']


In [55]:
strategy_set_p1_both_state_and_action = {key: add_degraded_state_policies(value) for key, value in strategy_set_p1_only_action.items()}
strategy_set_p2_both_state_and_action = {key: add_degraded_state_policies(value) for key, value in strategy_set_p2_only_action.items()}




In [57]:
print('strategy_set_p2_both_state_and_action: ', strategy_set_p2_both_state_and_action)


strategy_set_p2_both_state_and_action:  {'WSLS': [0.5, 1, 0.5, 0, 0.5, 0, 0.5, 1], 'GT': [0.5, 1, 0.5, 0, 0.5, 0, 0.5, 0], 'ALLC': [0.5, 1, 0.5, 1, 0.5, 1, 0.5, 1], 'ALLD': [0.5, 0, 0.5, 0, 0.5, 0, 0.5, 0], 'ReverseGT': [0.5, 0, 0.5, 0, 0.5, 0, 0.5, 1]}


In [50]:

mode = 'both_state_and_action_information'
ecopg = BaseEcologicalPublicGood()

information_condition_instance = Information_Conditions(ecopg, mode=mode)
mae_ecopg = POstratAC(env=information_condition_instance, learning_rates=0.05, discount_factors= 0.98)

p1_reward_matrix_both_state_and_action, p2_reward_matrix_both_state_and_action = metgame_reward_matrix(strategy_set_p1_both_state_and_action, strategy_set_p2_both_state_and_action, mae_ecopg)
       

p1_reward_matrix_both_state_and_action_df = pd.DataFrame(p1_reward_matrix_both_state_and_action, index = list(strategy_set_p1_both_state_and_action.keys()), columns= list(strategy_set_p2_both_state_and_action.keys()))
p2_reward_matrix_both_state_and_action_df = pd.DataFrame(p2_reward_matrix_both_state_and_action, index = list(strategy_set_p2_both_state_and_action.keys()), columns= list(strategy_set_p2_both_state_and_action.keys()))

with pd.ExcelWriter('reward_matrix_both_state_and_action.xlsx') as excel_file:
    p1_reward_matrix_both_state_and_action_df.to_excel(excel_file, sheet_name='p1_reward_matrix', index=True, header=True)
    p2_reward_matrix_both_state_and_action_df.to_excel(excel_file, sheet_name='p2_reward_matrix', index=True, header=True)

NameError: name 'strategy_set_p1_both_state_and_action' is not defined

In [90]:
calculate_nash_equilibria(p1_reward_matrix_both_state_and_action, p2_reward_matrix_both_state_and_action, strategy_set_p1_both_state_and_action, strategy_set_p2_both_state_and_action)


Pure Nash Equilibria
P1: ['WSLS'] P2: ['WSLS']
P1: ['WSLS'] P2: ['GT']
P1: ['WSLS'] P2: ['ALLC']
P1: ['GT'] P2: ['WSLS']
P1: ['GT'] P2: ['GT']
P1: ['GT'] P2: ['ALLC']
P1: ['ALLC'] P2: ['WSLS']
P1: ['ALLC'] P2: ['GT']
P1: ['ALLC'] P2: ['ALLC']
P1: ['ReverseGT'] P2: ['ReverseGT']
P1: ['WSLS' 'ReverseGT'] P2: ['WSLS' 'ReverseGT']


In [13]:
determinstic_strategy_itertools = itertools.product([0,1], repeat = 4)

determinisic_strategy_lists = list(list(strat) for strat in determinstic_strategy_itertools)
all_determinstic_strategy_dictionary = {str(strat):strat for strat in determinisic_strategy_lists}

In [14]:
strategy_set_p1_only_action = all_determinstic_strategy_dictionary
strategy_set_p2_only_action = all_determinstic_strategy_dictionary


mode = 'only_action_history_information'
ecopg = BaseEcologicalPublicGood()

information_condition_instance = Information_Conditions(ecopg, mode=mode)
mae_ecopg = POstratAC(env=information_condition_instance, learning_rates=0.05, discount_factors= 0.98)

p1_reward_matrix_only_action, p2_reward_matrix_only_action = metgame_reward_matrix(strategy_set_p1_only_action, strategy_set_p2_only_action, mae_ecopg)
       

p1_reward_matrix_only_action_df = pd.DataFrame(p1_reward_matrix_only_action, index = list(strategy_set_p1_only_action.keys()), columns= list(strategy_set_p2_only_action.keys()))
p2_reward_matrix_only_action_df = pd.DataFrame(p2_reward_matrix_only_action, index = list(strategy_set_p1_only_action.keys()), columns= list(strategy_set_p2_only_action.keys()))

# with pd.ExcelWriter('reward_matrix_only_action.xlsx') as excel_file:
#     p1_reward_matrix_only_action_df.to_excel(excel_file, sheet_name='p1_reward_matrix', index=True, header=True)
#     p2_reward_matrix_only_action_df.to_excel(excel_file, sheet_name='p2_reward_matrix', index=True, header=True)

In [15]:
equlibria, list_of_eq_strategies = calculate_nash_equilibria(p1_reward_matrix_only_action, p2_reward_matrix_only_action, strategy_set_p1_only_action, strategy_set_p2_only_action)


 Nash Equilibria


KeyboardInterrupt: 

In [69]:
np.printoptions(precision=5, suppress=True)


<contextlib._GeneratorContextManager at 0x1ae675f7230>

In [90]:
replicator = replicator_dynamics(p1_reward_matrix_only_action, p2_reward_matrix_only_action, strategy_set_p1_only_action, strategy_set_p2_only_action)
with np.printoptions(precision=5, suppress=True):
        print(replicator[999])
        
for k in strategy_set_p1_only_action:
    if np.round(replicator[999][k]) != 0:
        print(k)


[-0.      -0.      -0.      -0.      -0.      -0.      -0.      -0.
 -0.      -0.       0.00029  0.99971 -0.      -0.      -0.      -0.     ]


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [64]:
print(len(list_of_eq_strategies))

#possible to exploit symmetry in the game? to eliminate some nash/nash combinations?

1000


In [20]:
import pygambit 
import numpy as np         # pygambit is imported as “gambit”
# 2×2 Prisoner’s Dilemma directly from payoff arrays


payoffs_row =  p1_reward_matrix_only_action
payoffs_col  = np.transpose(p2_reward_matrix_only_action)
game = pygambit.Game.from_arrays(payoffs_row, payoffs_col)


# …or load a file that is already in .nfg / .efg / .gbt format
# g = gbt.read_nfg("example.nfg")

# Compute pure strategy Nash equilibria
result = pygambit.nash.enumpure_solve(game)
# result = pygambit.nash.enummixed_solve(game)

print(len(result.equilibria))
print(result.equilibria[0])
# for i in result.equilibria:
#     print(i)  
#     print("---")

14
[[Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(1, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1)], [Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(1, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1), Rational(0, 1)]]
