
# Information Conditions - Environmental State and Action Histories

> Plot learning trajectories under different information conditions of the Ecological Public Goods Game. a) Only environmental state history observable, b) only action history is observable, c) both environmental state and action histories are observable d) No information


In [1]:
%run imports.ipynb
%run information_conditions.ipynb import Information_Conditions
%run base_ecopg.ipynb import ecopg
%run helper_functions.ipynb import *

In [2]:
#degraded choice is false

for mode in ['both_state_and_action_information', 'only_action_history_information', 'only_state_information', 'no_information']:
 
    # Initialize the information condition
    information_condition_instance = Information_Conditions(ecopg, mode=mode)
    mae = POstratAC_eps(env=information_condition_instance, learning_rates=0.1, discount_factors=0.9)

    # Data storage
    
    avg_coop_time_pairs = []
    num_samples = 8
    initial_conditions_list = lhs_sampling(mae.Q, num_samples, mae.N)

    print(f"\nMode: {mode}")

    # Monte Carlo Simulations
    for initial_condition in initial_conditions_list:

        # initial_condition = make_degraded_state_cooperation_probablity_zero(initial_condition, information_condition_instance.Oset[0]) #to make sure all of them start at the same position in the degraded state (shouldn't it)
        xtraj, fixedpointreached = mae.trajectory(initial_condition, Tmax=10000, tolerance=1e-5)
        final_point = xtraj[-1]
        
        avg_coop_across_states = get_average_cooperativeness(policy=final_point, obsdist=mae.obsdist(final_point), mode = mode, Oset = mae.env.Oset[0])[0]
        time_to_reach = xtraj.shape[0]

        # Store cooperativeness and time as pairs (round cooperativeness to 2 decimals)
        avg_coop_time_pairs.append((round(avg_coop_across_states, 2), time_to_reach))

    # Create DataFrame for processing
    df = pd.DataFrame(avg_coop_time_pairs, columns=["AverageCooperation", "TimeToReach"])
    total_count = len(df)
    
    average_cooperation_across_initial_conditions = np.round(df['AverageCooperation'].agg('mean'), 3)
    print("Mean Final Cooperation Across Initial Conditions ", average_cooperation_across_initial_conditions)


    # Add a classification column
    def classify(avg_coop):
        if avg_coop < 0.1:
            return "Defection"
        elif avg_coop > 0.9:
            return "Cooperation"
        else:
            return "Mixed"


    df['Classification'] = df['AverageCooperation'].apply(classify)
    average_cooperation_across_initial_conditions = df['AverageCooperation'].agg('mean')
    # Reporting unique entries

    # Overall Summary
    summary = df.groupby('Classification')['TimeToReach'].agg(
        MedianTimetoReach='median',
        Percentage= lambda x: round((len(x) / total_count) * 100,1)
    ).reset_index()
    
    print(summary)



Mode: both_state_and_action_information
Mean Final Cooperation Across Initial Conditions  0.139
  Classification  MedianTimetoReach  Percentage
0    Cooperation            10000.0        13.9
1      Defection            10000.0        86.1

Mode: only_action_history_information
Mean Final Cooperation Across Initial Conditions  0.065
  Classification  MedianTimetoReach  Percentage
0      Defection            10000.0        80.6
1          Mixed            10000.0        19.4

Mode: only_state_information
Mean Final Cooperation Across Initial Conditions  0.0
  Classification  MedianTimetoReach  Percentage
0      Defection            10000.0       100.0

Mode: no_information
Mean Final Cooperation Across Initial Conditions  0.084
  Classification  MedianTimetoReach  Percentage
0      Defection            10000.0        69.4
1          Mixed            10000.0        30.6


Extracting Final Strategies

In [None]:
#degraded choice is false

for mode in ['both_state_and_action_information', 'only_action_history_information', 'only_state_information', 'no_information']:
 
    # Initialize the information condition
    information_condition_instance = Information_Conditions(ecopg, mode=mode)
    mae = POstratAC(env=information_condition_instance, learning_rates=0.1, discount_factors=0.9)

    # Data storage
    
    avg_coop_time_pairs = []
    num_samples = 8
    initial_conditions_list = lhs_sampling(mae.Q, num_samples, mae.N)

    print(f"\nMode: {mode}")

    # Monte Carlo Simulations
    for initial_condition in initial_conditions_list:

        # initial_condition = make_degraded_state_cooperation_probablity_zero(initial_condition, information_condition_instance.Oset[0]) #to make sure all of them start at the same position in the degraded state (shouldn't it)
        xtraj, fixedpointreached = mae.trajectory(initial_condition, Tmax=10000, tolerance=1e-5)
        final_point = xtraj[-1]
        
        avg_coop_across_states = get_average_cooperativeness(policy=final_point, obsdist=mae.obsdist(final_point), mode = mode, Oset = mae.env.Oset[0])[0]
        time_to_reach = xtraj.shape[0]

        # Store cooperativeness and time as pairs (round cooperativeness to 2 decimals)
        avg_coop_time_pairs.append((round(avg_coop_across_states, 2), time_to_reach))

    # Create DataFrame for processing
    df = pd.DataFrame(avg_coop_time_pairs, columns=["AverageCooperation", "TimeToReach"])
    total_count = len(df)
    
    average_cooperation_across_initial_conditions = np.round(df['AverageCooperation'].agg('mean'), 3)
    print("Mean Final Cooperation Across Initial Conditions ", average_cooperation_across_initial_conditions)


    # Add a classification column
    def classify(avg_coop):
        if avg_coop < 0.1:
            return "Defection"
        elif avg_coop > 0.9:
            return "Cooperation"
        else:
            return "Mixed"


    df['Classification'] = df['AverageCooperation'].apply(classify)
    average_cooperation_across_initial_conditions = df['AverageCooperation'].agg('mean')
    # Reporting unique entries

    # Overall Summary
    summary = df.groupby('Classification')['TimeToReach'].agg(
        MedianTimetoReach='median',
        Percentage= lambda x: round((len(x) / total_count) * 100,1)
    ).reset_index()
    
    print(summary)



Mode: both_state_and_action_information
Mean Final Cooperation Across Initial Conditions  0.0
  Classification  MedianTimetoReach  Percentage
0      Defection                1.0       100.0

Mode: only_action_history_information
Mean Final Cooperation Across Initial Conditions  0.065
  Classification  MedianTimetoReach  Percentage
0      Defection            10000.0        80.6
1          Mixed            10000.0        19.4

Mode: only_state_information
Mean Final Cooperation Across Initial Conditions  0.0
  Classification  MedianTimetoReach  Percentage
0      Defection                1.0       100.0

Mode: no_information
Mean Final Cooperation Across Initial Conditions  0.084
  Classification  MedianTimetoReach  Percentage
0      Defection            10000.0        69.4
1          Mixed            10000.0        30.6


: 

In [4]:

# Assuming required classes and functions are defined or imported
# Initialize the ecological public goods game

# List of modes to iterate through
modes = ['only_action_history_information']

# Data storage for results
results_unique = {}
results_all = {}
obdsist_all = {}
# Iterate through each mode

for mode in modes:
    # Initialize the information condition
    information_condition_instance = Information_Conditions(ecopg, mode=mode)
    mae = POstratAC_eps(env=information_condition_instance, learning_rates=0.1, discount_factors=0.9)

    # Generate initial conditions using Latin hypercube sampling
    num_samples = 8
    initial_conditions_list = lhs_sampling(mae.Q, num_samples, mae.N)
    # Print the mode
    print(f"\nMode: {mode}")

    # Store unique points for this mode
    final_point_list = []
    obsdist_list = []

    # Monte Carlo Simulations
    for initial_condition in initial_conditions_list:
        
        initial_condition = make_degraded_state_cooperation_probablity_zero(initial_condition, information_condition_instance.Oset[0])

        xtraj, fixedpointreached = mae.trajectory(initial_condition, Tmax=10000, tolerance=1e-5)
        final_point = xtraj[-1]

        # Round off to 4 decimal places
        rounded_point = np.round(final_point, 1)
        
        #obsdist points 
        obsdist = mae.obsdist(final_point)
        obsdist_list.append(obsdist)

        # Get unique points
        final_point_list.append(rounded_point)

    # Save results for the current mode
    results_unique[mode] = get_unique_arrays(final_point_list)
    results_all[mode] = final_point_list
    obdsist_all[mode] = obsdist_list

# The `results` dictionary contains the processed unique points for each mode

print(results_unique['only_action_history_information'][3])



Mode: only_action_history_information
[[[0.2 0.8]
  [0.1 0.9]
  [0.  1. ]
  [0.  1. ]]

 [[0.1 0.9]
  [0.2 0.8]
  [0.4 0.6]
  [0.1 0.9]]]
