In [1]:

import json
import logging
import os
import shutil
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

from po.scripts.poker_pop_server.evaluators.evaluator_utils import make_get_policy_fn, eval_policy_matchup
from po.scripts.poker_pop_server.utils.policy_config_keys import POKER_ARCH1_MODEL_CONFIG_KEY
from po.rllib.envs.opnspl.measure_nashconv_eval_callback import measure_nash_conv_nonlstm
import itertools
from po.rllib.common.cloud_storage import maybe_download_object, connect_storage_client
from po.rllib.sac.sac_policy import SACDiscreteTFPolicy
from po.rllib.ppo.ppo_stratego_model_policy import PPOStrategoModelTFPolicy
from po.rllib.common.stratego_preprocessor import STRATEGO_PREPROCESSOR, StrategoDictFlatteningPreprocessor
from ray.rllib.agents.trainer import with_common_config, with_base_config
from ray.rllib.models.catalog import MODEL_DEFAULTS
import ray
from ray.rllib.utils import try_import_tf
import json
import os
from po.rllib.envs.opnspl.poker_multiagent_env import POKER_ENV, KUHN_POKER, LEDUC_POKER, PARTIALLY_OBSERVABLE, PokerMultiAgentEnv
from po.rllib.common.sac_stratego_model import SAC_STRATEGO_MODEL
from po.scripts.population_server.utils.metanash import get_fp_metanash_for_latest_payoff_table, get_fp_metanash_for_payoff_table
from mprl.utility_services.payoff_table import PayoffTable, PolicySpec
import pandas as pd
import multiprocessing
from itertools import repeat

tf = try_import_tf()

MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY')
BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME")

OBSERVATION_MODE = PARTIALLY_OBSERVABLE

POLICY_CLASS = SACDiscreteTFPolicy
POLICY_CLASS_NAME = SACDiscreteTFPolicy.__name__
MODEL_CONFIG_KEY = POKER_ARCH1_MODEL_CONFIG_KEY

MANAGER_SEVER_HOST = "localhost"

logger = logging.getLogger(__name__)


def get_stats_for_single_payoff_table(payoff_table_key, experiment_name, poker_game_version, model_config_key):
    POKER_ENV_CONFIG = {
        'version': poker_game_version,
    }

    storage_client = connect_storage_client(endpoint=MINIO_ENDPOINT,
                                            access_key=MINIO_ACCESS_KEY,
                                            secret_key=MINIO_SECRET_KEY)


    # If you use ray for more than just this single example fn, you'll need to move ray.init to the top of your main()
    ray.init(address=os.getenv('RAY_HEAD_NODE'), ignore_reinit_error=True, local_mode=True)


    model_config_file_path, _ = maybe_download_object(storage_client=storage_client,
                                                      bucket_name=BUCKET_NAME,
                                                      object_name=model_config_key,
                                                      force_download=False)

    with open(model_config_file_path, 'r') as config_file:
        model_config = json.load(fp=config_file)

    example_env = PokerMultiAgentEnv(env_config=POKER_ENV_CONFIG)

    obs_space = example_env.observation_space
    act_space = example_env.action_space

    preprocessor = StrategoDictFlatteningPreprocessor(obs_space=obs_space)
    graph = tf.Graph()
    sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0}), graph=graph)

    def fetch_logits(policy):
        return {
            "behaviour_logits": policy.model.last_output(),
        }

    _policy_cls = POLICY_CLASS.with_updates(
        extra_action_fetches_fn=fetch_logits
    )

    with graph.as_default():
        with sess.as_default():
            policy = _policy_cls(
                obs_space=preprocessor.observation_space,
                action_space=act_space,
                config=with_common_config({
                    'model': with_base_config(base_config=MODEL_DEFAULTS, extra_config=model_config),
                    'env': POKER_ENV,
                    'env_config': POKER_ENV_CONFIG,
                    'custom_preprocessor': STRATEGO_PREPROCESSOR}))

    def set_policy_weights(weights_key):
        weights_file_path, _ = maybe_download_object(storage_client=storage_client,
                                                 bucket_name=BUCKET_NAME,
                                                 object_name=weights_key,
                                                 force_download=False)
        policy.load_model_weights(weights_file_path)

    payoff_table_local_path, _ = maybe_download_object(storage_client=storage_client,
                                                           bucket_name=BUCKET_NAME,
                                                           object_name=payoff_table_key,
                                                           force_download=False)

    payoff_table = PayoffTable.from_dill_file(dill_file_path=payoff_table_local_path)
    stats_out = {
        'payoff_table_key': [],
        'experiment_name': [],
        'num_policies': [],
        'exploitability': [],
        'total_steps': [],
        'total_episodes': [],
    }

    exploitability_per_generation = []
    total_steps_per_generation = []
    total_episodes_per_generation = []
    num_policies_per_generation = []

    for i, n_policies in enumerate(range(1,payoff_table.size() + 1)):
        metanash_probs = get_fp_metanash_for_payoff_table(payoff_table=payoff_table,
                                                                 fp_iters=40000,
                                                                 accepted_opponent_policy_class_names=[POLICY_CLASS_NAME],
                                                                 accepted_opponent_model_config_keys=[POKER_ENV_CONFIG],
                                                                 add_payoff_matrix_noise_std_dev=0.000,
                                                                 mix_with_uniform_dist_coeff=None,
                                                                 only_first_n_policies=n_policies,
                                                                 p_or_lower_rounds_to_zero=0.0)

        policy_weights_keys = payoff_table.get_ordered_keys_in_payoff_matrix()

        policy_dict = {key: prob for key, prob in zip(policy_weights_keys, metanash_probs)}

        exploitability_this_gen = measure_nash_conv_nonlstm(rllib_policy=policy,
                                  poker_game_version=poker_game_version,
                                  policy_mixture_dict=policy_dict,
                                  set_policy_weights_fn=set_policy_weights)

        print(f"{n_policies} policies, {exploitability_this_gen} exploitability")

        policy_added_this_gen = payoff_table.get_policy_for_index(i)
        latest_policy_tags = policy_added_this_gen.tags
        steps_prefix = "timesteps: "
        latest_policy_steps = int([tag for tag in latest_policy_tags if steps_prefix in tag][0][len(steps_prefix):])
        episodes_prefix = "episodes: "
        latest_policy_episodes = int([tag for tag in latest_policy_tags if episodes_prefix in tag][0][len(episodes_prefix):])

        if i > 0:
            total_steps_this_generation = latest_policy_steps + total_steps_per_generation[i-1]
            total_episodes_this_generation = latest_policy_episodes + total_episodes_per_generation[i-1]
        else:
            total_steps_this_generation = latest_policy_steps
            total_episodes_this_generation = latest_policy_episodes

        exploitability_per_generation.append(exploitability_this_gen)
        total_steps_per_generation.append(total_steps_this_generation)
        total_episodes_per_generation.append(total_episodes_this_generation)
        num_policies_per_generation.append(n_policies)

        num_new_entries = len(exploitability_per_generation)
        stats_out['payoff_table_key'] = stats_out['payoff_table_key'] + [payoff_table_key] * num_new_entries
        stats_out['experiment_name'] = stats_out['experiment_name'] + [experiment_name] * num_new_entries
        stats_out['num_policies'] = stats_out['num_policies'] + num_policies_per_generation
        stats_out['exploitability'] = stats_out['exploitability'] + exploitability_per_generation
        stats_out['total_steps'] = stats_out['total_steps'] + total_steps_per_generation
        stats_out['total_episodes'] = stats_out['total_episodes'] + total_episodes_per_generation
    return stats_out

def get_exploitability_stats_over_time_for_payoff_table_all_same_poker_version(payoff_table_keys, exp_names, poker_game_version, model_config_key):

    with multiprocessing.Pool(processes=16) as pool:
        results = pool.starmap(get_stats_for_single_payoff_table, zip(payoff_table_keys, exp_names, repeat(poker_game_version), repeat(model_config_key)))

    combined_stats = {}
    for result in results:
        for key, val in result.items():
            if key not in combined_stats:
                combined_stats[key] = val
            else:
                combined_stats[key] = [*combined_stats[key], *val]

    return pd.DataFrame(combined_stats)


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be

ModuleNotFoundError: No module named 'pyspiel'

In [None]:
kuhn_experiment_payoff_tables_and_names = [
    ("poker_ps/kuhn_psro_squential_explore_coeff_0/poker-learner-1_pid_168295_09_29_35AM_May-23-2020/payoff_tables/payoff_table_58_polices_0_pending_poker-learner-1_pid_168295_10_57_45PM_May-23-2020.dill", 
     "Kuhn Seq PSRO Expl 0.0"),
    ("poker_ps/kuhn_psro_squential_explore_coeff_0p1/poker-learner-1_pid_170498_09_29_55AM_May-23-2020/payoff_tables/payoff_table_56_polices_0_pending_poker-learner-1_pid_170498_10_46_58PM_May-23-2020.dill",
     "Kuhn Seq PSRO Expl 0.1")
]
    

In [None]:
kuhn_payoff_table_keys, kuhn_exp_names = zip(*kuhn_experiment_payoff_tables_and_names)

kuhn_perf_df = get_exploitability_stats_over_time_for_payoff_table_all_same_poker_version(
    payoff_table_keys=kuhn_payoff_table_keys,
    exp_names=kuhn_exp_names,
    poker_game_version="kuhn_poker",
    model_config_key=POKER_ARCH1_MODEL_CONFIG_KEY
)


In [None]:
fig = px.line(kuhn_perf_df, x="num_policies", y="exploitability", title=f"Exploibility over time",
        render_mode="svg", color="experiment_name")
fig.show()

In [5]:


leduc_experiment_payoff_tables_and_names = [
    ("poker_ps/leduc_psro_squential_explore_coeff_0/poker-learner-1_pid_137104_09_12_46AM_May-23-2020/payoff_tables/payoff_table_90_polices_0_pending_poker-learner-1_pid_137104_04_40_13AM_May-26-2020.dill", 
     "Leduc Seq PSRO Expl 0.0"),
    ("poker_ps/leduc_psro_squential_explore_coeff_0p1/poker-learner-1_pid_141254_09_13_38AM_May-23-2020/payoff_tables/payoff_table_24_polices_0_pending_poker-learner-1_pid_141254_11_20_51PM_May-23-2020.dill",
     "Leduc Seq PSRO Expl 0.1")
]
    

In [6]:
leduc_payoff_table_keys, leduc_exp_names = zip(*leduc_experiment_payoff_tables_and_names)

leduc_perf_df = get_exploitability_stats_over_time_for_payoff_table_all_same_poker_version(
    payoff_table_keys=leduc_payoff_table_keys,
    exp_names=leduc_exp_names,
    poker_game_version="leduc_poker",
    model_config_key=POKER_ARCH1_MODEL_CONFIG_KEY
)


2020-05-30 19:15:15,550	ERROR worker.py:1428 -- Calling ray.init() again after it has already been called.

[33mWARN: Box bound precision lowered by casting to float32[0m

2020-05-30 19:15:15,573	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor
2020-05-30 19:15:15,715	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)
Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                1240      
___________

19 policies, 0.819706929876769 exploitability
20 policies, 0.7818787387689884 exploitability
21 policies, 0.7619052322117581 exploitability
22 policies, 0.7600882366089301 exploitability
23 policies, 0.751277620647232 exploitability
24 policies, 0.7193328619354168 exploitability
25 policies, 0.7139441413566245 exploitability
26 policies, 0.7009118592578034 exploitability
27 policies, 0.707780861987236 exploitability
28 policies, 0.7138713694857135 exploitability
29 policies, 0.6921403831663034 exploitability
30 policies, 0.690202059842426 exploitability
31 policies, 0.6931149762546837 exploitability
32 policies, 0.6842320452092192 exploitability
33 policies, 0.6770957431091358 exploitability
34 policies, 0.6671562606473854 exploitability
35 policies, 0.6604554766669317 exploitability
36 policies, 0.6495775240007442 exploitability
37 policies, 0.6428253115107566 exploitability
38 policies, 0.6275349430794468 exploitability
39 policies, 0.6117898193419141 exploitability
40 policies, 0.61

In [7]:
fig = px.line(leduc_perf_df, x="total_episodes", y="exploitability", title=f"Exploibility over time",
        render_mode="svg", color="experiment_name")
fig.show()

In [None]:
google_cloud_kuhn_experiment_payoff_tables_and_names = [
    ("kuhn_poker_naive_3_workers_poker_ps/kuhn_psro_naive/naive-1-3-3-kuhn-poker_pid_430_12_40_08PM_May-31-2020/payoff_tables/latest.dill", "kuhn_naive_psro"),
    ("kuhn_poker_rect_3_workers_poker_ps/kuhn_psro_rectified/rect-1-3-3-kuhn-poker_pid_430_12_40_27PM_May-31-2020/payoff_tables/latest.dill", "kuhn_rectified"),
    ("kuhn_poker_singles_3_workers_poker_ps/kuhn_single_policies/singles-1-3-3-kuhn-poker_pid_431_12_40_58PM_May-31-2020/payoff_tables/latest.dill", "kuhn_singles"),
    ("kuhn_poker_subsets_3_workers_poker_ps/kuhn_subsets/subsets-1-3-3-kuhn-poker_pid_430_12_41_07PM_May-31-2020/payoff_tables/latest.dill", "kuhn_subsets")
]

gc_kuhn_table_keys, gc_kuhn_exp_names = zip(*google_cloud_kuhn_experiment_payoff_tables_and_names)

gc_kuhn_perf_df = get_exploitability_stats_over_time_for_payoff_table_all_same_poker_version(
    payoff_table_keys=gc_kuhn_table_keys,
    exp_names=gc_kuhn_exp_names,
    poker_game_version="kuhn_poker",
    model_config_key=POKER_ARCH1_MODEL_CONFIG_KEY
)


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m



{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(13,)


2020-05-31 18:28:36,643	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(13,)


2020-05-31 18:28:36,654	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(13,)


2020-05-31 18:28:36,668	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 11)]              0         
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                480       
_________________________________________________________________
twin_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_q_out (Dense)     (None, 2)                 82        
Total params: 3,842
Trainable params: 3,842
Non-trainable params: 0
_________________________________________________________________
None
Model: "model"
_________________________________________________________________
Model: "model_2"
Layer (type)        

Box(13,)
Total params: 3,842


2020-05-31 18:28:36,861	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Trainable params: 3,842
Non-trainable params: 0
_________________________________________________________________
None
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 11)]              0         
_________________________________________________________________
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_acti

2020-05-31 18:28:36,872	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


main_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_q_out (Dense)     (None, 2)                 82        
Total params: 3,842
Trainable params: 3,842
Non-trainable params: 0
_________________________________________________________________
None
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': No

2020-05-31 18:28:36,891	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 11)]              0         
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                480       
Model: "model_5"
_________________________________________________________________
_________________________________________________________________
twin_vf_fc_1 (Dense)         (None, 40)                1640      
Layer (type)                 Output Shape              Param #   
_________________________________________________________________
twin_vf_fc_2 (Dense)         (None, 40)                1640      
vf_observation (InputLayer)  [(None, 11)]              0         
_________________________________________________________________
twin_vf_fc_q_out (Dense)     (None, 2)                 82        
__________________________________________

Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 11)]              0         
_________________________________________________________________
main_vf_fc_0 (Dense)         (None, 40)                480       
_________________________________________________________________
main_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_q_out (Dense)     (None, 2)                 82        
Total params: 3,842
Trainable params: 3,842
Non-trainable params: 0
_________________________________________________________________
None


2020-05-31 18:28:42,341	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


1 policies, 0.3644301917537757 exploitability


2020-05-31 18:28:42,415	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


1 policies, 0.4119692911197338 exploitability


2020-05-31 18:28:42,535	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.


1 policies, 0.3378489864554763 exploitability
2 policies, 0.22360588712940216 exploitability
2 policies, 0.4006391349741132 exploitability
2 policies, 0.3572169873877059 exploitability
3 policies, 0.39914163943422576 exploitability
3 policies, 0.10345805709434311 exploitability
3 policies, 0.3562423033173112 exploitability
4 policies, 0.3179631156470245 exploitability
4 policies, 0.2772515067143104 exploitability
4 policies, 0.1632361639463875 exploitability
5 policies, 0.2661812108612055 exploitability
5 policies, 0.17521397173365147 exploitability
5 policies, 0.22243002270987658 exploitability
6 policies, 0.23232368420260804 exploitability
6 policies, 0.12673275685002036 exploitability
6 policies, 0.185257453635883 exploitability
7 policies, 0.19640093096182384 exploitability
7 policies, 0.1662005825214526 exploitability
7 policies, 0.14019225668556812 exploitability
8 policies, 0.1651887300360247 exploitability
8 policies, 0.15298261295113832 exploitability
8 policies, 0.10527236611

In [None]:
fig = px.line(gc_kuhn_perf_df, x="total_episodes", y="exploitability", title=f"Exploitability over time Kuhn 3 workers",
        render_mode="svg", color="experiment_name")
fig.show()

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [7]:
# google_cloud_kuhn_experiment_payoff_tables_and_names = [
#     ("leduc_poker_pipe_3_workers_poker_ps/leduc_pipeline_psro/pipe-1-3-3-leduc-poker_pid_430_09_03_49AM_Jun-01-2020/payoff_tables/latest.dill","leduc_pipe_1"),
#     ("leduc_poker_pipe_3_workers_poker_ps/leduc_pipeline_psro/pipe-2-3-3-leduc-poker_pid_430_09_04_00AM_Jun-01-2020/payoff_tables/latest.dill","leduc_pipe_2"),
#     ("leduc_poker_pipe_3_workers_poker_ps/leduc_pipeline_psro/pipe-3-3-3-leduc-poker_pid_431_09_04_01AM_Jun-01-2020/payoff_tables/latest.dill","leduc_pipe_3"),
#     ("leduc_poker_rect_3_workers_poker_ps/leduc_psro_rectified/rect-1-3-3-leduc-poker_pid_429_09_04_14AM_Jun-01-2020/payoff_tables/latest.dill","leduc_rect_1"),
#     ("leduc_poker_rect_3_workers_poker_ps/leduc_psro_rectified/rect-2-3-3-leduc-poker_pid_430_09_04_24AM_Jun-01-2020/payoff_tables/latest.dill","leduc_rect_2"),
#     ("leduc_poker_rect_3_workers_poker_ps/leduc_psro_rectified/rect-3-3-3-leduc-poker_pid_430_09_04_45AM_Jun-01-2020/payoff_tables/latest.dill","leduc_rect_3"),
#     ("leduc_poker_naive_3_workers_poker_ps/leduc_psro_naive/naive-1-3-3-leduc-poker_pid_430_09_03_04AM_Jun-01-2020/payoff_tables/latest.dill","leduc_naive_1"),
#     ("leduc_poker_naive_3_workers_poker_ps/leduc_psro_naive/naive-2-3-3-leduc-poker_pid_430_09_03_16AM_Jun-01-2020/payoff_tables/latest.dill","leduc_naive_2"),
#     ("leduc_poker_naive_3_workers_poker_ps/leduc_psro_naive/naive-3-3-3-leduc-poker_pid_430_09_03_34AM_Jun-01-2020/payoff_tables/latest.dill","leduc_naive_3")

# ]

# gc_kuhn_table_keys, gc_kuhn_exp_names = zip(*google_cloud_kuhn_experiment_payoff_tables_and_names)

# gc_kuhn_perf_df = get_exploitability_stats_over_time_for_payoff_table_all_same_poker_version(
#     payoff_table_keys=gc_kuhn_table_keys,
#     exp_names=gc_kuhn_exp_names,
#     poker_game_version="leduc_poker",
#     model_config_key=POKER_ARCH1_MODEL_CONFIG_KEY
# )


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m


[33mWARN: Box bound precision lowered by casting to float32[0m



{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)


2020-06-01 19:09:53,979	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)


2020-06-01 19:09:54,136	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40

2020-06-01 19:09:54,145	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor
2020-06-01 19:09:54,146	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)


2020-06-01 19:09:54,162	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hidd

2020-06-01 19:09:54,170	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}


2020-06-01 19:09:54,166	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Box(33,)


2020-06-01 19:09:54,185	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)


2020-06-01 19:09:54,204	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                1240      
_________________________________________________________________
twin_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
Non-trainable params: 0
_________________________________________________________________
None
Model: "model"
_________________________________________________________________
Layer (type)                 Output S

2020-06-01 19:09:54,522	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                1240      
_________________________________________________________________
twin_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
twin_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
Model: "model_2"
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shap

Trainable params: 4,643
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
pi_observation (InputLayer)  [(None, 30)]              0         
Non-trainable params: 0
_________________________________________________________________
None
_________________________________________________________________
pi_observation (InputLayer)  [(None, 30)]              0         
None
Model: "model"
Model: "model_2"
pi_fc_1 (Dense)              (None, 40)                1640      
_________________________________________________________________
Model: "model_1"
_________________________________________________________________
Trainable params: 4,643
pi_fc_0 (Dense)              (None, 40)                1240      
_________________________________________________________________
_________________________________________________________________
_________________________________________________________________
Layer (type

_________________________________________________________________
Trainable params: 4,643
Total params: 4,643
pi_fc_1 (Dense)              (None, 40)                1640      
_________________________________________________________________
pi_fc_unmasked_logits (Dense (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
_________________________________________________________________
pi_fc_0 (Dense)              (None, 40)                1240      
Trainable params: 4,643
Non-trainable params: 0
_________________________________________________________________
Non-trainable params: 0
pi_fc_2 (Dense)              (None, 40)                1640      
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
Model: "model_1"
Total params: 4,643
_________________________________________________________________
pi_fc_1 (Dense)              (None, 40)                1640      
___________

2020-06-01 19:09:54,715	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Non-trainable params: 0
Model: "model_1"


2020-06-01 19:09:54,715	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Non-trainable params: 0
_________________________________________________________________
main_vf_fc_0 (Dense)         (None, 40)                1240      
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
_________________________________________________________________
_________________________________________________________________
Total params: 4,643
_________________________________________________________________
main_vf_fc_1 (Dense)         (None, 40)                1640      
None
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                1240      
Trainable params: 4,643
Model: "model_1"
_________________________________________________________________
main_vf_fc_2 (Dense)         (None, 40)                1640      
Non-trainable params: 0
_________________________________________________________________
twin_vf_fc_1 (Dense)        

2020-06-01 19:09:54,745	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


_________________________________________________________________
main_vf_fc_1 (Dense)         (None, 40)                1640      
Non-trainable params: 0
None
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Non-trainable params: 0
_________________________________________________________________
Box(33,)
None
main_vf_fc_1 (Dense)         (None, 40)                1640      
____________________________________

2020-06-01 19:09:54,750	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


None
_________________________________________________________________
Model: "model_3"
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn

2020-06-01 19:09:54,760	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643


2020-06-01 19:09:54,768	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Trainable params: 4,643
Non-trainable params: 0
pi_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
None
_________________________________________________________________
pi_fc_0 (Dense)              (None, 40)                1240      
main_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'graysc

2020-06-01 19:09:54,794	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


_________________________________________________________________
None
pi_fc_2 (Dense)              (None, 40)                1640      
_________________________________________________________________
pi_fc_unmasked_logits (Dense (None, 3)                 123       
{'conv_activation': 'relu', 'conv_filters': [], 'use_lstm': False, 'max_seq_len': 20, 'custom_preprocessor': 'stratego_preprocessor', 'custom_model': 'sac_stratego_model', 'custom_options': {'mask_invalid_actions': True, 'observation_mode': 'partially_observable', 'q_fn': True, 'fake_lstm': False, 'use_lstm': False}, 'fcnet_activation': 'tanh', 'fcnet_hiddens': [40, 40, 40], 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'state_shape': None, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_action_dist': None}
Box(33,)


2020-06-01 19:09:54,805	INFO catalog.py:363 -- Using custom preprocessor stratego_preprocessor


Total params: 4,643
Trainable params: 4,643
Non-trainable params: 0
_________________________________________________________________
None
Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
main_vf_fc_0 (Dense)         (None, 40)                1240      
_________________________________________________________________
main_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
Non-trainable params: 0
__________________________________________________

Non-trainable params: 0
_________________________________________________________________
twin_vf_fc_q_out (Dense)     (None, 3)                 123       
_________________________________________________________________
pi_fc_0 (Dense)              (None, 40)                1240      
Total params: 4,643
None
_________________________________________________________________
Trainable params: 4,643
Total params: 4,643
Model: "model_3"
pi_fc_1 (Dense)              (None, 40)                1640      
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
None
Model: "model_3"
pi_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
____________________________________________________

Non-trainable params: 0
_________________________________________________________________
main_vf_fc_0 (Dense)         (None, 40)                1240      
_________________________________________________________________
_________________________________________________________________
twin_vf_fc_0 (Dense)         (None, 40)                1240      
None
_________________________________________________________________
None
main_vf_fc_1 (Dense)         (None, 40)                1640      
vf_observation (InputLayer)  [(None, 30)]              0         
_________________________________________________________________
Model: "model_4"
main_vf_fc_1 (Dense)         (None, 40)                1640      
Model: "model_3"
_________________________________________________________________
_________________________________________________________________
twin_vf_fc_1 (Dense)         (None, 40)                1640      
_________________________________________________________________
________

Trainable params: 4,643
_________________________________________________________________
main_vf_fc_0 (Dense)         (None, 40)                1240      
_________________________________________________________________
main_vf_fc_1 (Dense)         (None, 40)                1640      
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
None
main_vf_fc_2 (Dense)         (None, 40)                1640      
_________________________________________________________________
main_vf_fc_q_out (Dense)     (None, 3)                 123       
Total params: 4,643
Trainable params: 4,643
Non-trainable params: 0
_________________________________________________________________
None


2020-06-01 19:10:02,296	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2020-06-01 19:10:03,072	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2020-06-01 19:10:03,175	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2020-06-01 19:10:03,405	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2020-06-01 19:10:03,505	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environment variable.
2020-06-01 19:10:03,518	INFO tf_run_builder.py:92 -- Executing TF run without tracing. To dump TF timeline traces to disk, set the TF_TIMELINE_DIR environm

1 policies, 2.9374850478021273 exploitability
1 policies, 2.653782920508414 exploitability
1 policies, 3.066534679987807 exploitability
1 policies, 3.074998869772367 exploitability
1 policies, 2.957693296405573 exploitability
1 policies, 3.037660387070674 exploitability
1 policies, 3.1543033616375658 exploitability
1 policies, 2.999997654584079 exploitability
1 policies, 3.4583411164274986 exploitability
2 policies, 2.979169793580082 exploitability
2 policies, 1.4908850095293888 exploitability
2 policies, 1.6788976073216095 exploitability
2 policies, 2.011005858413469 exploitability
2 policies, 2.039313971763035 exploitability
2 policies, 1.834529209056181 exploitability
2 policies, 3.015247828497479 exploitability
2 policies, 2.571364389119915 exploitability
2 policies, 2.0466764640548027 exploitability
3 policies, 3.0513811180907915 exploitability
3 policies, 1.599272362898265 exploitability
3 policies, 2.957589753225438 exploitability
3 policies, 1.2482805139387887 exploitability
3 

21 policies, 0.6647079411675371 exploitability
21 policies, 1.0192720063871876 exploitability
21 policies, 1.0023541287322477 exploitability
21 policies, 0.9574264018863472 exploitability
21 policies, 1.1512243956268229 exploitability
21 policies, 1.3755090980239415 exploitability
21 policies, 1.5122641861045196 exploitability
22 policies, 0.9712953890334635 exploitability
22 policies, 0.9903251272791094 exploitability
22 policies, 0.6426449713384652 exploitability
22 policies, 0.9762983004457171 exploitability
22 policies, 1.1259424624863035 exploitability
22 policies, 1.3401382258799333 exploitability
22 policies, 1.4972847688252173 exploitability
23 policies, 0.953002914738782 exploitability
23 policies, 0.9762197628234159 exploitability
23 policies, 0.9456022488465611 exploitability
23 policies, 1.3248049926891938 exploitability
23 policies, 1.1378681494231786 exploitability
23 policies, 1.4291522823981913 exploitability
24 policies, 0.9157823175234896 exploitability
24 policies, 0

53 policies, 0.6089420075152792 exploitability
53 policies, 0.6425002109997187 exploitability
53 policies, 0.6176685075222941 exploitability
54 policies, 0.6318998985693998 exploitability
54 policies, 0.6224179845818658 exploitability


In [9]:
# gc_leduc_perf_df = gc_kuhn_perf_df

In [14]:
fig = px.line(gc_leduc_perf_df.drop_duplicates(), x="total_episodes", y="exploitability", title=f"Exploitability over time Kuhn 3 workers",
        render_mode="svg", color="experiment_name")
fig.show()

In [12]:
gc_leduc_perf_df.to_csv("~/Desktop/gc_leduc_jun_1.csv")