In [1]:
import ray
import time
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.dqn import DQNTrainer
from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy
from ray.rllib.agents.dqn.dqn_tf_policy import DQNTFPolicy
from rlcard.rllib_utils.random_policy import RandomPolicy
from rlcard.rllib_utils.model import ParametricActionsModel
from ray.rllib.models import ModelCatalog
from rlcard.rllib_utils.rlcard_wrapper import RLCardWrapper
from rlcard.rllib_utils.custom_metrics import PlayerScoreCallbacks
from ray.tune.registry import register_env

In [2]:
# Decide which RLcard environment to use
# rlcard_env_id = 'blackjack'
# rlcard_env_id = 'doudizhu'
# rlcard_env_id = 'gin-rummy'
rlcard_env_id = 'leduc-holdem'
# rlcard_env_id = 'limit-holdem'
# rlcard_env_id = 'mahjong'
# rlcard_env_id = 'no-limit-holdem'
# rlcard_env_id = 'simple-doudizhu'
# rlcard_env_id = 'uno'
# rlcard_env_id = 'scopone'

In [3]:
env_config = {
    "rlcard_env_id": rlcard_env_id,
}

env_config_eval = {
    "rlcard_env_id": rlcard_env_id,
    "randomize_agents_eval": [1]
}

In [4]:
ray.init(num_cpus=4)

2020-11-18 07:59:09,704	INFO resource_spec.py:212 -- Starting Ray with 4.0 GiB memory available for workers and up to 2.02 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-11-18 07:59:11,318	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '10.92.120.113',
 'raylet_ip_address': '10.92.120.113',
 'redis_address': '10.92.120.113:6379',
 'object_store_address': 'tcp://127.0.0.1:65144',
 'raylet_socket_name': 'tcp://127.0.0.1:64663',
 'webui_url': 'localhost:8265',
 'session_dir': 'C:\\Users\\chiappal\\AppData\\Local\\Temp\\ray\\session_2020-11-18_07-59-09_672995_3588'}

In [5]:
# Register env and model to be used by rllib
rlcard_environment = lambda _: RLCardWrapper(env_config)
register_env(rlcard_env_id, rlcard_environment)
ModelCatalog.register_custom_model("parametric_model_tf", ParametricActionsModel)

In [6]:
env_tmp = rlcard_environment(None)
policy_class = PPOTFPolicy
policy_config = {
    "model": {
        "custom_model": "parametric_model_tf",
        "fcnet_hiddens": [256, 256],
        "fcnet_activation": "relu"
    },
}

policies = {
    "ppo_policy_1": (policy_class,
                     env_tmp.observation_space,
                     env_tmp.action_space,
                     policy_config),
    "ppo_policy_2": (policy_class,
                     env_tmp.observation_space,
                     env_tmp.action_space,
                     policy_config),
    "rand_policy": (RandomPolicy,
                    env_tmp.observation_space,
                    env_tmp.action_space,
                    {}),
}

In [12]:
trainer_class = PPOTrainer
trainer_config = {
    "env": rlcard_env_id,
    "multiagent": {
        "policies_to_train": ['ppo_policy_1', 'ppo_policy_2'],
        "policies": policies,
        "policy_mapping_fn": lambda agent_id: "ppo_policy_1" if agent_id == "player_1" else "ppo_policy_2",
    },
    "num_workers": 2,
    "evaluation_num_workers": 1,
    "evaluation_config": {
        "env_config": env_config_eval
    },
    "evaluation_num_episodes": 100,
    "evaluation_interval": 2,
    "callbacks": PlayerScoreCallbacks
}

start = time.time()
trainer = trainer_class(trainer_config)
for i in range(20):
    res = trainer.train()

#     trainer_eval.set_weights(trainer.get_weights(["ppo_policy_1"]))
#     res = trainer_eval.train()

    policy_rewards = sorted(['{}: {}'.format(k, v) for k, v in res['policy_reward_mean'].items()])
    print("Iteration {}. policy_reward_mean: {}".format(i, policy_rewards))

stop = time.time()
train_duration = time.strftime('%H:%M:%S', time.gmtime(stop-start))
print('Training finished ({}), check the results in ~/ray_results/<dir>/'.format(train_duration))

In [13]:
res = tune.run(
    trainer_class,
    name="2020-11-18",  # This is used to specify the logging directory.
    stop={
        "training_iteration": 10,
#         "episodes_total": 10000
    },
    verbose=100,
    config=trainer_config,
    local_dir="./outputs",
    checkpoint_freq=10,
    checkpoint_at_end=True,
    restore=None
)

2020-11-18 08:23:15,274	ERROR syncer.py:46 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_leduc-holdem_ec3d3_00000,RUNNING,


[2m[36m(pid=18668)[0m 2020-11-18 08:23:38,487	INFO trainer.py:585 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=18668)[0m 2020-11-18 08:23:38,487	INFO trainer.py:612 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=18668)[0m 2020-11-18 08:24:37,351	INFO trainable.py:181 -- _setup took 58.876 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=21164)[0m   ret = umr_sum(arr, axis, dtype, out, keepdims)
[2m[36m(pid=18668)[0m   ret = umr_sum(arr, axis, dtype, out, keepdims)
Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: -0.03236040609137056
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: 0.03236040609137056
    player_2_score_min: -7.0
  date: 2020-11-18

2020-11-18 08:25:19,893	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,1,41.0477,4006,0


[2m[36m(pid=12192)[0m   ret = umr_sum(arr, axis, dtype, out, keepdims)
Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.03080168776371308
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.03080168776371308
    player_2_score_min: -7.0
  date: 2020-11-18_08-25-49
  done: false
  episode_len_mean: 3.3755274261603376
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1185
  episodes_total: 2761
  evaluation:
    custom_metrics:
      player_1_score_max: 7.0
      player_1_score_mean: 0.12
      player_1_score_min: -7.0
      player_2_score_max: 7.0
      player_2_score_mean: -0.12
      player_2_score_min: -7.0
    episode_len_mean: 3.95
    episode_reward_max: 0.0
    episode_reward_mean: 0.0
    episode_reward_min: 0.0
    episodes_this_iter: 100
    hist_stats:
      episode_lengths:
      - 5
      - 6
      - 2
  

2020-11-18 08:25:57,333	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,2,70.5598,8010,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.17578125
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.17578125
    player_2_score_min: -7.0
  date: 2020-11-18_08-26-26
  done: false
  episode_len_mean: 3.912109375
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1024
  episodes_total: 3785
  experiment_id: 5300f5713d56410ba5dac6bf4a8f25f6
  experiment_tag: '0'
  hostname: CRDWCL01169
  info:
    learner:
      ppo_policy_1:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 4.999999873689376e-05
        entropy: 0.8403305411338806
        entropy_coeff: 0.0
        kl: 0.015164325945079327
        model: {}
        policy_loss: -0.02859964780509472
        total_loss: 10.162201881408691
        vf_explained_var: 0.15490694344043732
        vf_loss: 10.183978080749512
      ppo_policy_2:
        cur_kl_co

2020-11-18 08:26:27,242	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,3,99.4098,12016,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.1846076458752515
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.1846076458752515
    player_2_score_min: -7.0
  date: 2020-11-18_08-26-56
  done: false
  episode_len_mean: 4.027162977867203
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 994
  episodes_total: 4779
  evaluation:
    custom_metrics:
      player_1_score_max: 7.0
      player_1_score_mean: 0.09
      player_1_score_min: -7.0
      player_2_score_max: 7.0
      player_2_score_mean: -0.09
      player_2_score_min: -7.0
    episode_len_mean: 4.18
    episode_reward_max: 0.0
    episode_reward_mean: 0.0
    episode_reward_min: 0.0
    episodes_this_iter: 100
    hist_stats:
      episode_lengths:
      - 2
      - 4
      - 2
      - 4
      - 1
      - 6
      - 6
      - 1
      - 4
      - 1
      - 4


2020-11-18 08:27:00,621	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,4,128.407,16019,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.26597938144329897
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.26597938144329897
    player_2_score_min: -7.0
  date: 2020-11-18_08-27-30
  done: false
  episode_len_mean: 4.134020618556701
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 970
  episodes_total: 5749
  experiment_id: 5300f5713d56410ba5dac6bf4a8f25f6
  experiment_tag: '0'
  hostname: CRDWCL01169
  info:
    learner:
      ppo_policy_1:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 4.999999873689376e-05
        entropy: 0.7333629131317139
        entropy_coeff: 0.0
        kl: 0.009003384970128536
        model: {}
        policy_loss: -0.020643673837184906
        total_loss: 10.632890701293945
        vf_explained_var: 0.24375443160533905
        vf_loss: 10.649480819702148
      ppo_pol

2020-11-18 08:27:31,434	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,5,158.341,20027,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.027214514407684097
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.027214514407684097
    player_2_score_min: -7.0
  date: 2020-11-18_08-27-58
  done: false
  episode_len_mean: 4.273212379935966
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 937
  episodes_total: 6686
  evaluation:
    custom_metrics:
      player_1_score_max: 7.0
      player_1_score_mean: -0.065
      player_1_score_min: -7.0
      player_2_score_max: 7.0
      player_2_score_mean: 0.065
      player_2_score_min: -7.0
    episode_len_mean: 4.13
    episode_reward_max: 0.0
    episode_reward_mean: 0.0
    episode_reward_min: 0.0
    episodes_this_iter: 100
    hist_stats:
      episode_lengths:
      - 6
      - 1
      - 6
      - 5
      - 5
      - 4
      - 5
      - 1
      - 1
      - 2
    

2020-11-18 08:28:03,682	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,6,185.708,24033,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: 0.027777777777777776
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: -0.027777777777777776
    player_2_score_min: -7.0
  date: 2020-11-18_08-28-31
  done: false
  episode_len_mean: 4.1928721174004195
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 954
  episodes_total: 7640
  experiment_id: 5300f5713d56410ba5dac6bf4a8f25f6
  experiment_tag: '0'
  hostname: CRDWCL01169
  info:
    learner:
      ppo_policy_1:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 4.999999873689376e-05
        entropy: 0.6488468050956726
        entropy_coeff: 0.0
        kl: 0.01049336139112711
        model: {}
        policy_loss: -0.01556729432195425
        total_loss: 10.230477333068848
        vf_explained_var: 0.263813853263855
        vf_loss: 10.241321563720703
      ppo_poli

2020-11-18 08:28:32,132	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,7,213.495,28034,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: -0.06321243523316063
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: 0.06321243523316063
    player_2_score_min: -7.0
  date: 2020-11-18_08-28-57
  done: false
  episode_len_mean: 4.147150259067358
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 965
  episodes_total: 8605
  evaluation:
    custom_metrics:
      player_1_score_max: 7.0
      player_1_score_mean: 0.695
      player_1_score_min: -7.0
      player_2_score_max: 7.0
      player_2_score_mean: -0.695
      player_2_score_min: -7.0
    episode_len_mean: 4.05
    episode_reward_max: 0.0
    episode_reward_mean: 0.0
    episode_reward_min: 0.0
    episodes_this_iter: 100
    hist_stats:
      episode_lengths:
      - 1
      - 4
      - 4
      - 3
      - 7
      - 6
      - 4
      - 5
      - 6
      - 1
      

2020-11-18 08:29:01,439	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,8,238.429,32035,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: -0.06387225548902195
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: 0.06387225548902195
    player_2_score_min: -7.0
  date: 2020-11-18_08-29-26
  done: false
  episode_len_mean: 3.9880239520958085
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1002
  episodes_total: 9607
  experiment_id: 5300f5713d56410ba5dac6bf4a8f25f6
  experiment_tag: '0'
  hostname: CRDWCL01169
  info:
    learner:
      ppo_policy_1:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 4.999999873689376e-05
        entropy: 0.5934880375862122
        entropy_coeff: 0.0
        kl: 0.00944802537560463
        model: {}
        policy_loss: -0.02367359772324562
        total_loss: 9.285711288452148
        vf_explained_var: 0.32868272066116333
        vf_loss: 9.305133819580078
      ppo_polic

2020-11-18 08:29:27,560	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,9,263.83,36036,0


Result for PPO_leduc-holdem_ec3d3_00000:
  callback_ok: true
  custom_metrics:
    player_1_score_max: 7.0
    player_1_score_mean: -0.14990328820116053
    player_1_score_min: -7.0
    player_2_score_max: 7.0
    player_2_score_mean: 0.14990328820116053
    player_2_score_min: -7.0
  date: 2020-11-18_08-29-53
  done: true
  episode_len_mean: 3.881044487427466
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1034
  episodes_total: 10641
  evaluation:
    custom_metrics:
      player_1_score_max: 7.0
      player_1_score_mean: 0.0
      player_1_score_min: -7.0
      player_2_score_max: 7.0
      player_2_score_mean: 0.0
      player_2_score_min: -7.0
    episode_len_mean: 3.34
    episode_reward_max: 0.0
    episode_reward_mean: 0.0
    episode_reward_min: 0.0
    episodes_this_iter: 100
    hist_stats:
      episode_lengths:
      - 1
      - 4
      - 7
      - 4
      - 2
      - 4
      - 6
      - 5
      - 1
      - 1
      - 5


2020-11-18 08:29:57,490	ERROR trial_runner.py:350 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 348, in step
    self.checkpoint()
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,RUNNING,10.92.120.113:18668,10,289.829,40044,0


2020-11-18 08:29:58,608	ERROR tune.py:334 -- Trial Runner checkpointing failed.
Traceback (most recent call last):
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\tune.py", line 332, in run
    runner.checkpoint(force=True)
  File "c:\users\chiappal\appdata\local\continuum\miniconda3\envs\rl\lib\site-packages\ray\tune\trial_runner.py", line 279, in checkpoint
    os.rename(tmp_file_name, self.checkpoint_file)
FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\.tmp_checkpoint' -> 'C:\\Users\\chiappal\\Documents\\rl_project\\rlcard\\outputs\\2020-11-18\\experiment_state-2020-11-18_08-23-15.json'


Trial name,status,loc,iter,total time (s),ts,reward
PPO_leduc-holdem_ec3d3_00000,TERMINATED,,10,289.829,40044,0
