In [1]:
%cd ..

/home/jovyan/personal_project/RL-tarot


In [2]:
"""
An example of learning a Deep-Q Agent on French Tarot Game
"""
import os
import time

import tensorflow as tf

import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.utils.logger import Logger
from rlcard.utils.utils import set_global_seed, time_difference_good_format

record_number = 11

# Make environment
env = rlcard.make('tarot')
eval_env = rlcard.make('tarot')

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 500
evolve_model_every = 5000
# save_plot_every = 100
evaluate_num = 1000

episode_num = 100000

self_play = 1
total_self_play_eval = int(episode_num / evaluate_every)

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 5000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/tarot_dqn_self_played_v{}/'.format(str(record_number))
log_path_random = root_path + 'log_random.txt'
csv_path_random = root_path + 'performance_random.csv'
log_path_opponent = root_path + 'log_opponent.txt'
csv_path_opponent = root_path + 'performance_opponent.csv'
figure_path_random = root_path + 'figures_random/'
figure_path_opponent = root_path + 'figures_opponent/'

# Model save path
if not os.path.exists('rlcard/models'):
    os.makedirs('rlcard/models')
if not os.path.exists('rlcard/models/pretrained'):
    os.makedirs('rlcard/models/pretrained')
for self_play_init in range(1, total_self_play_eval + 1):
    model_folder_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}'.format(
        str(record_number),
        str(record_number * 10000 + self_play_init))
    if not os.path.exists(model_folder_path):
        os.makedirs(model_folder_path)
model_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}/model'.format(
    str(record_number),
    str(record_number * 10000 + self_play))

# Set a global seed
set_global_seed(0)

random_agent = RandomAgent(action_num=eval_env.action_num)

with tf.compat.v1.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=78,  # env.action_num,
                     replay_memory_size=20000,
                     replay_memory_init_size=memory_init_size,
                     norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 1024, 512])

    opponent_agent = agent

    sess.run(tf.compat.v1.global_variables_initializer())

    saver = tf.compat.v1.train.Saver()

    env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))
    eval_env.set_agents([agent] + [random_agent] * (env.player_num - 1))

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve against random
    logger_random = Logger(xlabel='timestep', ylabel='reward', legend='DQN on TAROT against Random',
                           legend_hist='Histogram of last evaluations against Random', log_path=log_path_random,
                           csv_path=csv_path_random)
    # Init a Logger to plot the learning curve against last opponent
    logger_opponent = Logger(xlabel='timestep', ylabel='reward', legend='DQN on TAROT against last agent',
                             legend_hist='Histogram of last evaluations against last agent', log_path=log_path_opponent,
                             csv_path=csv_path_opponent)

    total_game_played = 0
    seconds = time.time()

    for episode in range(episode_num):
        print('\rEPISODE {} - Number of game played {} - {}'.format(episode, total_game_played,
                                                                    time_difference_good_format(seconds, time.time())),
              end='')

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)
        total_game_played += 1

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                # print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')

        # Evaluate the performance.
        if episode % evaluate_every == 0:
            # Save Model
            model_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}/model'.format(
                str(record_number),
                str(record_number * 10000 + self_play))

            saver.save(sess, model_path)

            # Eval against random
            reward_random = 0
            reward_random_list = []
            taking_list = []
            eval_env.set_agents([agent] + [random_agent] * (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval Random {} over {} - Number of game played {} - {}'.format(episode,
                                                                                                     eval_episode,
                                                                                                     evaluate_num,
                                                                                                     total_game_played,
                                                                                                     time_difference_good_format(
                                                                                                         seconds,
                                                                                                         time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_random_list.append(payoffs[0])
                reward_random += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log('\n########## Evaluation Against Random - Episode {} ##########'.format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(env.timestep,
                                                                          float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num)
            
            # Make plot
            logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png')
            logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png',
                                         save_path_2=figure_path_random + str(episode) + '_freq.png',
                                         reward_list=reward_random_list, taking_list=taking_list)

            # Eval against last agent
            reward_opponent = 0
            reward_opponent_list = []
            taking_list = []
            eval_env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}'.format(episode,
                                                                                                       eval_episode,
                                                                                                       evaluate_num,
                                                                                                       total_game_played,
                                                                                                       time_difference_good_format(
                                                                                                           seconds,
                                                                                                           time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_opponent_list.append(payoffs[0])
                reward_opponent += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_opponent.log('\n########## Evaluation Against Last Agent - Episode {} ##########'.format(episode))
            logger_opponent.log(
                'Timestep: {} Average reward against last agent is {}'.format(env.timestep,
                                                                              float(reward_opponent) / evaluate_num))

            # Add point to logger
            logger_opponent.add_point(x=env.timestep, y=float(reward_opponent) / evaluate_num)

            # Make plot
            logger_opponent.make_plot(save_path=figure_path_opponent + str(episode) + '.png')
            logger_opponent.make_plot_hist(save_path_1=figure_path_opponent + str(episode) + '_hist.png',
                                           save_path_2=figure_path_opponent + str(episode) + '_freq.png',
                                           reward_list=reward_opponent_list, taking_list=taking_list)
            
        if episode % evolve_model_every == 0:
            # GO to next step
            self_play += 1

            opponent_agent = agent
            env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))

    # Make the final plot
    logger_random.make_plot(save_path=figure_path_random + 'final_' + str(episode) + '.png')
    logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png',
                                 save_path_2=figure_path_random + str(episode) + '_freq.png',
                                 reward_list=reward_random_list, taking_list=taking_list)
    # Make the final plot
    logger_opponent.make_plot(save_path=figure_path_opponent + 'final_' + str(episode) + '.png')
    logger_opponent.make_plot_hist(save_path_1=figure_path_opponent + str(episode) + '_hist.png',
                                   save_path_2=figure_path_opponent + str(episode) + '_freq.png',
                                   reward_list=reward_opponent_list, taking_list=taking_list)


Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


EPISODE 0 - Eval Random 999 over 1000 - Number of game played 1000 - 36 seconds
########## Evaluation Against Random - Episode 0 ##########
Timestep: 79 Average reward against random is -0.87
EPISODE 0 - Eval Opponent 999 over 1000 - Number of game played 2000 - 2 minutes and 2 seconds
########## Evaluation Against Last Agent - Episode 0 ##########
Timestep: 79 Average reward against last agent is 0.209
EPISODE 500 - Eval Random 999 over 1000 - Number of game played 3500 - 3 minutes and 57 seconds
########## Evaluation Against Random - Episode 500 ##########
Timestep: 40000 Average reward against random is -1.301
EPISODE 500 - Eval Opponent 999 over 1000 - Number of game played 4500 - 5 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 500 ##########
Timestep: 40000 Average reward against last agent is -0.138
EPISODE 1000 - Eval Random 999 over 1000 - Number of game played 6000 - 8 minutes and 11 seconds
########## Evaluation Against Random - Episode 1000 ######

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 2500 - Eval Random 999 over 1000 - Number of game played 13500 - 21 minutes and 31 seconds
########## Evaluation Against Random - Episode 2500 ##########
Timestep: 200803 Average reward against random is 0.585
EPISODE 2500 - Eval Opponent 999 over 1000 - Number of game played 14500 - 22 minutes and 58 seconds
########## Evaluation Against Last Agent - Episode 2500 ##########
Timestep: 200803 Average reward against last agent is -2.436
EPISODE 3000 - Eval Random 999 over 1000 - Number of game played 16000 - 25 minutes and 59 seconds
########## Evaluation Against Random - Episode 3000 ##########
Timestep: 241158 Average reward against random is 1.288
EPISODE 3000 - Eval Opponent 999 over 1000 - Number of game played 17000 - 27 minutes and 24 seconds
########## Evaluation Against Last Agent - Episode 3000 ##########
Timestep: 241158 Average reward against last agent is 2.512


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 3500 - Eval Random 999 over 1000 - Number of game played 18500 - 30 minutes and 20 seconds
########## Evaluation Against Random - Episode 3500 ##########
Timestep: 281405 Average reward against random is 1.456
EPISODE 3500 - Eval Opponent 999 over 1000 - Number of game played 19500 - 31 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 3500 ##########
Timestep: 281405 Average reward against last agent is -0.43
EPISODE 4000 - Eval Random 999 over 1000 - Number of game played 21000 - 34 minutes and 44 seconds
########## Evaluation Against Random - Episode 4000 ##########
Timestep: 321581 Average reward against random is 1.408
EPISODE 4000 - Eval Opponent 999 over 1000 - Number of game played 22000 - 36 minutes and 9 secondss
########## Evaluation Against Last Agent - Episode 4000 ##########
Timestep: 321581 Average reward against last agent is 2.48


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 4500 - Eval Random 999 over 1000 - Number of game played 23500 - 39 minutes and 4 secondss
########## Evaluation Against Random - Episode 4500 ##########
Timestep: 361808 Average reward against random is 0.86
EPISODE 4500 - Eval Opponent 999 over 1000 - Number of game played 24500 - 40 minutes and 29 seconds
########## Evaluation Against Last Agent - Episode 4500 ##########
Timestep: 361808 Average reward against last agent is 2.525


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 5000 - Eval Random 999 over 1000 - Number of game played 26000 - 43 minutes and 26 seconds
########## Evaluation Against Random - Episode 5000 ##########
Timestep: 402140 Average reward against random is 1.604
EPISODE 5000 - Eval Opponent 999 over 1000 - Number of game played 27000 - 44 minutes and 57 seconds
########## Evaluation Against Last Agent - Episode 5000 ##########
Timestep: 402140 Average reward against last agent is -0.773
EPISODE 5500 - Eval Random 999 over 1000 - Number of game played 28500 - 47 minutes and 53 seconds
########## Evaluation Against Random - Episode 5500 ##########
Timestep: 442337 Average reward against random is 0.823
EPISODE 5500 - Eval Opponent 999 over 1000 - Number of game played 29500 - 49 minutes and 22 seconds
########## Evaluation Against Last Agent - Episode 5500 ##########
Timestep: 442337 Average reward against last agent is -2.221
EPISODE 6000 - Eval Random 999 over 1000 - Number of game played 31000 - 52 minutes and 16 seconds
#######

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 10000 - Eval Random 999 over 1000 - Number of game played 51000 - 1 hour, 26 minutes and 50 seconds
########## Evaluation Against Random - Episode 10000 ##########
Timestep: 802349 Average reward against random is 1.492
EPISODE 10000 - Eval Opponent 999 over 1000 - Number of game played 52000 - 1 hour, 28 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 10000 ##########
Timestep: 802349 Average reward against last agent is 2.842


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 10500 - Eval Random 999 over 1000 - Number of game played 53500 - 1 hour, 31 minutes and 9 secondss
########## Evaluation Against Random - Episode 10500 ##########
Timestep: 842579 Average reward against random is 1.01
EPISODE 10500 - Eval Opponent 999 over 1000 - Number of game played 54500 - 1 hour, 32 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 10500 ##########
Timestep: 842579 Average reward against last agent is 2.503


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 11000 - Eval Random 999 over 1000 - Number of game played 56000 - 1 hour, 35 minutes and 28 seconds
########## Evaluation Against Random - Episode 11000 ##########
Timestep: 882719 Average reward against random is 2.234
EPISODE 11000 - Eval Opponent 999 over 1000 - Number of game played 57000 - 1 hour, 36 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 11000 ##########
Timestep: 882719 Average reward against last agent is 2.69


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 11500 - Eval Random 999 over 1000 - Number of game played 58500 - 1 hour, 39 minutes and 49 seconds
########## Evaluation Against Random - Episode 11500 ##########
Timestep: 922963 Average reward against random is 2.508
EPISODE 11500 - Eval Opponent 999 over 1000 - Number of game played 59500 - 1 hour, 41 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 11500 ##########
Timestep: 922963 Average reward against last agent is 2.629


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 12000 - Eval Random 999 over 1000 - Number of game played 61000 - 1 hour, 44 minutes and 11 seconds
########## Evaluation Against Random - Episode 12000 ##########
Timestep: 963252 Average reward against random is 2.421
EPISODE 12000 - Eval Opponent 999 over 1000 - Number of game played 62000 - 1 hour, 45 minutes and 43 seconds
########## Evaluation Against Last Agent - Episode 12000 ##########
Timestep: 963252 Average reward against last agent is 2.022
EPISODE 12500 - Eval Random 999 over 1000 - Number of game played 63500 - 1 hour, 48 minutes and 40 seconds
########## Evaluation Against Random - Episode 12500 ##########
Timestep: 1003616 Average reward against random is 2.134
EPISODE 12500 - Eval Opponent 999 over 1000 - Number of game played 64500 - 1 hour, 50 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 12500 ##########
Timestep: 1003616 Average reward against last agent is -2.668
EPISODE 13000 - Eval Random 999 over 1000 - Number of game played

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 13500 - Eval Random 999 over 1000 - Number of game played 68500 - 1 hour, 57 minutes and 34 seconds
########## Evaluation Against Random - Episode 13500 ##########
Timestep: 1084086 Average reward against random is 2.187
EPISODE 13500 - Eval Opponent 999 over 1000 - Number of game played 69500 - 1 hour, 59 minutes and 22 seconds
########## Evaluation Against Last Agent - Episode 13500 ##########
Timestep: 1084086 Average reward against last agent is -1.589
EPISODE 14000 - Eval Random 999 over 1000 - Number of game played 71000 - 2 hours, 2 minutes and 18 seconds
########## Evaluation Against Random - Episode 14000 ##########
Timestep: 1124322 Average reward against random is 2.245
EPISODE 14000 - Eval Opponent 999 over 1000 - Number of game played 72000 - 2 hours, 4 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 14000 ##########
Timestep: 1124322 Average reward against last agent is -1.74
EPISODE 14500 - Eval Random 999 over 1000 - Number of game play

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 16500 - Eval Random 999 over 1000 - Number of game played 83500 - 2 hours, 24 minutes and 53 seconds
########## Evaluation Against Random - Episode 16500 ##########
Timestep: 1325358 Average reward against random is 1.613
EPISODE 16500 - Eval Opponent 999 over 1000 - Number of game played 84500 - 2 hours, 26 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 16500 ##########
Timestep: 1325358 Average reward against last agent is 1.62
EPISODE 17000 - Eval Random 999 over 1000 - Number of game played 86000 - 2 hours, 29 minutes and 15 seconds
########## Evaluation Against Random - Episode 17000 ##########
Timestep: 1365817 Average reward against random is 2.112
EPISODE 17000 - Eval Opponent 999 over 1000 - Number of game played 87000 - 2 hours, 31 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 17000 ##########
Timestep: 1365817 Average reward against last agent is -2.057
EPISODE 17500 - Eval Random 999 over 1000 - Number of game p

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 19000 - Eval Random 999 over 1000 - Number of game played 96000 - 2 hours, 47 minutes and 21 seconds
########## Evaluation Against Random - Episode 19000 ##########
Timestep: 1527024 Average reward against random is 2.292
EPISODE 19000 - Eval Opponent 999 over 1000 - Number of game played 97000 - 2 hours, 49 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 19000 ##########
Timestep: 1527024 Average reward against last agent is -0.195
EPISODE 19500 - Eval Random 999 over 1000 - Number of game played 98500 - 2 hours, 51 minutes and 56 seconds
########## Evaluation Against Random - Episode 19500 ##########
Timestep: 1567205 Average reward against random is 2.006
EPISODE 19500 - Eval Opponent 999 over 1000 - Number of game played 99500 - 2 hours, 53 minutes and 28 seconds
########## Evaluation Against Last Agent - Episode 19500 ##########
Timestep: 1567205 Average reward against last agent is -0.415
EPISODE 20000 - Eval Random 999 over 1000 - Number of game

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


Instructions for updating:
Use standard file APIs to delete files with this prefix.
EPISODE 20500 - Eval Random 999 over 1000 - Number of game played 103500 - 3 hours, 0 minute and 39 seconds
########## Evaluation Against Random - Episode 20500 ##########
Timestep: 1647380 Average reward against random is 1.894
EPISODE 20500 - Eval Opponent 999 over 1000 - Number of game played 104500 - 3 hours, 2 minutes and 3 seconds
########## Evaluation Against Last Agent - Episode 20500 ##########
Timestep: 1647380 Average reward against last agent is 2.444


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 21000 - Eval Random 999 over 1000 - Number of game played 106000 - 3 hours, 4 minutes and 58 seconds
########## Evaluation Against Random - Episode 21000 ##########
Timestep: 1687425 Average reward against random is 1.911
EPISODE 21000 - Eval Opponent 999 over 1000 - Number of game played 107000 - 3 hours, 6 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 21000 ##########
Timestep: 1687425 Average reward against last agent is -3.448
EPISODE 21500 - Eval Random 999 over 1000 - Number of game played 108500 - 3 hours, 9 minutes and 36 seconds
########## Evaluation Against Random - Episode 21500 ##########
Timestep: 1727558 Average reward against random is 1.864
EPISODE 21500 - Eval Opponent 999 over 1000 - Number of game played 109500 - 3 hours, 11 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 21500 ##########
Timestep: 1727558 Average reward against last agent is -4.096
EPISODE 22000 - Eval Random 999 over 1000 - Number of gam

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 23000 - Eval Random 999 over 1000 - Number of game played 116000 - 3 hours, 23 minutes and 9 secondss
########## Evaluation Against Random - Episode 23000 ##########
Timestep: 1847812 Average reward against random is 1.458
EPISODE 23000 - Eval Opponent 999 over 1000 - Number of game played 117000 - 3 hours, 24 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 23000 ##########
Timestep: 1847812 Average reward against last agent is 2.396


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 23500 - Eval Random 999 over 1000 - Number of game played 118500 - 3 hours, 27 minutes and 29 seconds
########## Evaluation Against Random - Episode 23500 ##########
Timestep: 1887900 Average reward against random is 0.976
EPISODE 23500 - Eval Opponent 999 over 1000 - Number of game played 119500 - 3 hours, 28 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 23500 ##########
Timestep: 1887900 Average reward against last agent is 2.379


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 24000 - Eval Random 999 over 1000 - Number of game played 121000 - 3 hours, 31 minutes and 50 seconds
########## Evaluation Against Random - Episode 24000 ##########
Timestep: 1928024 Average reward against random is 1.866
EPISODE 24000 - Eval Opponent 999 over 1000 - Number of game played 122000 - 3 hours, 33 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 24000 ##########
Timestep: 1928024 Average reward against last agent is 2.173


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 24500 - Eval Random 999 over 1000 - Number of game played 123500 - 3 hours, 36 minutes and 11 seconds
########## Evaluation Against Random - Episode 24500 ##########
Timestep: 1968271 Average reward against random is 1.686
EPISODE 24500 - Eval Opponent 999 over 1000 - Number of game played 124500 - 3 hours, 37 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 24500 ##########
Timestep: 1968271 Average reward against last agent is 2.593


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 25000 - Eval Random 999 over 1000 - Number of game played 126000 - 3 hours, 40 minutes and 34 seconds
########## Evaluation Against Random - Episode 25000 ##########
Timestep: 2008476 Average reward against random is 2.091
EPISODE 25000 - Eval Opponent 999 over 1000 - Number of game played 127000 - 3 hours, 42 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 25000 ##########
Timestep: 2008476 Average reward against last agent is -2.162
EPISODE 25500 - Eval Random 999 over 1000 - Number of game played 128500 - 3 hours, 45 minutes and 15 seconds
########## Evaluation Against Random - Episode 25500 ##########
Timestep: 2049055 Average reward against random is 2.021
EPISODE 25500 - Eval Opponent 999 over 1000 - Number of game played 129500 - 3 hours, 46 minutes and 39 seconds
########## Evaluation Against Last Agent - Episode 25500 ##########
Timestep: 2049055 Average reward against last agent is 2.332


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 26000 - Eval Random 999 over 1000 - Number of game played 131000 - 3 hours, 49 minutes and 36 seconds
########## Evaluation Against Random - Episode 26000 ##########
Timestep: 2089582 Average reward against random is 0.843
EPISODE 26000 - Eval Opponent 999 over 1000 - Number of game played 132000 - 3 hours, 51 minutes and 0 secondds
########## Evaluation Against Last Agent - Episode 26000 ##########
Timestep: 2089582 Average reward against last agent is 2.5


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 26500 - Eval Random 999 over 1000 - Number of game played 133500 - 3 hours, 53 minutes and 57 seconds
########## Evaluation Against Random - Episode 26500 ##########
Timestep: 2130056 Average reward against random is 1.108
EPISODE 26500 - Eval Opponent 999 over 1000 - Number of game played 134500 - 3 hours, 55 minutes and 26 seconds
########## Evaluation Against Last Agent - Episode 26500 ##########
Timestep: 2130056 Average reward against last agent is -1.22
EPISODE 27000 - Eval Random 999 over 1000 - Number of game played 136000 - 3 hours, 58 minutes and 24 seconds
########## Evaluation Against Random - Episode 27000 ##########
Timestep: 2170445 Average reward against random is 1.193
EPISODE 27000 - Eval Opponent 999 over 1000 - Number of game played 137000 - 3 hours, 59 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 27000 ##########
Timestep: 2170445 Average reward against last agent is 2.264


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 27500 - Eval Random 999 over 1000 - Number of game played 138500 - 4 hours, 2 minutes and 48 seconds
########## Evaluation Against Random - Episode 27500 ##########
Timestep: 2210749 Average reward against random is 1.113
EPISODE 27500 - Eval Opponent 999 over 1000 - Number of game played 139500 - 4 hours, 4 minutes and 12 seconds
########## Evaluation Against Last Agent - Episode 27500 ##########
Timestep: 2210749 Average reward against last agent is 2.177


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 28000 - Eval Random 999 over 1000 - Number of game played 141000 - 4 hours, 7 minutes and 11 seconds
########## Evaluation Against Random - Episode 28000 ##########
Timestep: 2250853 Average reward against random is 1.923
EPISODE 28000 - Eval Opponent 999 over 1000 - Number of game played 142000 - 4 hours, 8 minutes and 35 seconds
########## Evaluation Against Last Agent - Episode 28000 ##########
Timestep: 2250853 Average reward against last agent is 2.714


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 28500 - Eval Random 999 over 1000 - Number of game played 143500 - 4 hours, 11 minutes and 34 seconds
########## Evaluation Against Random - Episode 28500 ##########
Timestep: 2291005 Average reward against random is 1.654
EPISODE 28500 - Eval Opponent 999 over 1000 - Number of game played 144500 - 4 hours, 12 minutes and 58 seconds
########## Evaluation Against Last Agent - Episode 28500 ##########
Timestep: 2291005 Average reward against last agent is 2.518


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 29000 - Eval Random 999 over 1000 - Number of game played 146000 - 4 hours, 15 minutes and 56 seconds
########## Evaluation Against Random - Episode 29000 ##########
Timestep: 2331114 Average reward against random is 1.424
EPISODE 29000 - Eval Opponent 999 over 1000 - Number of game played 147000 - 4 hours, 17 minutes and 30 seconds
########## Evaluation Against Last Agent - Episode 29000 ##########
Timestep: 2331114 Average reward against last agent is -2.49
EPISODE 29500 - Eval Random 999 over 1000 - Number of game played 148500 - 4 hours, 20 minutes and 28 seconds
########## Evaluation Against Random - Episode 29500 ##########
Timestep: 2371179 Average reward against random is 1.639
EPISODE 29500 - Eval Opponent 999 over 1000 - Number of game played 149500 - 4 hours, 22 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 29500 ##########
Timestep: 2371179 Average reward against last agent is -2.573
EPISODE 30000 - Eval Random 999 over 1000 - Number of g

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 30500 - Eval Random 999 over 1000 - Number of game played 153500 - 4 hours, 29 minutes and 26 seconds
########## Evaluation Against Random - Episode 30500 ##########
Timestep: 2451363 Average reward against random is 2.186
EPISODE 30500 - Eval Opponent 999 over 1000 - Number of game played 154500 - 4 hours, 31 minutes and 11 seconds
########## Evaluation Against Last Agent - Episode 30500 ##########
Timestep: 2451363 Average reward against last agent is -1.89
EPISODE 31000 - Eval Random 999 over 1000 - Number of game played 156000 - 4 hours, 34 minutes and 11 seconds
########## Evaluation Against Random - Episode 31000 ##########
Timestep: 2491390 Average reward against random is 2.152
EPISODE 31000 - Eval Opponent 999 over 1000 - Number of game played 157000 - 4 hours, 35 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 31000 ##########
Timestep: 2491390 Average reward against last agent is 2.5


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 31500 - Eval Random 999 over 1000 - Number of game played 158500 - 4 hours, 38 minutes and 35 seconds
########## Evaluation Against Random - Episode 31500 ##########
Timestep: 2531495 Average reward against random is 2.083
EPISODE 31500 - Eval Opponent 999 over 1000 - Number of game played 159500 - 4 hours, 40 minutes and 16 seconds
########## Evaluation Against Last Agent - Episode 31500 ##########
Timestep: 2531495 Average reward against last agent is -1.618
EPISODE 32000 - Eval Random 999 over 1000 - Number of game played 161000 - 4 hours, 43 minutes and 16 seconds
########## Evaluation Against Random - Episode 32000 ##########
Timestep: 2571518 Average reward against random is 1.706
EPISODE 32000 - Eval Opponent 999 over 1000 - Number of game played 162000 - 4 hours, 44 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 32000 ##########
Timestep: 2571518 Average reward against last agent is 2.678


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 32500 - Eval Random 999 over 1000 - Number of game played 163500 - 4 hours, 47 minutes and 41 seconds
########## Evaluation Against Random - Episode 32500 ##########
Timestep: 2611533 Average reward against random is 1.568
EPISODE 32500 - Eval Opponent 999 over 1000 - Number of game played 164500 - 4 hours, 49 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 32500 ##########
Timestep: 2611533 Average reward against last agent is 2.571


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 33000 - Eval Random 999 over 1000 - Number of game played 166000 - 4 hours, 52 minutes and 4 secondss
########## Evaluation Against Random - Episode 33000 ##########
Timestep: 2651540 Average reward against random is 1.576
EPISODE 33000 - Eval Opponent 999 over 1000 - Number of game played 167000 - 4 hours, 53 minutes and 28 seconds
########## Evaluation Against Last Agent - Episode 33000 ##########
Timestep: 2651540 Average reward against last agent is 2.654


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 33500 - Eval Random 999 over 1000 - Number of game played 168500 - 4 hours, 56 minutes and 26 seconds
########## Evaluation Against Random - Episode 33500 ##########
Timestep: 2691594 Average reward against random is 2.094
EPISODE 33500 - Eval Opponent 999 over 1000 - Number of game played 169500 - 4 hours, 58 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 33500 ##########
Timestep: 2691594 Average reward against last agent is -1.547
EPISODE 34000 - Eval Random 999 over 1000 - Number of game played 171000 - 5 hours, 1 minute and 3 secondss
########## Evaluation Against Random - Episode 34000 ##########
Timestep: 2731744 Average reward against random is 1.568
EPISODE 34000 - Eval Opponent 999 over 1000 - Number of game played 172000 - 5 hours, 2 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 34000 ##########
Timestep: 2731744 Average reward against last agent is 2.456


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 34500 - Eval Random 999 over 1000 - Number of game played 173500 - 5 hours, 5 minutes and 26 seconds
########## Evaluation Against Random - Episode 34500 ##########
Timestep: 2771834 Average reward against random is 1.007
EPISODE 34500 - Eval Opponent 999 over 1000 - Number of game played 174500 - 5 hours, 6 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 34500 ##########
Timestep: 2771834 Average reward against last agent is 2.532


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 35000 - Eval Random 999 over 1000 - Number of game played 176000 - 5 hours, 9 minutes and 49 seconds
########## Evaluation Against Random - Episode 35000 ##########
Timestep: 2811874 Average reward against random is 2.004
EPISODE 35000 - Eval Opponent 999 over 1000 - Number of game played 177000 - 5 hours, 11 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 35000 ##########
Timestep: 2811874 Average reward against last agent is 2.511


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 35500 - Eval Random 999 over 1000 - Number of game played 178500 - 5 hours, 14 minutes and 12 seconds
########## Evaluation Against Random - Episode 35500 ##########
Timestep: 2851879 Average reward against random is 1.57
EPISODE 35500 - Eval Opponent 999 over 1000 - Number of game played 179500 - 5 hours, 15 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 35500 ##########
Timestep: 2851879 Average reward against last agent is 2.551


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36000 - Eval Random 999 over 1000 - Number of game played 181000 - 5 hours, 18 minutes and 37 seconds
########## Evaluation Against Random - Episode 36000 ##########
Timestep: 2891944 Average reward against random is 1.647
EPISODE 36000 - Eval Opponent 999 over 1000 - Number of game played 182000 - 5 hours, 20 minutes and 2 secondss
########## Evaluation Against Last Agent - Episode 36000 ##########
Timestep: 2891944 Average reward against last agent is 2.569


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36500 - Eval Random 999 over 1000 - Number of game played 183500 - 5 hours, 23 minutes and 2 secondss
########## Evaluation Against Random - Episode 36500 ##########
Timestep: 2931977 Average reward against random is 0.934
EPISODE 36500 - Eval Opponent 999 over 1000 - Number of game played 184500 - 5 hours, 24 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 36500 ##########
Timestep: 2931977 Average reward against last agent is 2.175


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 37000 - Eval Random 999 over 1000 - Number of game played 186000 - 5 hours, 27 minutes and 28 seconds
########## Evaluation Against Random - Episode 37000 ##########
Timestep: 2971934 Average reward against random is -0.053
EPISODE 37000 - Eval Opponent 999 over 1000 - Number of game played 187000 - 5 hours, 28 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 37000 ##########
Timestep: 2971934 Average reward against last agent is -3.204
EPISODE 37500 - Eval Random 999 over 1000 - Number of game played 188500 - 5 hours, 31 minutes and 55 seconds
########## Evaluation Against Random - Episode 37500 ##########
Timestep: 3011890 Average reward against random is 1.108
EPISODE 37500 - Eval Opponent 999 over 1000 - Number of game played 189500 - 5 hours, 33 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 37500 ##########
Timestep: 3011890 Average reward against last agent is 2.469


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 38000 - Eval Random 999 over 1000 - Number of game played 191000 - 5 hours, 36 minutes and 19 seconds
########## Evaluation Against Random - Episode 38000 ##########
Timestep: 3051898 Average reward against random is 0.768
EPISODE 38000 - Eval Opponent 999 over 1000 - Number of game played 192000 - 5 hours, 37 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 38000 ##########
Timestep: 3051898 Average reward against last agent is -3.699
EPISODE 38500 - Eval Random 999 over 1000 - Number of game played 193500 - 5 hours, 40 minutes and 48 seconds
########## Evaluation Against Random - Episode 38500 ##########
Timestep: 3091947 Average reward against random is 0.905
EPISODE 38500 - Eval Opponent 999 over 1000 - Number of game played 194500 - 5 hours, 42 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 38500 ##########
Timestep: 3091947 Average reward against last agent is 2.306


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 39000 - Eval Random 999 over 1000 - Number of game played 196000 - 5 hours, 45 minutes and 15 seconds
########## Evaluation Against Random - Episode 39000 ##########
Timestep: 3132043 Average reward against random is 1.576
EPISODE 39000 - Eval Opponent 999 over 1000 - Number of game played 197000 - 5 hours, 46 minutes and 44 seconds
########## Evaluation Against Last Agent - Episode 39000 ##########
Timestep: 3132043 Average reward against last agent is -1.801
EPISODE 39500 - Eval Random 999 over 1000 - Number of game played 198500 - 5 hours, 49 minutes and 48 seconds
########## Evaluation Against Random - Episode 39500 ##########
Timestep: 3172149 Average reward against random is 0.747
EPISODE 39500 - Eval Opponent 999 over 1000 - Number of game played 199500 - 5 hours, 51 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 39500 ##########
Timestep: 3172149 Average reward against last agent is -2.074
EPISODE 40000 - Eval Random 999 over 1000 - Number of 

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43000 - Eval Random 999 over 1000 - Number of game played 216000 - 6 hours, 21 minutes and 32 seconds
########## Evaluation Against Random - Episode 43000 ##########
Timestep: 3452749 Average reward against random is 1.292
EPISODE 43000 - Eval Opponent 999 over 1000 - Number of game played 217000 - 6 hours, 22 minutes and 57 seconds
########## Evaluation Against Last Agent - Episode 43000 ##########
Timestep: 3452749 Average reward against last agent is 2.314


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43500 - Eval Random 999 over 1000 - Number of game played 218500 - 6 hours, 26 minutes and 3 secondss
########## Evaluation Against Random - Episode 43500 ##########
Timestep: 3492778 Average reward against random is 0.489
EPISODE 43500 - Eval Opponent 999 over 1000 - Number of game played 219500 - 6 hours, 27 minutes and 28 seconds
########## Evaluation Against Last Agent - Episode 43500 ##########
Timestep: 3492778 Average reward against last agent is 2.621


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 44000 - Eval Random 999 over 1000 - Number of game played 221000 - 6 hours, 30 minutes and 35 seconds
########## Evaluation Against Random - Episode 44000 ##########
Timestep: 3532875 Average reward against random is 0.697
EPISODE 44000 - Eval Opponent 999 over 1000 - Number of game played 222000 - 6 hours, 32 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 44000 ##########
Timestep: 3532875 Average reward against last agent is -3.367
EPISODE 44500 - Eval Random 999 over 1000 - Number of game played 223500 - 6 hours, 35 minutes and 11 seconds
########## Evaluation Against Random - Episode 44500 ##########
Timestep: 3572879 Average reward against random is 0.112
EPISODE 44500 - Eval Opponent 999 over 1000 - Number of game played 224500 - 6 hours, 36 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 44500 ##########
Timestep: 3572879 Average reward against last agent is -3.719
EPISODE 45000 - Eval Random 999 over 1000 - Number of 

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 46000 - Eval Random 999 over 1000 - Number of game played 231000 - 6 hours, 48 minutes and 52 seconds
########## Evaluation Against Random - Episode 46000 ##########
Timestep: 3692709 Average reward against random is 0.358
EPISODE 46000 - Eval Opponent 999 over 1000 - Number of game played 232000 - 6 hours, 50 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 46000 ##########
Timestep: 3692709 Average reward against last agent is -3.07
EPISODE 46500 - Eval Random 999 over 1000 - Number of game played 233500 - 6 hours, 53 minutes and 29 seconds
########## Evaluation Against Random - Episode 46500 ##########
Timestep: 3732675 Average reward against random is 0.46
EPISODE 46500 - Eval Opponent 999 over 1000 - Number of game played 234500 - 6 hours, 54 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 46500 ##########
Timestep: 3732675 Average reward against last agent is -1.652
EPISODE 47000 - Eval Random 999 over 1000 - Number of ga

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 48500 - Eval Random 999 over 1000 - Number of game played 243500 - 7 hours, 11 minutes and 58 seconds
########## Evaluation Against Random - Episode 48500 ##########
Timestep: 3892566 Average reward against random is 0.383
EPISODE 48500 - Eval Opponent 999 over 1000 - Number of game played 244500 - 7 hours, 13 minutes and 24 seconds
########## Evaluation Against Last Agent - Episode 48500 ##########
Timestep: 3892566 Average reward against last agent is -2.281
EPISODE 49000 - Eval Random 999 over 1000 - Number of game played 246000 - 7 hours, 16 minutes and 33 seconds
########## Evaluation Against Random - Episode 49000 ##########
Timestep: 3932443 Average reward against random is -0.067
EPISODE 49000 - Eval Opponent 999 over 1000 - Number of game played 247000 - 7 hours, 17 minutes and 58 seconds
########## Evaluation Against Last Agent - Episode 49000 ##########
Timestep: 3932443 Average reward against last agent is 2.323


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 49500 - Eval Random 999 over 1000 - Number of game played 248500 - 7 hours, 21 minutes and 7 secondss
########## Evaluation Against Random - Episode 49500 ##########
Timestep: 3972289 Average reward against random is 0.905
EPISODE 49500 - Eval Opponent 999 over 1000 - Number of game played 249500 - 7 hours, 22 minutes and 31 seconds
########## Evaluation Against Last Agent - Episode 49500 ##########
Timestep: 3972289 Average reward against last agent is 2.204


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 50000 - Eval Random 999 over 1000 - Number of game played 251000 - 7 hours, 25 minutes and 41 seconds
########## Evaluation Against Random - Episode 50000 ##########
Timestep: 4012230 Average reward against random is 0.455
EPISODE 50000 - Eval Opponent 999 over 1000 - Number of game played 252000 - 7 hours, 27 minutes and 8 secondss
########## Evaluation Against Last Agent - Episode 50000 ##########
Timestep: 4012230 Average reward against last agent is -2.412
EPISODE 50500 - Eval Random 999 over 1000 - Number of game played 253500 - 7 hours, 30 minutes and 21 seconds
########## Evaluation Against Random - Episode 50500 ##########
Timestep: 4052092 Average reward against random is -0.823
EPISODE 50500 - Eval Opponent 999 over 1000 - Number of game played 254500 - 7 hours, 31 minutes and 46 seconds
########## Evaluation Against Last Agent - Episode 50500 ##########
Timestep: 4052092 Average reward against last agent is 2.348


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51000 - Eval Random 999 over 1000 - Number of game played 256000 - 7 hours, 34 minutes and 54 seconds
########## Evaluation Against Random - Episode 51000 ##########
Timestep: 4091984 Average reward against random is -1.341
EPISODE 51000 - Eval Opponent 999 over 1000 - Number of game played 257000 - 7 hours, 36 minutes and 17 seconds
########## Evaluation Against Last Agent - Episode 51000 ##########
Timestep: 4091984 Average reward against last agent is 2.637


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51500 - Eval Random 999 over 1000 - Number of game played 258500 - 7 hours, 39 minutes and 25 seconds
########## Evaluation Against Random - Episode 51500 ##########
Timestep: 4131879 Average reward against random is 0.513
EPISODE 51500 - Eval Opponent 999 over 1000 - Number of game played 259500 - 7 hours, 40 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 51500 ##########
Timestep: 4131879 Average reward against last agent is 2.311


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 52000 - Eval Random 999 over 1000 - Number of game played 261000 - 7 hours, 43 minutes and 55 seconds
########## Evaluation Against Random - Episode 52000 ##########
Timestep: 4171763 Average reward against random is -0.903
EPISODE 52000 - Eval Opponent 999 over 1000 - Number of game played 262000 - 7 hours, 45 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 52000 ##########
Timestep: 4171763 Average reward against last agent is -4.142
EPISODE 52500 - Eval Random 999 over 1000 - Number of game played 263500 - 7 hours, 48 minutes and 28 seconds
########## Evaluation Against Random - Episode 52500 ##########
Timestep: 4211617 Average reward against random is 0.304
EPISODE 52500 - Eval Opponent 999 over 1000 - Number of game played 264500 - 7 hours, 49 minutes and 55 seconds
########## Evaluation Against Last Agent - Episode 52500 ##########
Timestep: 4211617 Average reward against last agent is -3.845
EPISODE 53000 - Eval Random 999 over 1000 - Number of

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 53500 - Eval Random 999 over 1000 - Number of game played 268500 - 7 hours, 57 minutes and 36 seconds
########## Evaluation Against Random - Episode 53500 ##########
Timestep: 4291432 Average reward against random is 1.417
EPISODE 53500 - Eval Opponent 999 over 1000 - Number of game played 269500 - 7 hours, 59 minutes and 0 secondds
########## Evaluation Against Last Agent - Episode 53500 ##########
Timestep: 4291432 Average reward against last agent is 2.703


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54000 - Eval Random 999 over 1000 - Number of game played 271000 - 8 hours, 2 minutes and 12 seconds
########## Evaluation Against Random - Episode 54000 ##########
Timestep: 4331425 Average reward against random is 1.749
EPISODE 54000 - Eval Opponent 999 over 1000 - Number of game played 272000 - 8 hours, 3 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 54000 ##########
Timestep: 4331425 Average reward against last agent is 2.523


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54500 - Eval Random 999 over 1000 - Number of game played 273500 - 8 hours, 6 minutes and 47 seconds
########## Evaluation Against Random - Episode 54500 ##########
Timestep: 4371409 Average reward against random is 2.17
EPISODE 54500 - Eval Opponent 999 over 1000 - Number of game played 274500 - 8 hours, 8 minutes and 11 seconds
########## Evaluation Against Last Agent - Episode 54500 ##########
Timestep: 4371409 Average reward against last agent is 2.611


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 55000 - Eval Random 999 over 1000 - Number of game played 276000 - 8 hours, 11 minutes and 24 seconds
########## Evaluation Against Random - Episode 55000 ##########
Timestep: 4411444 Average reward against random is 1.998
EPISODE 55000 - Eval Opponent 999 over 1000 - Number of game played 277000 - 8 hours, 12 minutes and 52 seconds
########## Evaluation Against Last Agent - Episode 55000 ##########
Timestep: 4411444 Average reward against last agent is 0.619
EPISODE 55500 - Eval Random 999 over 1000 - Number of game played 278500 - 8 hours, 16 minutes and 7 secondss
########## Evaluation Against Random - Episode 55500 ##########
Timestep: 4451472 Average reward against random is 1.768
EPISODE 55500 - Eval Opponent 999 over 1000 - Number of game played 279500 - 8 hours, 17 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 55500 ##########
Timestep: 4451472 Average reward against last agent is 0.808
EPISODE 56000 - Eval Random 999 over 1000 - Number of ga

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57000 - Eval Random 999 over 1000 - Number of game played 286000 - 8 hours, 30 minutes and 6 secondss
########## Evaluation Against Random - Episode 57000 ##########
Timestep: 4571551 Average reward against random is -0.324
EPISODE 57000 - Eval Opponent 999 over 1000 - Number of game played 287000 - 8 hours, 31 minutes and 30 seconds
########## Evaluation Against Last Agent - Episode 57000 ##########
Timestep: 4571551 Average reward against last agent is 2.488


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57500 - Eval Random 999 over 1000 - Number of game played 288500 - 8 hours, 34 minutes and 45 seconds
########## Evaluation Against Random - Episode 57500 ##########
Timestep: 4611643 Average reward against random is 2.02
EPISODE 57500 - Eval Opponent 999 over 1000 - Number of game played 289500 - 8 hours, 36 minutes and 17 seconds
########## Evaluation Against Last Agent - Episode 57500 ##########
Timestep: 4611643 Average reward against last agent is 0.213
EPISODE 58000 - Eval Random 999 over 1000 - Number of game played 291000 - 8 hours, 39 minutes and 34 seconds
########## Evaluation Against Random - Episode 58000 ##########
Timestep: 4651801 Average reward against random is 0.991
EPISODE 58000 - Eval Opponent 999 over 1000 - Number of game played 292000 - 8 hours, 41 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 58000 ##########
Timestep: 4651801 Average reward against last agent is -1.402
EPISODE 58500 - Eval Random 999 over 1000 - Number of ga

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 59500 - Eval Random 999 over 1000 - Number of game played 298500 - 8 hours, 53 minutes and 37 seconds
########## Evaluation Against Random - Episode 59500 ##########
Timestep: 4771677 Average reward against random is -1.11
EPISODE 59500 - Eval Opponent 999 over 1000 - Number of game played 299500 - 8 hours, 55 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 59500 ##########
Timestep: 4771677 Average reward against last agent is 2.056
EPISODE 60000 - Eval Random 999 over 1000 - Number of game played 301000 - 8 hours, 58 minutes and 16 seconds
########## Evaluation Against Random - Episode 60000 ##########
Timestep: 4811529 Average reward against random is -1.645
EPISODE 60000 - Eval Opponent 999 over 1000 - Number of game played 302000 - 8 hours, 59 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 60000 ##########
Timestep: 4811529 Average reward against last agent is 2.45


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 60500 - Eval Random 999 over 1000 - Number of game played 303500 - 9 hours, 2 minutes and 55 seconds
########## Evaluation Against Random - Episode 60500 ##########
Timestep: 4851281 Average reward against random is -2.076
EPISODE 60500 - Eval Opponent 999 over 1000 - Number of game played 304500 - 9 hours, 4 minutes and 20 seconds
########## Evaluation Against Last Agent - Episode 60500 ##########
Timestep: 4851281 Average reward against last agent is -6.696
EPISODE 61000 - Eval Random 999 over 1000 - Number of game played 306000 - 9 hours, 7 minutes and 36 seconds
########## Evaluation Against Random - Episode 61000 ##########
Timestep: 4891044 Average reward against random is -1.296
EPISODE 61000 - Eval Opponent 999 over 1000 - Number of game played 307000 - 9 hours, 9 minutes and 2 secondss
########## Evaluation Against Last Agent - Episode 61000 ##########
Timestep: 4891044 Average reward against last agent is -5.742
EPISODE 61500 - Eval Random 999 over 1000 - Number of ga

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 62500 - Eval Random 999 over 1000 - Number of game played 313500 - 9 hours, 21 minutes and 45 seconds
########## Evaluation Against Random - Episode 62500 ##########
Timestep: 5010699 Average reward against random is 1.19
EPISODE 62500 - Eval Opponent 999 over 1000 - Number of game played 314500 - 9 hours, 23 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 62500 ##########
Timestep: 5010699 Average reward against last agent is 2.775


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 63000 - Eval Random 999 over 1000 - Number of game played 316000 - 9 hours, 26 minutes and 28 seconds
########## Evaluation Against Random - Episode 63000 ##########
Timestep: 5050660 Average reward against random is 2.044
EPISODE 63000 - Eval Opponent 999 over 1000 - Number of game played 317000 - 9 hours, 27 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 63000 ##########
Timestep: 5050660 Average reward against last agent is 0.535
EPISODE 63500 - Eval Random 999 over 1000 - Number of game played 318500 - 9 hours, 31 minutes and 14 seconds
########## Evaluation Against Random - Episode 63500 ##########
Timestep: 5090658 Average reward against random is 0.998
EPISODE 63500 - Eval Opponent 999 over 1000 - Number of game played 319500 - 9 hours, 32 minutes and 40 seconds
########## Evaluation Against Last Agent - Episode 63500 ##########
Timestep: 5090658 Average reward against last agent is -0.795
EPISODE 64000 - Eval Random 999 over 1000 - Number of g

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 64500 - Eval Random 999 over 1000 - Number of game played 323500 - 9 hours, 40 minutes and 48 seconds
########## Evaluation Against Random - Episode 64500 ##########
Timestep: 5170698 Average reward against random is 2.017
EPISODE 64500 - Eval Opponent 999 over 1000 - Number of game played 324500 - 9 hours, 42 minutes and 17 seconds
########## Evaluation Against Last Agent - Episode 64500 ##########
Timestep: 5170698 Average reward against last agent is 1.116
EPISODE 65000 - Eval Random 999 over 1000 - Number of game played 326000 - 9 hours, 45 minutes and 41 seconds
########## Evaluation Against Random - Episode 65000 ##########
Timestep: 5210772 Average reward against random is 1.05
EPISODE 65000 - Eval Opponent 999 over 1000 - Number of game played 327000 - 9 hours, 47 minutes and 7 secondss
########## Evaluation Against Last Agent - Episode 65000 ##########
Timestep: 5210772 Average reward against last agent is 0.114
EPISODE 65500 - Eval Random 999 over 1000 - Number of gam

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 66000 - Eval Random 999 over 1000 - Number of game played 331000 - 9 hours, 55 minutes and 22 seconds
########## Evaluation Against Random - Episode 66000 ##########
Timestep: 5290985 Average reward against random is 2.238
EPISODE 66000 - Eval Opponent 999 over 1000 - Number of game played 332000 - 9 hours, 56 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 66000 ##########
Timestep: 5290985 Average reward against last agent is 1.54
EPISODE 66500 - Eval Random 999 over 1000 - Number of game played 333500 - 10 hours, 0 minute and 18 secondss
########## Evaluation Against Random - Episode 66500 ##########
Timestep: 5331179 Average reward against random is 2.016
EPISODE 66500 - Eval Opponent 999 over 1000 - Number of game played 334500 - 10 hours, 1 minute and 47 seconds
########## Evaluation Against Last Agent - Episode 66500 ##########
Timestep: 5331179 Average reward against last agent is 1.265
EPISODE 67000 - Eval Random 999 over 1000 - Number of game

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 67500 - Eval Random 999 over 1000 - Number of game played 338500 - 10 hours, 10 minutes and 1 seconds
########## Evaluation Against Random - Episode 67500 ##########
Timestep: 5411279 Average reward against random is 0.053
EPISODE 67500 - Eval Opponent 999 over 1000 - Number of game played 339500 - 10 hours, 11 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 67500 ##########
Timestep: 5411279 Average reward against last agent is -2.56
EPISODE 68000 - Eval Random 999 over 1000 - Number of game played 341000 - 10 hours, 14 minutes and 55 seconds
########## Evaluation Against Random - Episode 68000 ##########
Timestep: 5451146 Average reward against random is 0.064
EPISODE 68000 - Eval Opponent 999 over 1000 - Number of game played 342000 - 10 hours, 16 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 68000 ##########
Timestep: 5451146 Average reward against last agent is 2.727


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 68500 - Eval Random 999 over 1000 - Number of game played 343500 - 10 hours, 19 minutes and 43 seconds
########## Evaluation Against Random - Episode 68500 ##########
Timestep: 5491066 Average reward against random is 0.719
EPISODE 68500 - Eval Opponent 999 over 1000 - Number of game played 344500 - 10 hours, 21 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 68500 ##########
Timestep: 5491066 Average reward against last agent is -1.451
EPISODE 69000 - Eval Random 999 over 1000 - Number of game played 346000 - 10 hours, 24 minutes and 31 seconds
########## Evaluation Against Random - Episode 69000 ##########
Timestep: 5531003 Average reward against random is -0.11
EPISODE 69000 - Eval Opponent 999 over 1000 - Number of game played 347000 - 10 hours, 25 minutes and 55 seconds
########## Evaluation Against Last Agent - Episode 69000 ##########
Timestep: 5531003 Average reward against last agent is 2.466


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 69500 - Eval Random 999 over 1000 - Number of game played 348500 - 10 hours, 29 minutes and 11 seconds
########## Evaluation Against Random - Episode 69500 ##########
Timestep: 5570928 Average reward against random is 0.68
EPISODE 69500 - Eval Opponent 999 over 1000 - Number of game played 349500 - 10 hours, 30 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 69500 ##########
Timestep: 5570928 Average reward against last agent is -1.916
EPISODE 70000 - Eval Random 999 over 1000 - Number of game played 351000 - 10 hours, 33 minutes and 55 seconds
########## Evaluation Against Random - Episode 70000 ##########
Timestep: 5610869 Average reward against random is 1.801
EPISODE 70000 - Eval Opponent 999 over 1000 - Number of game played 352000 - 10 hours, 35 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 70000 ##########
Timestep: 5610869 Average reward against last agent is 0.751
EPISODE 70500 - Eval Random 999 over 1000 - Number o

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 71000 - Eval Random 999 over 1000 - Number of game played 356000 - 10 hours, 43 minutes and 26 seconds
########## Evaluation Against Random - Episode 71000 ##########
Timestep: 5690984 Average reward against random is 2.212
EPISODE 71000 - Eval Opponent 999 over 1000 - Number of game played 357000 - 10 hours, 44 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 71000 ##########
Timestep: 5690984 Average reward against last agent is 2.587


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 71500 - Eval Random 999 over 1000 - Number of game played 358500 - 10 hours, 48 minutes and 11 seconds
########## Evaluation Against Random - Episode 71500 ##########
Timestep: 5731058 Average reward against random is 2.051
EPISODE 71500 - Eval Opponent 999 over 1000 - Number of game played 359500 - 10 hours, 49 minutes and 35 seconds
########## Evaluation Against Last Agent - Episode 71500 ##########
Timestep: 5731058 Average reward against last agent is 2.505


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 72000 - Eval Random 999 over 1000 - Number of game played 361000 - 10 hours, 53 minutes and 1 secondds
########## Evaluation Against Random - Episode 72000 ##########
Timestep: 5771078 Average reward against random is 1.479
EPISODE 72000 - Eval Opponent 999 over 1000 - Number of game played 362000 - 10 hours, 54 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 72000 ##########
Timestep: 5771078 Average reward against last agent is 2.252


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 72500 - Eval Random 999 over 1000 - Number of game played 363500 - 10 hours, 57 minutes and 47 seconds
########## Evaluation Against Random - Episode 72500 ##########
Timestep: 5811088 Average reward against random is 1.656
EPISODE 72500 - Eval Opponent 999 over 1000 - Number of game played 364500 - 10 hours, 59 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 72500 ##########
Timestep: 5811088 Average reward against last agent is 0.019
EPISODE 73000 - Eval Random 999 over 1000 - Number of game played 366000 - 11 hours, 2 minutes and 39 seconds
########## Evaluation Against Random - Episode 73000 ##########
Timestep: 5851042 Average reward against random is 0.99
EPISODE 73000 - Eval Opponent 999 over 1000 - Number of game played 367000 - 11 hours, 4 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 73000 ##########
Timestep: 5851042 Average reward against last agent is -0.946
EPISODE 73500 - Eval Random 999 over 1000 - Number of 

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 74000 - Eval Random 999 over 1000 - Number of game played 371000 - 11 hours, 12 minutes and 22 seconds
########## Evaluation Against Random - Episode 74000 ##########
Timestep: 5930984 Average reward against random is 0.734
EPISODE 74000 - Eval Opponent 999 over 1000 - Number of game played 372000 - 11 hours, 13 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 74000 ##########
Timestep: 5930984 Average reward against last agent is -0.687
EPISODE 74500 - Eval Random 999 over 1000 - Number of game played 373500 - 11 hours, 17 minutes and 13 seconds
########## Evaluation Against Random - Episode 74500 ##########
Timestep: 5970933 Average reward against random is 0.507
EPISODE 74500 - Eval Opponent 999 over 1000 - Number of game played 374500 - 11 hours, 18 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 74500 ##########
Timestep: 5970933 Average reward against last agent is 2.505


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 75000 - Eval Random 999 over 1000 - Number of game played 376000 - 11 hours, 22 minutes and 3 secondss
########## Evaluation Against Random - Episode 75000 ##########
Timestep: 6010862 Average reward against random is -0.309
EPISODE 75000 - Eval Opponent 999 over 1000 - Number of game played 377000 - 11 hours, 23 minutes and 29 seconds
########## Evaluation Against Last Agent - Episode 75000 ##########
Timestep: 6010862 Average reward against last agent is -3.175
EPISODE 75500 - Eval Random 999 over 1000 - Number of game played 378500 - 11 hours, 27 minutes and 1 secondds
########## Evaluation Against Random - Episode 75500 ##########
Timestep: 6050827 Average reward against random is -0.494
EPISODE 75500 - Eval Opponent 999 over 1000 - Number of game played 379500 - 11 hours, 28 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 75500 ##########
Timestep: 6050827 Average reward against last agent is 2.445


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 76000 - Eval Random 999 over 1000 - Number of game played 381000 - 11 hours, 31 minutes and 53 seconds
########## Evaluation Against Random - Episode 76000 ##########
Timestep: 6090843 Average reward against random is 0.219
EPISODE 76000 - Eval Opponent 999 over 1000 - Number of game played 382000 - 11 hours, 33 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 76000 ##########
Timestep: 6090843 Average reward against last agent is -2.568
EPISODE 76500 - Eval Random 999 over 1000 - Number of game played 383500 - 11 hours, 36 minutes and 48 seconds
########## Evaluation Against Random - Episode 76500 ##########
Timestep: 6130800 Average reward against random is 0.128
EPISODE 76500 - Eval Opponent 999 over 1000 - Number of game played 384500 - 11 hours, 38 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 76500 ##########
Timestep: 6130800 Average reward against last agent is 2.517


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 77000 - Eval Random 999 over 1000 - Number of game played 386000 - 11 hours, 41 minutes and 45 seconds
########## Evaluation Against Random - Episode 77000 ##########
Timestep: 6170744 Average reward against random is 0.18
EPISODE 77000 - Eval Opponent 999 over 1000 - Number of game played 387000 - 11 hours, 43 minutes and 9 secondss
########## Evaluation Against Last Agent - Episode 77000 ##########
Timestep: 6170744 Average reward against last agent is 2.499


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 77500 - Eval Random 999 over 1000 - Number of game played 388500 - 11 hours, 46 minutes and 46 seconds
########## Evaluation Against Random - Episode 77500 ##########
Timestep: 6210739 Average reward against random is 0.776
EPISODE 77500 - Eval Opponent 999 over 1000 - Number of game played 389500 - 11 hours, 48 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 77500 ##########
Timestep: 6210739 Average reward against last agent is 2.38


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 78000 - Eval Random 999 over 1000 - Number of game played 391000 - 11 hours, 51 minutes and 43 seconds
########## Evaluation Against Random - Episode 78000 ##########
Timestep: 6250702 Average reward against random is 1.039
EPISODE 78000 - Eval Opponent 999 over 1000 - Number of game played 392000 - 11 hours, 53 minutes and 7 secondss
########## Evaluation Against Last Agent - Episode 78000 ##########
Timestep: 6250702 Average reward against last agent is 2.349


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 78500 - Eval Random 999 over 1000 - Number of game played 393500 - 11 hours, 56 minutes and 39 seconds
########## Evaluation Against Random - Episode 78500 ##########
Timestep: 6290665 Average reward against random is 1.354
EPISODE 78500 - Eval Opponent 999 over 1000 - Number of game played 394500 - 11 hours, 58 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 78500 ##########
Timestep: 6290665 Average reward against last agent is 2.281


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 79000 - Eval Random 999 over 1000 - Number of game played 396000 - 12 hours, 1 minute and 37 seconds
########## Evaluation Against Random - Episode 79000 ##########
Timestep: 6330615 Average reward against random is 1.57
EPISODE 79000 - Eval Opponent 999 over 1000 - Number of game played 397000 - 12 hours, 3 minutes and 8 secondss
########## Evaluation Against Last Agent - Episode 79000 ##########
Timestep: 6330615 Average reward against last agent is -0.988
EPISODE 79500 - Eval Random 999 over 1000 - Number of game played 398500 - 12 hours, 6 minutes and 48 seconds
########## Evaluation Against Random - Episode 79500 ##########
Timestep: 6370638 Average reward against random is 1.692
EPISODE 79500 - Eval Opponent 999 over 1000 - Number of game played 399500 - 12 hours, 8 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 79500 ##########
Timestep: 6370638 Average reward against last agent is 2.596


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 80000 - Eval Random 999 over 1000 - Number of game played 401000 - 12 hours, 11 minutes and 47 seconds
########## Evaluation Against Random - Episode 80000 ##########
Timestep: 6410735 Average reward against random is 1.824
EPISODE 80000 - Eval Opponent 999 over 1000 - Number of game played 402000 - 12 hours, 13 minutes and 11 seconds
########## Evaluation Against Last Agent - Episode 80000 ##########
Timestep: 6410735 Average reward against last agent is 2.656


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 80500 - Eval Random 999 over 1000 - Number of game played 403500 - 12 hours, 16 minutes and 53 seconds
########## Evaluation Against Random - Episode 80500 ##########
Timestep: 6450812 Average reward against random is 1.553
EPISODE 80500 - Eval Opponent 999 over 1000 - Number of game played 404500 - 12 hours, 18 minutes and 17 seconds
########## Evaluation Against Last Agent - Episode 80500 ##########
Timestep: 6450812 Average reward against last agent is 2.645


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 81000 - Eval Random 999 over 1000 - Number of game played 406000 - 12 hours, 21 minutes and 54 seconds
########## Evaluation Against Random - Episode 81000 ##########
Timestep: 6490802 Average reward against random is 1.767
EPISODE 81000 - Eval Opponent 999 over 1000 - Number of game played 407000 - 12 hours, 23 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 81000 ##########
Timestep: 6490802 Average reward against last agent is 2.471


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 81500 - Eval Random 999 over 1000 - Number of game played 408500 - 12 hours, 26 minutes and 55 seconds
########## Evaluation Against Random - Episode 81500 ##########
Timestep: 6530833 Average reward against random is 1.394
EPISODE 81500 - Eval Opponent 999 over 1000 - Number of game played 409500 - 12 hours, 28 minutes and 22 seconds
########## Evaluation Against Last Agent - Episode 81500 ##########
Timestep: 6530833 Average reward against last agent is 0.055
EPISODE 82000 - Eval Random 999 over 1000 - Number of game played 411000 - 12 hours, 32 minutes and 8 secondss
########## Evaluation Against Random - Episode 82000 ##########
Timestep: 6570985 Average reward against random is 1.783
EPISODE 82000 - Eval Opponent 999 over 1000 - Number of game played 412000 - 12 hours, 33 minutes and 32 seconds
########## Evaluation Against Last Agent - Episode 82000 ##########
Timestep: 6570985 Average reward against last agent is 2.603


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 82500 - Eval Random 999 over 1000 - Number of game played 413500 - 12 hours, 37 minutes and 12 seconds
########## Evaluation Against Random - Episode 82500 ##########
Timestep: 6611013 Average reward against random is 1.35
EPISODE 82500 - Eval Opponent 999 over 1000 - Number of game played 414500 - 12 hours, 38 minutes and 39 seconds
########## Evaluation Against Last Agent - Episode 82500 ##########
Timestep: 6611013 Average reward against last agent is 0.818
EPISODE 83000 - Eval Random 999 over 1000 - Number of game played 416000 - 12 hours, 42 minutes and 28 seconds
########## Evaluation Against Random - Episode 83000 ##########
Timestep: 6651182 Average reward against random is 1.752
EPISODE 83000 - Eval Opponent 999 over 1000 - Number of game played 417000 - 12 hours, 43 minutes and 59 seconds
########## Evaluation Against Last Agent - Episode 83000 ##########
Timestep: 6651182 Average reward against last agent is 0.383
EPISODE 83500 - Eval Random 999 over 1000 - Number of

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 86000 - Eval Random 999 over 1000 - Number of game played 431000 - 13 hours, 14 minutes and 6 secondss
########## Evaluation Against Random - Episode 86000 ##########
Timestep: 6891546 Average reward against random is 0.104
EPISODE 86000 - Eval Opponent 999 over 1000 - Number of game played 432000 - 13 hours, 15 minutes and 30 seconds
########## Evaluation Against Last Agent - Episode 86000 ##########
Timestep: 6891546 Average reward against last agent is 2.502


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 86500 - Eval Random 999 over 1000 - Number of game played 433500 - 13 hours, 19 minutes and 19 seconds
########## Evaluation Against Random - Episode 86500 ##########
Timestep: 6931476 Average reward against random is -0.468
EPISODE 86500 - Eval Opponent 999 over 1000 - Number of game played 434500 - 13 hours, 20 minutes and 44 seconds
########## Evaluation Against Last Agent - Episode 86500 ##########
Timestep: 6931476 Average reward against last agent is 2.489


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 87000 - Eval Random 999 over 1000 - Number of game played 436000 - 13 hours, 24 minutes and 24 seconds
########## Evaluation Against Random - Episode 87000 ##########
Timestep: 6971393 Average reward against random is -0.113
EPISODE 87000 - Eval Opponent 999 over 1000 - Number of game played 437000 - 13 hours, 25 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 87000 ##########
Timestep: 6971393 Average reward against last agent is -2.773
EPISODE 87500 - Eval Random 999 over 1000 - Number of game played 438500 - 13 hours, 29 minutes and 29 seconds
########## Evaluation Against Random - Episode 87500 ##########
Timestep: 7011403 Average reward against random is -0.324
EPISODE 87500 - Eval Opponent 999 over 1000 - Number of game played 439500 - 13 hours, 30 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 87500 ##########
Timestep: 7011403 Average reward against last agent is -3.529
EPISODE 88000 - Eval Random 999 over 1000 - Numb

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 89000 - Eval Random 999 over 1000 - Number of game played 446000 - 13 hours, 44 minutes and 47 seconds
########## Evaluation Against Random - Episode 89000 ##########
Timestep: 7131468 Average reward against random is 0.498
EPISODE 89000 - Eval Opponent 999 over 1000 - Number of game played 447000 - 13 hours, 46 minutes and 11 seconds
########## Evaluation Against Last Agent - Episode 89000 ##########
Timestep: 7131468 Average reward against last agent is 2.556


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 89500 - Eval Random 999 over 1000 - Number of game played 448500 - 13 hours, 49 minutes and 50 seconds
########## Evaluation Against Random - Episode 89500 ##########
Timestep: 7171408 Average reward against random is 0.856
EPISODE 89500 - Eval Opponent 999 over 1000 - Number of game played 449500 - 13 hours, 51 minutes and 16 seconds
########## Evaluation Against Last Agent - Episode 89500 ##########
Timestep: 7171408 Average reward against last agent is -2.097
EPISODE 90000 - Eval Random 999 over 1000 - Number of game played 451000 - 13 hours, 54 minutes and 54 seconds
########## Evaluation Against Random - Episode 90000 ##########
Timestep: 7211413 Average reward against random is 1.865
EPISODE 90000 - Eval Opponent 999 over 1000 - Number of game played 452000 - 13 hours, 56 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 90000 ##########
Timestep: 7211413 Average reward against last agent is 2.414


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 90500 - Eval Random 999 over 1000 - Number of game played 453500 - 13 hours, 59 minutes and 58 seconds
########## Evaluation Against Random - Episode 90500 ##########
Timestep: 7251444 Average reward against random is 1.701
EPISODE 90500 - Eval Opponent 999 over 1000 - Number of game played 454500 - 14 hours, 1 minute and 22 secondss
########## Evaluation Against Last Agent - Episode 90500 ##########
Timestep: 7251444 Average reward against last agent is 2.515


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 91000 - Eval Random 999 over 1000 - Number of game played 456000 - 14 hours, 5 minutes and 9 secondss
########## Evaluation Against Random - Episode 91000 ##########
Timestep: 7291448 Average reward against random is 0.882
EPISODE 91000 - Eval Opponent 999 over 1000 - Number of game played 457000 - 14 hours, 6 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 91000 ##########
Timestep: 7291448 Average reward against last agent is -0.845
EPISODE 91500 - Eval Random 999 over 1000 - Number of game played 458500 - 14 hours, 10 minutes and 17 seconds
########## Evaluation Against Random - Episode 91500 ##########
Timestep: 7331471 Average reward against random is 1.344
EPISODE 91500 - Eval Opponent 999 over 1000 - Number of game played 459500 - 14 hours, 11 minutes and 43 seconds
########## Evaluation Against Last Agent - Episode 91500 ##########
Timestep: 7331471 Average reward against last agent is -0.901
EPISODE 92000 - Eval Random 999 over 1000 - Number o

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 93000 - Eval Random 999 over 1000 - Number of game played 466000 - 14 hours, 25 minutes and 47 seconds
########## Evaluation Against Random - Episode 93000 ##########
Timestep: 7451526 Average reward against random is 1.834
EPISODE 93000 - Eval Opponent 999 over 1000 - Number of game played 467000 - 14 hours, 27 minutes and 15 seconds
########## Evaluation Against Last Agent - Episode 93000 ##########
Timestep: 7451526 Average reward against last agent is 0.936
EPISODE 93500 - Eval Random 999 over 1000 - Number of game played 468500 - 14 hours, 31 minutes and 4 secondss
########## Evaluation Against Random - Episode 93500 ##########
Timestep: 7491550 Average reward against random is 1.165
EPISODE 93500 - Eval Opponent 999 over 1000 - Number of game played 469500 - 14 hours, 32 minutes and 28 seconds
########## Evaluation Against Last Agent - Episode 93500 ##########
Timestep: 7491550 Average reward against last agent is 2.835


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 94000 - Eval Random 999 over 1000 - Number of game played 471000 - 14 hours, 36 minutes and 21 seconds
########## Evaluation Against Random - Episode 94000 ##########
Timestep: 7531510 Average reward against random is 1.377
EPISODE 94000 - Eval Opponent 999 over 1000 - Number of game played 472000 - 14 hours, 37 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 94000 ##########
Timestep: 7531510 Average reward against last agent is -0.065
EPISODE 94500 - Eval Random 999 over 1000 - Number of game played 473500 - 14 hours, 41 minutes and 35 seconds
########## Evaluation Against Random - Episode 94500 ##########
Timestep: 7571499 Average reward against random is 1.273
EPISODE 94500 - Eval Opponent 999 over 1000 - Number of game played 474500 - 14 hours, 43 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 94500 ##########
Timestep: 7571499 Average reward against last agent is -0.765
EPISODE 95000 - Eval Random 999 over 1000 - Number

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 96000 - Eval Random 999 over 1000 - Number of game played 481000 - 14 hours, 57 minutes and 32 seconds
########## Evaluation Against Random - Episode 96000 ##########
Timestep: 7691432 Average reward against random is -0.401
EPISODE 96000 - Eval Opponent 999 over 1000 - Number of game played 482000 - 14 hours, 58 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 96000 ##########
Timestep: 7691432 Average reward against last agent is 2.839


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 96500 - Eval Random 999 over 1000 - Number of game played 483500 - 15 hours, 2 minutes and 50 seconds
########## Evaluation Against Random - Episode 96500 ##########
Timestep: 7731396 Average reward against random is -0.368
EPISODE 96500 - Eval Opponent 999 over 1000 - Number of game played 484500 - 15 hours, 4 minutes and 15 seconds
########## Evaluation Against Last Agent - Episode 96500 ##########
Timestep: 7731396 Average reward against last agent is 2.684


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 97000 - Eval Random 999 over 1000 - Number of game played 486000 - 15 hours, 8 minutes and 7 secondss
########## Evaluation Against Random - Episode 97000 ##########
Timestep: 7771322 Average reward against random is 0.475
EPISODE 97000 - Eval Opponent 999 over 1000 - Number of game played 487000 - 15 hours, 9 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 97000 ##########
Timestep: 7771322 Average reward against last agent is -1.973
EPISODE 97500 - Eval Random 999 over 1000 - Number of game played 488500 - 15 hours, 13 minutes and 30 seconds
########## Evaluation Against Random - Episode 97500 ##########
Timestep: 7811260 Average reward against random is -0.476
EPISODE 97500 - Eval Opponent 999 over 1000 - Number of game played 489500 - 15 hours, 14 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 97500 ##########
Timestep: 7811260 Average reward against last agent is -3.199
EPISODE 98000 - Eval Random 999 over 1000 - Number 

in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 98500 - Eval Random 999 over 1000 - Number of game played 493500 - 15 hours, 24 minutes and 24 seconds
########## Evaluation Against Random - Episode 98500 ##########
Timestep: 7891146 Average reward against random is 0.483
EPISODE 98500 - Eval Opponent 999 over 1000 - Number of game played 494500 - 15 hours, 25 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 98500 ##########
Timestep: 7891146 Average reward against last agent is -1.389
EPISODE 99000 - Eval Random 999 over 1000 - Number of game played 496000 - 15 hours, 29 minutes and 49 seconds
########## Evaluation Against Random - Episode 99000 ##########
Timestep: 7931125 Average reward against random is 0.794
EPISODE 99000 - Eval Opponent 999 over 1000 - Number of game played 497000 - 15 hours, 31 minutes and 15 seconds
########## Evaluation Against Last Agent - Episode 99000 ##########
Timestep: 7931125 Average reward against last agent is -0.703
EPISODE 99500 - Eval Random 999 over 1000 - Number