In [1]:
%cd ..

/home/jovyan/personal_project/RL-tarot


In [None]:
"""
An example of learning a Deep-Q Agent on French Tarot Game
"""
import os
import time

import tensorflow as tf

import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.utils.logger import Logger
from rlcard.utils.utils import set_global_seed, time_difference_good_format

record_number = 12

# Make environment
env = rlcard.make('tarot')
eval_env = rlcard.make('tarot')

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 500
evolve_model_every = 5000
evaluate_num = 1000

episode_num = 100000

self_play = 1
total_self_play_eval = int(episode_num / evaluate_every)

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 5000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/tarot_dqn_self_played_v{}/'.format(str(record_number))
log_path_random = root_path + 'log_random.txt'
csv_path_random = root_path + 'performance_random.csv'
log_path_opponent = root_path + 'log_opponent.txt'
csv_path_opponent = root_path + 'performance_opponent.csv'
figure_path_random = root_path + 'figures_random/'
figure_path_opponent = root_path + 'figures_opponent/'

# Model save path
if not os.path.exists('rlcard/models'):
    os.makedirs('rlcard/models')
if not os.path.exists('rlcard/models/pretrained'):
    os.makedirs('rlcard/models/pretrained')
for self_play_init in range(1, total_self_play_eval + 1):
    model_folder_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}'.format(
        str(record_number),
        str(record_number * 10000 + self_play_init))
    if not os.path.exists(model_folder_path):
        os.makedirs(model_folder_path)
model_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}/model'.format(
    str(record_number),
    str(record_number * 10000 + self_play))

# Set a global seed
set_global_seed(0)

random_agent = RandomAgent(action_num=eval_env.action_num)

with tf.compat.v1.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=78,  # env.action_num,
                     replay_memory_size=20000,
                     replay_memory_init_size=memory_init_size,
                     norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 1024, 512])

    opponent_agent = agent

    sess.run(tf.compat.v1.global_variables_initializer())

    saver = tf.compat.v1.train.Saver()

    env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))
    eval_env.set_agents([agent] + [random_agent] * (env.player_num - 1))

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve against random
    logger_random = Logger(xlabel='timestep', ylabel='reward', legend='DQN on TAROT against Random',
                           legend_hist='Histogram of last evaluations against Random', log_path=log_path_random,
                           csv_path=csv_path_random)
    # Init a Logger to plot the learning curve against last opponent
    logger_opponent = Logger(xlabel='timestep', ylabel='reward', legend='DQN on TAROT against last agent',
                             legend_hist='Histogram of last evaluations against last agent', log_path=log_path_opponent,
                             csv_path=csv_path_opponent)

    total_game_played = 0
    seconds = time.time()

    for episode in range(episode_num):
        print('\rEPISODE {} - Number of game played {} - {}'.format(episode, total_game_played,
                                                                    time_difference_good_format(seconds, time.time())),
              end='')

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)
        total_game_played += 1

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                # print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')

        # Evaluate the performance.
        if episode % evaluate_every == 0:
            # Save Model
            model_path = 'rlcard/models/pretrained/self_played_{}/tarot_v{}/model'.format(
                str(record_number),
                str(record_number * 10000 + self_play))
                        
            saver.save(sess, model_path)

            # Eval against random
            reward_random = 0
            reward_random_list = []
            taking_list = []
            eval_env.set_agents([agent] + [random_agent] * (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval Random {} over {} - Number of game played {} - {}'.format(episode,
                                                                                                     eval_episode,
                                                                                                     evaluate_num,
                                                                                                     total_game_played,
                                                                                                     time_difference_good_format(
                                                                                                         seconds,
                                                                                                         time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_random_list.append(payoffs[0])
                reward_random += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log('\n########## Evaluation Against Random - Episode {} ##########'.format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(env.timestep,
                                                                          float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num)
            
            # Make plot
            logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png')
            logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png',
                                         save_path_2=figure_path_random + str(episode) + '_freq.png',
                                         reward_list=reward_random_list, taking_list=taking_list)

            # Eval against last agent
            reward_opponent = 0
            reward_opponent_list = []
            taking_list = []
            eval_env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}'.format(episode,
                                                                                                       eval_episode,
                                                                                                       evaluate_num,
                                                                                                       total_game_played,
                                                                                                       time_difference_good_format(
                                                                                                           seconds,
                                                                                                           time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_opponent_list.append(payoffs[0])
                reward_opponent += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_opponent.log('\n########## Evaluation Against Last Agent - Episode {} ##########'.format(episode))
            logger_opponent.log(
                'Timestep: {} Average reward against last agent is {}'.format(env.timestep,
                                                                              float(reward_opponent) / evaluate_num))

            # Add point to logger
            logger_opponent.add_point(x=env.timestep, y=float(reward_opponent) / evaluate_num)

            # Make plot
            logger_opponent.make_plot(save_path=figure_path_opponent + str(episode) + '.png')
            logger_opponent.make_plot_hist(save_path_1=figure_path_opponent + str(episode) + '_hist.png',
                                           save_path_2=figure_path_opponent + str(episode) + '_freq.png',
                                           reward_list=reward_opponent_list, taking_list=taking_list)
            
        if episode % evolve_model_every == 0 and episode > 0:
            # GO to next step
            self_play += 1

            opponent_agent = agent
            env.set_agents([agent] + [opponent_agent] * (env.player_num - 1))
            logger_random.log('\n########## Changing model - Using Model {} as opponent ##########'.format(self_play))
            logger_opponent.log('\n########## Changing model - Using Model {} as opponent ##########'.format(self_play))


    # Make the final plot
    logger_random.make_plot(save_path=figure_path_random + 'final_' + str(episode) + '.png')
    logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png',
                                 save_path_2=figure_path_random + str(episode) + '_freq.png',
                                 reward_list=reward_random_list, taking_list=taking_list)
    # Make the final plot
    logger_opponent.make_plot(save_path=figure_path_opponent + 'final_' + str(episode) + '.png')
    logger_opponent.make_plot_hist(save_path_1=figure_path_opponent + str(episode) + '_hist.png',
                                   save_path_2=figure_path_opponent + str(episode) + '_freq.png',
                                   reward_list=reward_opponent_list, taking_list=taking_list)


Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


EPISODE 0 - Eval Random 999 over 1000 - Number of game played 1000 - 35 seconds
########## Evaluation Against Random - Episode 0 ##########
Timestep: 79 Average reward against random is 1.719
EPISODE 0 - Eval Opponent 999 over 1000 - Number of game played 2000 - 2 minutes and 2 seconds
########## Evaluation Against Last Agent - Episode 0 ##########
Timestep: 79 Average reward against last agent is 2.386
EPISODE 500 - Eval Random 999 over 1000 - Number of game played 3500 - 4 minutes and 15 seconds
########## Evaluation Against Random - Episode 500 ##########
Timestep: 40107 Average reward against random is 1.186
EPISODE 500 - Eval Opponent 999 over 1000 - Number of game played 4500 - 5 minutes and 40 seconds
########## Evaluation Against Last Agent - Episode 500 ##########
Timestep: 40107 Average reward against last agent is 2.491


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 1000 - Eval Random 999 over 1000 - Number of game played 6000 - 8 minutes and 58 seconds
########## Evaluation Against Random - Episode 1000 ##########
Timestep: 80109 Average reward against random is 1.428
EPISODE 1000 - Eval Opponent 999 over 1000 - Number of game played 7000 - 10 minutes and 23 seconds
########## Evaluation Against Last Agent - Episode 1000 ##########
Timestep: 80109 Average reward against last agent is 2.415


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 1500 - Eval Random 999 over 1000 - Number of game played 8500 - 13 minutes and 41 seconds
########## Evaluation Against Random - Episode 1500 ##########
Timestep: 120140 Average reward against random is 1.61
EPISODE 1500 - Eval Opponent 999 over 1000 - Number of game played 9500 - 15 minutes and 6 secondss
########## Evaluation Against Last Agent - Episode 1500 ##########
Timestep: 120140 Average reward against last agent is 2.496


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 2000 - Eval Random 999 over 1000 - Number of game played 11000 - 18 minutes and 20 seconds
########## Evaluation Against Random - Episode 2000 ##########
Timestep: 160172 Average reward against random is 1.259
EPISODE 2000 - Eval Opponent 999 over 1000 - Number of game played 12000 - 19 minutes and 45 seconds
########## Evaluation Against Last Agent - Episode 2000 ##########
Timestep: 160172 Average reward against last agent is 2.482


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 2500 - Eval Random 999 over 1000 - Number of game played 13500 - 22 minutes and 59 seconds
########## Evaluation Against Random - Episode 2500 ##########
Timestep: 200186 Average reward against random is 1.454
EPISODE 2500 - Eval Opponent 999 over 1000 - Number of game played 14500 - 24 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 2500 ##########
Timestep: 200186 Average reward against last agent is 2.713


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 3000 - Eval Random 999 over 1000 - Number of game played 16000 - 27 minutes and 39 seconds
########## Evaluation Against Random - Episode 3000 ##########
Timestep: 240143 Average reward against random is 1.31
EPISODE 3000 - Eval Opponent 999 over 1000 - Number of game played 17000 - 29 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 3000 ##########
Timestep: 240143 Average reward against last agent is 2.378


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 3500 - Eval Random 999 over 1000 - Number of game played 18500 - 32 minutes and 17 seconds
########## Evaluation Against Random - Episode 3500 ##########
Timestep: 280113 Average reward against random is 1.318
EPISODE 3500 - Eval Opponent 999 over 1000 - Number of game played 19500 - 33 minutes and 43 seconds
########## Evaluation Against Last Agent - Episode 3500 ##########
Timestep: 280113 Average reward against last agent is 2.398


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 4000 - Eval Random 999 over 1000 - Number of game played 21000 - 36 minutes and 54 seconds
########## Evaluation Against Random - Episode 4000 ##########
Timestep: 320118 Average reward against random is 1.188
EPISODE 4000 - Eval Opponent 999 over 1000 - Number of game played 22000 - 38 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 4000 ##########
Timestep: 320118 Average reward against last agent is 2.558


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 4500 - Eval Random 999 over 1000 - Number of game played 23500 - 41 minutes and 33 seconds
########## Evaluation Against Random - Episode 4500 ##########
Timestep: 360103 Average reward against random is 1.189
EPISODE 4500 - Eval Opponent 999 over 1000 - Number of game played 24500 - 42 minutes and 58 seconds
########## Evaluation Against Last Agent - Episode 4500 ##########
Timestep: 360103 Average reward against last agent is 2.437


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 5000 - Eval Random 999 over 1000 - Number of game played 26000 - 46 minutes and 11 seconds
########## Evaluation Against Random - Episode 5000 ##########
Timestep: 400119 Average reward against random is 1.25
EPISODE 5000 - Eval Opponent 999 over 1000 - Number of game played 27000 - 47 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 5000 ##########
Timestep: 400119 Average reward against last agent is 2.494


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 2 as opponent ##########

########## Changing model - Using Model 2 as opponent ##########
EPISODE 5500 - Eval Random 999 over 1000 - Number of game played 28500 - 50 minutes and 51 seconds
########## Evaluation Against Random - Episode 5500 ##########
Timestep: 440131 Average reward against random is 1.332
EPISODE 5500 - Eval Opponent 999 over 1000 - Number of game played 29500 - 52 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 5500 ##########
Timestep: 440131 Average reward against last agent is 2.329


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 6000 - Eval Random 999 over 1000 - Number of game played 31000 - 55 minutes and 29 seconds
########## Evaluation Against Random - Episode 6000 ##########
Timestep: 480194 Average reward against random is 1.36
EPISODE 6000 - Eval Opponent 999 over 1000 - Number of game played 32000 - 56 minutes and 55 seconds
########## Evaluation Against Last Agent - Episode 6000 ##########
Timestep: 480194 Average reward against last agent is 2.531


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 6500 - Eval Random 999 over 1000 - Number of game played 33500 - 1 hour, 0 minute and 7 seconds
########## Evaluation Against Random - Episode 6500 ##########
Timestep: 520183 Average reward against random is 1.074
EPISODE 6500 - Eval Opponent 999 over 1000 - Number of game played 34500 - 1 hour, 1 minute and 33 seconds
########## Evaluation Against Last Agent - Episode 6500 ##########
Timestep: 520183 Average reward against last agent is 2.653


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 7000 - Eval Random 999 over 1000 - Number of game played 36000 - 1 hour, 4 minutes and 45 seconds
########## Evaluation Against Random - Episode 7000 ##########
Timestep: 560173 Average reward against random is 2.455


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 7000 - Eval Opponent 999 over 1000 - Number of game played 37000 - 1 hour, 6 minutes and 11 seconds
########## Evaluation Against Last Agent - Episode 7000 ##########
Timestep: 560173 Average reward against last agent is 2.525


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 7500 - Eval Random 999 over 1000 - Number of game played 38500 - 1 hour, 9 minutes and 21 seconds
########## Evaluation Against Random - Episode 7500 ##########
Timestep: 600151 Average reward against random is 2.465


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 7500 - Eval Opponent 999 over 1000 - Number of game played 39500 - 1 hour, 10 minutes and 46 seconds
########## Evaluation Against Last Agent - Episode 7500 ##########
Timestep: 600151 Average reward against last agent is 2.44


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 8000 - Eval Random 999 over 1000 - Number of game played 41000 - 1 hour, 13 minutes and 59 seconds
########## Evaluation Against Random - Episode 8000 ##########
Timestep: 640122 Average reward against random is 2.616


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 8000 - Eval Opponent 999 over 1000 - Number of game played 42000 - 1 hour, 15 minutes and 24 seconds
########## Evaluation Against Last Agent - Episode 8000 ##########
Timestep: 640122 Average reward against last agent is 2.407


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 8500 - Eval Random 999 over 1000 - Number of game played 43500 - 1 hour, 18 minutes and 37 seconds
########## Evaluation Against Random - Episode 8500 ##########
Timestep: 680146 Average reward against random is 2.597


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 8500 - Eval Opponent 999 over 1000 - Number of game played 44500 - 1 hour, 20 minutes and 2 secondss
########## Evaluation Against Last Agent - Episode 8500 ##########
Timestep: 680146 Average reward against last agent is 2.41


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 9000 - Eval Random 999 over 1000 - Number of game played 46000 - 1 hour, 23 minutes and 16 seconds
########## Evaluation Against Random - Episode 9000 ##########
Timestep: 720152 Average reward against random is 1.37
EPISODE 9000 - Eval Opponent 999 over 1000 - Number of game played 47000 - 1 hour, 24 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 9000 ##########
Timestep: 720152 Average reward against last agent is 2.339


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 9500 - Eval Random 999 over 1000 - Number of game played 48500 - 1 hour, 27 minutes and 55 seconds
########## Evaluation Against Random - Episode 9500 ##########
Timestep: 760135 Average reward against random is 1.219
EPISODE 9500 - Eval Opponent 999 over 1000 - Number of game played 49500 - 1 hour, 29 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 9500 ##########
Timestep: 760135 Average reward against last agent is 2.777


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 10000 - Eval Random 999 over 1000 - Number of game played 51000 - 1 hour, 32 minutes and 33 seconds
########## Evaluation Against Random - Episode 10000 ##########
Timestep: 800126 Average reward against random is 1.568
EPISODE 10000 - Eval Opponent 999 over 1000 - Number of game played 52000 - 1 hour, 34 minutes and 0 secondds
########## Evaluation Against Last Agent - Episode 10000 ##########
Timestep: 800126 Average reward against last agent is 2.15


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 3 as opponent ##########

########## Changing model - Using Model 3 as opponent ##########
EPISODE 10500 - Eval Random 999 over 1000 - Number of game played 53500 - 1 hour, 37 minutes and 12 seconds
########## Evaluation Against Random - Episode 10500 ##########
Timestep: 840170 Average reward against random is 1.223
EPISODE 10500 - Eval Opponent 999 over 1000 - Number of game played 54500 - 1 hour, 38 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 10500 ##########
Timestep: 840170 Average reward against last agent is 2.307


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 11000 - Eval Random 999 over 1000 - Number of game played 56000 - 1 hour, 41 minutes and 52 seconds
########## Evaluation Against Random - Episode 11000 ##########
Timestep: 880139 Average reward against random is 1.296
EPISODE 11000 - Eval Opponent 999 over 1000 - Number of game played 57000 - 1 hour, 43 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 11000 ##########
Timestep: 880139 Average reward against last agent is 2.445


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 11500 - Eval Random 999 over 1000 - Number of game played 58500 - 1 hour, 46 minutes and 32 seconds
########## Evaluation Against Random - Episode 11500 ##########
Timestep: 920194 Average reward against random is 1.224
EPISODE 11500 - Eval Opponent 999 over 1000 - Number of game played 59500 - 1 hour, 47 minutes and 59 seconds
########## Evaluation Against Last Agent - Episode 11500 ##########
Timestep: 920194 Average reward against last agent is 2.471


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 12000 - Eval Random 999 over 1000 - Number of game played 61000 - 1 hour, 51 minutes and 12 seconds
########## Evaluation Against Random - Episode 12000 ##########
Timestep: 960201 Average reward against random is 1.542
EPISODE 12000 - Eval Opponent 999 over 1000 - Number of game played 62000 - 1 hour, 52 minutes and 40 seconds
########## Evaluation Against Last Agent - Episode 12000 ##########
Timestep: 960201 Average reward against last agent is 2.328


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 12500 - Eval Random 999 over 1000 - Number of game played 63500 - 1 hour, 55 minutes and 53 seconds
########## Evaluation Against Random - Episode 12500 ##########
Timestep: 1000251 Average reward against random is 1.087
EPISODE 12500 - Eval Opponent 999 over 1000 - Number of game played 64500 - 1 hour, 57 minutes and 20 seconds
########## Evaluation Against Last Agent - Episode 12500 ##########
Timestep: 1000251 Average reward against last agent is 2.604


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 13000 - Eval Random 999 over 1000 - Number of game played 66000 - 2 hours, 0 minute and 34 seconds
########## Evaluation Against Random - Episode 13000 ##########
Timestep: 1040249 Average reward against random is 1.363
EPISODE 13000 - Eval Opponent 999 over 1000 - Number of game played 67000 - 2 hours, 2 minutes and 3 seconds
########## Evaluation Against Last Agent - Episode 13000 ##########
Timestep: 1040249 Average reward against last agent is 2.638


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 13500 - Eval Random 999 over 1000 - Number of game played 68500 - 2 hours, 5 minutes and 18 seconds
########## Evaluation Against Random - Episode 13500 ##########
Timestep: 1080249 Average reward against random is 1.392
EPISODE 13500 - Eval Opponent 999 over 1000 - Number of game played 69500 - 2 hours, 6 minutes and 44 seconds
########## Evaluation Against Last Agent - Episode 13500 ##########
Timestep: 1080249 Average reward against last agent is 2.359


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 14000 - Eval Random 999 over 1000 - Number of game played 71000 - 2 hours, 10 minutes and 0 seconds
########## Evaluation Against Random - Episode 14000 ##########
Timestep: 1120249 Average reward against random is 1.092
EPISODE 14000 - Eval Opponent 999 over 1000 - Number of game played 72000 - 2 hours, 11 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 14000 ##########
Timestep: 1120249 Average reward against last agent is 2.672


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 14500 - Eval Random 999 over 1000 - Number of game played 73500 - 2 hours, 14 minutes and 41 seconds
########## Evaluation Against Random - Episode 14500 ##########
Timestep: 1160242 Average reward against random is 1.456
EPISODE 14500 - Eval Opponent 999 over 1000 - Number of game played 74500 - 2 hours, 16 minutes and 8 secondss
########## Evaluation Against Last Agent - Episode 14500 ##########
Timestep: 1160242 Average reward against last agent is 2.805


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 15000 - Eval Random 999 over 1000 - Number of game played 76000 - 2 hours, 19 minutes and 24 seconds
########## Evaluation Against Random - Episode 15000 ##########
Timestep: 1200271 Average reward against random is 2.453


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 15000 - Eval Opponent 999 over 1000 - Number of game played 77000 - 2 hours, 36 minutes and 59 seconds
########## Evaluation Against Last Agent - Episode 15000 ##########
Timestep: 1200271 Average reward against last agent is 0.179

########## Changing model - Using Model 4 as opponent ##########

########## Changing model - Using Model 4 as opponent ##########
EPISODE 15500 - Eval Random 999 over 1000 - Number of game played 78500 - 2 hours, 40 minutes and 14 seconds
########## Evaluation Against Random - Episode 15500 ##########
Timestep: 1240260 Average reward against random is 1.395
EPISODE 15500 - Eval Opponent 999 over 1000 - Number of game played 79500 - 2 hours, 41 minutes and 40 seconds
########## Evaluation Against Last Agent - Episode 15500 ##########
Timestep: 1240260 Average reward against last agent is 2.343


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 16000 - Eval Random 999 over 1000 - Number of game played 81000 - 2 hours, 44 minutes and 53 seconds
########## Evaluation Against Random - Episode 16000 ##########
Timestep: 1280226 Average reward against random is 1.065
EPISODE 16000 - Eval Opponent 999 over 1000 - Number of game played 82000 - 2 hours, 46 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 16000 ##########
Timestep: 1280226 Average reward against last agent is 2.625


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 16500 - Eval Random 999 over 1000 - Number of game played 83500 - 2 hours, 49 minutes and 32 seconds
########## Evaluation Against Random - Episode 16500 ##########
Timestep: 1320237 Average reward against random is 1.261
EPISODE 16500 - Eval Opponent 999 over 1000 - Number of game played 84500 - 2 hours, 50 minutes and 58 seconds
########## Evaluation Against Last Agent - Episode 16500 ##########
Timestep: 1320237 Average reward against last agent is 2.378


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 17000 - Eval Random 999 over 1000 - Number of game played 86000 - 2 hours, 54 minutes and 11 seconds
########## Evaluation Against Random - Episode 17000 ##########
Timestep: 1360248 Average reward against random is 1.151
EPISODE 17000 - Eval Opponent 999 over 1000 - Number of game played 87000 - 2 hours, 55 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 17000 ##########
Timestep: 1360248 Average reward against last agent is 2.486


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 17500 - Eval Random 999 over 1000 - Number of game played 88500 - 2 hours, 58 minutes and 50 seconds
########## Evaluation Against Random - Episode 17500 ##########
Timestep: 1400282 Average reward against random is 2.404


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 17500 - Eval Opponent 999 over 1000 - Number of game played 89500 - 3 hours, 15 minutes and 31 seconds
########## Evaluation Against Last Agent - Episode 17500 ##########
Timestep: 1400282 Average reward against last agent is -0.07
EPISODE 18000 - Eval Random 999 over 1000 - Number of game played 91000 - 3 hours, 18 minutes and 44 seconds
########## Evaluation Against Random - Episode 18000 ##########
Timestep: 1440336 Average reward against random is 2.512


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 18000 - Eval Opponent 999 over 1000 - Number of game played 92000 - 3 hours, 35 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 18000 ##########
Timestep: 1440336 Average reward against last agent is -0.061
EPISODE 18500 - Eval Random 999 over 1000 - Number of game played 93500 - 3 hours, 39 minutes and 1 secondds
########## Evaluation Against Random - Episode 18500 ##########
Timestep: 1480336 Average reward against random is 2.514


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 18500 - Eval Opponent 999 over 1000 - Number of game played 94500 - 3 hours, 56 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 18500 ##########
Timestep: 1480336 Average reward against last agent is -0.09
EPISODE 19000 - Eval Random 999 over 1000 - Number of game played 96000 - 4 hours, 0 minute and 4 secondsnds
########## Evaluation Against Random - Episode 19000 ##########
Timestep: 1520306 Average reward against random is 2.567


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 19000 - Eval Opponent 999 over 1000 - Number of game played 97000 - 4 hours, 1 minute and 30 seconds
########## Evaluation Against Last Agent - Episode 19000 ##########
Timestep: 1520306 Average reward against last agent is 2.386


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 19500 - Eval Random 999 over 1000 - Number of game played 98500 - 4 hours, 4 minutes and 42 seconds
########## Evaluation Against Random - Episode 19500 ##########
Timestep: 1560366 Average reward against random is 2.606


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 19500 - Eval Opponent 999 over 1000 - Number of game played 99500 - 4 hours, 21 minutes and 19 seconds
########## Evaluation Against Last Agent - Episode 19500 ##########
Timestep: 1560366 Average reward against last agent is -0.022
EPISODE 20000 - Eval Random 999 over 1000 - Number of game played 101000 - 4 hours, 24 minutes and 31 seconds
########## Evaluation Against Random - Episode 20000 ##########
Timestep: 1600464 Average reward against random is 2.529


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 20000 - Eval Opponent 999 over 1000 - Number of game played 102000 - 4 hours, 41 minutes and 42 seconds
########## Evaluation Against Last Agent - Episode 20000 ##########
Timestep: 1600464 Average reward against last agent is -0.032

########## Changing model - Using Model 5 as opponent ##########

########## Changing model - Using Model 5 as opponent ##########
EPISODE 20500 - Eval Random 999 over 1000 - Number of game played 103500 - 4 hours, 44 minutes and 56 seconds
########## Evaluation Against Random - Episode 20500 ##########
Timestep: 1640513 Average reward against random is 2.456


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 20500 - Eval Opponent 999 over 1000 - Number of game played 104500 - 5 hours, 2 minutes and 15 secondss
########## Evaluation Against Last Agent - Episode 20500 ##########
Timestep: 1640513 Average reward against last agent is 0.111
EPISODE 21000 - Eval Random 999 over 1000 - Number of game played 106000 - 5 hours, 5 minutes and 31 seconds
########## Evaluation Against Random - Episode 21000 ##########
Timestep: 1680554 Average reward against random is 2.428


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 21000 - Eval Opponent 999 over 1000 - Number of game played 107000 - 5 hours, 22 minutes and 21 seconds
########## Evaluation Against Last Agent - Episode 21000 ##########
Timestep: 1680554 Average reward against last agent is -0.139
EPISODE 21500 - Eval Random 999 over 1000 - Number of game played 108500 - 5 hours, 25 minutes and 37 seconds
########## Evaluation Against Random - Episode 21500 ##########
Timestep: 1720579 Average reward against random is 2.425


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 21500 - Eval Opponent 999 over 1000 - Number of game played 109500 - 5 hours, 42 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 21500 ##########
Timestep: 1720579 Average reward against last agent is -0.016
EPISODE 22000 - Eval Random 999 over 1000 - Number of game played 111000 - 5 hours, 45 minutes and 14 seconds
########## Evaluation Against Random - Episode 22000 ##########
Timestep: 1760656 Average reward against random is 2.531


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 22000 - Eval Opponent 999 over 1000 - Number of game played 112000 - 6 hours, 3 minutes and 1 seconddss
########## Evaluation Against Last Agent - Episode 22000 ##########
Timestep: 1760656 Average reward against last agent is 0.107
EPISODE 22500 - Eval Random 999 over 1000 - Number of game played 113500 - 6 hours, 6 minutes and 13 seconds
########## Evaluation Against Random - Episode 22500 ##########
Timestep: 1800705 Average reward against random is 2.369


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 22500 - Eval Opponent 999 over 1000 - Number of game played 114500 - 6 hours, 7 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 22500 ##########
Timestep: 1800705 Average reward against last agent is 2.35


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 23000 - Eval Random 999 over 1000 - Number of game played 116000 - 6 hours, 10 minutes and 46 seconds
########## Evaluation Against Random - Episode 23000 ##########
Timestep: 1840812 Average reward against random is 2.348


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 23000 - Eval Opponent 999 over 1000 - Number of game played 117000 - 6 hours, 23 minutes and 0 secondds
########## Evaluation Against Last Agent - Episode 23000 ##########
Timestep: 1840812 Average reward against last agent is 0.879
EPISODE 23500 - Eval Random 999 over 1000 - Number of game played 118500 - 6 hours, 26 minutes and 10 seconds
########## Evaluation Against Random - Episode 23500 ##########
Timestep: 1880892 Average reward against random is 2.372


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 23500 - Eval Opponent 999 over 1000 - Number of game played 119500 - 6 hours, 43 minutes and 12 seconds
########## Evaluation Against Last Agent - Episode 23500 ##########
Timestep: 1880892 Average reward against last agent is 0.032
EPISODE 24000 - Eval Random 999 over 1000 - Number of game played 121000 - 6 hours, 46 minutes and 16 seconds
########## Evaluation Against Random - Episode 24000 ##########
Timestep: 1920886 Average reward against random is 2.511


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 24000 - Eval Opponent 999 over 1000 - Number of game played 122000 - 7 hours, 3 minutes and 0 seconddss
########## Evaluation Against Last Agent - Episode 24000 ##########
Timestep: 1920886 Average reward against last agent is 0.022
EPISODE 24500 - Eval Random 999 over 1000 - Number of game played 123500 - 7 hours, 6 minutes and 1 secondds
########## Evaluation Against Random - Episode 24500 ##########
Timestep: 1961000 Average reward against random is 2.595


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 24500 - Eval Opponent 999 over 1000 - Number of game played 124500 - 7 hours, 22 minutes and 26 seconds
########## Evaluation Against Last Agent - Episode 24500 ##########
Timestep: 1961000 Average reward against last agent is 0.103
EPISODE 25000 - Eval Random 999 over 1000 - Number of game played 126000 - 7 hours, 25 minutes and 28 seconds
########## Evaluation Against Random - Episode 25000 ##########
Timestep: 2001128 Average reward against random is 2.552


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 25000 - Eval Opponent 999 over 1000 - Number of game played 127000 - 7 hours, 40 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 25000 ##########
Timestep: 2001128 Average reward against last agent is -0.031

########## Changing model - Using Model 6 as opponent ##########

########## Changing model - Using Model 6 as opponent ##########
Instructions for updating:
Use standard file APIs to delete files with this prefix.
EPISODE 25500 - Eval Random 999 over 1000 - Number of game played 128500 - 7 hours, 43 minutes and 38 seconds
########## Evaluation Against Random - Episode 25500 ##########
Timestep: 2041361 Average reward against random is 2.514


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 25500 - Eval Opponent 999 over 1000 - Number of game played 129500 - 7 hours, 56 minutes and 30 seconds
########## Evaluation Against Last Agent - Episode 25500 ##########
Timestep: 2041361 Average reward against last agent is 0.415
EPISODE 26000 - Eval Random 999 over 1000 - Number of game played 131000 - 7 hours, 59 minutes and 36 seconds
########## Evaluation Against Random - Episode 26000 ##########
Timestep: 2081403 Average reward against random is 2.586


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 26000 - Eval Opponent 999 over 1000 - Number of game played 132000 - 8 hours, 15 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 26000 ##########
Timestep: 2081403 Average reward against last agent is -0.037
EPISODE 26500 - Eval Random 999 over 1000 - Number of game played 133500 - 8 hours, 18 minutes and 58 seconds
########## Evaluation Against Random - Episode 26500 ##########
Timestep: 2121506 Average reward against random is 2.554


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 26500 - Eval Opponent 999 over 1000 - Number of game played 134500 - 8 hours, 34 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 26500 ##########
Timestep: 2121506 Average reward against last agent is 0.009
EPISODE 27000 - Eval Random 999 over 1000 - Number of game played 136000 - 8 hours, 37 minutes and 39 seconds
########## Evaluation Against Random - Episode 27000 ##########
Timestep: 2161748 Average reward against random is 2.608


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 27000 - Eval Opponent 999 over 1000 - Number of game played 137000 - 8 hours, 41 minutes and 22 seconds
########## Evaluation Against Last Agent - Episode 27000 ##########
Timestep: 2161748 Average reward against last agent is 1.891
EPISODE 27500 - Eval Random 999 over 1000 - Number of game played 138500 - 8 hours, 44 minutes and 24 seconds
########## Evaluation Against Random - Episode 27500 ##########
Timestep: 2201977 Average reward against random is 2.475


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 27500 - Eval Opponent 999 over 1000 - Number of game played 139500 - 8 hours, 48 minutes and 24 seconds
########## Evaluation Against Last Agent - Episode 27500 ##########
Timestep: 2201977 Average reward against last agent is 1.981
EPISODE 28000 - Eval Random 999 over 1000 - Number of game played 141000 - 8 hours, 51 minutes and 27 seconds
########## Evaluation Against Random - Episode 28000 ##########
Timestep: 2242202 Average reward against random is 2.568


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 28000 - Eval Opponent 999 over 1000 - Number of game played 142000 - 8 hours, 53 minutes and 32 seconds
########## Evaluation Against Last Agent - Episode 28000 ##########
Timestep: 2242202 Average reward against last agent is 2.605
EPISODE 28500 - Eval Random 999 over 1000 - Number of game played 143500 - 8 hours, 56 minutes and 37 seconds
########## Evaluation Against Random - Episode 28500 ##########
Timestep: 2282486 Average reward against random is 2.407


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 28500 - Eval Opponent 999 over 1000 - Number of game played 144500 - 9 hours, 12 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 28500 ##########
Timestep: 2282486 Average reward against last agent is -0.099
EPISODE 29000 - Eval Random 999 over 1000 - Number of game played 146000 - 9 hours, 15 minutes and 54 seconds
########## Evaluation Against Random - Episode 29000 ##########
Timestep: 2322704 Average reward against random is 2.345


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 29000 - Eval Opponent 999 over 1000 - Number of game played 147000 - 9 hours, 33 minutes and 28 seconds
########## Evaluation Against Last Agent - Episode 29000 ##########
Timestep: 2322704 Average reward against last agent is -0.098
EPISODE 29500 - Eval Random 999 over 1000 - Number of game played 148500 - 9 hours, 36 minutes and 31 seconds
########## Evaluation Against Random - Episode 29500 ##########
Timestep: 2362968 Average reward against random is 2.538


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 29500 - Eval Opponent 999 over 1000 - Number of game played 149500 - 9 hours, 52 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 29500 ##########
Timestep: 2362968 Average reward against last agent is 0.107
EPISODE 30000 - Eval Random 999 over 1000 - Number of game played 151000 - 9 hours, 55 minutes and 21 seconds
########## Evaluation Against Random - Episode 30000 ##########
Timestep: 2403110 Average reward against random is 2.466


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 30000 - Eval Opponent 999 over 1000 - Number of game played 152000 - 10 hours, 12 minutes and 2 secondss
########## Evaluation Against Last Agent - Episode 30000 ##########
Timestep: 2403110 Average reward against last agent is 0.026

########## Changing model - Using Model 7 as opponent ##########

########## Changing model - Using Model 7 as opponent ##########
EPISODE 30500 - Eval Random 999 over 1000 - Number of game played 153500 - 10 hours, 15 minutes and 8 secondss
########## Evaluation Against Random - Episode 30500 ##########
Timestep: 2443294 Average reward against random is 2.46


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 30500 - Eval Opponent 999 over 1000 - Number of game played 154500 - 10 hours, 31 minutes and 0 secondds
########## Evaluation Against Last Agent - Episode 30500 ##########
Timestep: 2443294 Average reward against last agent is -0.08
EPISODE 31000 - Eval Random 999 over 1000 - Number of game played 156000 - 10 hours, 34 minutes and 9 secondss
########## Evaluation Against Random - Episode 31000 ##########
Timestep: 2483450 Average reward against random is 2.444


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 31000 - Eval Opponent 999 over 1000 - Number of game played 157000 - 10 hours, 51 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 31000 ##########
Timestep: 2483450 Average reward against last agent is -0.047
EPISODE 31500 - Eval Random 999 over 1000 - Number of game played 158500 - 10 hours, 54 minutes and 12 seconds
########## Evaluation Against Random - Episode 31500 ##########
Timestep: 2523581 Average reward against random is 2.49


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 31500 - Eval Opponent 999 over 1000 - Number of game played 159500 - 11 hours, 9 minutes and 51 secondss
########## Evaluation Against Last Agent - Episode 31500 ##########
Timestep: 2523581 Average reward against last agent is 0.117
EPISODE 32000 - Eval Random 999 over 1000 - Number of game played 161000 - 11 hours, 12 minutes and 55 seconds
########## Evaluation Against Random - Episode 32000 ##########
Timestep: 2563829 Average reward against random is 2.482


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 32000 - Eval Opponent 999 over 1000 - Number of game played 162000 - 11 hours, 15 minutes and 7 secondss
########## Evaluation Against Last Agent - Episode 32000 ##########
Timestep: 2563829 Average reward against last agent is 2.541
EPISODE 32500 - Eval Random 999 over 1000 - Number of game played 163500 - 11 hours, 18 minutes and 14 seconds
########## Evaluation Against Random - Episode 32500 ##########
Timestep: 2603921 Average reward against random is 2.373


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 32500 - Eval Opponent 999 over 1000 - Number of game played 164500 - 11 hours, 33 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 32500 ##########
Timestep: 2603921 Average reward against last agent is 0.196
EPISODE 33000 - Eval Random 999 over 1000 - Number of game played 166000 - 11 hours, 36 minutes and 22 seconds
########## Evaluation Against Random - Episode 33000 ##########
Timestep: 2643940 Average reward against random is 2.373


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 33000 - Eval Opponent 999 over 1000 - Number of game played 167000 - 11 hours, 50 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 33000 ##########
Timestep: 2643940 Average reward against last agent is 0.234
EPISODE 33500 - Eval Random 999 over 1000 - Number of game played 168500 - 11 hours, 53 minutes and 25 seconds
########## Evaluation Against Random - Episode 33500 ##########
Timestep: 2684015 Average reward against random is 2.463


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 33500 - Eval Opponent 999 over 1000 - Number of game played 169500 - 11 hours, 54 minutes and 45 seconds
########## Evaluation Against Last Agent - Episode 33500 ##########
Timestep: 2684015 Average reward against last agent is 2.466


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 34000 - Eval Random 999 over 1000 - Number of game played 171000 - 11 hours, 57 minutes and 56 seconds
########## Evaluation Against Random - Episode 34000 ##########
Timestep: 2723992 Average reward against random is 2.604


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 34000 - Eval Opponent 999 over 1000 - Number of game played 172000 - 12 hours, 0 minute and 47 secondsds
########## Evaluation Against Last Agent - Episode 34000 ##########
Timestep: 2723992 Average reward against last agent is 2.008
EPISODE 34500 - Eval Random 999 over 1000 - Number of game played 173500 - 12 hours, 3 minutes and 57 seconds
########## Evaluation Against Random - Episode 34500 ##########
Timestep: 2763963 Average reward against random is 2.409


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 34500 - Eval Opponent 999 over 1000 - Number of game played 174500 - 12 hours, 5 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 34500 ##########
Timestep: 2763963 Average reward against last agent is 2.248


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 35000 - Eval Random 999 over 1000 - Number of game played 176000 - 12 hours, 8 minutes and 35 seconds
########## Evaluation Against Random - Episode 35000 ##########
Timestep: 2803986 Average reward against random is 2.499


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 35000 - Eval Opponent 999 over 1000 - Number of game played 177000 - 12 hours, 10 minutes and 1 seconds
########## Evaluation Against Last Agent - Episode 35000 ##########
Timestep: 2803986 Average reward against last agent is 2.358

########## Changing model - Using Model 8 as opponent ##########

########## Changing model - Using Model 8 as opponent ##########
EPISODE 35500 - Eval Random 999 over 1000 - Number of game played 178500 - 12 hours, 13 minutes and 13 seconds
########## Evaluation Against Random - Episode 35500 ##########
Timestep: 2843971 Average reward against random is 2.545


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 35500 - Eval Opponent 999 over 1000 - Number of game played 179500 - 12 hours, 14 minutes and 39 seconds
########## Evaluation Against Last Agent - Episode 35500 ##########
Timestep: 2843971 Average reward against last agent is 2.29


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36000 - Eval Random 999 over 1000 - Number of game played 181000 - 12 hours, 17 minutes and 51 seconds
########## Evaluation Against Random - Episode 36000 ##########
Timestep: 2883972 Average reward against random is 2.506


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36000 - Eval Opponent 999 over 1000 - Number of game played 182000 - 12 hours, 19 minutes and 16 seconds
########## Evaluation Against Last Agent - Episode 36000 ##########
Timestep: 2883972 Average reward against last agent is 2.372


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36500 - Eval Random 999 over 1000 - Number of game played 183500 - 12 hours, 22 minutes and 28 seconds
########## Evaluation Against Random - Episode 36500 ##########
Timestep: 2923916 Average reward against random is 2.351


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 36500 - Eval Opponent 999 over 1000 - Number of game played 184500 - 12 hours, 23 minutes and 53 seconds
########## Evaluation Against Last Agent - Episode 36500 ##########
Timestep: 2923916 Average reward against last agent is 2.243


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 37000 - Eval Random 999 over 1000 - Number of game played 186000 - 12 hours, 27 minutes and 7 secondss
########## Evaluation Against Random - Episode 37000 ##########
Timestep: 2963932 Average reward against random is 2.449


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 37000 - Eval Opponent 999 over 1000 - Number of game played 187000 - 12 hours, 28 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 37000 ##########
Timestep: 2963932 Average reward against last agent is 2.492


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 37500 - Eval Random 999 over 1000 - Number of game played 188500 - 12 hours, 31 minutes and 42 seconds
########## Evaluation Against Random - Episode 37500 ##########
Timestep: 3003904 Average reward against random is 2.327


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 37500 - Eval Opponent 999 over 1000 - Number of game played 189500 - 12 hours, 33 minutes and 8 secondss
########## Evaluation Against Last Agent - Episode 37500 ##########
Timestep: 3003904 Average reward against last agent is 2.176


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 38000 - Eval Random 999 over 1000 - Number of game played 191000 - 12 hours, 36 minutes and 28 seconds
########## Evaluation Against Random - Episode 38000 ##########
Timestep: 3043691 Average reward against random is 2.482


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 38000 - Eval Opponent 999 over 1000 - Number of game played 192000 - 12 hours, 37 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 38000 ##########
Timestep: 3043691 Average reward against last agent is 2.484


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 38500 - Eval Random 999 over 1000 - Number of game played 193500 - 12 hours, 41 minutes and 12 seconds
########## Evaluation Against Random - Episode 38500 ##########
Timestep: 3083389 Average reward against random is 2.601


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 38500 - Eval Opponent 999 over 1000 - Number of game played 194500 - 12 hours, 42 minutes and 33 seconds
########## Evaluation Against Last Agent - Episode 38500 ##########
Timestep: 3083389 Average reward against last agent is 2.173


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 39000 - Eval Random 999 over 1000 - Number of game played 196000 - 12 hours, 45 minutes and 53 seconds
########## Evaluation Against Random - Episode 39000 ##########
Timestep: 3123146 Average reward against random is 2.554


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 39000 - Eval Opponent 999 over 1000 - Number of game played 197000 - 12 hours, 47 minutes and 16 seconds
########## Evaluation Against Last Agent - Episode 39000 ##########
Timestep: 3123146 Average reward against last agent is 2.373


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 39500 - Eval Random 999 over 1000 - Number of game played 198500 - 12 hours, 50 minutes and 37 seconds
########## Evaluation Against Random - Episode 39500 ##########
Timestep: 3162941 Average reward against random is 2.458


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 39500 - Eval Opponent 999 over 1000 - Number of game played 199500 - 12 hours, 51 minutes and 59 seconds
########## Evaluation Against Last Agent - Episode 39500 ##########
Timestep: 3162941 Average reward against last agent is 2.414


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 40000 - Eval Random 999 over 1000 - Number of game played 201000 - 12 hours, 55 minutes and 23 seconds
########## Evaluation Against Random - Episode 40000 ##########
Timestep: 3202716 Average reward against random is 2.461


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 40000 - Eval Opponent 999 over 1000 - Number of game played 202000 - 12 hours, 56 minutes and 44 seconds
########## Evaluation Against Last Agent - Episode 40000 ##########
Timestep: 3202716 Average reward against last agent is 2.368


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 9 as opponent ##########

########## Changing model - Using Model 9 as opponent ##########
EPISODE 40500 - Eval Random 999 over 1000 - Number of game played 203500 - 13 hours, 0 minute and 7 secondsnds
########## Evaluation Against Random - Episode 40500 ##########
Timestep: 3242461 Average reward against random is 2.491


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 40500 - Eval Opponent 999 over 1000 - Number of game played 204500 - 13 hours, 1 minute and 28 seconds
########## Evaluation Against Last Agent - Episode 40500 ##########
Timestep: 3242461 Average reward against last agent is 2.394


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 41000 - Eval Random 999 over 1000 - Number of game played 206000 - 13 hours, 4 minutes and 51 seconds
########## Evaluation Against Random - Episode 41000 ##########
Timestep: 3282216 Average reward against random is 2.329


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 41000 - Eval Opponent 999 over 1000 - Number of game played 207000 - 13 hours, 6 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 41000 ##########
Timestep: 3282216 Average reward against last agent is 2.389


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 41500 - Eval Random 999 over 1000 - Number of game played 208500 - 13 hours, 9 minutes and 40 seconds
########## Evaluation Against Random - Episode 41500 ##########
Timestep: 3321982 Average reward against random is 2.415


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 41500 - Eval Opponent 999 over 1000 - Number of game played 209500 - 13 hours, 11 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 41500 ##########
Timestep: 3321982 Average reward against last agent is 2.451


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 42000 - Eval Random 999 over 1000 - Number of game played 211000 - 13 hours, 14 minutes and 30 seconds
########## Evaluation Against Random - Episode 42000 ##########
Timestep: 3361750 Average reward against random is 2.413


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 42000 - Eval Opponent 999 over 1000 - Number of game played 212000 - 13 hours, 15 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 42000 ##########
Timestep: 3361750 Average reward against last agent is 2.388


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 42500 - Eval Random 999 over 1000 - Number of game played 213500 - 13 hours, 19 minutes and 20 seconds
########## Evaluation Against Random - Episode 42500 ##########
Timestep: 3401511 Average reward against random is 2.314


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 42500 - Eval Opponent 999 over 1000 - Number of game played 214500 - 13 hours, 20 minutes and 43 seconds
########## Evaluation Against Last Agent - Episode 42500 ##########
Timestep: 3401511 Average reward against last agent is 2.369


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43000 - Eval Random 999 over 1000 - Number of game played 216000 - 13 hours, 24 minutes and 11 seconds
########## Evaluation Against Random - Episode 43000 ##########
Timestep: 3441211 Average reward against random is 2.412


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43000 - Eval Opponent 999 over 1000 - Number of game played 217000 - 13 hours, 25 minutes and 34 seconds
########## Evaluation Against Last Agent - Episode 43000 ##########
Timestep: 3441211 Average reward against last agent is 2.208


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43500 - Eval Random 999 over 1000 - Number of game played 218500 - 13 hours, 28 minutes and 59 seconds
########## Evaluation Against Random - Episode 43500 ##########
Timestep: 3481011 Average reward against random is 2.472


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 43500 - Eval Opponent 999 over 1000 - Number of game played 219500 - 13 hours, 30 minutes and 26 seconds
########## Evaluation Against Last Agent - Episode 43500 ##########
Timestep: 3481011 Average reward against last agent is 2.284


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 44000 - Eval Random 999 over 1000 - Number of game played 221000 - 13 hours, 33 minutes and 53 seconds
########## Evaluation Against Random - Episode 44000 ##########
Timestep: 3520827 Average reward against random is 2.466


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 44000 - Eval Opponent 999 over 1000 - Number of game played 222000 - 13 hours, 35 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 44000 ##########
Timestep: 3520827 Average reward against last agent is 2.567


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 44500 - Eval Random 999 over 1000 - Number of game played 223500 - 13 hours, 38 minutes and 42 seconds
########## Evaluation Against Random - Episode 44500 ##########
Timestep: 3560663 Average reward against random is 2.352


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 44500 - Eval Opponent 999 over 1000 - Number of game played 224500 - 13 hours, 40 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 44500 ##########
Timestep: 3560663 Average reward against last agent is 2.224


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 45000 - Eval Random 999 over 1000 - Number of game played 226000 - 13 hours, 43 minutes and 28 seconds
########## Evaluation Against Random - Episode 45000 ##########
Timestep: 3600498 Average reward against random is 2.399


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 45000 - Eval Opponent 999 over 1000 - Number of game played 227000 - 13 hours, 44 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 45000 ##########
Timestep: 3600498 Average reward against last agent is 2.499


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 10 as opponent ##########

########## Changing model - Using Model 10 as opponent ##########
EPISODE 45500 - Eval Random 999 over 1000 - Number of game played 228500 - 13 hours, 48 minutes and 19 seconds
########## Evaluation Against Random - Episode 45500 ##########
Timestep: 3640345 Average reward against random is 2.589


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 45500 - Eval Opponent 999 over 1000 - Number of game played 229500 - 13 hours, 49 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 45500 ##########
Timestep: 3640345 Average reward against last agent is 2.268


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 46000 - Eval Random 999 over 1000 - Number of game played 231000 - 13 hours, 53 minutes and 8 secondss
########## Evaluation Against Random - Episode 46000 ##########
Timestep: 3680250 Average reward against random is 2.485


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 46000 - Eval Opponent 999 over 1000 - Number of game played 232000 - 13 hours, 54 minutes and 29 seconds
########## Evaluation Against Last Agent - Episode 46000 ##########
Timestep: 3680250 Average reward against last agent is 2.476


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 46500 - Eval Random 999 over 1000 - Number of game played 233500 - 13 hours, 58 minutes and 1 secondds
########## Evaluation Against Random - Episode 46500 ##########
Timestep: 3720054 Average reward against random is 2.38


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 46500 - Eval Opponent 999 over 1000 - Number of game played 234500 - 13 hours, 59 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 46500 ##########
Timestep: 3720054 Average reward against last agent is 2.342


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 47000 - Eval Random 999 over 1000 - Number of game played 236000 - 14 hours, 2 minutes and 53 seconds
########## Evaluation Against Random - Episode 47000 ##########
Timestep: 3759923 Average reward against random is 2.354


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 47000 - Eval Opponent 999 over 1000 - Number of game played 237000 - 14 hours, 4 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 47000 ##########
Timestep: 3759923 Average reward against last agent is 2.343


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 47500 - Eval Random 999 over 1000 - Number of game played 238500 - 14 hours, 7 minutes and 47 seconds
########## Evaluation Against Random - Episode 47500 ##########
Timestep: 3799694 Average reward against random is 2.408


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 47500 - Eval Opponent 999 over 1000 - Number of game played 239500 - 14 hours, 9 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 47500 ##########
Timestep: 3799694 Average reward against last agent is 2.385


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 48000 - Eval Random 999 over 1000 - Number of game played 241000 - 14 hours, 12 minutes and 44 seconds
########## Evaluation Against Random - Episode 48000 ##########
Timestep: 3839491 Average reward against random is 2.355


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 48000 - Eval Opponent 999 over 1000 - Number of game played 242000 - 14 hours, 14 minutes and 6 secondss
########## Evaluation Against Last Agent - Episode 48000 ##########
Timestep: 3839491 Average reward against last agent is 2.322


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 48500 - Eval Random 999 over 1000 - Number of game played 243500 - 14 hours, 17 minutes and 43 seconds
########## Evaluation Against Random - Episode 48500 ##########
Timestep: 3879190 Average reward against random is 2.453


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 48500 - Eval Opponent 999 over 1000 - Number of game played 244500 - 14 hours, 19 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 48500 ##########
Timestep: 3879190 Average reward against last agent is 2.263


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 49000 - Eval Random 999 over 1000 - Number of game played 246000 - 14 hours, 22 minutes and 44 seconds
########## Evaluation Against Random - Episode 49000 ##########
Timestep: 3918842 Average reward against random is 2.435


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 49000 - Eval Opponent 999 over 1000 - Number of game played 247000 - 14 hours, 24 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 49000 ##########
Timestep: 3918842 Average reward against last agent is 2.503


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 49500 - Eval Random 999 over 1000 - Number of game played 248500 - 14 hours, 27 minutes and 43 seconds
########## Evaluation Against Random - Episode 49500 ##########
Timestep: 3958571 Average reward against random is 2.524


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 49500 - Eval Opponent 999 over 1000 - Number of game played 249500 - 14 hours, 29 minutes and 7 secondss
########## Evaluation Against Last Agent - Episode 49500 ##########
Timestep: 3958571 Average reward against last agent is 2.492


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 50000 - Eval Random 999 over 1000 - Number of game played 251000 - 14 hours, 32 minutes and 33 seconds
########## Evaluation Against Random - Episode 50000 ##########
Timestep: 3998365 Average reward against random is 2.436


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 50000 - Eval Opponent 999 over 1000 - Number of game played 252000 - 14 hours, 33 minutes and 53 seconds
########## Evaluation Against Last Agent - Episode 50000 ##########
Timestep: 3998365 Average reward against last agent is 2.519


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 11 as opponent ##########

########## Changing model - Using Model 11 as opponent ##########
EPISODE 50500 - Eval Random 999 over 1000 - Number of game played 253500 - 14 hours, 37 minutes and 24 seconds
########## Evaluation Against Random - Episode 50500 ##########
Timestep: 4038109 Average reward against random is 2.45


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 50500 - Eval Opponent 999 over 1000 - Number of game played 254500 - 14 hours, 38 minutes and 48 seconds
########## Evaluation Against Last Agent - Episode 50500 ##########
Timestep: 4038109 Average reward against last agent is 2.22


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51000 - Eval Random 999 over 1000 - Number of game played 256000 - 14 hours, 42 minutes and 21 seconds
########## Evaluation Against Random - Episode 51000 ##########
Timestep: 4077803 Average reward against random is 2.453


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51000 - Eval Opponent 999 over 1000 - Number of game played 257000 - 14 hours, 43 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 51000 ##########
Timestep: 4077803 Average reward against last agent is 2.386


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51500 - Eval Random 999 over 1000 - Number of game played 258500 - 14 hours, 47 minutes and 16 seconds
########## Evaluation Against Random - Episode 51500 ##########
Timestep: 4117476 Average reward against random is 2.474


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 51500 - Eval Opponent 999 over 1000 - Number of game played 259500 - 14 hours, 48 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 51500 ##########
Timestep: 4117476 Average reward against last agent is 2.383


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 52000 - Eval Random 999 over 1000 - Number of game played 261000 - 14 hours, 52 minutes and 10 seconds
########## Evaluation Against Random - Episode 52000 ##########
Timestep: 4157252 Average reward against random is 2.413


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 52000 - Eval Opponent 999 over 1000 - Number of game played 262000 - 14 hours, 53 minutes and 32 seconds
########## Evaluation Against Last Agent - Episode 52000 ##########
Timestep: 4157252 Average reward against last agent is 2.504


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 52500 - Eval Random 999 over 1000 - Number of game played 263500 - 14 hours, 57 minutes and 8 secondss
########## Evaluation Against Random - Episode 52500 ##########
Timestep: 4196988 Average reward against random is 2.457


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 52500 - Eval Opponent 999 over 1000 - Number of game played 264500 - 14 hours, 58 minutes and 29 seconds
########## Evaluation Against Last Agent - Episode 52500 ##########
Timestep: 4196988 Average reward against last agent is 2.23


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 53000 - Eval Random 999 over 1000 - Number of game played 266000 - 15 hours, 2 minutes and 3 seconds
########## Evaluation Against Random - Episode 53000 ##########
Timestep: 4236701 Average reward against random is 2.55


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 53000 - Eval Opponent 999 over 1000 - Number of game played 267000 - 15 hours, 3 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 53000 ##########
Timestep: 4236701 Average reward against last agent is 2.412
EPISODE 53500 - Eval Random 999 over 1000 - Number of game played 268500 - 15 hours, 7 minutes and 2 secondss
########## Evaluation Against Random - Episode 53500 ##########
Timestep: 4276460 Average reward against random is 2.446


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 53500 - Eval Opponent 999 over 1000 - Number of game played 269500 - 15 hours, 8 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 53500 ##########
Timestep: 4276460 Average reward against last agent is 2.381


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54000 - Eval Random 999 over 1000 - Number of game played 271000 - 15 hours, 12 minutes and 2 secondss
########## Evaluation Against Random - Episode 54000 ##########
Timestep: 4316200 Average reward against random is 2.461


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54000 - Eval Opponent 999 over 1000 - Number of game played 272000 - 15 hours, 13 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 54000 ##########
Timestep: 4316200 Average reward against last agent is 2.583


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54500 - Eval Random 999 over 1000 - Number of game played 273500 - 15 hours, 17 minutes and 4 secondss
########## Evaluation Against Random - Episode 54500 ##########
Timestep: 4355929 Average reward against random is 2.564


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 54500 - Eval Opponent 999 over 1000 - Number of game played 274500 - 15 hours, 18 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 54500 ##########
Timestep: 4355929 Average reward against last agent is 2.461


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 55000 - Eval Random 999 over 1000 - Number of game played 276000 - 15 hours, 22 minutes and 5 secondss
########## Evaluation Against Random - Episode 55000 ##########
Timestep: 4395599 Average reward against random is 2.36


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 55000 - Eval Opponent 999 over 1000 - Number of game played 277000 - 15 hours, 23 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 55000 ##########
Timestep: 4395599 Average reward against last agent is 2.448


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 12 as opponent ##########

########## Changing model - Using Model 12 as opponent ##########
EPISODE 55500 - Eval Random 999 over 1000 - Number of game played 278500 - 15 hours, 27 minutes and 9 secondss
########## Evaluation Against Random - Episode 55500 ##########
Timestep: 4435193 Average reward against random is 2.583


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 55500 - Eval Opponent 999 over 1000 - Number of game played 279500 - 15 hours, 28 minutes and 30 seconds
########## Evaluation Against Last Agent - Episode 55500 ##########
Timestep: 4435193 Average reward against last agent is 2.213


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 56000 - Eval Random 999 over 1000 - Number of game played 281000 - 15 hours, 32 minutes and 18 seconds
########## Evaluation Against Random - Episode 56000 ##########
Timestep: 4474772 Average reward against random is 2.573


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 56000 - Eval Opponent 999 over 1000 - Number of game played 282000 - 15 hours, 33 minutes and 39 seconds
########## Evaluation Against Last Agent - Episode 56000 ##########
Timestep: 4474772 Average reward against last agent is 2.193


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 56500 - Eval Random 999 over 1000 - Number of game played 283500 - 15 hours, 37 minutes and 25 seconds
########## Evaluation Against Random - Episode 56500 ##########
Timestep: 4514363 Average reward against random is 2.523


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 56500 - Eval Opponent 999 over 1000 - Number of game played 284500 - 15 hours, 38 minutes and 46 seconds
########## Evaluation Against Last Agent - Episode 56500 ##########
Timestep: 4514363 Average reward against last agent is 2.499


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57000 - Eval Random 999 over 1000 - Number of game played 286000 - 15 hours, 42 minutes and 32 seconds
########## Evaluation Against Random - Episode 57000 ##########
Timestep: 4553952 Average reward against random is 2.32


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57000 - Eval Opponent 999 over 1000 - Number of game played 287000 - 15 hours, 43 minutes and 52 seconds
########## Evaluation Against Last Agent - Episode 57000 ##########
Timestep: 4553952 Average reward against last agent is 2.262


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57500 - Eval Random 999 over 1000 - Number of game played 288500 - 15 hours, 47 minutes and 40 seconds
########## Evaluation Against Random - Episode 57500 ##########
Timestep: 4593595 Average reward against random is 2.387


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 57500 - Eval Opponent 999 over 1000 - Number of game played 289500 - 15 hours, 49 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 57500 ##########
Timestep: 4593595 Average reward against last agent is 2.21


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 58000 - Eval Random 999 over 1000 - Number of game played 291000 - 15 hours, 52 minutes and 48 seconds
########## Evaluation Against Random - Episode 58000 ##########
Timestep: 4633322 Average reward against random is 2.461


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 58000 - Eval Opponent 999 over 1000 - Number of game played 292000 - 15 hours, 54 minutes and 9 secondss
########## Evaluation Against Last Agent - Episode 58000 ##########
Timestep: 4633322 Average reward against last agent is 2.528


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 58500 - Eval Random 999 over 1000 - Number of game played 293500 - 15 hours, 57 minutes and 57 seconds
########## Evaluation Against Random - Episode 58500 ##########
Timestep: 4672982 Average reward against random is 2.383


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 58500 - Eval Opponent 999 over 1000 - Number of game played 294500 - 15 hours, 59 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 58500 ##########
Timestep: 4672982 Average reward against last agent is 2.358


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 59000 - Eval Random 999 over 1000 - Number of game played 296000 - 16 hours, 3 minutes and 6 secondss
########## Evaluation Against Random - Episode 59000 ##########
Timestep: 4712728 Average reward against random is 2.54


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 59000 - Eval Opponent 999 over 1000 - Number of game played 297000 - 16 hours, 4 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 59000 ##########
Timestep: 4712728 Average reward against last agent is 2.472


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 59500 - Eval Random 999 over 1000 - Number of game played 298500 - 16 hours, 8 minutes and 16 seconds
########## Evaluation Against Random - Episode 59500 ##########
Timestep: 4752460 Average reward against random is 2.547


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 59500 - Eval Opponent 999 over 1000 - Number of game played 299500 - 16 hours, 9 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 59500 ##########
Timestep: 4752460 Average reward against last agent is 2.265


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 60000 - Eval Random 999 over 1000 - Number of game played 301000 - 16 hours, 13 minutes and 29 seconds
########## Evaluation Against Random - Episode 60000 ##########
Timestep: 4792129 Average reward against random is 2.332


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 60000 - Eval Opponent 999 over 1000 - Number of game played 302000 - 16 hours, 14 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 60000 ##########
Timestep: 4792129 Average reward against last agent is 2.293


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 13 as opponent ##########

########## Changing model - Using Model 13 as opponent ##########
EPISODE 60500 - Eval Random 999 over 1000 - Number of game played 303500 - 16 hours, 18 minutes and 45 seconds
########## Evaluation Against Random - Episode 60500 ##########
Timestep: 4831751 Average reward against random is 1.108
EPISODE 60500 - Eval Opponent 999 over 1000 - Number of game played 304500 - 16 hours, 20 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 60500 ##########
Timestep: 4831751 Average reward against last agent is 2.176


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 61000 - Eval Random 999 over 1000 - Number of game played 306000 - 16 hours, 24 minutes and 2 secondss
########## Evaluation Against Random - Episode 61000 ##########
Timestep: 4871364 Average reward against random is 1.226
EPISODE 61000 - Eval Opponent 999 over 1000 - Number of game played 307000 - 16 hours, 25 minutes and 23 seconds
########## Evaluation Against Last Agent - Episode 61000 ##########
Timestep: 4871364 Average reward against last agent is 2.413


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 61500 - Eval Random 999 over 1000 - Number of game played 308500 - 16 hours, 29 minutes and 19 seconds
########## Evaluation Against Random - Episode 61500 ##########
Timestep: 4911080 Average reward against random is 1.385
EPISODE 61500 - Eval Opponent 999 over 1000 - Number of game played 309500 - 16 hours, 30 minutes and 40 seconds
########## Evaluation Against Last Agent - Episode 61500 ##########
Timestep: 4911080 Average reward against last agent is 2.271


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 62000 - Eval Random 999 over 1000 - Number of game played 311000 - 16 hours, 34 minutes and 36 seconds
########## Evaluation Against Random - Episode 62000 ##########
Timestep: 4950844 Average reward against random is 1.184
EPISODE 62000 - Eval Opponent 999 over 1000 - Number of game played 312000 - 16 hours, 35 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 62000 ##########
Timestep: 4950844 Average reward against last agent is 2.434


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 62500 - Eval Random 999 over 1000 - Number of game played 313500 - 16 hours, 39 minutes and 47 seconds
########## Evaluation Against Random - Episode 62500 ##########
Timestep: 4990662 Average reward against random is 1.031
EPISODE 62500 - Eval Opponent 999 over 1000 - Number of game played 314500 - 16 hours, 41 minutes and 8 secondss
########## Evaluation Against Last Agent - Episode 62500 ##########
Timestep: 4990662 Average reward against last agent is 2.613


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 63000 - Eval Random 999 over 1000 - Number of game played 316000 - 16 hours, 45 minutes and 8 secondss
########## Evaluation Against Random - Episode 63000 ##########
Timestep: 5030403 Average reward against random is 0.96
EPISODE 63000 - Eval Opponent 999 over 1000 - Number of game played 317000 - 16 hours, 46 minutes and 29 seconds
########## Evaluation Against Last Agent - Episode 63000 ##########
Timestep: 5030403 Average reward against last agent is 2.427


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 63500 - Eval Random 999 over 1000 - Number of game played 318500 - 16 hours, 50 minutes and 28 seconds
########## Evaluation Against Random - Episode 63500 ##########
Timestep: 5070208 Average reward against random is 1.199
EPISODE 63500 - Eval Opponent 999 over 1000 - Number of game played 319500 - 16 hours, 51 minutes and 49 seconds
########## Evaluation Against Last Agent - Episode 63500 ##########
Timestep: 5070208 Average reward against last agent is 2.58


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 64000 - Eval Random 999 over 1000 - Number of game played 321000 - 16 hours, 55 minutes and 49 seconds
########## Evaluation Against Random - Episode 64000 ##########
Timestep: 5109991 Average reward against random is 1.513
EPISODE 64000 - Eval Opponent 999 over 1000 - Number of game played 322000 - 16 hours, 57 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 64000 ##########
Timestep: 5109991 Average reward against last agent is 2.257


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 64500 - Eval Random 999 over 1000 - Number of game played 323500 - 17 hours, 1 minute and 7 secondss
########## Evaluation Against Random - Episode 64500 ##########
Timestep: 5149798 Average reward against random is 1.201
EPISODE 64500 - Eval Opponent 999 over 1000 - Number of game played 324500 - 17 hours, 2 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 64500 ##########
Timestep: 5149798 Average reward against last agent is 2.568


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 65000 - Eval Random 999 over 1000 - Number of game played 326000 - 17 hours, 6 minutes and 33 seconds
########## Evaluation Against Random - Episode 65000 ##########
Timestep: 5189472 Average reward against random is 1.332
EPISODE 65000 - Eval Opponent 999 over 1000 - Number of game played 327000 - 17 hours, 7 minutes and 53 seconds
########## Evaluation Against Last Agent - Episode 65000 ##########
Timestep: 5189472 Average reward against last agent is 2.5


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 14 as opponent ##########

########## Changing model - Using Model 14 as opponent ##########
EPISODE 65500 - Eval Random 999 over 1000 - Number of game played 328500 - 17 hours, 11 minutes and 59 seconds
########## Evaluation Against Random - Episode 65500 ##########
Timestep: 5229202 Average reward against random is 1.268
EPISODE 65500 - Eval Opponent 999 over 1000 - Number of game played 329500 - 17 hours, 13 minutes and 20 seconds
########## Evaluation Against Last Agent - Episode 65500 ##########
Timestep: 5229202 Average reward against last agent is 2.476


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 66000 - Eval Random 999 over 1000 - Number of game played 331000 - 17 hours, 17 minutes and 26 seconds
########## Evaluation Against Random - Episode 66000 ##########
Timestep: 5268981 Average reward against random is 1.345
EPISODE 66000 - Eval Opponent 999 over 1000 - Number of game played 332000 - 17 hours, 18 minutes and 47 seconds
########## Evaluation Against Last Agent - Episode 66000 ##########
Timestep: 5268981 Average reward against last agent is 2.636


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 66500 - Eval Random 999 over 1000 - Number of game played 333500 - 17 hours, 22 minutes and 48 seconds
########## Evaluation Against Random - Episode 66500 ##########
Timestep: 5308798 Average reward against random is 0.951
EPISODE 66500 - Eval Opponent 999 over 1000 - Number of game played 334500 - 17 hours, 24 minutes and 9 secondss
########## Evaluation Against Last Agent - Episode 66500 ##########
Timestep: 5308798 Average reward against last agent is 2.39


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 67000 - Eval Random 999 over 1000 - Number of game played 336000 - 17 hours, 28 minutes and 17 seconds
########## Evaluation Against Random - Episode 67000 ##########
Timestep: 5348575 Average reward against random is 1.178
EPISODE 67000 - Eval Opponent 999 over 1000 - Number of game played 337000 - 17 hours, 29 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 67000 ##########
Timestep: 5348575 Average reward against last agent is 2.385


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 67500 - Eval Random 999 over 1000 - Number of game played 338500 - 17 hours, 33 minutes and 48 seconds
########## Evaluation Against Random - Episode 67500 ##########
Timestep: 5388378 Average reward against random is 1.386
EPISODE 67500 - Eval Opponent 999 over 1000 - Number of game played 339500 - 17 hours, 35 minutes and 9 secondss
########## Evaluation Against Last Agent - Episode 67500 ##########
Timestep: 5388378 Average reward against last agent is 2.465


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 68000 - Eval Random 999 over 1000 - Number of game played 341000 - 17 hours, 39 minutes and 19 seconds
########## Evaluation Against Random - Episode 68000 ##########
Timestep: 5428129 Average reward against random is 2.522


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 68000 - Eval Opponent 999 over 1000 - Number of game played 342000 - 17 hours, 40 minutes and 42 seconds
########## Evaluation Against Last Agent - Episode 68000 ##########
Timestep: 5428129 Average reward against last agent is 2.38


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 68500 - Eval Random 999 over 1000 - Number of game played 343500 - 17 hours, 44 minutes and 38 seconds
########## Evaluation Against Random - Episode 68500 ##########
Timestep: 5467879 Average reward against random is 2.485


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 68500 - Eval Opponent 999 over 1000 - Number of game played 344500 - 17 hours, 45 minutes and 59 seconds
########## Evaluation Against Last Agent - Episode 68500 ##########
Timestep: 5467879 Average reward against last agent is 2.417


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 69000 - Eval Random 999 over 1000 - Number of game played 346000 - 17 hours, 50 minutes and 2 secondss
########## Evaluation Against Random - Episode 69000 ##########
Timestep: 5507628 Average reward against random is 2.383


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 69000 - Eval Opponent 999 over 1000 - Number of game played 347000 - 17 hours, 51 minutes and 25 seconds
########## Evaluation Against Last Agent - Episode 69000 ##########
Timestep: 5507628 Average reward against last agent is 2.192


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 69500 - Eval Random 999 over 1000 - Number of game played 348500 - 17 hours, 55 minutes and 29 seconds
########## Evaluation Against Random - Episode 69500 ##########
Timestep: 5547458 Average reward against random is 1.215
EPISODE 69500 - Eval Opponent 999 over 1000 - Number of game played 349500 - 17 hours, 56 minutes and 50 seconds
########## Evaluation Against Last Agent - Episode 69500 ##########
Timestep: 5547458 Average reward against last agent is 2.59


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 70000 - Eval Random 999 over 1000 - Number of game played 351000 - 18 hours, 0 minute and 55 seconds
########## Evaluation Against Random - Episode 70000 ##########
Timestep: 5587242 Average reward against random is 1.161
EPISODE 70000 - Eval Opponent 999 over 1000 - Number of game played 352000 - 18 hours, 2 minutes and 16 seconds
########## Evaluation Against Last Agent - Episode 70000 ##########
Timestep: 5587242 Average reward against last agent is 2.279


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 15 as opponent ##########

########## Changing model - Using Model 15 as opponent ##########
EPISODE 70500 - Eval Random 999 over 1000 - Number of game played 353500 - 18 hours, 6 minutes and 20 seconds
########## Evaluation Against Random - Episode 70500 ##########
Timestep: 5626949 Average reward against random is 1.166
EPISODE 70500 - Eval Opponent 999 over 1000 - Number of game played 354500 - 18 hours, 7 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 70500 ##########
Timestep: 5626949 Average reward against last agent is 2.282


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 71000 - Eval Random 999 over 1000 - Number of game played 356000 - 18 hours, 11 minutes and 58 seconds
########## Evaluation Against Random - Episode 71000 ##########
Timestep: 5666600 Average reward against random is 1.246
EPISODE 71000 - Eval Opponent 999 over 1000 - Number of game played 357000 - 18 hours, 13 minutes and 18 seconds
########## Evaluation Against Last Agent - Episode 71000 ##########
Timestep: 5666600 Average reward against last agent is 2.28


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 71500 - Eval Random 999 over 1000 - Number of game played 358500 - 18 hours, 17 minutes and 31 seconds
########## Evaluation Against Random - Episode 71500 ##########
Timestep: 5706249 Average reward against random is 1.17
EPISODE 71500 - Eval Opponent 999 over 1000 - Number of game played 359500 - 18 hours, 18 minutes and 52 seconds
########## Evaluation Against Last Agent - Episode 71500 ##########
Timestep: 5706249 Average reward against last agent is 2.326


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 72000 - Eval Random 999 over 1000 - Number of game played 361000 - 18 hours, 23 minutes and 6 secondss
########## Evaluation Against Random - Episode 72000 ##########
Timestep: 5745919 Average reward against random is 1.406
EPISODE 72000 - Eval Opponent 999 over 1000 - Number of game played 362000 - 18 hours, 24 minutes and 27 seconds
########## Evaluation Against Last Agent - Episode 72000 ##########
Timestep: 5745919 Average reward against last agent is 2.308


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 72500 - Eval Random 999 over 1000 - Number of game played 363500 - 18 hours, 28 minutes and 43 seconds
########## Evaluation Against Random - Episode 72500 ##########
Timestep: 5785624 Average reward against random is 1.27
EPISODE 72500 - Eval Opponent 999 over 1000 - Number of game played 364500 - 18 hours, 30 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 72500 ##########
Timestep: 5785624 Average reward against last agent is 2.277


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 73000 - Eval Random 999 over 1000 - Number of game played 366000 - 18 hours, 34 minutes and 23 seconds
########## Evaluation Against Random - Episode 73000 ##########
Timestep: 5825307 Average reward against random is 1.138
EPISODE 73000 - Eval Opponent 999 over 1000 - Number of game played 367000 - 18 hours, 35 minutes and 43 seconds
########## Evaluation Against Last Agent - Episode 73000 ##########
Timestep: 5825307 Average reward against last agent is 2.282


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 73500 - Eval Random 999 over 1000 - Number of game played 368500 - 18 hours, 40 minutes and 5 secondss
########## Evaluation Against Random - Episode 73500 ##########
Timestep: 5864926 Average reward against random is 1.171
EPISODE 73500 - Eval Opponent 999 over 1000 - Number of game played 369500 - 18 hours, 41 minutes and 26 seconds
########## Evaluation Against Last Agent - Episode 73500 ##########
Timestep: 5864926 Average reward against last agent is 2.369


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 74000 - Eval Random 999 over 1000 - Number of game played 371000 - 18 hours, 45 minutes and 49 seconds
########## Evaluation Against Random - Episode 74000 ##########
Timestep: 5904499 Average reward against random is 1.234
EPISODE 74000 - Eval Opponent 999 over 1000 - Number of game played 372000 - 18 hours, 47 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 74000 ##########
Timestep: 5904499 Average reward against last agent is 2.281


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 74500 - Eval Random 999 over 1000 - Number of game played 373500 - 18 hours, 51 minutes and 33 seconds
########## Evaluation Against Random - Episode 74500 ##########
Timestep: 5944174 Average reward against random is 1.268
EPISODE 74500 - Eval Opponent 999 over 1000 - Number of game played 374500 - 18 hours, 52 minutes and 54 seconds
########## Evaluation Against Last Agent - Episode 74500 ##########
Timestep: 5944174 Average reward against last agent is 2.283


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 75000 - Eval Random 999 over 1000 - Number of game played 376000 - 18 hours, 57 minutes and 15 seconds
########## Evaluation Against Random - Episode 75000 ##########
Timestep: 5984016 Average reward against random is 1.324
EPISODE 75000 - Eval Opponent 999 over 1000 - Number of game played 377000 - 18 hours, 58 minutes and 37 seconds
########## Evaluation Against Last Agent - Episode 75000 ##########
Timestep: 5984016 Average reward against last agent is 2.399


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 16 as opponent ##########

########## Changing model - Using Model 16 as opponent ##########
EPISODE 75500 - Eval Random 3 over 1000 - Number of game played 377504 - 19 hours, 2 minutes and 20 seconds

  probs = remove_illegal(np.exp(q_values), state['legal_actions'])


EPISODE 75500 - Eval Random 31 over 1000 - Number of game played 377532 - 19 hours, 2 minutes and 21 seconds

  probs /= sum(probs)


EPISODE 75500 - Eval Random 999 over 1000 - Number of game played 378500 - 19 hours, 2 minutes and 52 seconds
########## Evaluation Against Random - Episode 75500 ##########
Timestep: 6023999 Average reward against random is 1.036
EPISODE 75500 - Eval Opponent 999 over 1000 - Number of game played 379500 - 19 hours, 4 minutes and 14 seconds
########## Evaluation Against Last Agent - Episode 75500 ##########
Timestep: 6023999 Average reward against last agent is 2.3


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 76000 - Eval Random 999 over 1000 - Number of game played 381000 - 19 hours, 8 minutes and 34 seconds
########## Evaluation Against Random - Episode 76000 ##########
Timestep: 6064028 Average reward against random is 1.281
EPISODE 76000 - Eval Opponent 999 over 1000 - Number of game played 382000 - 19 hours, 9 minutes and 57 seconds
########## Evaluation Against Last Agent - Episode 76000 ##########
Timestep: 6064028 Average reward against last agent is 2.508


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 76500 - Eval Random 999 over 1000 - Number of game played 383500 - 19 hours, 14 minutes and 13 seconds
########## Evaluation Against Random - Episode 76500 ##########
Timestep: 6103982 Average reward against random is 1.169
EPISODE 76500 - Eval Opponent 999 over 1000 - Number of game played 384500 - 19 hours, 15 minutes and 35 seconds
########## Evaluation Against Last Agent - Episode 76500 ##########
Timestep: 6103982 Average reward against last agent is 2.493


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 77000 - Eval Random 999 over 1000 - Number of game played 386000 - 19 hours, 20 minutes and 0 secondds
########## Evaluation Against Random - Episode 77000 ##########
Timestep: 6143836 Average reward against random is 1.15
EPISODE 77000 - Eval Opponent 999 over 1000 - Number of game played 387000 - 19 hours, 21 minutes and 23 seconds
########## Evaluation Against Last Agent - Episode 77000 ##########
Timestep: 6143836 Average reward against last agent is 2.487


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 77500 - Eval Random 999 over 1000 - Number of game played 388500 - 19 hours, 25 minutes and 44 seconds
########## Evaluation Against Random - Episode 77500 ##########
Timestep: 6183706 Average reward against random is 1.102
EPISODE 77500 - Eval Opponent 999 over 1000 - Number of game played 389500 - 19 hours, 27 minutes and 6 secondss
########## Evaluation Against Last Agent - Episode 77500 ##########
Timestep: 6183706 Average reward against last agent is 2.53


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 78000 - Eval Random 999 over 1000 - Number of game played 391000 - 19 hours, 31 minutes and 34 seconds
########## Evaluation Against Random - Episode 78000 ##########
Timestep: 6223625 Average reward against random is 1.192
EPISODE 78000 - Eval Opponent 999 over 1000 - Number of game played 392000 - 19 hours, 32 minutes and 56 seconds
########## Evaluation Against Last Agent - Episode 78000 ##########
Timestep: 6223625 Average reward against last agent is 2.333


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 78500 - Eval Random 999 over 1000 - Number of game played 393500 - 19 hours, 37 minutes and 19 seconds
########## Evaluation Against Random - Episode 78500 ##########
Timestep: 6263486 Average reward against random is 0.904
EPISODE 78500 - Eval Opponent 999 over 1000 - Number of game played 394500 - 19 hours, 38 minutes and 41 seconds
########## Evaluation Against Last Agent - Episode 78500 ##########
Timestep: 6263486 Average reward against last agent is 2.391


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 79000 - Eval Random 999 over 1000 - Number of game played 396000 - 19 hours, 43 minutes and 14 seconds
########## Evaluation Against Random - Episode 79000 ##########
Timestep: 6303275 Average reward against random is 0.923
EPISODE 79000 - Eval Opponent 999 over 1000 - Number of game played 397000 - 19 hours, 44 minutes and 36 seconds
########## Evaluation Against Last Agent - Episode 79000 ##########
Timestep: 6303275 Average reward against last agent is 2.415


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 79500 - Eval Random 999 over 1000 - Number of game played 398500 - 19 hours, 49 minutes and 11 seconds
########## Evaluation Against Random - Episode 79500 ##########
Timestep: 6343033 Average reward against random is 1.472
EPISODE 79500 - Eval Opponent 999 over 1000 - Number of game played 399500 - 19 hours, 50 minutes and 33 seconds
########## Evaluation Against Last Agent - Episode 79500 ##########
Timestep: 6343033 Average reward against last agent is 2.424


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 80000 - Eval Random 999 over 1000 - Number of game played 401000 - 19 hours, 55 minutes and 2 secondss
########## Evaluation Against Random - Episode 80000 ##########
Timestep: 6382895 Average reward against random is 1.128
EPISODE 80000 - Eval Opponent 999 over 1000 - Number of game played 402000 - 19 hours, 56 minutes and 24 seconds
########## Evaluation Against Last Agent - Episode 80000 ##########
Timestep: 6382895 Average reward against last agent is 2.292


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 17 as opponent ##########

########## Changing model - Using Model 17 as opponent ##########
EPISODE 80500 - Eval Random 999 over 1000 - Number of game played 403500 - 20 hours, 0 minute and 51 seconds
########## Evaluation Against Random - Episode 80500 ##########
Timestep: 6422823 Average reward against random is 1.203
EPISODE 80500 - Eval Opponent 999 over 1000 - Number of game played 404500 - 20 hours, 2 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 80500 ##########
Timestep: 6422823 Average reward against last agent is 2.447


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 81000 - Eval Random 999 over 1000 - Number of game played 406000 - 20 hours, 6 minutes and 48 seconds
########## Evaluation Against Random - Episode 81000 ##########
Timestep: 6462763 Average reward against random is 1.181
EPISODE 81000 - Eval Opponent 999 over 1000 - Number of game played 407000 - 20 hours, 8 minutes and 10 seconds
########## Evaluation Against Last Agent - Episode 81000 ##########
Timestep: 6462763 Average reward against last agent is 2.408


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 81500 - Eval Random 999 over 1000 - Number of game played 408500 - 20 hours, 12 minutes and 42 seconds
########## Evaluation Against Random - Episode 81500 ##########
Timestep: 6502715 Average reward against random is 1.151
EPISODE 81500 - Eval Opponent 999 over 1000 - Number of game played 409500 - 20 hours, 14 minutes and 4 secondss
########## Evaluation Against Last Agent - Episode 81500 ##########
Timestep: 6502715 Average reward against last agent is 2.511


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 82000 - Eval Random 999 over 1000 - Number of game played 411000 - 20 hours, 18 minutes and 38 seconds
########## Evaluation Against Random - Episode 82000 ##########
Timestep: 6542600 Average reward against random is 1.078
EPISODE 82000 - Eval Opponent 999 over 1000 - Number of game played 412000 - 20 hours, 20 minutes and 1 secondds
########## Evaluation Against Last Agent - Episode 82000 ##########
Timestep: 6542600 Average reward against last agent is 2.326


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 82500 - Eval Random 999 over 1000 - Number of game played 413500 - 20 hours, 24 minutes and 45 seconds
########## Evaluation Against Random - Episode 82500 ##########
Timestep: 6582450 Average reward against random is 1.277
EPISODE 82500 - Eval Opponent 999 over 1000 - Number of game played 414500 - 20 hours, 26 minutes and 7 secondss
########## Evaluation Against Last Agent - Episode 82500 ##########
Timestep: 6582450 Average reward against last agent is 2.181


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 83000 - Eval Random 999 over 1000 - Number of game played 416000 - 20 hours, 30 minutes and 42 seconds
########## Evaluation Against Random - Episode 83000 ##########
Timestep: 6622393 Average reward against random is 0.931
EPISODE 83000 - Eval Opponent 999 over 1000 - Number of game played 417000 - 20 hours, 32 minutes and 3 secondss
########## Evaluation Against Last Agent - Episode 83000 ##########
Timestep: 6622393 Average reward against last agent is 2.21


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 83500 - Eval Random 999 over 1000 - Number of game played 418500 - 20 hours, 36 minutes and 51 seconds
########## Evaluation Against Random - Episode 83500 ##########
Timestep: 6662244 Average reward against random is 1.387
EPISODE 83500 - Eval Opponent 999 over 1000 - Number of game played 419500 - 20 hours, 38 minutes and 13 seconds
########## Evaluation Against Last Agent - Episode 83500 ##########
Timestep: 6662244 Average reward against last agent is 1.958


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 84000 - Eval Random 999 over 1000 - Number of game played 421000 - 20 hours, 42 minutes and 40 seconds
########## Evaluation Against Random - Episode 84000 ##########
Timestep: 6702174 Average reward against random is 2.365


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 84000 - Eval Opponent 999 over 1000 - Number of game played 422000 - 20 hours, 44 minutes and 5 secondss
########## Evaluation Against Last Agent - Episode 84000 ##########
Timestep: 6702174 Average reward against last agent is 1.716


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 84500 - Eval Random 999 over 1000 - Number of game played 423500 - 20 hours, 48 minutes and 17 seconds
########## Evaluation Against Random - Episode 84500 ##########
Timestep: 6742118 Average reward against random is 1.079
EPISODE 84500 - Eval Opponent 999 over 1000 - Number of game played 424500 - 20 hours, 49 minutes and 38 seconds
########## Evaluation Against Last Agent - Episode 84500 ##########
Timestep: 6742118 Average reward against last agent is 2.164


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 85000 - Eval Random 999 over 1000 - Number of game played 426000 - 20 hours, 53 minutes and 57 seconds
########## Evaluation Against Random - Episode 85000 ##########
Timestep: 6782049 Average reward against random is 1.368
EPISODE 85000 - Eval Opponent 999 over 1000 - Number of game played 427000 - 20 hours, 55 minutes and 17 seconds
########## Evaluation Against Last Agent - Episode 85000 ##########
Timestep: 6782049 Average reward against last agent is 2.04


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)



########## Changing model - Using Model 18 as opponent ##########

########## Changing model - Using Model 18 as opponent ##########
EPISODE 85500 - Eval Random 999 over 1000 - Number of game played 428500 - 20 hours, 59 minutes and 30 seconds
########## Evaluation Against Random - Episode 85500 ##########
Timestep: 6821950 Average reward against random is 1.123
EPISODE 85500 - Eval Opponent 999 over 1000 - Number of game played 429500 - 21 hours, 0 minute and 50 secondsds
########## Evaluation Against Last Agent - Episode 85500 ##########
Timestep: 6821950 Average reward against last agent is 2.059


in singular transformations; automatically expanding.
bottom=0, top=0
  ret = ax.set_ylim(*args, **kwargs)


EPISODE 85565 - Number of game played 429565 - 21 hours, 1 minute and 16 seconds