In [1]:
import gym
import gym_maze

MAZE = 'MazeXYZ-v0' # SET PROPER ENV    # if state as strings list - use observation wrapper

maze = gym.make(MAZE)

In [2]:
from ray.rllib.agents.dqn import DQNTrainer
import numpy as np
import os
from ray.tune.registry import register_env


RESULTS_PATH = os.path.join('RESULTS', 'RL_RESULTS')
DQN_RESULTS_PATH = os.path.join(RESULTS_PATH, 'DQN', MAZE)
os.makedirs(DQN_RESULTS_PATH, exist_ok=True)
def maze_env_creator(env_config):
    maze.reset()
    return maze  # return an env instance

register_env("maze_env", maze_env_creator)

RUNS = 30
RUNS_START = 1

for run in range(RUNS_START, RUNS_START + RUNS):
# Create an RLlib Trainer instance to learn how to act in the above
# environment.
    trainer = DQNTrainer(
        config={
            # Env class to use (here: our gym.Env sub-class from above).
            "env": "maze_env",
            # Config dict to be passed to our custom env's constructor.
            "env_config": {
            },
            # Parallelize environment rollouts.
            "num_workers": 0,
            "n_step": 10,
            "noisy": True,
            "num_atoms": 4,
            "v_min": 0.0, 
            "v_max": 1000.0,
            "batch_mode": "complete_episodes",
            "exploration_config": {
            # The Exploration class to use.
                "type": "EpsilonGreedy",
                # Config for the Exploration class' constructor:
                "initial_epsilon": 0.5,
                "final_epsilon": 0.5,
                "epsilon_timesteps": 50000000,  # Timesteps over which to anneal epsilon.
            },
            "evaluation_config": {
                "explore": True,
            },
        })


    total_episodes = 0

    all_episodes_steps = []
    i = 0
    while total_episodes < 5000:
        i += 1
        results = trainer.train()
        iteration_episodes = results['episodes_this_iter']
        total_episodes += iteration_episodes
        print(f'TOTAL EPISODES: {total_episodes}')
        print(f"Iter: {i}; avg. reward={results['episode_reward_mean']}; # episodes_steps={results['hist_stats']['episode_lengths']}")
        all_episodes_steps += results['hist_stats']['episode_lengths']

    np.save(os.path.join(DQN_RESULTS_PATH, f'{run}.npy'), np.array(all_episodes_steps))




TOTAL EPISODES: 21
Iter: 1; avg. reward=95.23809523809524; # episodes_steps=[50, 50, 50, 50, 15, 50, 50, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=116.27906976744185; # episodes_steps=[50, 50, 50, 50, 15, 50, 50, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 29, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 64
Iter: 3; avg. reward=93.75; # episodes_steps=[50, 50, 50, 50, 15, 50, 50, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 29, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 86
Iter: 4; avg. reward=116.27906976744185; # episodes_steps=[50, 50, 50, 50, 15, 50, 50, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 29, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 



TOTAL EPISODES: 5028
Iter: 147; avg. reward=780.0; # episodes_steps=[5, 14, 22, 11, 34, 50, 50, 19, 28, 38, 50, 4, 17, 16, 50, 50, 27, 31, 3, 43, 50, 6, 25, 27, 12, 29, 50, 35, 36, 50, 19, 2, 5, 16, 50, 50, 18, 39, 7, 50, 24, 15, 25, 50, 23, 50, 50, 23, 39, 23, 50, 11, 18, 15, 39, 50, 6, 18, 48, 21, 30, 23, 23, 11, 21, 32, 11, 44, 3, 6, 16, 36, 31, 18, 5, 23, 1, 33, 50, 50, 2, 1, 2, 11, 14, 22, 50, 28, 35, 50, 19, 26, 3, 50, 50, 22, 26, 28, 2, 16]




TOTAL EPISODES: 23
Iter: 1; avg. reward=173.91304347826087; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 23, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 11, 50]
TOTAL EPISODES: 45
Iter: 2; avg. reward=133.33333333333334; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 23, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 11, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 27, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 68
Iter: 3; avg. reward=147.05882352941177; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 23, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 11, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 27, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 16, 50, 50, 47, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50]
TOTAL EPISODES: 91
Iter: 4; avg. reward=164.83516483516485; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 23, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 11, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 27, 50, 50, 50, 50, 50,



TOTAL EPISODES: 5036
Iter: 142; avg. reward=870.0; # episodes_steps=[14, 19, 17, 50, 50, 13, 6, 4, 50, 32, 15, 16, 18, 30, 12, 5, 23, 15, 12, 49, 20, 9, 6, 37, 50, 50, 4, 2, 2, 22, 20, 27, 45, 33, 26, 39, 35, 20, 19, 38, 20, 24, 12, 22, 50, 29, 23, 4, 2, 50, 45, 35, 9, 23, 20, 12, 25, 50, 6, 8, 15, 18, 3, 10, 25, 20, 19, 3, 10, 12, 50, 19, 9, 50, 39, 14, 11, 11, 11, 26, 36, 38, 26, 11, 50, 4, 41, 2, 24, 7, 24, 15, 27, 12, 19, 7, 21, 50, 50, 18]




TOTAL EPISODES: 22
Iter: 1; avg. reward=136.36363636363637; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 39, 50, 1, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=116.27906976744185; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 39, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=136.36363636363637; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 39, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 50, 50, 50, 50, 49, 1, 50, 50, 50, 38, 50, 50, 50, 50, 50, 50, 8, 50, 50, 50, 50, 50]
TOTAL EPISODES: 88
Iter: 4; avg. reward=125.0; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 39, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 50, 50,



TOTAL EPISODES: 21
Iter: 1; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=69.76744186046511; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 18, 50, 50, 50, 50, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 64
Iter: 3; avg. reward=62.5; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 18, 50, 50, 50, 50, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 87
Iter: 4; avg. reward=91.95402298850574; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 18, 50, 50, 50, 50, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=69.76744186046511; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50]
TOTAL EPISODES: 64
Iter: 3; avg. reward=78.125; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 44, 50, 50, 6, 50]
TOTAL EPISODES: 87
Iter: 4; avg. reward=103.44827586206897; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,



TOTAL EPISODES: 5023
Iter: 137; avg. reward=830.0; # episodes_steps=[18, 26, 11, 30, 1, 18, 50, 24, 32, 15, 36, 19, 32, 27, 50, 8, 3, 50, 13, 42, 12, 3, 12, 11, 50, 20, 29, 47, 13, 36, 5, 28, 13, 2, 34, 17, 23, 50, 32, 22, 9, 27, 50, 2, 21, 13, 22, 50, 14, 13, 17, 7, 24, 50, 3, 16, 50, 36, 15, 50, 43, 50, 38, 6, 33, 43, 31, 14, 39, 29, 28, 11, 21, 10, 5, 12, 22, 25, 50, 50, 13, 17, 18, 19, 17, 36, 17, 17, 9, 36, 50, 50, 8, 1, 42, 50, 40, 19, 50, 38]




TOTAL EPISODES: 23
Iter: 1; avg. reward=217.3913043478261; # episodes_steps=[50, 17, 29, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 13, 50, 50, 50, 50, 50, 50, 50, 28]
TOTAL EPISODES: 45
Iter: 2; avg. reward=155.55555555555554; # episodes_steps=[50, 17, 29, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 13, 50, 50, 50, 50, 50, 50, 50, 28, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 21, 50, 50, 50, 50, 25, 50, 50, 50, 50]
TOTAL EPISODES: 68
Iter: 3; avg. reward=147.05882352941177; # episodes_steps=[50, 17, 29, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 13, 50, 50, 50, 50, 50, 50, 50, 28, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 21, 50, 50, 50, 50, 25, 50, 50, 50, 50, 50, 50, 19, 50, 50, 50, 50, 50, 50, 50, 50, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50]
TOTAL EPISODES: 93
Iter: 4; avg. reward=182.79569892473117; # episodes_steps=[50, 17, 29, 50, 50, 22, 50, 50, 50, 50, 50, 50, 50, 50, 13, 50, 50, 50, 50, 50, 50, 50, 28, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 21, 50



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 44
Iter: 2; avg. reward=159.0909090909091; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 24, 50, 45, 45, 50, 50, 10, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=166.66666666666666; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 24, 50, 45, 45, 50, 50, 10, 50, 50, 50, 50, 16, 33, 50, 31, 50, 50, 50, 50, 50, 50, 43, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 86
Iter: 4; avg. reward=127.90697674418605; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 24, 50, 45, 45, 50, 50, 10,



TOTAL EPISODES: 21
Iter: 1; avg. reward=95.23809523809524; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 27, 50, 42, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=93.02325581395348; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 27, 50, 42, 50, 50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 65
Iter: 3; avg. reward=92.3076923076923; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 27, 50, 42, 50, 50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 9, 50, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 86
Iter: 4; avg. reward=81.3953488372093; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 27, 50, 42, 50, 50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50,



TOTAL EPISODES: 25
Iter: 1; avg. reward=240.0; # episodes_steps=[35, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 9, 50, 8, 50, 8, 50, 50, 50]
TOTAL EPISODES: 47
Iter: 2; avg. reward=170.2127659574468; # episodes_steps=[35, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 9, 50, 8, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50]
TOTAL EPISODES: 68
Iter: 3; avg. reward=132.35294117647058; # episodes_steps=[35, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 9, 50, 8, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 92
Iter: 4; avg. reward=163.04347826086956; # episodes_steps=[35, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 9, 50, 8, 50, 8, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 16, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=69.76744186046511; # episodes_steps=[50, 50, 50, 50, 50, 50, 16, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 41, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=106.06060606060606; # episodes_steps=[50, 50, 50, 50, 50, 50, 16, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 41, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 18, 50, 50, 50, 50, 50, 3, 8, 50, 50, 42, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 88
Iter: 4; avg. reward=102.27272727272727; # episodes_steps=[50, 50, 50, 50, 50, 50, 16, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 41, 50, 50, 50, 50, 50, 50, 50, 5



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[19, 50, 9, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=69.76744186046511; # episodes_steps=[19, 50, 9, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 65
Iter: 3; avg. reward=92.3076923076923; # episodes_steps=[19, 50, 9, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 50, 50, 50, 50, 50, 50, 50, 50, 2, 9]
TOTAL EPISODES: 87
Iter: 4; avg. reward=103.44827586206897; # episodes_steps=[19, 50, 9, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[33, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 46
Iter: 2; avg. reward=152.17391304347825; # episodes_steps=[33, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 6, 3, 50, 50, 50, 50, 50, 48, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=106.06060606060606; # episodes_steps=[33, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 6, 3, 50, 50, 50, 50, 50, 48, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 88
Iter: 4; avg. reward=102.27272727272727; # episodes_steps=[33, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 6, 3, 50, 50, 50, 50, 50, 48, 5, 50, 50, 50, 50, 50,



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 50, 50, 50, 50, 30, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 44
Iter: 2; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 50, 50, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 3, 50, 26, 50, 50]
TOTAL EPISODES: 68
Iter: 3; avg. reward=117.6470588235294; # episodes_steps=[50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 50, 50, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 3, 50, 26, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 45, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 8, 50]
TOTAL EPISODES: 90
Iter: 4; avg. reward=133.33333333333334; # episodes_steps=[50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 50, 50, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 3, 50,



TOTAL EPISODES: 22
Iter: 1; avg. reward=136.36363636363637; # episodes_steps=[50, 33, 50, 50, 50, 50, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50]
TOTAL EPISODES: 47
Iter: 2; avg. reward=191.48936170212767; # episodes_steps=[50, 33, 50, 50, 50, 50, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 16, 50, 50, 29, 50, 1, 50, 50, 50, 2, 50, 50, 50, 50, 50, 7, 50, 50, 21, 50, 50, 50, 50]
TOTAL EPISODES: 70
Iter: 3; avg. reward=185.71428571428572; # episodes_steps=[50, 33, 50, 50, 50, 50, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 16, 50, 50, 29, 50, 1, 50, 50, 50, 2, 50, 50, 50, 50, 50, 7, 50, 50, 21, 50, 50, 50, 50, 50, 50, 8, 50, 50, 32, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 24, 50]
TOTAL EPISODES: 91
Iter: 4; avg. reward=153.84615384615384; # episodes_steps=[50, 33, 50, 50, 50, 50, 50, 15, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 16, 50, 50, 29, 50, 1, 50, 50, 50, 2, 50, 5



TOTAL EPISODES: 23
Iter: 1; avg. reward=260.8695652173913; # episodes_steps=[50, 50, 29, 50, 50, 16, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 39, 50, 50, 50, 47, 43, 50]
TOTAL EPISODES: 44
Iter: 2; avg. reward=181.8181818181818; # episodes_steps=[50, 50, 29, 50, 50, 16, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 39, 50, 50, 50, 47, 43, 50, 44, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 64
Iter: 3; avg. reward=125.0; # episodes_steps=[50, 50, 29, 50, 50, 16, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 39, 50, 50, 50, 47, 43, 50, 44, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 88
Iter: 4; avg. reward=159.0909090909091; # episodes_steps=[50, 50, 29, 50, 50, 16, 50, 50, 23, 50, 50, 50, 50, 50, 50, 50, 39, 50, 50, 50, 47, 43, 50, 44, 50, 12, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 



TOTAL EPISODES: 21
Iter: 1; avg. reward=142.85714285714286; # episodes_steps=[50, 42, 50, 21, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=139.53488372093022; # episodes_steps=[50, 42, 50, 21, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 50, 50, 50, 22]
TOTAL EPISODES: 65
Iter: 3; avg. reward=123.07692307692308; # episodes_steps=[50, 42, 50, 21, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 50, 50, 50, 22, 50, 50, 50, 34, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 86
Iter: 4; avg. reward=104.65116279069767; # episodes_steps=[50, 42, 50, 21, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 50, 50, 50, 22, 50, 



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 50, 19, 50, 50]
TOTAL EPISODES: 45
Iter: 2; avg. reward=111.11111111111111; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 50, 19, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 4, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 67
Iter: 3; avg. reward=119.40298507462687; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 50, 19, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 4, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 33, 50, 50, 50, 50, 12, 50, 50, 50, 50, 22, 50, 50, 50]
TOTAL EPISODES: 89
Iter: 4; avg. reward=112.35955056179775; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 50, 19, 50, 50, 50, 50, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 4, 50, 50, 50, 4, 50, 



TOTAL EPISODES: 21
Iter: 1; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=116.27906976744185; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 17, 50, 18, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 65
Iter: 3; avg. reward=138.46153846153845; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 17, 50, 18, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 19, 50, 50, 28, 31, 50, 50, 50, 50, 50, 43, 50, 50]
TOTAL EPISODES: 89
Iter: 4; avg. reward=146.06741573033707; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 50, 17, 50, 18, 50, 50, 50, 50, 50, 50, 50, 50, 5



TOTAL EPISODES: 22
Iter: 1; avg. reward=136.36363636363637; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 27, 50, 50, 30]
TOTAL EPISODES: 45
Iter: 2; avg. reward=155.55555555555554; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 27, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 38, 50, 1, 50, 50, 22, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=121.21212121212122; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 27, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 38, 50, 1, 50, 50, 22, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 21, 50]
TOTAL EPISODES: 89
Iter: 4; avg. reward=146.06741573033707; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, 27, 50, 50, 30, 50, 50, 50, 50, 50, 50, 50, 38, 50, 1, 50, 50, 22, 50, 50, 4, 50, 50, 50, 50



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 2, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=69.76744186046511; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 26, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 66
Iter: 3; avg. reward=121.21212121212122; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 26, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 31, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 42, 50, 50, 50, 27]
TOTAL EPISODES: 87
Iter: 4; avg. reward=103.44827586206897; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 26, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 



TOTAL EPISODES: 21
Iter: 1; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 43, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 46
Iter: 2; avg. reward=195.65217391304347; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 43, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 48, 50, 8, 4, 32, 31, 50, 50, 15, 36, 50, 13, 50]
TOTAL EPISODES: 72
Iter: 3; avg. reward=236.11111111111111; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 43, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 48, 50, 8, 4, 32, 31, 50, 50, 15, 36, 50, 13, 50, 50, 50, 50, 50, 50, 39, 50, 5, 2, 50, 50, 50, 1, 2, 50, 43, 50, 50, 50, 50, 46, 50, 5, 50, 50, 50]
TOTAL EPISODES: 93
Iter: 4; avg. reward=204.30107526881721; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 43, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 48, 50, 8,



TOTAL EPISODES: 20
Iter: 1; avg. reward=50.0; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 41
Iter: 2; avg. reward=48.78048780487805; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 50]
TOTAL EPISODES: 63
Iter: 3; avg. reward=63.492063492063494; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50]
TOTAL EPISODES: 84
Iter: 4; avg. reward=59.523809523809526; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 



TOTAL EPISODES: 5030
Iter: 148; avg. reward=770.0; # episodes_steps=[1, 23, 2, 50, 15, 22, 50, 50, 35, 42, 50, 33, 37, 4, 9, 12, 30, 1, 34, 7, 33, 25, 30, 14, 3, 14, 50, 50, 50, 3, 5, 16, 5, 49, 33, 10, 50, 19, 22, 50, 6, 50, 21, 8, 50, 15, 14, 50, 40, 24, 37, 11, 50, 40, 17, 50, 24, 14, 1, 14, 9, 50, 10, 24, 7, 10, 7, 20, 46, 50, 50, 50, 12, 6, 35, 31, 50, 34, 2, 11, 18, 17, 1, 14, 10, 50, 50, 17, 31, 27, 26, 18, 50, 34, 4, 11, 20, 50, 7, 31]




TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 33, 50]
TOTAL EPISODES: 47
Iter: 2; avg. reward=170.2127659574468; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 33, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 13, 50, 5, 7, 50]
TOTAL EPISODES: 70
Iter: 3; avg. reward=185.71428571428572; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 33, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 13, 50, 5, 7, 50, 50, 50, 50, 50, 50, 6, 50, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 28, 50, 41, 50, 15, 50]
TOTAL EPISODES: 92
Iter: 4; avg. reward=173.91304347826087; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 33, 50, 50, 12, 50, 50, 50, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50,



TOTAL EPISODES: 23
Iter: 1; avg. reward=173.91304347826087; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 1, 7, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 45
Iter: 2; avg. reward=133.33333333333334; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 1, 7, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 68
Iter: 3; avg. reward=147.05882352941177; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 1, 7, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 27, 50, 50, 50, 50, 37, 50, 50, 50, 50, 50, 50, 50, 50, 9, 50, 1, 50]
TOTAL EPISODES: 89
Iter: 4; avg. reward=123.59550561797752; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 50, 14, 50, 50, 1, 7, 50, 50, 29, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 17, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 5



TOTAL EPISODES: 21
Iter: 1; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 43
Iter: 2; avg. reward=93.02325581395348; # episodes_steps=[50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 37, 13, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50]
TOTAL EPISODES: 65
Iter: 3; avg. reward=92.3076923076923; # episodes_steps=[50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 37, 13, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 50, 50, 20, 50, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 88
Iter: 4; avg. reward=113.63636363636364; # episodes_steps=[50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 37, 13, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 29, 50, 50, 50, 5



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 50, 27, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 47
Iter: 2; avg. reward=170.2127659574468; # episodes_steps=[50, 50, 50, 27, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 42, 50, 6, 50, 50, 18, 50, 50, 50, 50, 50, 3, 50, 50, 50, 50, 5, 50]
TOTAL EPISODES: 70
Iter: 3; avg. reward=171.42857142857142; # episodes_steps=[50, 50, 50, 27, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 42, 50, 6, 50, 50, 18, 50, 50, 50, 50, 50, 3, 50, 50, 50, 50, 5, 50, 50, 50, 50, 10, 50, 50, 50, 50, 50, 50, 3, 50, 50, 50, 50, 50, 50, 50, 50, 35, 44, 50, 50]
TOTAL EPISODES: 95
Iter: 4; avg. reward=189.47368421052633; # episodes_steps=[50, 50, 50, 27, 50, 50, 5, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 42, 50, 6, 50, 50, 18, 50, 50, 50



TOTAL EPISODES: 26
Iter: 1; avg. reward=269.2307692307692; # episodes_steps=[4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 18, 17, 50, 10, 11, 50, 50, 50, 50, 50, 14, 1, 50]
TOTAL EPISODES: 46
Iter: 2; avg. reward=152.17391304347825; # episodes_steps=[4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 18, 17, 50, 10, 11, 50, 50, 50, 50, 50, 14, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 69
Iter: 3; avg. reward=144.92753623188406; # episodes_steps=[4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 18, 17, 50, 10, 11, 50, 50, 50, 50, 50, 14, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 28, 50, 50, 50, 50, 50, 20, 50, 50, 1, 50, 50, 50]
TOTAL EPISODES: 90
Iter: 4; avg. reward=122.22222222222223; # episodes_steps=[4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 18, 17, 50, 10, 11, 50, 50, 50, 50, 50, 14, 1, 50, 50, 50, 50, 50, 50, 50, 50, 50



TOTAL EPISODES: 23
Iter: 1; avg. reward=173.91304347826087; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 6, 43, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 46
Iter: 2; avg. reward=217.3913043478261; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 6, 43, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 5, 45, 50, 50, 50, 50, 50, 50, 50, 50, 40, 31, 45, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 70
Iter: 3; avg. reward=228.57142857142858; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 6, 43, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 5, 45, 50, 50, 50, 50, 50, 50, 50, 50, 40, 31, 45, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 50, 22, 50, 50, 50, 50, 50, 21, 50, 50, 50, 50, 47, 8, 50, 50, 50, 50, 6, 50, 36, 50, 50, 50]
TOTAL EPISODES: 91
Iter: 4; avg. reward=197.8021978021978; # episodes_steps=[50, 50, 50, 50, 50, 50, 50, 2, 6, 43, 50, 50, 50, 50, 50, 50, 1, 50, 50, 50, 50, 50, 50, 5, 45, 50, 50, 50, 50, 50, 50, 50, 50, 40, 31, 45, 50, 50, 5



TOTAL EPISODES: 22
Iter: 1; avg. reward=90.9090909090909; # episodes_steps=[50, 50, 30, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 42
Iter: 2; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 30, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 63
Iter: 3; avg. reward=47.61904761904762; # episodes_steps=[50, 50, 30, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 85
Iter: 4; avg. reward=58.8235294117647; # episodes_steps=[50, 50, 30, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11



TOTAL EPISODES: 25
Iter: 1; avg. reward=200.0; # episodes_steps=[50, 50, 28, 5, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 2, 50]
TOTAL EPISODES: 47
Iter: 2; avg. reward=148.93617021276594; # episodes_steps=[50, 50, 28, 5, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50]
TOTAL EPISODES: 69
Iter: 3; avg. reward=144.92753623188406; # episodes_steps=[50, 50, 28, 5, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 15, 32, 50, 50, 50, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50]
TOTAL EPISODES: 90
Iter: 4; avg. reward=122.22222222222223; # episodes_steps=[50, 50, 28, 5, 50, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 11, 50, 50, 