In [1]:
# pylint: disable=invalid-name

In [2]:
import sys
import os
import logging
import gymnasium as gym
import cyberbattle.agents.baseline.learner as learner
import cyberbattle.agents.baseline.plotting as p
import cyberbattle.agents.baseline.agent_wrapper as w
import cyberbattle.agents.baseline.agent_randomcredlookup as rca
import cyberbattle.agents.baseline.agent_tabularqlearning as tqa
import cyberbattle.agents.baseline.agent_dql as dqla
import cyberbattle.agents.baseline.agent_drqn as drqn
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
from cyberbattle._env.cyberbattle_env import CyberBattleEnv

logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")

In [3]:
%matplotlib inline

In [4]:
# Papermill notebook parameters
gymid = "CyberBattleChain-v0"
env_size = 10
iteration_count = 9000
training_episode_count = 50
eval_episode_count = 5
maximum_node_count = 22
maximum_total_credentials = 22
plots_dir = "output/plots"

In [5]:
# Parameters
gymid = "CyberBattleChain-v0"
iteration_count = 200
training_episode_count = 20
eval_episode_count = 3
maximum_node_count = 20
maximum_total_credentials = 20
env_size = 10
plots_dir = "notebooks/output/baseline_dql/plots"


In [6]:
os.makedirs(plots_dir, exist_ok=True)

# Load the Gym environment
if env_size:
    _gym_env = gym.make(gymid, size=env_size)
else:
    _gym_env = gym.make(gymid)

from typing import cast

gym_env = cast(CyberBattleEnv, _gym_env.unwrapped)
assert isinstance(gym_env, CyberBattleEnv), f"Expected CyberBattleEnv, got {type(gym_env)}"

ep = w.EnvironmentBounds.of_identifiers(maximum_node_count=maximum_node_count, maximum_total_credentials=maximum_total_credentials, identifiers=gym_env.identifiers)

In [7]:
debugging = False
if debugging:
    print(f"port_count = {ep.port_count}, property_count = {ep.property_count}")

    gym_env.environment
    # training_env.environment.plot_environment_graph()
    gym_env.environment.network.nodes
    gym_env.action_space
    gym_env.action_space.sample()
    gym_env.observation_space.sample()
    o0, _ = gym_env.reset()
    o_test, r, d, t, i = gym_env.step(gym_env.sample_valid_action())
    o0, _ = gym_env.reset()

    o0.keys()

    fe_example = w.RavelEncoding(ep, [w.Feature_active_node_properties(ep), w.Feature_discovered_node_count(ep)])
    a = w.StateAugmentation(o0)
    w.Feature_discovered_ports(ep).get(a)
    fe_example.encode_at(a, 0)

In [8]:
# Evaluate the Deep Q-learning agent
dql_run = learner.epsilon_greedy_search(
    cyberbattle_gym_env=gym_env,
    environment_properties=ep,
    learner=dqla.DeepQLearnerPolicy(
        ep=ep,
        gamma=0.015,
        replay_memory_size=10000,
        target_update=10,
        batch_size=512,
        # torch default learning rate is 1e-2
        # a large value helps converge in less episodes
        learning_rate=0.01,
    ),
    episode_count=training_episode_count,
    iteration_count=iteration_count,
    epsilon=0.90,
    epsilon_exponential_decay=5000,
    epsilon_minimum=0.10,
    verbosity=Verbosity.Quiet,
    render=False,
    plot_episodes_length=False,
    title="DQL",
)

###### DQL
Learning with: episode_count=20,iteration_count=200,ϵ=0.9,ϵ_min=0.1, ϵ_expdecay=5000,γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/20 'DQL' ϵ=0.9000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

  state_batch = torch.tensor(states_to_consider).to(device)
Episode 1|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 4|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 4|reward:   14.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:  114.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:  114.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 1|Iteration 11|reward:  116.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 1|Iteration 11|reward:  116.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 1|Iteration 24|reward:  118.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 1|Iteration 24|reward:  118.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 1|Iteration 43|reward:  132.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 1|Iteration 43|reward:  132.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 1|Iteration 45|reward:  138.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 1|Iteration 45|reward:  138.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 1|Iteration 49|reward:  144.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 1|Iteration 49|reward:  144.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 1|Iteration 57|reward:  152.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 1|Iteration 57|reward:  152.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 89|reward:  152.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 112|reward:  152.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 157|reward:  152.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 179|reward:  252.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 179|reward:  252.0|last_reward_at:  179|Elapsed Time: 0:00:00||

Episode 1|Iteration 200|reward:  252.0|last_reward_at:  179|Elapsed Time: 0:00:00||




  Episode 1 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/31 (0.09)
    explore-remote: 4/61 (0.06)
    explore-connect: 1/79 (0.01)
    exploit-local: 0/0 (NaN)
    exploit-remote: 0/4 (0.00)
    exploit-connect: 1/16 (0.06)
  exploit deflected to exploration: 11
  ## Episode: 2/20 'DQL' ϵ=0.8688, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   18.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   18.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 2|Iteration 7|reward:   20.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 2|Iteration 7|reward:   20.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 2|Iteration 8|reward:  120.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 2|Iteration 8|reward:  120.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 2|Iteration 11|reward:  131.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 2|Iteration 11|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 2|Iteration 18|reward:  137.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 2|Iteration 18|reward:  137.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 2|Iteration 34|reward:  145.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 2|Iteration 34|reward:  145.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 2|Iteration 48|reward:  154.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 2|Iteration 48|reward:  154.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 2|Iteration 76|reward:  254.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 2|Iteration 76|reward:  254.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 2|Iteration 99|reward:  265.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 2|Iteration 99|reward:  265.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 2|Iteration 102|reward:  274.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 2|Iteration 102|reward:  274.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 2|Iteration 104|reward:  276.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 2|Iteration 104|reward:  276.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 2|Iteration 130|reward:  276.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 2|Iteration 155|reward:  276.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 2|Iteration 166|reward:  280.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 2|Iteration 166|reward:  280.0|last_reward_at:  166|Elapsed Time: 0:00:00||

Episode 2|Iteration 200|reward:  280.0|last_reward_at:  166|Elapsed Time: 0:00:00||




  Episode 2 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/41 (0.11)
    explore-remote: 6/61 (0.09)
    explore-connect: 1/71 (0.01)
    exploit-local: 0/0 (NaN)
    exploit-remote: 0/0 (NaN)
    exploit-connect: 1/14 (0.07)
  exploit deflected to exploration: 14
  ## Episode: 3/20 'DQL' ϵ=0.8386, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 15|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 15|reward:   14.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 3|Iteration 18|reward:   18.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 3|Iteration 18|reward:   18.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 3|Iteration 21|reward:   20.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 3|Iteration 21|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 3|Iteration 45|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 3|Iteration 46|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:01||

Episode 3|Iteration 47|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:02||

Episode 3|Iteration 48|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:03||

Episode 3|Iteration 49|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:04||

Episode 3|Iteration 50|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:05||

Episode 3|Iteration 51|reward:   20.0|last_reward_at:   21|Elapsed Time: 0:00:06||

Episode 3|Iteration 52|reward:  120.0|last_reward_at:   21|Elapsed Time: 0:00:07||

Episode 3|Iteration 52|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:07||

Episode 3|Iteration 53|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:08||

Episode 3|Iteration 54|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:09||

Episode 3|Iteration 55|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:10||

Episode 3|Iteration 56|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:11||

Episode 3|Iteration 57|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:12||

Episode 3|Iteration 58|reward:  134.0|last_reward_at:   52|Elapsed Time: 0:00:13||

Episode 3|Iteration 58|reward:  134.0|last_reward_at:   58|Elapsed Time: 0:00:13||

Episode 3|Iteration 59|reward:  134.0|last_reward_at:   58|Elapsed Time: 0:00:14||

Episode 3|Iteration 60|reward:  140.0|last_reward_at:   58|Elapsed Time: 0:00:14||

Episode 3|Iteration 60|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:14||

Episode 3|Iteration 61|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:15||

Episode 3|Iteration 62|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:16||

Episode 3|Iteration 63|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:17||

Episode 3|Iteration 64|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:18||

Episode 3|Iteration 65|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:19||

Episode 3|Iteration 66|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:20||

Episode 3|Iteration 67|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:21||

Episode 3|Iteration 68|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:22||

Episode 3|Iteration 69|reward:  140.0|last_reward_at:   60|Elapsed Time: 0:00:23||

Episode 3|Iteration 70|reward:  148.0|last_reward_at:   60|Elapsed Time: 0:00:24||

Episode 3|Iteration 70|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:24||

Episode 3|Iteration 71|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:24||

Episode 3|Iteration 72|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:25||

Episode 3|Iteration 73|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:26||

Episode 3|Iteration 74|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:27||

Episode 3|Iteration 75|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:28||

Episode 3|Iteration 76|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:29||

Episode 3|Iteration 77|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:30||

Episode 3|Iteration 78|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:31||

Episode 3|Iteration 79|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:32||

Episode 3|Iteration 80|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:33||

Episode 3|Iteration 81|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:34||

Episode 3|Iteration 82|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:34||

Episode 3|Iteration 83|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:35||

Episode 3|Iteration 84|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:36||

Episode 3|Iteration 85|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:37||

Episode 3|Iteration 86|reward:  154.0|last_reward_at:   70|Elapsed Time: 0:00:38||

Episode 3|Iteration 86|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:38||

Episode 3|Iteration 87|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:39||

Episode 3|Iteration 88|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:40||

Episode 3|Iteration 89|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:41||

Episode 3|Iteration 90|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:41||

Episode 3|Iteration 91|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:42||

Episode 3|Iteration 92|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:45||

Episode 3|Iteration 93|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:46||

Episode 3|Iteration 94|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:48||

Episode 3|Iteration 95|reward:  154.0|last_reward_at:   86|Elapsed Time: 0:00:49||

Episode 3|Iteration 96|reward:  254.0|last_reward_at:   86|Elapsed Time: 0:00:50||

Episode 3|Iteration 96|reward:  254.0|last_reward_at:   96|Elapsed Time: 0:00:50||

Episode 3|Iteration 97|reward:  254.0|last_reward_at:   96|Elapsed Time: 0:00:51||

Episode 3|Iteration 98|reward:  268.0|last_reward_at:   96|Elapsed Time: 0:00:52||

Episode 3|Iteration 98|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:52||

Episode 3|Iteration 99|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:53||

Episode 3|Iteration 100|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:54||

Episode 3|Iteration 101|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:55||

Episode 3|Iteration 102|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:56||

Episode 3|Iteration 103|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:57||

Episode 3|Iteration 104|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:58||

Episode 3|Iteration 105|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:58||

Episode 3|Iteration 106|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:00:59||

Episode 3|Iteration 107|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:00||

Episode 3|Iteration 108|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:01||

Episode 3|Iteration 109|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:02||

Episode 3|Iteration 110|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:03||

Episode 3|Iteration 111|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:04||

Episode 3|Iteration 112|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:05||

Episode 3|Iteration 113|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:06||

Episode 3|Iteration 114|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:07||

Episode 3|Iteration 115|reward:  268.0|last_reward_at:   98|Elapsed Time: 0:01:07||

Episode 3|Iteration 116|reward:  272.0|last_reward_at:   98|Elapsed Time: 0:01:08||

Episode 3|Iteration 116|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:08||

Episode 3|Iteration 117|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:09||

Episode 3|Iteration 118|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:10||

Episode 3|Iteration 119|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:11||

Episode 3|Iteration 120|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:12||

Episode 3|Iteration 121|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:13||

Episode 3|Iteration 122|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:14||

Episode 3|Iteration 123|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:15||

Episode 3|Iteration 124|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:15||

Episode 3|Iteration 125|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:17||

Episode 3|Iteration 126|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:17||

Episode 3|Iteration 127|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:18||

Episode 3|Iteration 128|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:19||

Episode 3|Iteration 129|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:20||

Episode 3|Iteration 130|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:21||

Episode 3|Iteration 131|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:22||

Episode 3|Iteration 132|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:23||

Episode 3|Iteration 133|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:24||

Episode 3|Iteration 134|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:25||

Episode 3|Iteration 135|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:26||

Episode 3|Iteration 136|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:27||

Episode 3|Iteration 137|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:27||

Episode 3|Iteration 138|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:28||

Episode 3|Iteration 139|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:29||

Episode 3|Iteration 140|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:30||

Episode 3|Iteration 141|reward:  272.0|last_reward_at:  116|Elapsed Time: 0:01:31||

Episode 3|Iteration 142|reward:  278.0|last_reward_at:  116|Elapsed Time: 0:01:32||

Episode 3|Iteration 142|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:32||

Episode 3|Iteration 143|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:33||

Episode 3|Iteration 144|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:34||

Episode 3|Iteration 145|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:35||

Episode 3|Iteration 146|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:36||

Episode 3|Iteration 147|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:37||

Episode 3|Iteration 148|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:38||

Episode 3|Iteration 149|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:39||

Episode 3|Iteration 150|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:40||

Episode 3|Iteration 151|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:41||

Episode 3|Iteration 152|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:41||

Episode 3|Iteration 153|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:42||

Episode 3|Iteration 154|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:43||

Episode 3|Iteration 155|reward:  278.0|last_reward_at:  142|Elapsed Time: 0:01:44||

Episode 3|Iteration 156|reward:  378.0|last_reward_at:  142|Elapsed Time: 0:01:45||

Episode 3|Iteration 156|reward:  378.0|last_reward_at:  156|Elapsed Time: 0:01:45||

Episode 3|Iteration 157|reward:  392.0|last_reward_at:  156|Elapsed Time: 0:01:46||

Episode 3|Iteration 157|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:46||

Episode 3|Iteration 158|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:47||

Episode 3|Iteration 159|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:48||

Episode 3|Iteration 160|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:49||

Episode 3|Iteration 161|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:50||

Episode 3|Iteration 162|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:51||

Episode 3|Iteration 163|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:51||

Episode 3|Iteration 164|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:52||

Episode 3|Iteration 165|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:53||

Episode 3|Iteration 166|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:54||

Episode 3|Iteration 167|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:55||

Episode 3|Iteration 168|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:56||

Episode 3|Iteration 169|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:57||

Episode 3|Iteration 170|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:58||

Episode 3|Iteration 171|reward:  392.0|last_reward_at:  157|Elapsed Time: 0:01:59||

Episode 3|Iteration 172|reward:  400.0|last_reward_at:  157|Elapsed Time: 0:02:00||

Episode 3|Iteration 172|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:00||

Episode 3|Iteration 173|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:01||

Episode 3|Iteration 174|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:02||

Episode 3|Iteration 175|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:04||

Episode 3|Iteration 176|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:05||

Episode 3|Iteration 177|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:05||

Episode 3|Iteration 178|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:06||

Episode 3|Iteration 179|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:07||

Episode 3|Iteration 180|reward:  400.0|last_reward_at:  172|Elapsed Time: 0:02:08||

Episode 3|Iteration 181|reward:  406.0|last_reward_at:  172|Elapsed Time: 0:02:09||

Episode 3|Iteration 181|reward:  406.0|last_reward_at:  181|Elapsed Time: 0:02:09||

Episode 3|Iteration 182|reward:  406.0|last_reward_at:  181|Elapsed Time: 0:02:10||

Episode 3|Iteration 183|reward:  408.0|last_reward_at:  181|Elapsed Time: 0:02:11||

Episode 3|Iteration 183|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:11||

Episode 3|Iteration 184|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:12||

Episode 3|Iteration 185|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:13||

Episode 3|Iteration 186|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:14||

Episode 3|Iteration 187|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:15||

Episode 3|Iteration 188|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:15||

Episode 3|Iteration 189|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:16||

Episode 3|Iteration 190|reward:  408.0|last_reward_at:  183|Elapsed Time: 0:02:17||

Episode 3|Iteration 191|reward:  414.0|last_reward_at:  183|Elapsed Time: 0:02:18||

Episode 3|Iteration 191|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:18||

Episode 3|Iteration 192|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:19||

Episode 3|Iteration 193|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:20||

Episode 3|Iteration 194|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:21||

Episode 3|Iteration 195|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:22||

Episode 3|Iteration 196|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:23||

Episode 3|Iteration 197|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:24||

Episode 3|Iteration 198|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:24||

Episode 3|Iteration 199|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:25||

Episode 3|Iteration 200|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:26||

Episode 3|Iteration 200|reward:  414.0|last_reward_at:  191|Elapsed Time: 0:02:26||




  Episode 3 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/21 (0.19)
    explore-remote: 7/73 (0.09)
    explore-connect: 1/62 (0.02)
    exploit-local: 2/7 (0.22)
    exploit-remote: 1/5 (0.17)
    exploit-connect: 2/14 (0.12)
  exploit deflected to exploration: 4
  ## Episode: 4/20 'DQL' ϵ=0.8097, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 2|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:01||

Episode 4|Iteration 3|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:02||

Episode 4|Iteration 4|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:03||

Episode 4|Iteration 4|reward:   14.0|last_reward_at:    4|Elapsed Time: 0:00:03||

Episode 4|Iteration 5|reward:  114.0|last_reward_at:    4|Elapsed Time: 0:00:04||

Episode 4|Iteration 5|reward:  114.0|last_reward_at:    5|Elapsed Time: 0:00:04||

Episode 4|Iteration 6|reward:  114.0|last_reward_at:    5|Elapsed Time: 0:00:05||

Episode 4|Iteration 7|reward:  114.0|last_reward_at:    5|Elapsed Time: 0:00:06||

Episode 4|Iteration 8|reward:  114.0|last_reward_at:    5|Elapsed Time: 0:00:07||

Episode 4|Iteration 9|reward:  116.0|last_reward_at:    5|Elapsed Time: 0:00:08||

Episode 4|Iteration 9|reward:  116.0|last_reward_at:    9|Elapsed Time: 0:00:08||

Episode 4|Iteration 10|reward:  130.0|last_reward_at:    9|Elapsed Time: 0:00:09||

Episode 4|Iteration 10|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:09||

Episode 4|Iteration 11|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:10||

Episode 4|Iteration 12|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:11||

Episode 4|Iteration 13|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:12||

Episode 4|Iteration 14|reward:  136.0|last_reward_at:   10|Elapsed Time: 0:00:12||

Episode 4|Iteration 14|reward:  136.0|last_reward_at:   14|Elapsed Time: 0:00:12||

Episode 4|Iteration 15|reward:  136.0|last_reward_at:   14|Elapsed Time: 0:00:13||

Episode 4|Iteration 16|reward:  142.0|last_reward_at:   14|Elapsed Time: 0:00:14||

Episode 4|Iteration 16|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:14||

Episode 4|Iteration 17|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:15||

Episode 4|Iteration 18|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:16||

Episode 4|Iteration 19|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:17||

Episode 4|Iteration 20|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:18||

Episode 4|Iteration 21|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:19||

Episode 4|Iteration 22|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:20||

Episode 4|Iteration 23|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:20||

Episode 4|Iteration 24|reward:  142.0|last_reward_at:   16|Elapsed Time: 0:00:21||

Episode 4|Iteration 25|reward:  144.0|last_reward_at:   16|Elapsed Time: 0:00:22||

Episode 4|Iteration 25|reward:  144.0|last_reward_at:   25|Elapsed Time: 0:00:22||

Episode 4|Iteration 26|reward:  152.0|last_reward_at:   25|Elapsed Time: 0:00:23||

Episode 4|Iteration 26|reward:  152.0|last_reward_at:   26|Elapsed Time: 0:00:23||

Episode 4|Iteration 27|reward:  152.0|last_reward_at:   26|Elapsed Time: 0:00:24||

Episode 4|Iteration 28|reward:  252.0|last_reward_at:   26|Elapsed Time: 0:00:25||

Episode 4|Iteration 28|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:25||

Episode 4|Iteration 29|reward:  263.0|last_reward_at:   28|Elapsed Time: 0:00:26||

Episode 4|Iteration 29|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:26||

Episode 4|Iteration 30|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:27||

Episode 4|Iteration 31|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:28||

Episode 4|Iteration 32|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:30||

Episode 4|Iteration 33|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:31||

Episode 4|Iteration 34|reward:  265.0|last_reward_at:   29|Elapsed Time: 0:00:32||

Episode 4|Iteration 34|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:32||

Episode 4|Iteration 35|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:33||

Episode 4|Iteration 36|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:34||

Episode 4|Iteration 37|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:35||

Episode 4|Iteration 38|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:35||

Episode 4|Iteration 39|reward:  265.0|last_reward_at:   34|Elapsed Time: 0:00:36||

Episode 4|Iteration 40|reward:  274.0|last_reward_at:   34|Elapsed Time: 0:00:37||

Episode 4|Iteration 40|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:37||

Episode 4|Iteration 41|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:38||

Episode 4|Iteration 42|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:39||

Episode 4|Iteration 43|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:40||

Episode 4|Iteration 44|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:41||

Episode 4|Iteration 45|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:42||

Episode 4|Iteration 46|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:43||

Episode 4|Iteration 47|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:44||

Episode 4|Iteration 48|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:44||

Episode 4|Iteration 49|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:45||

Episode 4|Iteration 50|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:46||

Episode 4|Iteration 51|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:47||

Episode 4|Iteration 52|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:48||

Episode 4|Iteration 53|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:49||

Episode 4|Iteration 54|reward:  274.0|last_reward_at:   40|Elapsed Time: 0:00:50||

Episode 4|Iteration 55|reward:  278.0|last_reward_at:   40|Elapsed Time: 0:00:51||

Episode 4|Iteration 55|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:51||

Episode 4|Iteration 56|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:52||

Episode 4|Iteration 57|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:53||

Episode 4|Iteration 58|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:53||

Episode 4|Iteration 59|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:54||

Episode 4|Iteration 60|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:55||

Episode 4|Iteration 61|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:56||

Episode 4|Iteration 62|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:57||

Episode 4|Iteration 63|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:58||

Episode 4|Iteration 64|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:58||

Episode 4|Iteration 65|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:00:59||

Episode 4|Iteration 66|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:00||

Episode 4|Iteration 67|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:01||

Episode 4|Iteration 68|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:02||

Episode 4|Iteration 69|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:03||

Episode 4|Iteration 70|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:04||

Episode 4|Iteration 71|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:05||

Episode 4|Iteration 72|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:06||

Episode 4|Iteration 73|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:07||

Episode 4|Iteration 74|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:08||

Episode 4|Iteration 75|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:08||

Episode 4|Iteration 76|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:09||

Episode 4|Iteration 77|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:10||

Episode 4|Iteration 78|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:11||

Episode 4|Iteration 79|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:12||

Episode 4|Iteration 80|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:13||

Episode 4|Iteration 81|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:14||

Episode 4|Iteration 82|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:15||

Episode 4|Iteration 83|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:16||

Episode 4|Iteration 84|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:17||

Episode 4|Iteration 85|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:17||

Episode 4|Iteration 86|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:18||

Episode 4|Iteration 87|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:19||

Episode 4|Iteration 88|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:20||

Episode 4|Iteration 89|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:21||

Episode 4|Iteration 90|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:22||

Episode 4|Iteration 91|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:23||

Episode 4|Iteration 92|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:24||

Episode 4|Iteration 93|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:25||

Episode 4|Iteration 94|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:26||

Episode 4|Iteration 95|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:27||

Episode 4|Iteration 96|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:27||

Episode 4|Iteration 97|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:28||

Episode 4|Iteration 98|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:29||

Episode 4|Iteration 99|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:30||

Episode 4|Iteration 100|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:31||

Episode 4|Iteration 101|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:32||

Episode 4|Iteration 102|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:33||

Episode 4|Iteration 103|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:34||

Episode 4|Iteration 104|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:35||

Episode 4|Iteration 105|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:36||

Episode 4|Iteration 106|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:37||

Episode 4|Iteration 107|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:38||

Episode 4|Iteration 108|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:38||

Episode 4|Iteration 109|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:39||

Episode 4|Iteration 110|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:40||

Episode 4|Iteration 111|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:41||

Episode 4|Iteration 112|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:42||

Episode 4|Iteration 113|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:43||

Episode 4|Iteration 114|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:44||

Episode 4|Iteration 115|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:45||

Episode 4|Iteration 116|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:46||

Episode 4|Iteration 117|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:47||

Episode 4|Iteration 118|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:48||

Episode 4|Iteration 119|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:48||

Episode 4|Iteration 120|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:49||

Episode 4|Iteration 121|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:50||

Episode 4|Iteration 122|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:51||

Episode 4|Iteration 123|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:52||

Episode 4|Iteration 124|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:53||

Episode 4|Iteration 125|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:54||

Episode 4|Iteration 126|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:54||

Episode 4|Iteration 127|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:55||

Episode 4|Iteration 128|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:56||

Episode 4|Iteration 129|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:57||

Episode 4|Iteration 130|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:58||

Episode 4|Iteration 131|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:01:59||

Episode 4|Iteration 132|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:00||

Episode 4|Iteration 133|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:01||

Episode 4|Iteration 134|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:02||

Episode 4|Iteration 135|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:03||

Episode 4|Iteration 136|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:04||

Episode 4|Iteration 137|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:04||

Episode 4|Iteration 138|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:05||

Episode 4|Iteration 139|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:06||

Episode 4|Iteration 140|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:07||

Episode 4|Iteration 141|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:08||

Episode 4|Iteration 142|reward:  278.0|last_reward_at:   55|Elapsed Time: 0:02:09||

Episode 4|Iteration 143|reward:  378.0|last_reward_at:   55|Elapsed Time: 0:02:10||

Episode 4|Iteration 143|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:10||

Episode 4|Iteration 144|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:11||

Episode 4|Iteration 145|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:12||

Episode 4|Iteration 146|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:13||

Episode 4|Iteration 147|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:14||

Episode 4|Iteration 148|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:15||

Episode 4|Iteration 149|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:16||

Episode 4|Iteration 150|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:17||

Episode 4|Iteration 151|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:17||

Episode 4|Iteration 152|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:18||

Episode 4|Iteration 153|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:19||

Episode 4|Iteration 154|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:20||

Episode 4|Iteration 155|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:21||

Episode 4|Iteration 156|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:22||

Episode 4|Iteration 157|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:23||

Episode 4|Iteration 158|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:24||

Episode 4|Iteration 159|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:25||

Episode 4|Iteration 160|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:25||

Episode 4|Iteration 161|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:26||

Episode 4|Iteration 162|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:27||

Episode 4|Iteration 163|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:28||

Episode 4|Iteration 164|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:29||

Episode 4|Iteration 165|reward:  378.0|last_reward_at:  143|Elapsed Time: 0:02:30||

Episode 4|Iteration 166|reward:  392.0|last_reward_at:  143|Elapsed Time: 0:02:31||

Episode 4|Iteration 166|reward:  392.0|last_reward_at:  166|Elapsed Time: 0:02:31||

Episode 4|Iteration 167|reward:  398.0|last_reward_at:  166|Elapsed Time: 0:02:32||

Episode 4|Iteration 167|reward:  398.0|last_reward_at:  167|Elapsed Time: 0:02:32||

Episode 4|Iteration 168|reward:  498.0|last_reward_at:  167|Elapsed Time: 0:02:33||

Episode 4|Iteration 168|reward:  498.0|last_reward_at:  168|Elapsed Time: 0:02:33||

Episode 4|Iteration 169|reward:  498.0|last_reward_at:  168|Elapsed Time: 0:02:34||

Episode 4|Iteration 170|reward:  498.0|last_reward_at:  168|Elapsed Time: 0:02:35||

Episode 4|Iteration 171|reward:  498.0|last_reward_at:  168|Elapsed Time: 0:02:36||

Episode 4|Iteration 172|reward:  512.0|last_reward_at:  168|Elapsed Time: 0:02:36||

Episode 4|Iteration 172|reward:  512.0|last_reward_at:  172|Elapsed Time: 0:02:36||

Episode 4|Iteration 173|reward:  512.0|last_reward_at:  172|Elapsed Time: 0:02:37||

Episode 4|Iteration 174|reward:  512.0|last_reward_at:  172|Elapsed Time: 0:02:38||

Episode 4|Iteration 175|reward:  512.0|last_reward_at:  172|Elapsed Time: 0:02:39||

Episode 4|Iteration 176|reward:  512.0|last_reward_at:  172|Elapsed Time: 0:02:40||

Episode 4|Iteration 177|reward:  516.0|last_reward_at:  172|Elapsed Time: 0:02:41||

Episode 4|Iteration 177|reward:  516.0|last_reward_at:  177|Elapsed Time: 0:02:41||

Episode 4|Iteration 178|reward:  516.0|last_reward_at:  177|Elapsed Time: 0:02:42||

Episode 4|Iteration 179|reward:  516.0|last_reward_at:  177|Elapsed Time: 0:02:43||

Episode 4|Iteration 180|reward:  516.0|last_reward_at:  177|Elapsed Time: 0:02:44||

Episode 4|Iteration 181|reward:  516.0|last_reward_at:  177|Elapsed Time: 0:02:45||

Episode 4|Iteration 182|reward:  518.0|last_reward_at:  177|Elapsed Time: 0:02:46||

Episode 4|Iteration 182|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:46||

Episode 4|Iteration 183|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:47||

Episode 4|Iteration 184|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:48||

Episode 4|Iteration 185|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:49||

Episode 4|Iteration 186|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:49||

Episode 4|Iteration 187|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:50||

Episode 4|Iteration 188|reward:  518.0|last_reward_at:  182|Elapsed Time: 0:02:51||

Episode 4|Iteration 189|reward:  524.0|last_reward_at:  182|Elapsed Time: 0:02:52||

Episode 4|Iteration 189|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:52||

Episode 4|Iteration 190|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:53||

Episode 4|Iteration 191|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:54||

Episode 4|Iteration 192|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:55||

Episode 4|Iteration 193|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:56||

Episode 4|Iteration 194|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:58||

Episode 4|Iteration 195|reward:  524.0|last_reward_at:  189|Elapsed Time: 0:02:59||

Episode 4|Iteration 196|reward:  530.0|last_reward_at:  189|Elapsed Time: 0:03:00||

Episode 4|Iteration 196|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:00||

Episode 4|Iteration 197|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:01||

Episode 4|Iteration 198|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:02||

Episode 4|Iteration 199|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:03||

Episode 4|Iteration 200|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:04||

Episode 4|Iteration 200|reward:  530.0|last_reward_at:  196|Elapsed Time: 0:03:04||




  Episode 4 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/46 (0.06)
    explore-remote: 9/54 (0.14)
    explore-connect: 1/47 (0.02)
    exploit-local: 5/5 (0.50)
    exploit-remote: 1/11 (0.08)
    exploit-connect: 3/15 (0.17)
  exploit deflected to exploration: 0
  ## Episode: 5/20 'DQL' ϵ=0.7819, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 2|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:03||

Episode 5|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:04||

Episode 5|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:04||

Episode 5|Iteration 4|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:05||

Episode 5|Iteration 5|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:06||

Episode 5|Iteration 6|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:07||

Episode 5|Iteration 7|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:08||

Episode 5|Iteration 8|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:09||

Episode 5|Iteration 9|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:10||

Episode 5|Iteration 10|reward:  114.0|last_reward_at:    3|Elapsed Time: 0:00:11||

Episode 5|Iteration 10|reward:  114.0|last_reward_at:   10|Elapsed Time: 0:00:11||

Episode 5|Iteration 11|reward:  114.0|last_reward_at:   10|Elapsed Time: 0:00:12||

Episode 5|Iteration 12|reward:  116.0|last_reward_at:   10|Elapsed Time: 0:00:13||

Episode 5|Iteration 12|reward:  116.0|last_reward_at:   12|Elapsed Time: 0:00:13||

Episode 5|Iteration 13|reward:  127.0|last_reward_at:   12|Elapsed Time: 0:00:16||

Episode 5|Iteration 13|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:16||

Episode 5|Iteration 14|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:18||

Episode 5|Iteration 15|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:19||

Episode 5|Iteration 16|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:20||

Episode 5|Iteration 17|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:21||

Episode 5|Iteration 18|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:24||

Episode 5|Iteration 19|reward:  127.0|last_reward_at:   13|Elapsed Time: 0:00:25||

Episode 5|Iteration 20|reward:  129.0|last_reward_at:   13|Elapsed Time: 0:00:26||

Episode 5|Iteration 20|reward:  129.0|last_reward_at:   20|Elapsed Time: 0:00:26||

Episode 5|Iteration 21|reward:  138.0|last_reward_at:   20|Elapsed Time: 0:00:27||

Episode 5|Iteration 21|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:27||

Episode 5|Iteration 22|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:28||

Episode 5|Iteration 23|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:29||

Episode 5|Iteration 24|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:29||

Episode 5|Iteration 25|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:30||

Episode 5|Iteration 26|reward:  138.0|last_reward_at:   21|Elapsed Time: 0:00:31||

Episode 5|Iteration 27|reward:  238.0|last_reward_at:   21|Elapsed Time: 0:00:32||

Episode 5|Iteration 27|reward:  238.0|last_reward_at:   27|Elapsed Time: 0:00:32||

Episode 5|Iteration 28|reward:  252.0|last_reward_at:   27|Elapsed Time: 0:00:33||

Episode 5|Iteration 28|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:33||

Episode 5|Iteration 29|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:34||

Episode 5|Iteration 30|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:35||

Episode 5|Iteration 31|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:36||

Episode 5|Iteration 32|reward:  252.0|last_reward_at:   28|Elapsed Time: 0:00:37||

Episode 5|Iteration 33|reward:  254.0|last_reward_at:   28|Elapsed Time: 0:00:37||

Episode 5|Iteration 33|reward:  254.0|last_reward_at:   33|Elapsed Time: 0:00:37||

Episode 5|Iteration 34|reward:  254.0|last_reward_at:   33|Elapsed Time: 0:00:38||

Episode 5|Iteration 35|reward:  260.0|last_reward_at:   33|Elapsed Time: 0:00:39||

Episode 5|Iteration 35|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:39||

Episode 5|Iteration 36|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:40||

Episode 5|Iteration 37|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:41||

Episode 5|Iteration 38|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:42||

Episode 5|Iteration 39|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:43||

Episode 5|Iteration 40|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:44||

Episode 5|Iteration 41|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:44||

Episode 5|Iteration 42|reward:  260.0|last_reward_at:   35|Elapsed Time: 0:00:45||

Episode 5|Iteration 43|reward:  264.0|last_reward_at:   35|Elapsed Time: 0:00:46||

Episode 5|Iteration 43|reward:  264.0|last_reward_at:   43|Elapsed Time: 0:00:46||

Episode 5|Iteration 44|reward:  264.0|last_reward_at:   43|Elapsed Time: 0:00:47||

Episode 5|Iteration 45|reward:  264.0|last_reward_at:   43|Elapsed Time: 0:00:48||

Episode 5|Iteration 46|reward:  264.0|last_reward_at:   43|Elapsed Time: 0:00:49||

Episode 5|Iteration 47|reward:  264.0|last_reward_at:   43|Elapsed Time: 0:00:50||

Episode 5|Iteration 48|reward:  270.0|last_reward_at:   43|Elapsed Time: 0:00:51||

Episode 5|Iteration 48|reward:  270.0|last_reward_at:   48|Elapsed Time: 0:00:51||

Episode 5|Iteration 49|reward:  370.0|last_reward_at:   48|Elapsed Time: 0:00:52||

Episode 5|Iteration 49|reward:  370.0|last_reward_at:   49|Elapsed Time: 0:00:52||

Episode 5|Iteration 50|reward:  384.0|last_reward_at:   49|Elapsed Time: 0:00:53||

Episode 5|Iteration 50|reward:  384.0|last_reward_at:   50|Elapsed Time: 0:00:53||

Episode 5|Iteration 51|reward:  384.0|last_reward_at:   50|Elapsed Time: 0:00:53||

Episode 5|Iteration 52|reward:  384.0|last_reward_at:   50|Elapsed Time: 0:00:54||

Episode 5|Iteration 53|reward:  384.0|last_reward_at:   50|Elapsed Time: 0:00:55||

Episode 5|Iteration 54|reward:  384.0|last_reward_at:   50|Elapsed Time: 0:00:56||

Episode 5|Iteration 55|reward:  390.0|last_reward_at:   50|Elapsed Time: 0:00:57||

Episode 5|Iteration 55|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:00:57||

Episode 5|Iteration 56|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:00:58||

Episode 5|Iteration 57|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:00:59||

Episode 5|Iteration 58|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:00||

Episode 5|Iteration 59|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:01||

Episode 5|Iteration 60|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:01||

Episode 5|Iteration 61|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:02||

Episode 5|Iteration 62|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:03||

Episode 5|Iteration 63|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:04||

Episode 5|Iteration 64|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:05||

Episode 5|Iteration 65|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:06||

Episode 5|Iteration 66|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:07||

Episode 5|Iteration 67|reward:  390.0|last_reward_at:   55|Elapsed Time: 0:01:08||

Episode 5|Iteration 68|reward:  490.0|last_reward_at:   55|Elapsed Time: 0:01:09||

Episode 5|Iteration 68|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:09||

Episode 5|Iteration 69|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:10||

Episode 5|Iteration 70|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:10||

Episode 5|Iteration 71|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:11||

Episode 5|Iteration 72|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:12||

Episode 5|Iteration 73|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:13||

Episode 5|Iteration 74|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:14||

Episode 5|Iteration 75|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:15||

Episode 5|Iteration 76|reward:  490.0|last_reward_at:   68|Elapsed Time: 0:01:16||

Episode 5|Iteration 77|reward:  504.0|last_reward_at:   68|Elapsed Time: 0:01:18||

Episode 5|Iteration 77|reward:  504.0|last_reward_at:   77|Elapsed Time: 0:01:18||

Episode 5|Iteration 78|reward:  504.0|last_reward_at:   77|Elapsed Time: 0:01:18||

Episode 5|Iteration 79|reward:  504.0|last_reward_at:   77|Elapsed Time: 0:01:19||

Episode 5|Iteration 80|reward:  504.0|last_reward_at:   77|Elapsed Time: 0:01:20||

Episode 5|Iteration 81|reward:  508.0|last_reward_at:   77|Elapsed Time: 0:01:21||

Episode 5|Iteration 81|reward:  508.0|last_reward_at:   81|Elapsed Time: 0:01:21||

Episode 5|Iteration 82|reward:  508.0|last_reward_at:   81|Elapsed Time: 0:01:22||

Episode 5|Iteration 83|reward:  508.0|last_reward_at:   81|Elapsed Time: 0:01:23||

Episode 5|Iteration 84|reward:  508.0|last_reward_at:   81|Elapsed Time: 0:01:24||

Episode 5|Iteration 85|reward:  508.0|last_reward_at:   81|Elapsed Time: 0:01:25||

Episode 5|Iteration 86|reward:  514.0|last_reward_at:   81|Elapsed Time: 0:01:27||

Episode 5|Iteration 86|reward:  514.0|last_reward_at:   86|Elapsed Time: 0:01:27||

Episode 5|Iteration 87|reward:  514.0|last_reward_at:   86|Elapsed Time: 0:01:28||

Episode 5|Iteration 88|reward:  614.0|last_reward_at:   86|Elapsed Time: 0:01:28||

Episode 5|Iteration 88|reward:  614.0|last_reward_at:   88|Elapsed Time: 0:01:28||

Episode 5|Iteration 89|reward:  614.0|last_reward_at:   88|Elapsed Time: 0:01:29||

Episode 5|Iteration 90|reward:  628.0|last_reward_at:   88|Elapsed Time: 0:01:30||

Episode 5|Iteration 90|reward:  628.0|last_reward_at:   90|Elapsed Time: 0:01:30||

Episode 5|Iteration 91|reward:  628.0|last_reward_at:   90|Elapsed Time: 0:01:31||

Episode 5|Iteration 92|reward:  728.0|last_reward_at:   90|Elapsed Time: 0:01:32||

Episode 5|Iteration 92|reward:  728.0|last_reward_at:   92|Elapsed Time: 0:01:32||

Episode 5|Iteration 93|reward:  728.0|last_reward_at:   92|Elapsed Time: 0:01:33||

Episode 5|Iteration 94|reward:  728.0|last_reward_at:   92|Elapsed Time: 0:01:34||

Episode 5|Iteration 95|reward:  742.0|last_reward_at:   92|Elapsed Time: 0:01:35||

Episode 5|Iteration 95|reward:  742.0|last_reward_at:   95|Elapsed Time: 0:01:35||

Episode 5|Iteration 96|reward:  842.0|last_reward_at:   95|Elapsed Time: 0:01:36||

Episode 5|Iteration 96|reward:  842.0|last_reward_at:   96|Elapsed Time: 0:01:36||

Episode 5|Iteration 97|reward:  856.0|last_reward_at:   96|Elapsed Time: 0:01:38||

Episode 5|Iteration 97|reward:  856.0|last_reward_at:   97|Elapsed Time: 0:01:38||

Episode 5|Iteration 98|reward:  956.0|last_reward_at:   97|Elapsed Time: 0:01:39||

Episode 5|Iteration 98|reward:  956.0|last_reward_at:   98|Elapsed Time: 0:01:39||

Episode 5|Iteration 99|reward:  970.0|last_reward_at:   98|Elapsed Time: 0:01:40||

Episode 5|Iteration 99|reward:  970.0|last_reward_at:   99|Elapsed Time: 0:01:40||

Episode 5|Iteration 100|reward:  970.0|last_reward_at:   99|Elapsed Time: 0:01:41||

Episode 5|Iteration 101|reward:  976.0|last_reward_at:   99|Elapsed Time: 0:01:42||

Episode 5|Iteration 101|reward:  976.0|last_reward_at:  101|Elapsed Time: 0:01:42||

Episode 5|Iteration 102|reward:  976.0|last_reward_at:  101|Elapsed Time: 0:01:43||

Episode 5|Iteration 103|reward:  976.0|last_reward_at:  101|Elapsed Time: 0:01:44||

Episode 5|Iteration 104|reward:  976.0|last_reward_at:  101|Elapsed Time: 0:01:45||

Episode 5|Iteration 105|reward:  976.0|last_reward_at:  101|Elapsed Time: 0:01:45||

Episode 5|Iteration 106|reward: 1076.0|last_reward_at:  101|Elapsed Time: 0:01:46||

Episode 5|Iteration 106|reward: 1076.0|last_reward_at:  106|Elapsed Time: 0:01:46||

Episode 5|Iteration 107|reward: 1090.0|last_reward_at:  106|Elapsed Time: 0:01:48||

Episode 5|Iteration 107|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:48||

Episode 5|Iteration 108|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:49||

Episode 5|Iteration 109|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:50||

Episode 5|Iteration 110|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:51||

Episode 5|Iteration 111|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:52||

Episode 5|Iteration 112|reward: 1090.0|last_reward_at:  107|Elapsed Time: 0:01:53||

Episode 5|Iteration 113|reward: 1190.0|last_reward_at:  107|Elapsed Time: 0:01:54||

Episode 5|Iteration 113|reward: 1190.0|last_reward_at:  113|Elapsed Time: 0:01:54||

Episode 5|Iteration 114|reward: 1190.0|last_reward_at:  113|Elapsed Time: 0:01:55||

Episode 5|Iteration 115|reward: 1204.0|last_reward_at:  113|Elapsed Time: 0:01:56||

Episode 5|Iteration 115|reward: 1204.0|last_reward_at:  115|Elapsed Time: 0:01:56||

Episode 5|Iteration 116|reward: 6204.0|last_reward_at:  115|Elapsed Time: 0:01:57||

Episode 5|Iteration 116|reward: 6204.0|last_reward_at:  116|Elapsed Time: 0:01:57||




  Episode 5 ended at t=116 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/17 (0.15)
    explore-remote: 5/26 (0.16)
    explore-connect: 0/32 (0.00)
    exploit-local: 11/2 (0.85)
    exploit-remote: 3/4 (0.43)
    exploit-connect: 11/2 (0.85)
  exploit deflected to exploration: 2
  ## Episode: 6/20 'DQL' ϵ=0.7662, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:01||

Episode 6|Iteration 2|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:02||

Episode 6|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:03||

Episode 6|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:03||

Episode 6|Iteration 4|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:04||

Episode 6|Iteration 5|reward:   16.0|last_reward_at:    3|Elapsed Time: 0:00:05||

Episode 6|Iteration 5|reward:   16.0|last_reward_at:    5|Elapsed Time: 0:00:05||

Episode 6|Iteration 6|reward:   16.0|last_reward_at:    5|Elapsed Time: 0:00:06||

Episode 6|Iteration 7|reward:  116.0|last_reward_at:    5|Elapsed Time: 0:00:07||

Episode 6|Iteration 7|reward:  116.0|last_reward_at:    7|Elapsed Time: 0:00:07||

Episode 6|Iteration 8|reward:  116.0|last_reward_at:    7|Elapsed Time: 0:00:08||

Episode 6|Iteration 9|reward:  127.0|last_reward_at:    7|Elapsed Time: 0:00:09||

Episode 6|Iteration 9|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:09||

Episode 6|Iteration 10|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:10||

Episode 6|Iteration 11|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:10||

Episode 6|Iteration 12|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:11||

Episode 6|Iteration 13|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:14||

Episode 6|Iteration 14|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:15||

Episode 6|Iteration 15|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:16||

Episode 6|Iteration 16|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:17||

Episode 6|Iteration 17|reward:  129.0|last_reward_at:    9|Elapsed Time: 0:00:18||

Episode 6|Iteration 17|reward:  129.0|last_reward_at:   17|Elapsed Time: 0:00:18||

Episode 6|Iteration 18|reward:  129.0|last_reward_at:   17|Elapsed Time: 0:00:19||

Episode 6|Iteration 19|reward:  129.0|last_reward_at:   17|Elapsed Time: 0:00:20||

Episode 6|Iteration 20|reward:  129.0|last_reward_at:   17|Elapsed Time: 0:00:20||

Episode 6|Iteration 21|reward:  135.0|last_reward_at:   17|Elapsed Time: 0:00:21||

Episode 6|Iteration 21|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:21||

Episode 6|Iteration 22|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:22||

Episode 6|Iteration 23|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:23||

Episode 6|Iteration 24|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:24||

Episode 6|Iteration 25|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:25||

Episode 6|Iteration 26|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:26||

Episode 6|Iteration 27|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:27||

Episode 6|Iteration 28|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:28||

Episode 6|Iteration 29|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:29||

Episode 6|Iteration 30|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:30||

Episode 6|Iteration 31|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:31||

Episode 6|Iteration 32|reward:  135.0|last_reward_at:   21|Elapsed Time: 0:00:32||

Episode 6|Iteration 33|reward:  144.0|last_reward_at:   21|Elapsed Time: 0:00:32||

Episode 6|Iteration 33|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:32||

Episode 6|Iteration 34|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:33||

Episode 6|Iteration 35|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:34||

Episode 6|Iteration 36|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:35||

Episode 6|Iteration 37|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:36||

Episode 6|Iteration 38|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:37||

Episode 6|Iteration 39|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:38||

Episode 6|Iteration 40|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:39||

Episode 6|Iteration 41|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:40||

Episode 6|Iteration 42|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:41||

Episode 6|Iteration 43|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:42||

Episode 6|Iteration 44|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:44||

Episode 6|Iteration 45|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:45||

Episode 6|Iteration 46|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:46||

Episode 6|Iteration 47|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:47||

Episode 6|Iteration 48|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:48||

Episode 6|Iteration 49|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:49||

Episode 6|Iteration 50|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:49||

Episode 6|Iteration 51|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:50||

Episode 6|Iteration 52|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:51||

Episode 6|Iteration 53|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:52||

Episode 6|Iteration 54|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:53||

Episode 6|Iteration 55|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:54||

Episode 6|Iteration 56|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:55||

Episode 6|Iteration 57|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:56||

Episode 6|Iteration 58|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:57||

Episode 6|Iteration 59|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:58||

Episode 6|Iteration 60|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:00:59||

Episode 6|Iteration 61|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:00||

Episode 6|Iteration 62|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:01||

Episode 6|Iteration 63|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:02||

Episode 6|Iteration 64|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:03||

Episode 6|Iteration 65|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:03||

Episode 6|Iteration 66|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:04||

Episode 6|Iteration 67|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:05||

Episode 6|Iteration 68|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:06||

Episode 6|Iteration 69|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:07||

Episode 6|Iteration 70|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:08||

Episode 6|Iteration 71|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:09||

Episode 6|Iteration 72|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:10||

Episode 6|Iteration 73|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:11||

Episode 6|Iteration 74|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:12||

Episode 6|Iteration 75|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:12||

Episode 6|Iteration 76|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:13||

Episode 6|Iteration 77|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:14||

Episode 6|Iteration 78|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:15||

Episode 6|Iteration 79|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:16||

Episode 6|Iteration 80|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:18||

Episode 6|Iteration 81|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:19||

Episode 6|Iteration 82|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:20||

Episode 6|Iteration 83|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:21||

Episode 6|Iteration 84|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:22||

Episode 6|Iteration 85|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:23||

Episode 6|Iteration 86|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:24||

Episode 6|Iteration 87|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:25||

Episode 6|Iteration 88|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:26||

Episode 6|Iteration 89|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:26||

Episode 6|Iteration 90|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:27||

Episode 6|Iteration 91|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:28||

Episode 6|Iteration 92|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:29||

Episode 6|Iteration 93|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:30||

Episode 6|Iteration 94|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:32||

Episode 6|Iteration 95|reward:  144.0|last_reward_at:   33|Elapsed Time: 0:01:33||

Episode 6|Iteration 96|reward:  152.0|last_reward_at:   33|Elapsed Time: 0:01:34||

Episode 6|Iteration 96|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:34||

Episode 6|Iteration 97|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:34||

Episode 6|Iteration 98|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:35||

Episode 6|Iteration 99|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:36||

Episode 6|Iteration 100|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:37||

Episode 6|Iteration 101|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:38||

Episode 6|Iteration 102|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:39||

Episode 6|Iteration 103|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:40||

Episode 6|Iteration 104|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:41||

Episode 6|Iteration 105|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:41||

Episode 6|Iteration 106|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:42||

Episode 6|Iteration 107|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:43||

Episode 6|Iteration 108|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:44||

Episode 6|Iteration 109|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:45||

Episode 6|Iteration 110|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:46||

Episode 6|Iteration 111|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:47||

Episode 6|Iteration 112|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:48||

Episode 6|Iteration 113|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:49||

Episode 6|Iteration 114|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:49||

Episode 6|Iteration 115|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:50||

Episode 6|Iteration 116|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:51||

Episode 6|Iteration 117|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:52||

Episode 6|Iteration 118|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:53||

Episode 6|Iteration 119|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:54||

Episode 6|Iteration 120|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:55||

Episode 6|Iteration 121|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:56||

Episode 6|Iteration 122|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:56||

Episode 6|Iteration 123|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:57||

Episode 6|Iteration 124|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:58||

Episode 6|Iteration 125|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:01:59||

Episode 6|Iteration 126|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:02:00||

Episode 6|Iteration 127|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:02:01||

Episode 6|Iteration 128|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:02:02||

Episode 6|Iteration 129|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:02:03||

Episode 6|Iteration 130|reward:  152.0|last_reward_at:   96|Elapsed Time: 0:02:04||

Episode 6|Iteration 131|reward:  252.0|last_reward_at:   96|Elapsed Time: 0:02:04||

Episode 6|Iteration 131|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:04||

Episode 6|Iteration 132|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:05||

Episode 6|Iteration 133|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:06||

Episode 6|Iteration 134|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:07||

Episode 6|Iteration 135|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:08||

Episode 6|Iteration 136|reward:  252.0|last_reward_at:  131|Elapsed Time: 0:02:09||

Episode 6|Iteration 137|reward:  266.0|last_reward_at:  131|Elapsed Time: 0:02:10||

Episode 6|Iteration 137|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:10||

Episode 6|Iteration 138|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:11||

Episode 6|Iteration 139|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:12||

Episode 6|Iteration 140|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:13||

Episode 6|Iteration 141|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:13||

Episode 6|Iteration 142|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:14||

Episode 6|Iteration 143|reward:  266.0|last_reward_at:  137|Elapsed Time: 0:02:15||

Episode 6|Iteration 144|reward:  270.0|last_reward_at:  137|Elapsed Time: 0:02:16||

Episode 6|Iteration 144|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:16||

Episode 6|Iteration 145|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:17||

Episode 6|Iteration 146|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:18||

Episode 6|Iteration 147|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:19||

Episode 6|Iteration 148|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:20||

Episode 6|Iteration 149|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:20||

Episode 6|Iteration 150|reward:  270.0|last_reward_at:  144|Elapsed Time: 0:02:21||

Episode 6|Iteration 151|reward:  272.0|last_reward_at:  144|Elapsed Time: 0:02:22||

Episode 6|Iteration 151|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:22||

Episode 6|Iteration 152|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:23||

Episode 6|Iteration 153|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:24||

Episode 6|Iteration 154|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:25||

Episode 6|Iteration 155|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:26||

Episode 6|Iteration 156|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:26||

Episode 6|Iteration 157|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:27||

Episode 6|Iteration 158|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:28||

Episode 6|Iteration 159|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:29||

Episode 6|Iteration 160|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:30||

Episode 6|Iteration 161|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:31||

Episode 6|Iteration 162|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:32||

Episode 6|Iteration 163|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:33||

Episode 6|Iteration 164|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:34||

Episode 6|Iteration 165|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:35||

Episode 6|Iteration 166|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:35||

Episode 6|Iteration 167|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:36||

Episode 6|Iteration 168|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:37||

Episode 6|Iteration 169|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:38||

Episode 6|Iteration 170|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:39||

Episode 6|Iteration 171|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:40||

Episode 6|Iteration 172|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:41||

Episode 6|Iteration 173|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:42||

Episode 6|Iteration 174|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:43||

Episode 6|Iteration 175|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:44||

Episode 6|Iteration 176|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:44||

Episode 6|Iteration 177|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:45||

Episode 6|Iteration 178|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:46||

Episode 6|Iteration 179|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:47||

Episode 6|Iteration 180|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:48||

Episode 6|Iteration 181|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:49||

Episode 6|Iteration 182|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:50||

Episode 6|Iteration 183|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:51||

Episode 6|Iteration 184|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:51||

Episode 6|Iteration 185|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:52||

Episode 6|Iteration 186|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:53||

Episode 6|Iteration 187|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:54||

Episode 6|Iteration 188|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:55||

Episode 6|Iteration 189|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:56||

Episode 6|Iteration 190|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:57||

Episode 6|Iteration 191|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:58||

Episode 6|Iteration 192|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:02:59||

Episode 6|Iteration 193|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:03:00||

Episode 6|Iteration 194|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:03:00||

Episode 6|Iteration 195|reward:  272.0|last_reward_at:  151|Elapsed Time: 0:03:01||

Episode 6|Iteration 196|reward:  372.0|last_reward_at:  151|Elapsed Time: 0:03:02||

Episode 6|Iteration 196|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:02||

Episode 6|Iteration 197|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:03||

Episode 6|Iteration 198|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:04||

Episode 6|Iteration 199|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:05||

Episode 6|Iteration 200|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:06||

Episode 6|Iteration 200|reward:  372.0|last_reward_at:  196|Elapsed Time: 0:03:06||




  Episode 6 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/28 (0.07)
    explore-remote: 6/61 (0.09)
    explore-connect: 0/62 (0.00)
    exploit-local: 2/11 (0.15)
    exploit-remote: 0/4 (0.00)
    exploit-connect: 3/21 (0.12)
  exploit deflected to exploration: 2
  ## Episode: 7/20 'DQL' ϵ=0.7401, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:01||

Episode 7|Iteration 2|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:02||

Episode 7|Iteration 3|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:04||

Episode 7|Iteration 4|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:05||

Episode 7|Iteration 5|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:06||

Episode 7|Iteration 6|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:07||

Episode 7|Iteration 7|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:08||

Episode 7|Iteration 7|reward:   14.0|last_reward_at:    7|Elapsed Time: 0:00:08||

Episode 7|Iteration 8|reward:  114.0|last_reward_at:    7|Elapsed Time: 0:00:09||

Episode 7|Iteration 8|reward:  114.0|last_reward_at:    8|Elapsed Time: 0:00:09||

Episode 7|Iteration 9|reward:  125.0|last_reward_at:    8|Elapsed Time: 0:00:10||

Episode 7|Iteration 9|reward:  125.0|last_reward_at:    9|Elapsed Time: 0:00:10||

Episode 7|Iteration 10|reward:  125.0|last_reward_at:    9|Elapsed Time: 0:00:10||

Episode 7|Iteration 11|reward:  127.0|last_reward_at:    9|Elapsed Time: 0:00:11||

Episode 7|Iteration 11|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:11||

Episode 7|Iteration 12|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:12||

Episode 7|Iteration 13|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:13||

Episode 7|Iteration 14|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:14||

Episode 7|Iteration 15|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:17||

Episode 7|Iteration 16|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:17||

Episode 7|Iteration 17|reward:  127.0|last_reward_at:   11|Elapsed Time: 0:00:18||

Episode 7|Iteration 18|reward:  129.0|last_reward_at:   11|Elapsed Time: 0:00:19||

Episode 7|Iteration 18|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:19||

Episode 7|Iteration 19|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:20||

Episode 7|Iteration 20|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:21||

Episode 7|Iteration 21|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:22||

Episode 7|Iteration 22|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:23||

Episode 7|Iteration 23|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:25||

Episode 7|Iteration 24|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:26||

Episode 7|Iteration 25|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:27||

Episode 7|Iteration 26|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:28||

Episode 7|Iteration 27|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:29||

Episode 7|Iteration 28|reward:  129.0|last_reward_at:   18|Elapsed Time: 0:00:30||

Episode 7|Iteration 29|reward:  137.0|last_reward_at:   18|Elapsed Time: 0:00:31||

Episode 7|Iteration 29|reward:  137.0|last_reward_at:   29|Elapsed Time: 0:00:31||

Episode 7|Iteration 30|reward:  137.0|last_reward_at:   29|Elapsed Time: 0:00:32||

Episode 7|Iteration 31|reward:  143.0|last_reward_at:   29|Elapsed Time: 0:00:33||

Episode 7|Iteration 31|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:33||

Episode 7|Iteration 32|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:34||

Episode 7|Iteration 33|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:35||

Episode 7|Iteration 34|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:36||

Episode 7|Iteration 35|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:37||

Episode 7|Iteration 36|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:37||

Episode 7|Iteration 37|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:40||

Episode 7|Iteration 38|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:41||

Episode 7|Iteration 39|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:42||

Episode 7|Iteration 40|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:43||

Episode 7|Iteration 41|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:43||

Episode 7|Iteration 42|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:44||

Episode 7|Iteration 43|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:45||

Episode 7|Iteration 44|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:46||

Episode 7|Iteration 45|reward:  143.0|last_reward_at:   31|Elapsed Time: 0:00:47||

Episode 7|Iteration 46|reward:  152.0|last_reward_at:   31|Elapsed Time: 0:00:48||

Episode 7|Iteration 46|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:48||

Episode 7|Iteration 47|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:49||

Episode 7|Iteration 48|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:50||

Episode 7|Iteration 49|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:51||

Episode 7|Iteration 50|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:52||

Episode 7|Iteration 51|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:53||

Episode 7|Iteration 52|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:54||

Episode 7|Iteration 53|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:55||

Episode 7|Iteration 54|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:56||

Episode 7|Iteration 55|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:57||

Episode 7|Iteration 56|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:57||

Episode 7|Iteration 57|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:58||

Episode 7|Iteration 58|reward:  152.0|last_reward_at:   46|Elapsed Time: 0:00:59||

In [None]:
# Evaluate an agent that exploits the Q-function learnt above
dql_exploit_run = learner.epsilon_greedy_search(
    gym_env,
    ep,
    learner=dql_run["learner"],
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.0,
    epsilon_minimum=0.00,
    render=False,
    plot_episodes_length=False,
    verbosity=Verbosity.Quiet,
    render_last_episode_rewards_to=os.path.join(plots_dir, f"dql-{gymid}"),
    title="Exploiting DQL",
)

In [None]:
# Compare and plot results for all the agents
all_runs = [
    dql_run,
    dql_exploit_run,
]

# Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit
themodel = dqla.CyberBattleStateActionModel(ep)
p.plot_averaged_cummulative_rewards(
    all_runs=all_runs,
    title=(
        f"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count}\n"
        f"State: {[f.name() for f in themodel.state_space.feature_selection]} "
        f"({len(themodel.state_space.feature_selection)})\n"
        f"Action: abstract_action ({themodel.action_space.flat_size()})"
    ),
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumrewards.png"),
)

In [None]:
contenders = [dql_run, dql_exploit_run]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(
    title=f"Agent Benchmark top contenders\nmax_nodes:{ep.maximum_node_count}\n",
    all_runs=contenders,
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumreward_contenders.png"),
)

In [None]:
# Plot cumulative rewards for all episodes
for r in contenders:
    p.plot_all_episodes(r)