In [1]:
# pylint: disable=invalid-name

In [2]:
import sys
import os
import re
import json
import yaml
import logging
import random
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple

import gymnasium as gym

import torch

import cyberbattle.agents.baseline.learner as learner
import cyberbattle.agents.baseline.plotting as p
import cyberbattle.agents.baseline.agent_wrapper as w
import cyberbattle.agents.baseline.agent_dql as dqla
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
from cyberbattle._env.cyberbattle_env import CyberBattleEnv

from openai import OpenAI

logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Papermill notebook parameters
gymid = "CyberBattleToyCtf-v0"
env_size = 10
iteration_count = 9000
training_episode_count = 50
eval_episode_count = 5
maximum_node_count = 22
maximum_total_credentials = 22
plots_dir = "output/plots"

# --- LLM(평가에서만 사용) 옵션: 기본 OFF ---
use_llm = False
model_id = "gpt-5.1"
llm_every_steps = 1      # 매 step마다 LLM 프루닝(비싸면 5~10 추천)
candidate_pool = 200     # 샘플링으로 구성할 후보 수
llm_topk = 10            # DQL 상위 topK 중 LLM이 1개 선택

def find_llm_token_yaml(start=None):
    p = os.path.abspath(start or os.getcwd())
    while True:
        cand = os.path.join(p, "llm_token.yaml")
        if os.path.exists(cand):
            return cand
        parent = os.path.dirname(p)
        if parent == p:
            raise RuntimeError(f"llm_token.yaml 못 찾음. 시작점={os.getcwd()}")
        p = parent

llm_token_yaml = find_llm_token_yaml()
print("FOUND llm_token_yaml =", llm_token_yaml)

# (옵션) LLM에 관측 텍스트를 얼마나 줄지
llm_obs_max_chars = 1400

FOUND llm_token_yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml


In [4]:
# Parameters
gymid = "CyberBattleToyCtf-v0"
env_size = None
iteration_count = 500
training_episode_count = 20
eval_episode_count = 10
maximum_node_count = 12
maximum_total_credentials = 10
plots_dir = "notebooks/output/toyctf_hybrid_dql_llm/plots"
use_llm = True
model_id = "gpt-5.1"
llm_every_steps = 5
candidate_pool = 200
llm_topk = 10


In [5]:
os.makedirs(plots_dir, exist_ok=True)

In [6]:
# -----------------------------
# 1) Gym env 로드 (기존 그대로)
# -----------------------------
if env_size:
    _gym_env = gym.make(gymid, size=env_size)
else:
    _gym_env = gym.make(gymid)

from typing import cast
gym_env = cast(CyberBattleEnv, _gym_env.unwrapped)
assert isinstance(gym_env, CyberBattleEnv), f"Expected CyberBattleEnv, got {type(gym_env)}"

ep = w.EnvironmentBounds.of_identifiers(
    maximum_node_count=maximum_node_count,
    maximum_total_credentials=maximum_total_credentials,
    identifiers=gym_env.identifiers,
)

In [7]:
# -----------------------------------------
# 2) OpenAI 토큰 로더 + chat callable
# -----------------------------------------
def load_openai_token(config_path: str) -> str:
    if os.path.exists(config_path):
        with open(config_path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f) or {}
        if isinstance(data, dict):
            oa_cfg = data.get("openai", {})
            if isinstance(oa_cfg, dict) and "api_key" in oa_cfg:
                key = str(oa_cfg["api_key"]).strip()
                if key.lower() == "dummy" or len(key) < 20:
                    raise RuntimeError(f"llm_token.yaml의 openai.api_key가 이상함: {key!r}")
                return key

    key = (os.getenv("OPENAI_API_KEY") or "").strip()
    if key and key.lower() != "dummy":
        return key

    raise RuntimeError(f"OpenAI API 키를 못 찾음. config_path={config_path!r}, ENV OPENAI_API_KEY도 없음/이상함")


def make_openai_chat_callable(model_id: str, llm_token_yaml: str):
    api_key = load_openai_token(llm_token_yaml)  # ✅ 여기 중요
    # 디버그(키 노출 금지용)
    print("[OpenAI] key_prefix=", api_key[:8], "len=", len(api_key), "yaml=", os.path.abspath(llm_token_yaml))

    client = OpenAI(api_key=api_key)

    def chat(messages):
        resp = client.responses.create(
            model=model_id,
            input=[{"role": m["role"], "content": m["content"]} for m in messages],
            max_output_tokens=256,
        )
        return getattr(resp, "output_text", "") or ""

    return chat

print("CWD =", os.getcwd())
print("yaml =", os.path.abspath(llm_token_yaml), "exists=", os.path.exists(llm_token_yaml))
print("ENV OPENAI_API_KEY prefix =", (os.getenv("OPENAI_API_KEY") or "")[:8])

CWD = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/src
yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml exists= True
ENV OPENAI_API_KEY prefix = dummy


In [8]:
# -----------------------------------------
# 3) observation에서 Discovered nodes JSON 추출(로그 유지용)
# -----------------------------------------
_DISC_RE = re.compile(r"Discovered nodes:\s*(\{.*\})\s*$", re.DOTALL)

def extract_discovered_nodes_raw(obs_text: str) -> str:
    m = _DISC_RE.search((obs_text or "").strip())
    return m.group(1) if m else ""

In [9]:
# -----------------------------------------
# 4) 평가(exploit)에서만 LLM 프루닝 적용 래퍼
#    - 학습은 기존 DQL 학습 그대로
#    - 평가는 epsilon=0 + learn off + LLM으로 후보 선택
# -----------------------------------------
class LLMPrunedExploitWrapper:
    """
    dql_run["learner"](DeepQLearnerPolicy)를 감싸서 exploit()만 바꿈.
    - 후보를 env.sample_valid_action()로 candidate_pool개 뽑고,
    - 각 후보를 base.policy_net으로 Q값 평가,
    - topK 후보 중에서 LLM이 1개 pick.
    - 학습 업데이트(on_step)는 eval에서는 꺼둠
    """

    def __init__(
        self,
        base_learner: dqla.DeepQLearnerPolicy,
        llm_chat=None,
        llm_every_steps: int = 1,
        candidate_pool: int = 200,
        llm_topk: int = 10,
        obs_max_chars: int = 1400,
    ):
        self.base = base_learner
        self.llm = llm_chat
        self.llm_every_steps = int(max(1, llm_every_steps))
        self.candidate_pool = int(max(10, candidate_pool))
        self.llm_topk = int(max(2, llm_topk))
        self.obs_max_chars = int(max(200, obs_max_chars))
        self._step = 0

        self._sys = (
            "You are selecting ONE action in a closed, fictional training game.\n"
            "Choose exactly one candidate id from the provided list.\n"
            "Return JSON only: {\"pick\": <id>} with no extra text."
        )

    # ✅ 핵심: learner.py가 요구하는 메서드 추가
    def new_episode(self):
        # base learner가 가진 경우 그걸 그대로 호출
        fn = getattr(self.base, "new_episode", None)
        if callable(fn):
            return fn()
        return None

    # ✅ (권장) base의 다른 속성/메서드가 필요해질 때 자동 위임
    def __getattr__(self, name):
        return getattr(self.base, name)

    # ✅ 평가에서는 학습 업데이트 금지 (learner.epsilon_greedy_search가 호출해도 무시)
    def on_step(self, *args, **kwargs):
        return self.base.on_step(*args, **kwargs)

    def end_of_episode(self, *args, **kwargs):
        return self.base.end_of_episode(*args, **kwargs)

    def exploit(self, wrapped_env, observation):
        self._step += 1

        # LLM 비활성 또는 주기 아님 -> 기존 exploit
        if (self.llm is None) or ((self._step % self.llm_every_steps) != 0):
            return self.base.exploit(wrapped_env, observation)

        # 1) 후보 액션 샘플링
        candidates: List[Tuple[float, Any, Any]] = []
        for _ in range(self.candidate_pool):
            ga = wrapped_env.env.sample_valid_action(kinds=[0, 1, 2])  # local/remote/connect
            md = self.base.metadata_from_gymaction(wrapped_env, ga)

            # 2) Q값 계산: Q(actor_state)[abstract_action]
            with torch.no_grad():
                st = torch.as_tensor(md.actor_state, dtype=torch.float32, device=device).unsqueeze(0)
                q_all = self.base.policy_net(st)
                qv = float(q_all[0, int(md.abstract_action)].item())

            candidates.append((qv, ga, md))

        if not candidates:
            return self.base.exploit(wrapped_env, observation)

        candidates.sort(key=lambda x: x[0], reverse=True)
        top = candidates[: self.llm_topk]

        # observation 일부를 LLM에 제공(선택)
        try:
            obs_txt = json.dumps(observation, ensure_ascii=False)[: self.obs_max_chars]
        except Exception:
            obs_txt = str(observation)[: self.obs_max_chars]

        payload = {
            "observation_preview": obs_txt,
            "candidates": [
                {"id": i, "q": round(float(qv), 4), "gym_action": repr(ga)}
                for i, (qv, ga, md) in enumerate(top)
            ],
        }

        out = self.llm(
            [
                {"role": "system", "content": self._sys},
                {"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
            ]
        ) or ""

        m = re.search(r"\{.*\}", out, flags=re.DOTALL)
        if not m:
            qv, ga, md = top[0]
            return "exploit[dql_top1]", ga, md

        try:
            obj = json.loads(m.group(0))
            pick = int(obj.get("pick"))
            if 0 <= pick < len(top):
                qv, ga, md = top[pick]
                return "exploit[llm_pruned]", ga, md
        except Exception:
            pass

        qv, ga, md = top[0]
        return "exploit[dql_top1]", ga, md

In [10]:
# -----------------------------------------
# 5) DQL 학습 (✅ 기존 파라미터 그대로 유지)
# -----------------------------------------
dql_run = learner.epsilon_greedy_search(
    cyberbattle_gym_env=gym_env,
    environment_properties=ep,
    learner=dqla.DeepQLearnerPolicy(
        ep=ep,
        gamma=0.015,
        replay_memory_size=10000,
        target_update=10,
        batch_size=512,
        learning_rate=0.01,
    ),
    episode_count=training_episode_count,
    iteration_count=iteration_count,
    epsilon=0.90,
    epsilon_exponential_decay=5000,
    epsilon_minimum=0.10,
    verbosity=Verbosity.Quiet,
    render=False,
    plot_episodes_length=False,
    title="DQL",
)

###### DQL
Learning with: episode_count=20,iteration_count=500,ϵ=0.9,ϵ_min=0.1, ϵ_expdecay=5000,γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/20 'DQL' ϵ=0.9000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

  state_batch = torch.tensor(states_to_consider).to(device)


Episode 1|Iteration 13|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 1|Iteration 13|reward:   22.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 1|Iteration 48|reward:   33.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 1|Iteration 48|reward:   33.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 1|Iteration 65|reward:   44.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 1|Iteration 65|reward:   44.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 1|Iteration 79|reward:   53.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 1|Iteration 79|reward:   53.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 1|Iteration 84|reward:   67.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 1|Iteration 84|reward:   67.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 1|Iteration 127|reward:   67.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 1|Iteration 137|reward:   78.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 1|Iteration 137|reward:   78.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 1|Iteration 178|reward:   78.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 1|Iteration 222|reward:   78.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 1|Iteration 266|reward:   78.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 1|Iteration 273|reward:  178.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 1|Iteration 273|reward:  178.0|last_reward_at:  273|Elapsed Time: 0:00:00||

Episode 1|Iteration 279|reward:  192.0|last_reward_at:  273|Elapsed Time: 0:00:00||

Episode 1|Iteration 279|reward:  192.0|last_reward_at:  279|Elapsed Time: 0:00:00||

Episode 1|Iteration 317|reward:  192.0|last_reward_at:  279|Elapsed Time: 0:00:00||

Episode 1|Iteration 319|reward:  242.0|last_reward_at:  279|Elapsed Time: 0:00:00||

Episode 1|Iteration 319|reward:  242.0|last_reward_at:  319|Elapsed Time: 0:00:00||

Episode 1|Iteration 342|reward:  342.0|last_reward_at:  319|Elapsed Time: 0:00:00||

Episode 1|Iteration 342|reward:  342.0|last_reward_at:  342|Elapsed Time: 0:00:00||

Episode 1|Iteration 372|reward:  356.0|last_reward_at:  342|Elapsed Time: 0:00:00||

Episode 1|Iteration 372|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:00||

Episode 1|Iteration 399|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:00||

Episode 1|Iteration 431|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:00||

Episode 1|Iteration 463|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:00||

Episode 1|Iteration 488|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:01||

Episode 1|Iteration 500|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:01||




  Episode 1 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/56 (0.05)
    explore-remote: 5/228 (0.02)
    explore-connect: 3/137 (0.02)
    exploit-local: 0/0 (NaN)
    exploit-remote: 1/67 (0.01)
    exploit-connect: 0/0 (NaN)
  exploit deflected to exploration: 0
  ## Episode: 2/20 'DQL' ϵ=0.8240, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 9|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 9|reward:   11.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 2|Iteration 10|reward:   11.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 2|Iteration 25|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 2|Iteration 25|reward:   22.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 2|Iteration 31|reward:   31.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 2|Iteration 31|reward:   31.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 2|Iteration 35|reward:   42.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 2|Iteration 35|reward:   42.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 2|Iteration 39|reward:   53.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 2|Iteration 39|reward:   53.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 2|Iteration 51|reward:   53.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 2|Iteration 62|reward:  153.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 2|Iteration 62|reward:  153.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 2|Iteration 63|reward:  167.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 2|Iteration 63|reward:  167.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 2|Iteration 67|reward:  181.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 2|Iteration 67|reward:  181.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 2|Iteration 83|reward:  181.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 2|Iteration 95|reward:  181.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 2|Iteration 98|reward:  187.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 2|Iteration 98|reward:  187.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 2|Iteration 105|reward:  201.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 2|Iteration 105|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:00||

Episode 2|Iteration 114|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:00||

Episode 2|Iteration 127|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:00||

Episode 2|Iteration 140|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 2|Iteration 152|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 2|Iteration 165|reward:  201.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 2|Iteration 172|reward:  212.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 2|Iteration 172|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 184|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 197|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 209|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 222|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 235|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 247|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 260|reward:  212.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 265|reward:  262.0|last_reward_at:  172|Elapsed Time: 0:00:01||

Episode 2|Iteration 265|reward:  262.0|last_reward_at:  265|Elapsed Time: 0:00:01||

Episode 2|Iteration 279|reward:  262.0|last_reward_at:  265|Elapsed Time: 0:00:01||

Episode 2|Iteration 287|reward:  312.0|last_reward_at:  265|Elapsed Time: 0:00:01||

Episode 2|Iteration 287|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:01||

Episode 2|Iteration 298|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 311|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 323|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 336|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 349|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 361|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 374|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 387|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 399|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 406|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 418|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 431|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:02||

Episode 2|Iteration 437|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 444|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 456|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 469|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 475|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 482|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 494|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 2|Iteration 500|reward:  312.0|last_reward_at:  287|Elapsed Time: 0:00:03||




  Episode 2 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/37 (0.03)
    explore-remote: 8/192 (0.04)
    explore-connect: 1/156 (0.01)
    exploit-local: 1/2 (0.33)
    exploit-remote: 0/41 (0.00)
    exploit-connect: 2/59 (0.03)
  exploit deflected to exploration: 3
  ## Episode: 3/20 'DQL' ϵ=0.7551, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   11.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 13|reward:   11.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 17|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 17|reward:   22.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 3|Iteration 32|reward:   22.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 3|Iteration 43|reward:   33.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 3|Iteration 43|reward:   33.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 3|Iteration 54|reward:   47.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 3|Iteration 54|reward:   47.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 3|Iteration 56|reward:   56.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 3|Iteration 56|reward:   56.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 3|Iteration 59|reward:  106.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 3|Iteration 59|reward:  106.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 3|Iteration 70|reward:  106.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 3|Iteration 83|reward:  106.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 3|Iteration 93|reward:  206.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 3|Iteration 93|reward:  206.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 3|Iteration 102|reward:  206.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 3|Iteration 103|reward:  220.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 3|Iteration 103|reward:  220.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 3|Iteration 114|reward:  220.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 3|Iteration 127|reward:  220.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 3|Iteration 140|reward:  220.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 3|Iteration 140|reward:  231.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 3|Iteration 140|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:00||

Episode 3|Iteration 152|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 159|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 171|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 184|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 197|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 209|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 222|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 235|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 247|reward:  231.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 252|reward:  331.0|last_reward_at:  140|Elapsed Time: 0:00:01||

Episode 3|Iteration 252|reward:  331.0|last_reward_at:  252|Elapsed Time: 0:00:01||

Episode 3|Iteration 265|reward:  331.0|last_reward_at:  252|Elapsed Time: 0:00:01||

Episode 3|Iteration 270|reward:  345.0|last_reward_at:  252|Elapsed Time: 0:00:01||

Episode 3|Iteration 270|reward:  345.0|last_reward_at:  270|Elapsed Time: 0:00:01||

Episode 3|Iteration 279|reward:  345.0|last_reward_at:  270|Elapsed Time: 0:00:01||

Episode 3|Iteration 292|reward:  345.0|last_reward_at:  270|Elapsed Time: 0:00:01||

Episode 3|Iteration 304|reward:  345.0|last_reward_at:  270|Elapsed Time: 0:00:02||

Episode 3|Iteration 308|reward:  395.0|last_reward_at:  270|Elapsed Time: 0:00:02||

Episode 3|Iteration 308|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 317|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 323|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 330|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 336|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 345|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 355|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 359|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 368|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 374|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 380|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 393|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:02||

Episode 3|Iteration 399|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 406|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 416|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 425|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 437|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 450|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 456|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 463|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 469|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 475|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 482|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 488|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:03||

Episode 3|Iteration 494|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:04||

Episode 3|Iteration 499|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:04||

Episode 3|Iteration 500|reward:  395.0|last_reward_at:  308|Elapsed Time: 0:00:04||




  Episode 3 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/51 (0.04)
    explore-remote: 4/187 (0.02)
    explore-connect: 1/148 (0.01)
    exploit-local: 1/5 (0.17)
    exploit-remote: 1/74 (0.01)
    exploit-connect: 3/23 (0.12)
  exploit deflected to exploration: 27
  ## Episode: 4/20 'DQL' ϵ=0.6928, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 4|Iteration 2|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 4|Iteration 2|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 13|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 15|reward:   33.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 15|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 4|Iteration 26|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 4|Iteration 38|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 4|Iteration 39|reward:   42.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 4|Iteration 39|reward:   42.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 51|reward:   42.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 64|reward:   42.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 76|reward:   42.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 80|reward:   53.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 80|reward:   53.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 4|Iteration 85|reward:  153.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 4|Iteration 85|reward:  153.0|last_reward_at:   85|Elapsed Time: 0:00:00||

Episode 4|Iteration 91|reward:  167.0|last_reward_at:   85|Elapsed Time: 0:00:00||

Episode 4|Iteration 91|reward:  167.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 4|Iteration 102|reward:  167.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 4|Iteration 114|reward:  167.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 4|Iteration 114|reward:  181.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 4|Iteration 114|reward:  181.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 4|Iteration 126|reward:  192.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 4|Iteration 126|reward:  192.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 4|Iteration 140|reward:  192.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 4|Iteration 152|reward:  192.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 4|Iteration 160|reward:  242.0|last_reward_at:  126|Elapsed Time: 0:00:01||

Episode 4|Iteration 160|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 171|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 178|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 190|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 197|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 209|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 222|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 235|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 247|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 260|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 273|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 285|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 4|Iteration 298|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 311|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 323|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 336|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 349|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 361|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 368|reward:  242.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 372|reward:  342.0|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 4|Iteration 372|reward:  342.0|last_reward_at:  372|Elapsed Time: 0:00:02||

Episode 4|Iteration 379|reward:  342.0|last_reward_at:  372|Elapsed Time: 0:00:02||

Episode 4|Iteration 387|reward:  342.0|last_reward_at:  372|Elapsed Time: 0:00:02||

Episode 4|Iteration 388|reward:  356.0|last_reward_at:  372|Elapsed Time: 0:00:02||

Episode 4|Iteration 388|reward:  356.0|last_reward_at:  388|Elapsed Time: 0:00:02||

Episode 4|Iteration 391|reward:  406.0|last_reward_at:  388|Elapsed Time: 0:00:02||

Episode 4|Iteration 391|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:02||

Episode 4|Iteration 396|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:02||

Episode 4|Iteration 405|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 408|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 412|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 418|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 422|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 431|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 444|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 456|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 463|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 469|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 475|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 480|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:03||

Episode 4|Iteration 485|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:04||

Episode 4|Iteration 494|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:04||

Episode 4|Iteration 500|reward:  406.0|last_reward_at:  391|Elapsed Time: 0:00:04||




  Episode 4 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/35 (0.03)
    explore-remote: 4/171 (0.02)
    explore-connect: 2/143 (0.01)
    exploit-local: 1/0 (1.00)
    exploit-remote: 3/111 (0.03)
    exploit-connect: 2/27 (0.07)
  exploit deflected to exploration: 26
  ## Episode: 5/20 'DQL' ϵ=0.6364, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 3|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 13|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 26|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 29|reward:   36.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 29|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 5|Iteration 45|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 5|Iteration 51|reward:   42.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 5|Iteration 51|reward:   42.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 5|Iteration 64|reward:   42.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 5|Iteration 65|reward:   92.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 5|Iteration 65|reward:   92.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 5|Iteration 76|reward:   92.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 5|Iteration 89|reward:   92.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 5|Iteration 99|reward:  103.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 5|Iteration 99|reward:  103.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 5|Iteration 103|reward:  112.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 5|Iteration 103|reward:  112.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 5|Iteration 114|reward:  112.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 5|Iteration 126|reward:  123.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 5|Iteration 126|reward:  123.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 5|Iteration 140|reward:  123.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 5|Iteration 152|reward:  123.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 5|Iteration 154|reward:  223.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 5|Iteration 154|reward:  223.0|last_reward_at:  154|Elapsed Time: 0:00:00||

Episode 5|Iteration 161|reward:  237.0|last_reward_at:  154|Elapsed Time: 0:00:01||

Episode 5|Iteration 161|reward:  237.0|last_reward_at:  161|Elapsed Time: 0:00:01||

Episode 5|Iteration 163|reward:  251.0|last_reward_at:  161|Elapsed Time: 0:00:01||

Episode 5|Iteration 163|reward:  251.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 171|reward:  251.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 184|reward:  251.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 197|reward:  251.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 209|reward:  251.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 210|reward:  301.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 5|Iteration 210|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 222|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 235|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 247|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 260|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 273|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 285|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 292|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 304|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:01||

Episode 5|Iteration 311|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 323|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 336|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 349|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 361|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 374|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 387|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 399|reward:  301.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 403|reward:  401.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 5|Iteration 403|reward:  401.0|last_reward_at:  403|Elapsed Time: 0:00:02||

Episode 5|Iteration 412|reward:  401.0|last_reward_at:  403|Elapsed Time: 0:00:02||

Episode 5|Iteration 418|reward:  401.0|last_reward_at:  403|Elapsed Time: 0:00:02||

Episode 5|Iteration 420|reward:  415.0|last_reward_at:  403|Elapsed Time: 0:00:02||

Episode 5|Iteration 420|reward:  415.0|last_reward_at:  420|Elapsed Time: 0:00:02||

Episode 5|Iteration 431|reward:  415.0|last_reward_at:  420|Elapsed Time: 0:00:02||

Episode 5|Iteration 431|reward:  465.0|last_reward_at:  420|Elapsed Time: 0:00:02||

Episode 5|Iteration 431|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:02||

Episode 5|Iteration 437|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 444|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 456|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 463|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 469|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 475|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 479|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 488|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 494|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 5|Iteration 500|reward:  465.0|last_reward_at:  431|Elapsed Time: 0:00:03||




  Episode 5 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/25 (0.04)
    explore-remote: 5/144 (0.03)
    explore-connect: 0/111 (0.00)
    exploit-local: 2/14 (0.12)
    exploit-remote: 2/171 (0.01)
    exploit-connect: 5/20 (0.20)
  exploit deflected to exploration: 10
  ## Episode: 6/20 'DQL' ϵ=0.5853, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 13|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 19|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 19|reward:   22.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 6|Iteration 20|reward:   33.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 6|Iteration 20|reward:   33.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 6|Iteration 25|reward:   47.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 6|Iteration 25|reward:   47.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 6|Iteration 38|reward:   47.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 6|Iteration 42|reward:   97.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 6|Iteration 42|reward:   97.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 6|Iteration 48|reward:  103.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 6|Iteration 48|reward:  103.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 6|Iteration 51|reward:  114.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 6|Iteration 51|reward:  114.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 64|reward:  114.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 70|reward:  114.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 83|reward:  114.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 95|reward:  114.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 100|reward:  123.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 6|Iteration 100|reward:  123.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 6|Iteration 114|reward:  123.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 6|Iteration 122|reward:  223.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 6|Iteration 122|reward:  223.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 6|Iteration 123|reward:  237.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 6|Iteration 123|reward:  237.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 6|Iteration 128|reward:  251.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 6|Iteration 128|reward:  251.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 6|Iteration 140|reward:  251.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 6|Iteration 145|reward:  262.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 6|Iteration 145|reward:  262.0|last_reward_at:  145|Elapsed Time: 0:00:00||

Episode 6|Iteration 146|reward:  312.0|last_reward_at:  145|Elapsed Time: 0:00:00||

Episode 6|Iteration 146|reward:  312.0|last_reward_at:  146|Elapsed Time: 0:00:00||

Episode 6|Iteration 159|reward:  312.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 6|Iteration 171|reward:  312.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 6|Iteration 180|reward:  412.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 6|Iteration 180|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 190|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 196|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 203|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 216|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 228|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 230|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 235|reward:  412.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 236|reward:  426.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 6|Iteration 236|reward:  426.0|last_reward_at:  236|Elapsed Time: 0:00:01||

Episode 6|Iteration 239|reward:  476.0|last_reward_at:  236|Elapsed Time: 0:00:01||

Episode 6|Iteration 239|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:01||

Episode 6|Iteration 246|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:01||

Episode 6|Iteration 254|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 260|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 271|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 276|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 285|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 292|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 301|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 311|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 317|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 327|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 330|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 336|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 6|Iteration 348|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 354|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 361|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 368|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 374|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 379|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 387|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 398|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 406|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 412|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:03||

Episode 6|Iteration 417|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 425|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 429|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 437|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 444|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 450|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 456|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 463|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 469|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 475|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 482|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 488|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 494|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:04||

Episode 6|Iteration 500|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:05||

Episode 6|Iteration 500|reward:  476.0|last_reward_at:  239|Elapsed Time: 0:00:05||




  Episode 6 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/35 (0.03)
    explore-remote: 7/137 (0.05)
    explore-connect: 0/140 (0.00)
    exploit-local: 2/15 (0.12)
    exploit-remote: 1/146 (0.01)
    exploit-connect: 5/11 (0.31)
  exploit deflected to exploration: 36
  ## Episode: 7/20 'DQL' ϵ=0.5391, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 3|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 3|reward:   11.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 7|Iteration 9|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 7|Iteration 9|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 7|Iteration 15|reward:   31.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 7|Iteration 15|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 7|Iteration 26|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 7|Iteration 38|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 7|Iteration 51|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 7|Iteration 55|reward:  131.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 7|Iteration 55|reward:  131.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 7|Iteration 64|reward:  131.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 7|Iteration 69|reward:  145.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 7|Iteration 69|reward:  145.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 7|Iteration 71|reward:  156.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 7|Iteration 71|reward:  156.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 83|reward:  156.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 94|reward:  156.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 102|reward:  156.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 114|reward:  156.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 119|reward:  170.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 7|Iteration 119|reward:  170.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 7|Iteration 133|reward:  170.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 7|Iteration 140|reward:  170.0|last_reward_at:  119|Elapsed Time: 0:00:01||

Episode 7|Iteration 144|reward:  181.0|last_reward_at:  119|Elapsed Time: 0:00:01||

Episode 7|Iteration 144|reward:  181.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 7|Iteration 152|reward:  181.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 7|Iteration 159|reward:  181.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 7|Iteration 171|reward:  181.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 7|Iteration 182|reward:  281.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 7|Iteration 182|reward:  281.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 7|Iteration 190|reward:  281.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 7|Iteration 192|reward:  331.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 7|Iteration 192|reward:  331.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 7|Iteration 202|reward:  331.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 7|Iteration 209|reward:  331.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 7|Iteration 213|reward:  345.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 7|Iteration 213|reward:  345.0|last_reward_at:  213|Elapsed Time: 0:00:01||

Episode 7|Iteration 222|reward:  345.0|last_reward_at:  213|Elapsed Time: 0:00:01||

Episode 7|Iteration 222|reward:  356.0|last_reward_at:  213|Elapsed Time: 0:00:01||

Episode 7|Iteration 222|reward:  356.0|last_reward_at:  222|Elapsed Time: 0:00:01||

Episode 7|Iteration 235|reward:  356.0|last_reward_at:  222|Elapsed Time: 0:00:01||

Episode 7|Iteration 235|reward:  406.0|last_reward_at:  222|Elapsed Time: 0:00:01||

Episode 7|Iteration 235|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:01||

Episode 7|Iteration 241|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:01||

Episode 7|Iteration 247|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 253|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 260|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 264|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 273|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 279|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 285|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 292|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 298|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 309|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 313|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:02||

Episode 7|Iteration 322|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 328|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 335|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 348|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 355|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 361|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 368|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 374|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 380|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 385|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 393|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 399|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 406|reward:  406.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 409|reward:  420.0|last_reward_at:  235|Elapsed Time: 0:00:03||

Episode 7|Iteration 409|reward:  420.0|last_reward_at:  409|Elapsed Time: 0:00:03||

Episode 7|Iteration 418|reward:  420.0|last_reward_at:  409|Elapsed Time: 0:00:04||

Episode 7|Iteration 425|reward:  420.0|last_reward_at:  409|Elapsed Time: 0:00:04||

Episode 7|Iteration 437|reward:  420.0|last_reward_at:  409|Elapsed Time: 0:00:04||

Episode 7|Iteration 444|reward:  420.0|last_reward_at:  409|Elapsed Time: 0:00:04||

Episode 7|Iteration 446|reward:  470.0|last_reward_at:  409|Elapsed Time: 0:00:04||

Episode 7|Iteration 446|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 456|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 463|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 469|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 472|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 475|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 482|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 487|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 493|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 7|Iteration 498|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 7|Iteration 500|reward:  470.0|last_reward_at:  446|Elapsed Time: 0:00:05||




  Episode 7 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/27 (0.07)
    explore-remote: 2/142 (0.01)
    explore-connect: 1/127 (0.01)
    exploit-local: 1/14 (0.07)
    exploit-remote: 5/157 (0.03)
    exploit-connect: 4/18 (0.18)
  exploit deflected to exploration: 41
  ## Episode: 8/20 'DQL' ϵ=0.4973, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 8|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 8|Iteration 12|reward:   36.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 8|Iteration 12|reward:   36.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 8|Iteration 24|reward:   47.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 8|Iteration 24|reward:   47.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 8|Iteration 29|reward:   97.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 8|Iteration 29|reward:   97.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 8|Iteration 30|reward:  106.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 8|Iteration 30|reward:  106.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 8|Iteration 38|reward:  106.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 8|Iteration 46|reward:  117.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 8|Iteration 46|reward:  117.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 8|Iteration 52|reward:  217.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 8|Iteration 52|reward:  217.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 8|Iteration 53|reward:  231.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 8|Iteration 53|reward:  231.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 8|Iteration 64|reward:  231.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 8|Iteration 70|reward:  245.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 8|Iteration 70|reward:  245.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 8|Iteration 80|reward:  295.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 8|Iteration 80|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 89|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 100|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 108|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 121|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 133|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 8|Iteration 140|reward:  295.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 8|Iteration 141|reward:  395.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 8|Iteration 141|reward:  395.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 8|Iteration 152|reward:  395.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 8|Iteration 157|reward:  395.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 8|Iteration 165|reward:  395.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 8|Iteration 176|reward:  409.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 8|Iteration 176|reward:  409.0|last_reward_at:  176|Elapsed Time: 0:00:01||

Episode 8|Iteration 184|reward:  409.0|last_reward_at:  176|Elapsed Time: 0:00:01||

Episode 8|Iteration 194|reward:  459.0|last_reward_at:  176|Elapsed Time: 0:00:01||

Episode 8|Iteration 194|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 8|Iteration 198|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 8|Iteration 209|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 8|Iteration 214|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 8|Iteration 222|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 8|Iteration 225|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:02||

Episode 8|Iteration 235|reward:  459.0|last_reward_at:  194|Elapsed Time: 0:00:02||

Episode 8|Iteration 237|reward:  465.0|last_reward_at:  194|Elapsed Time: 0:00:02||

Episode 8|Iteration 237|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 247|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 252|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 266|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 273|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 279|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 291|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 302|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 311|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 317|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 323|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:02||

Episode 8|Iteration 330|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 336|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 342|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 349|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 355|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 361|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 368|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 374|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 382|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 393|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 405|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 418|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 8|Iteration 431|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 437|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 444|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 450|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 463|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 474|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 482|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 494|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||

Episode 8|Iteration 500|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:00:04||




  Episode 8 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/16 (0.06)
    explore-remote: 5/123 (0.04)
    explore-connect: 1/126 (0.01)
    exploit-local: 2/33 (0.06)
    exploit-remote: 2/168 (0.01)
    exploit-connect: 4/19 (0.17)
  exploit deflected to exploration: 26
  ## Episode: 9/20 'DQL' ϵ=0.4595, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 9|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 9|Iteration 4|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 9|Iteration 4|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 9|Iteration 8|reward:   33.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 9|Iteration 8|reward:   33.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 19|reward:   33.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 32|reward:   33.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 36|reward:   42.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 36|reward:   42.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 9|Iteration 37|reward:   53.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 9|Iteration 37|reward:   53.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 49|reward:   67.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 49|reward:   67.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 9|Iteration 54|reward:  167.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 9|Iteration 54|reward:  167.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 9|Iteration 55|reward:  181.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 9|Iteration 55|reward:  181.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 9|Iteration 70|reward:  181.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 9|Iteration 82|reward:  231.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 9|Iteration 82|reward:  231.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 9|Iteration 95|reward:  231.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 9|Iteration 108|reward:  231.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 9|Iteration 108|reward:  245.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 9|Iteration 108|reward:  245.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 9|Iteration 121|reward:  245.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 9|Iteration 123|reward:  295.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 9|Iteration 123|reward:  295.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 9|Iteration 128|reward:  301.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 9|Iteration 128|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 9|Iteration 140|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 9|Iteration 152|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 9|Iteration 165|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 178|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 190|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 203|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 216|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 228|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 241|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 254|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 266|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 279|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 292|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:01||

Episode 9|Iteration 304|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:02||

Episode 9|Iteration 311|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:02||

Episode 9|Iteration 323|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:02||

Episode 9|Iteration 330|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:02||

Episode 9|Iteration 340|reward:  312.0|last_reward_at:  128|Elapsed Time: 0:00:02||

Episode 9|Iteration 340|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 349|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 358|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 368|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 380|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 387|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 397|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 406|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 418|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:02||

Episode 9|Iteration 425|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 437|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 444|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 454|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 463|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 475|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 482|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 494|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 500|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||

Episode 9|Iteration 500|reward:  312.0|last_reward_at:  340|Elapsed Time: 0:00:03||




  Episode 9 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/19 (0.05)
    explore-remote: 3/108 (0.03)
    explore-connect: 0/107 (0.00)
    exploit-local: 1/7 (0.12)
    exploit-remote: 5/198 (0.02)
    exploit-connect: 3/48 (0.06)
  exploit deflected to exploration: 9
  ## Episode: 10/20 'DQL' ϵ=0.4253, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 10|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 3|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 13|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 19|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 26|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 38|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 41|reward:   36.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 10|Iteration 41|reward:   36.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 10|Iteration 43|reward:   47.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 10|Iteration 43|reward:   47.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 10|Iteration 51|reward:   47.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 10|Iteration 55|reward:   97.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 10|Iteration 55|reward:   97.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 64|reward:   97.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 76|reward:   97.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 86|reward:  108.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 86|reward:  108.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 10|Iteration 92|reward:  122.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 10|Iteration 92|reward:  122.0|last_reward_at:   92|Elapsed Time: 0:00:00||

Episode 10|Iteration 102|reward:  122.0|last_reward_at:   92|Elapsed Time: 0:00:00||

Episode 10|Iteration 113|reward:  172.0|last_reward_at:   92|Elapsed Time: 0:00:00||

Episode 10|Iteration 113|reward:  172.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 10|Iteration 121|reward:  172.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 10|Iteration 127|reward:  172.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 10|Iteration 140|reward:  172.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 10|Iteration 148|reward:  178.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 10|Iteration 148|reward:  178.0|last_reward_at:  148|Elapsed Time: 0:00:01||

Episode 10|Iteration 159|reward:  178.0|last_reward_at:  148|Elapsed Time: 0:00:01||

Episode 10|Iteration 163|reward:  189.0|last_reward_at:  148|Elapsed Time: 0:00:01||

Episode 10|Iteration 163|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 171|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 178|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 190|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 197|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 209|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 215|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 222|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 228|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 235|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 245|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 10|Iteration 254|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 260|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 266|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 273|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 284|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 292|reward:  189.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 302|reward:  198.0|last_reward_at:  163|Elapsed Time: 0:00:02||

Episode 10|Iteration 302|reward:  198.0|last_reward_at:  302|Elapsed Time: 0:00:02||

Episode 10|Iteration 311|reward:  198.0|last_reward_at:  302|Elapsed Time: 0:00:02||

Episode 10|Iteration 316|reward:  298.0|last_reward_at:  302|Elapsed Time: 0:00:02||

Episode 10|Iteration 316|reward:  298.0|last_reward_at:  316|Elapsed Time: 0:00:02||

Episode 10|Iteration 317|reward:  312.0|last_reward_at:  316|Elapsed Time: 0:00:02||

Episode 10|Iteration 317|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:02||

Episode 10|Iteration 330|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:02||

Episode 10|Iteration 342|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:02||

Episode 10|Iteration 349|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:02||

Episode 10|Iteration 360|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 368|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 374|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 386|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 393|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 399|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 406|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 418|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 425|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 431|reward:  312.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 431|reward:  412.0|last_reward_at:  317|Elapsed Time: 0:00:03||

Episode 10|Iteration 431|reward:  412.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 10|Iteration 436|reward:  412.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 10|Iteration 443|reward:  412.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 10|Iteration 450|reward:  412.0|last_reward_at:  431|Elapsed Time: 0:00:03||

Episode 10|Iteration 456|reward:  412.0|last_reward_at:  431|Elapsed Time: 0:00:04||

Episode 10|Iteration 457|reward:  426.0|last_reward_at:  431|Elapsed Time: 0:00:04||

Episode 10|Iteration 457|reward:  426.0|last_reward_at:  457|Elapsed Time: 0:00:04||

Episode 10|Iteration 465|reward:  426.0|last_reward_at:  457|Elapsed Time: 0:00:04||

Episode 10|Iteration 470|reward:  476.0|last_reward_at:  457|Elapsed Time: 0:00:04||

Episode 10|Iteration 470|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||

Episode 10|Iteration 478|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||

Episode 10|Iteration 482|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||

Episode 10|Iteration 488|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||

Episode 10|Iteration 494|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||

Episode 10|Iteration 500|reward:  476.0|last_reward_at:  470|Elapsed Time: 0:00:04||




  Episode 10 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/18 (0.10)
    explore-remote: 4/105 (0.04)
    explore-connect: 0/106 (0.00)
    exploit-local: 1/24 (0.04)
    exploit-remote: 4/210 (0.02)
    exploit-connect: 5/21 (0.19)
  exploit deflected to exploration: 37
  ## Episode: 11/20 'DQL' ϵ=0.3944, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 11|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 7|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 7|reward:   11.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:   22.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:   22.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 26|reward:   22.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 38|reward:   22.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 39|reward:   36.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 39|reward:   36.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 11|Iteration 43|reward:   42.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 11|Iteration 43|reward:   42.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 11|Iteration 57|reward:   42.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 11|Iteration 68|reward:   92.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 11|Iteration 68|reward:   92.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 76|reward:   92.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 83|reward:   92.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 89|reward:   92.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 95|reward:   92.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 106|reward:  103.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 106|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 11|Iteration 121|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 11|Iteration 133|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 11|Iteration 140|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 152|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 164|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 178|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 190|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 197|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 208|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 216|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 228|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 241|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 254|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 260|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:01||

Episode 11|Iteration 271|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:02||

Episode 11|Iteration 279|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:02||

Episode 11|Iteration 285|reward:  103.0|last_reward_at:  106|Elapsed Time: 0:00:02||

Episode 11|Iteration 290|reward:  112.0|last_reward_at:  106|Elapsed Time: 0:00:02||

Episode 11|Iteration 290|reward:  112.0|last_reward_at:  290|Elapsed Time: 0:00:02||

Episode 11|Iteration 298|reward:  112.0|last_reward_at:  290|Elapsed Time: 0:00:02||

Episode 11|Iteration 302|reward:  212.0|last_reward_at:  290|Elapsed Time: 0:00:02||

Episode 11|Iteration 302|reward:  212.0|last_reward_at:  302|Elapsed Time: 0:00:02||

Episode 11|Iteration 306|reward:  226.0|last_reward_at:  302|Elapsed Time: 0:00:02||

Episode 11|Iteration 306|reward:  226.0|last_reward_at:  306|Elapsed Time: 0:00:02||

Episode 11|Iteration 309|reward:  326.0|last_reward_at:  306|Elapsed Time: 0:00:02||

Episode 11|Iteration 309|reward:  326.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 317|reward:  326.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 323|reward:  326.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 330|reward:  326.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 336|reward:  326.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 336|reward:  340.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 11|Iteration 336|reward:  340.0|last_reward_at:  336|Elapsed Time: 0:00:02||

Episode 11|Iteration 345|reward:  390.0|last_reward_at:  336|Elapsed Time: 0:00:02||

Episode 11|Iteration 345|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:02||

Episode 11|Iteration 352|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:02||

Episode 11|Iteration 361|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 367|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 374|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 380|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 387|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 398|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 406|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 412|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 420|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 430|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 437|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 444|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:03||

Episode 11|Iteration 453|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 459|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 469|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 475|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 480|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 488|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 494|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 500|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 11|Iteration 500|reward:  390.0|last_reward_at:  345|Elapsed Time: 0:00:04||




  Episode 11 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/22 (0.08)
    explore-remote: 3/144 (0.02)
    explore-connect: 0/95 (0.00)
    exploit-local: 1/31 (0.03)
    exploit-remote: 2/193 (0.01)
    exploit-connect: 4/3 (0.57)
  exploit deflected to exploration: 61
  ## Episode: 12/20 'DQL' ϵ=0.3664, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 12|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 12|Iteration 6|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 12|Iteration 6|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 12|Iteration 12|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 12|Iteration 12|reward:   33.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 12|Iteration 17|reward:   44.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 12|Iteration 17|reward:   44.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 26|reward:   44.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 32|reward:   44.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 38|reward:   44.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 45|reward:   44.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 47|reward:   58.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 12|Iteration 47|reward:   58.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 12|Iteration 54|reward:   67.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 12|Iteration 54|reward:   67.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 12|Iteration 64|reward:   67.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 12|Iteration 69|reward:  117.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 12|Iteration 69|reward:  117.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 12|Iteration 79|reward:  217.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 12|Iteration 79|reward:  217.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 12|Iteration 80|reward:  231.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 12|Iteration 80|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 89|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 99|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 108|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 120|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 12|Iteration 127|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 12|Iteration 133|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 12|Iteration 140|reward:  231.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 12|Iteration 144|reward:  245.0|last_reward_at:   80|Elapsed Time: 0:00:01||

Episode 12|Iteration 144|reward:  245.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 12|Iteration 150|reward:  295.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 12|Iteration 150|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 157|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 165|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 174|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 178|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 184|reward:  295.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 188|reward:  306.0|last_reward_at:  150|Elapsed Time: 0:00:01||

Episode 12|Iteration 188|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 12|Iteration 197|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 12|Iteration 203|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 12|Iteration 213|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 12|Iteration 222|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 12|Iteration 228|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 12|Iteration 235|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 12|Iteration 247|reward:  306.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 12|Iteration 248|reward:  312.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 12|Iteration 248|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 254|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 265|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 273|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 279|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 285|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 292|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 298|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 304|reward:  312.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 309|reward:  412.0|last_reward_at:  248|Elapsed Time: 0:00:02||

Episode 12|Iteration 309|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:02||

Episode 12|Iteration 312|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 317|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 323|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 330|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 336|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 347|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 355|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 361|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 368|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 374|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 380|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 387|reward:  412.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 390|reward:  426.0|last_reward_at:  309|Elapsed Time: 0:00:03||

Episode 12|Iteration 390|reward:  426.0|last_reward_at:  390|Elapsed Time: 0:00:03||

Episode 12|Iteration 394|reward:  476.0|last_reward_at:  390|Elapsed Time: 0:00:03||

Episode 12|Iteration 394|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:03||

Episode 12|Iteration 405|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 412|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 418|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 425|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 430|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 437|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 443|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 450|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 454|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 463|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 469|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 474|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 482|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:04||

Episode 12|Iteration 487|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:05||

Episode 12|Iteration 493|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:05||

Episode 12|Iteration 500|reward:  476.0|last_reward_at:  394|Elapsed Time: 0:00:05||




  Episode 12 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/17 (0.06)
    explore-remote: 4/87 (0.04)
    explore-connect: 0/89 (0.00)
    exploit-local: 2/24 (0.08)
    exploit-remote: 4/247 (0.02)
    exploit-connect: 5/20 (0.20)
  exploit deflected to exploration: 31
  ## Episode: 13/20 'DQL' ϵ=0.3410, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 13|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 13|Iteration 6|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 13|Iteration 6|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 13|Iteration 19|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 13|Iteration 26|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 13|Iteration 27|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 13|Iteration 27|reward:   33.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 13|Iteration 38|reward:   33.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 13|Iteration 42|reward:   44.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 13|Iteration 42|reward:   44.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 13|Iteration 50|reward:   53.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 13|Iteration 50|reward:   53.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 13|Iteration 64|reward:   53.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 13|Iteration 68|reward:   67.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 13|Iteration 68|reward:   67.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 13|Iteration 70|reward:   73.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 13|Iteration 70|reward:   73.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 13|Iteration 83|reward:   73.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 13|Iteration 95|reward:   73.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 13|Iteration 96|reward:  123.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 13|Iteration 96|reward:  123.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 13|Iteration 98|reward:  137.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 13|Iteration 98|reward:  137.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 13|Iteration 99|reward:  187.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 13|Iteration 99|reward:  187.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 108|reward:  187.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 121|reward:  187.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 132|reward:  187.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 140|reward:  187.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 13|Iteration 144|reward:  287.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 13|Iteration 144|reward:  287.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 13|Iteration 145|reward:  301.0|last_reward_at:  144|Elapsed Time: 0:00:01||

Episode 13|Iteration 145|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 152|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 159|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 171|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 178|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 190|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 197|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 203|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 209|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 216|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 227|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 235|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 241|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 13|Iteration 247|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 254|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 266|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 273|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 285|reward:  301.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 288|reward:  312.0|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 13|Iteration 288|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 298|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 311|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 319|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 330|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 336|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 342|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 349|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 355|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:02||

Episode 13|Iteration 361|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 368|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 374|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 380|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 387|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 393|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 405|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 412|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 421|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 430|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 437|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 444|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 450|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 456|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:03||

Episode 13|Iteration 463|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 475|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 482|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 488|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 494|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 500|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||

Episode 13|Iteration 500|reward:  312.0|last_reward_at:  288|Elapsed Time: 0:00:04||




  Episode 13 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/16 (0.06)
    explore-remote: 1/91 (0.01)
    explore-connect: 0/68 (0.00)
    exploit-local: 1/25 (0.04)
    exploit-remote: 7/212 (0.03)
    exploit-connect: 3/75 (0.04)
  exploit deflected to exploration: 25
  ## Episode: 14/20 'DQL' ϵ=0.3181, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 14|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 2|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 2|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 14|Iteration 13|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 14|Iteration 15|reward:   31.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 14|Iteration 15|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 14|Iteration 20|reward:  131.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 14|Iteration 20|reward:  131.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 14|Iteration 26|reward:  131.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 14|Iteration 30|reward:  142.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 14|Iteration 30|reward:  142.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 14|Iteration 38|reward:  142.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 14|Iteration 39|reward:  156.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 14|Iteration 39|reward:  156.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 14|Iteration 45|reward:  156.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 14|Iteration 51|reward:  156.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 14|Iteration 64|reward:  156.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 14|Iteration 71|reward:  256.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 14|Iteration 71|reward:  256.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 14|Iteration 80|reward:  267.0|last_reward_at:   71|Elapsed Time: 0:00:00||

Episode 14|Iteration 80|reward:  267.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 14|Iteration 87|reward:  281.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 14|Iteration 87|reward:  281.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 14|Iteration 95|reward:  281.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 14|Iteration 95|reward:  295.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 14|Iteration 95|reward:  295.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 14|Iteration 102|reward:  295.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 14|Iteration 104|reward:  345.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 14|Iteration 104|reward:  345.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 14|Iteration 112|reward:  345.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 14|Iteration 116|reward:  395.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 14|Iteration 116|reward:  395.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 14|Iteration 121|reward:  395.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 14|Iteration 127|reward:  395.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 14|Iteration 131|reward:  395.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 14|Iteration 135|reward:  406.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 14|Iteration 135|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:01||

Episode 14|Iteration 145|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:01||

Episode 14|Iteration 150|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:01||

Episode 14|Iteration 158|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:01||

Episode 14|Iteration 165|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:01||

Episode 14|Iteration 171|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:02||

Episode 14|Iteration 178|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:02||

Episode 14|Iteration 181|reward:  406.0|last_reward_at:  135|Elapsed Time: 0:00:02||

Episode 14|Iteration 189|reward:  420.0|last_reward_at:  135|Elapsed Time: 0:00:02||

Episode 14|Iteration 189|reward:  420.0|last_reward_at:  189|Elapsed Time: 0:00:02||

Episode 14|Iteration 197|reward:  420.0|last_reward_at:  189|Elapsed Time: 0:00:02||

Episode 14|Iteration 201|reward:  470.0|last_reward_at:  189|Elapsed Time: 0:00:02||

Episode 14|Iteration 201|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 205|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 211|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 216|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 222|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 227|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 235|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 14|Iteration 241|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 246|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 251|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 254|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 260|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 264|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 266|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 269|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 279|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 285|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 289|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:03||

Episode 14|Iteration 296|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 303|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 305|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 311|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 316|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 323|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 330|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 336|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 342|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:04||

Episode 14|Iteration 347|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:05||

Episode 14|Iteration 355|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:05||

Episode 14|Iteration 358|reward:  470.0|last_reward_at:  201|Elapsed Time: 0:00:05||

Episode 14|Iteration 359|reward:  476.0|last_reward_at:  201|Elapsed Time: 0:00:05||

Episode 14|Iteration 359|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 362|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 373|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 380|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 387|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 393|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 399|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 401|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:05||

Episode 14|Iteration 408|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 412|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 418|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 425|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 428|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 437|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 444|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 450|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 455|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 463|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 469|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:06||

Episode 14|Iteration 475|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||

Episode 14|Iteration 479|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||

Episode 14|Iteration 488|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||

Episode 14|Iteration 493|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||

Episode 14|Iteration 500|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||

Episode 14|Iteration 500|reward:  476.0|last_reward_at:  359|Elapsed Time: 0:00:07||




  Episode 14 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/20 (0.13)
    explore-remote: 5/94 (0.05)
    explore-connect: 0/74 (0.00)
    exploit-local: 0/38 (0.00)
    exploit-remote: 3/249 (0.01)
    exploit-connect: 5/9 (0.36)
  exploit deflected to exploration: 60
  ## Episode: 15/20 'DQL' ϵ=0.2973, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 15|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 6|reward:   33.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 6|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 15|Iteration 11|reward:   44.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 15|Iteration 11|reward:   44.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 19|reward:   44.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   44.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   58.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   58.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 15|Iteration 32|reward:   64.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 15|Iteration 32|reward:   64.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 15|Iteration 35|reward:  114.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 15|Iteration 35|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 45|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 54|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 63|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 75|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 83|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 89|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 15|Iteration 94|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 15|Iteration 102|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 15|Iteration 108|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 15|Iteration 114|reward:  114.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 15|Iteration 116|reward:  123.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 15|Iteration 116|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 127|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 138|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 146|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 152|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 159|reward:  123.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 163|reward:  223.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 15|Iteration 163|reward:  223.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 15|Iteration 164|reward:  237.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 15|Iteration 164|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 15|Iteration 171|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 15|Iteration 178|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 15|Iteration 184|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 15|Iteration 190|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 15|Iteration 197|reward:  237.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 15|Iteration 197|reward:  337.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 15|Iteration 197|reward:  337.0|last_reward_at:  197|Elapsed Time: 0:00:02||

Episode 15|Iteration 203|reward:  337.0|last_reward_at:  197|Elapsed Time: 0:00:02||

Episode 15|Iteration 203|reward:  351.0|last_reward_at:  197|Elapsed Time: 0:00:02||

Episode 15|Iteration 203|reward:  351.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 15|Iteration 213|reward:  351.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 15|Iteration 222|reward:  351.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 15|Iteration 228|reward:  351.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 15|Iteration 233|reward:  401.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 15|Iteration 233|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 15|Iteration 239|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 15|Iteration 247|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 15|Iteration 252|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 15|Iteration 260|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 15|Iteration 262|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 15|Iteration 265|reward:  401.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 15|Iteration 270|reward:  415.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 15|Iteration 270|reward:  415.0|last_reward_at:  270|Elapsed Time: 0:00:03||

Episode 15|Iteration 275|reward:  465.0|last_reward_at:  270|Elapsed Time: 0:00:03||

Episode 15|Iteration 275|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 278|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 285|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 291|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 298|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 304|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 308|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 317|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 15|Iteration 322|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 330|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 336|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 342|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 347|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 351|reward:  465.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 352|reward:  476.0|last_reward_at:  275|Elapsed Time: 0:00:04||

Episode 15|Iteration 352|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 358|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 367|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 374|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 378|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 386|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:04||

Episode 15|Iteration 390|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 396|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 403|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 411|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 418|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 422|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 425|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 431|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 437|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 444|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 449|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 15|Iteration 454|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 463|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 469|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 475|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 482|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 487|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 494|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 499|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 15|Iteration 500|reward:  476.0|last_reward_at:  352|Elapsed Time: 0:00:06||




  Episode 15 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/17 (0.06)
    explore-remote: 1/80 (0.01)
    explore-connect: 0/89 (0.00)
    exploit-local: 2/23 (0.08)
    exploit-remote: 7/265 (0.03)
    exploit-connect: 5/10 (0.33)
  exploit deflected to exploration: 53
  ## Episode: 16/20 'DQL' ϵ=0.2785, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 16|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 16|Iteration 7|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 16|Iteration 8|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 16|Iteration 8|reward:   22.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 16|Iteration 9|reward:   33.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 16|Iteration 9|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 18|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 26|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 32|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 38|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:   33.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 47|reward:   47.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 47|reward:   47.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 16|Iteration 53|reward:   97.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 16|Iteration 53|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 57|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 64|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 70|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 76|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 83|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 16|Iteration 89|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 95|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 102|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 108|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 114|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 121|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 127|reward:   97.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 131|reward:  106.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 16|Iteration 131|reward:  106.0|last_reward_at:  131|Elapsed Time: 0:00:01||

Episode 16|Iteration 140|reward:  106.0|last_reward_at:  131|Elapsed Time: 0:00:01||

Episode 16|Iteration 146|reward:  106.0|last_reward_at:  131|Elapsed Time: 0:00:01||

Episode 16|Iteration 152|reward:  106.0|last_reward_at:  131|Elapsed Time: 0:00:01||

Episode 16|Iteration 156|reward:  206.0|last_reward_at:  131|Elapsed Time: 0:00:01||

Episode 16|Iteration 156|reward:  206.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 16|Iteration 158|reward:  220.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 16|Iteration 158|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:01||

Episode 16|Iteration 165|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:01||

Episode 16|Iteration 171|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 178|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 184|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 190|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 197|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 203|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 209|reward:  220.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 210|reward:  226.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 16|Iteration 210|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 216|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 222|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 228|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 235|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 241|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 247|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 254|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 16|Iteration 260|reward:  226.0|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 16|Iteration 262|reward:  237.0|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 16|Iteration 262|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 272|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 279|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 285|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 292|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 298|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 304|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 311|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 317|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 323|reward:  237.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 327|reward:  337.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 16|Iteration 327|reward:  337.0|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 16|Iteration 332|reward:  351.0|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 16|Iteration 332|reward:  351.0|last_reward_at:  332|Elapsed Time: 0:00:03||

Episode 16|Iteration 333|reward:  401.0|last_reward_at:  332|Elapsed Time: 0:00:03||

Episode 16|Iteration 333|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:03||

Episode 16|Iteration 336|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:03||

Episode 16|Iteration 342|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:03||

Episode 16|Iteration 348|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 354|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 359|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 366|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 374|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 377|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 383|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 393|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 399|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:04||

Episode 16|Iteration 404|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 410|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 416|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 418|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 422|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 427|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 431|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 435|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 444|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:05||

Episode 16|Iteration 448|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 455|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 461|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 466|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 475|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 479|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 482|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 487|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 493|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:06||

Episode 16|Iteration 500|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:07||

Episode 16|Iteration 500|reward:  401.0|last_reward_at:  333|Elapsed Time: 0:00:07||




  Episode 16 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/22 (0.04)
    explore-remote: 1/87 (0.01)
    explore-connect: 0/70 (0.00)
    exploit-local: 2/27 (0.07)
    exploit-remote: 5/252 (0.02)
    exploit-connect: 4/29 (0.12)
  exploit deflected to exploration: 47
  ## Episode: 17/20 'DQL' ϵ=0.2615, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 17|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 7|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 12|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 14|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 14|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 19|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 26|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 32|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 38|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 45|reward:   22.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 46|reward:   33.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 17|Iteration 46|reward:   33.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 17|Iteration 54|reward:   42.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 17|Iteration 54|reward:   42.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 17|Iteration 63|reward:   42.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 17|Iteration 66|reward:   53.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 17|Iteration 66|reward:   53.0|last_reward_at:   66|Elapsed Time: 0:00:00||

Episode 17|Iteration 75|reward:   53.0|last_reward_at:   66|Elapsed Time: 0:00:01||

Episode 17|Iteration 83|reward:   53.0|last_reward_at:   66|Elapsed Time: 0:00:01||

Episode 17|Iteration 88|reward:   67.0|last_reward_at:   66|Elapsed Time: 0:00:01||

Episode 17|Iteration 88|reward:   67.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 17|Iteration 91|reward:  117.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 17|Iteration 91|reward:  117.0|last_reward_at:   91|Elapsed Time: 0:00:01||

Episode 17|Iteration 100|reward:  117.0|last_reward_at:   91|Elapsed Time: 0:00:01||

Episode 17|Iteration 101|reward:  123.0|last_reward_at:   91|Elapsed Time: 0:00:01||

Episode 17|Iteration 101|reward:  123.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 17|Iteration 103|reward:  223.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 17|Iteration 103|reward:  223.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 17|Iteration 104|reward:  237.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 17|Iteration 104|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 113|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 121|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 127|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 133|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 140|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 146|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 152|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 17|Iteration 158|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 165|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 171|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 178|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 184|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 188|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 197|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 203|reward:  237.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 206|reward:  337.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 17|Iteration 206|reward:  337.0|last_reward_at:  206|Elapsed Time: 0:00:02||

Episode 17|Iteration 213|reward:  337.0|last_reward_at:  206|Elapsed Time: 0:00:02||

Episode 17|Iteration 217|reward:  351.0|last_reward_at:  206|Elapsed Time: 0:00:02||

Episode 17|Iteration 217|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:02||

Episode 17|Iteration 227|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:02||

Episode 17|Iteration 234|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:03||

Episode 17|Iteration 241|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:03||

Episode 17|Iteration 247|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:03||

Episode 17|Iteration 254|reward:  351.0|last_reward_at:  217|Elapsed Time: 0:00:03||

Episode 17|Iteration 259|reward:  401.0|last_reward_at:  217|Elapsed Time: 0:00:03||

Episode 17|Iteration 259|reward:  401.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 261|reward:  401.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 266|reward:  401.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 269|reward:  401.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 273|reward:  401.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 278|reward:  415.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 17|Iteration 278|reward:  415.0|last_reward_at:  278|Elapsed Time: 0:00:03||

Episode 17|Iteration 282|reward:  415.0|last_reward_at:  278|Elapsed Time: 0:00:03||

Episode 17|Iteration 292|reward:  415.0|last_reward_at:  278|Elapsed Time: 0:00:03||

Episode 17|Iteration 293|reward:  465.0|last_reward_at:  278|Elapsed Time: 0:00:04||

Episode 17|Iteration 293|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 297|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 301|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 306|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 309|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 311|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 314|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 318|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 322|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:04||

Episode 17|Iteration 327|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 330|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 336|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 339|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 347|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 354|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 361|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 364|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 368|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 374|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 380|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:05||

Episode 17|Iteration 384|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:06||

Episode 17|Iteration 389|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:06||

Episode 17|Iteration 396|reward:  465.0|last_reward_at:  293|Elapsed Time: 0:00:06||

Episode 17|Iteration 399|reward:  476.0|last_reward_at:  293|Elapsed Time: 0:00:06||

Episode 17|Iteration 399|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 406|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 409|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 415|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 418|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 422|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 425|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:06||

Episode 17|Iteration 428|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 433|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 442|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 450|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 453|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 458|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 462|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 469|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 475|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 479|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:07||

Episode 17|Iteration 486|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||

Episode 17|Iteration 492|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||

Episode 17|Iteration 494|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||

Episode 17|Iteration 496|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||

Episode 17|Iteration 500|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||

Episode 17|Iteration 500|reward:  476.0|last_reward_at:  399|Elapsed Time: 0:00:08||




  Episode 17 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/18 (0.05)
    explore-remote: 1/86 (0.01)
    explore-connect: 0/63 (0.00)
    exploit-local: 2/42 (0.05)
    exploit-remote: 7/262 (0.03)
    exploit-connect: 5/13 (0.28)
  exploit deflected to exploration: 49
  ## Episode: 18/20 'DQL' ϵ=0.2462, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 18|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 18|Iteration 3|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 18|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 18|Iteration 7|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 18|Iteration 10|reward:   33.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 18|Iteration 10|reward:   33.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 17|reward:   33.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 24|reward:   33.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 27|reward:   47.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 27|reward:   47.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 18|Iteration 32|reward:   47.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 18|Iteration 35|reward:   58.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 18|Iteration 35|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 18|Iteration 42|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 18|Iteration 49|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 18|Iteration 56|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 18|Iteration 63|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 18|Iteration 69|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 18|Iteration 76|reward:   58.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 18|Iteration 77|reward:  108.0|last_reward_at:   35|Elapsed Time: 0:00:01||

Episode 18|Iteration 77|reward:  108.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 18|Iteration 82|reward:  108.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 18|Iteration 89|reward:  108.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 18|Iteration 93|reward:  117.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 18|Iteration 93|reward:  117.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 18|Iteration 100|reward:  117.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 18|Iteration 107|reward:  117.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 18|Iteration 111|reward:  217.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 18|Iteration 111|reward:  217.0|last_reward_at:  111|Elapsed Time: 0:00:01||

Episode 18|Iteration 113|reward:  231.0|last_reward_at:  111|Elapsed Time: 0:00:01||

Episode 18|Iteration 113|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 18|Iteration 120|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 18|Iteration 127|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 133|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 140|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 146|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 152|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 159|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 164|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 171|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 178|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 184|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 190|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 197|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:02||

Episode 18|Iteration 203|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 209|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 213|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 219|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 226|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 232|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 239|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 246|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 252|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 259|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:03||

Episode 18|Iteration 265|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 271|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 278|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 285|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 289|reward:  231.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 290|reward:  237.0|last_reward_at:  113|Elapsed Time: 0:00:04||

Episode 18|Iteration 290|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 296|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 303|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 308|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 311|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 313|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 18|Iteration 319|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 326|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 330|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 336|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 342|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 345|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 349|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 355|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 361|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 367|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 18|Iteration 374|reward:  237.0|last_reward_at:  290|Elapsed Time: 0:00:06||

Episode 18|Iteration 374|reward:  251.0|last_reward_at:  290|Elapsed Time: 0:00:06||

Episode 18|Iteration 374|reward:  251.0|last_reward_at:  374|Elapsed Time: 0:00:06||

Episode 18|Iteration 380|reward:  251.0|last_reward_at:  374|Elapsed Time: 0:00:06||

Episode 18|Iteration 385|reward:  251.0|last_reward_at:  374|Elapsed Time: 0:00:06||

Episode 18|Iteration 390|reward:  251.0|last_reward_at:  374|Elapsed Time: 0:00:06||

Episode 18|Iteration 393|reward:  301.0|last_reward_at:  374|Elapsed Time: 0:00:06||

Episode 18|Iteration 393|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 397|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 404|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 411|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 418|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 424|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 18|Iteration 431|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:07||

Episode 18|Iteration 437|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:07||

Episode 18|Iteration 443|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:07||

Episode 18|Iteration 449|reward:  301.0|last_reward_at:  393|Elapsed Time: 0:00:07||

Episode 18|Iteration 453|reward:  401.0|last_reward_at:  393|Elapsed Time: 0:00:07||

Episode 18|Iteration 453|reward:  401.0|last_reward_at:  453|Elapsed Time: 0:00:07||

Episode 18|Iteration 455|reward:  401.0|last_reward_at:  453|Elapsed Time: 0:00:07||

Episode 18|Iteration 457|reward:  401.0|last_reward_at:  453|Elapsed Time: 0:00:07||

Episode 18|Iteration 461|reward:  401.0|last_reward_at:  453|Elapsed Time: 0:00:07||

Episode 18|Iteration 463|reward:  401.0|last_reward_at:  453|Elapsed Time: 0:00:07||

Episode 18|Iteration 465|reward:  415.0|last_reward_at:  453|Elapsed Time: 0:00:08||

Episode 18|Iteration 465|reward:  415.0|last_reward_at:  465|Elapsed Time: 0:00:08||

Episode 18|Iteration 466|reward:  465.0|last_reward_at:  465|Elapsed Time: 0:00:08||

Episode 18|Iteration 466|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 467|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 474|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 480|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 487|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 494|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 500|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||

Episode 18|Iteration 500|reward:  465.0|last_reward_at:  466|Elapsed Time: 0:00:08||




  Episode 18 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/15 (0.06)
    explore-remote: 5/67 (0.07)
    explore-connect: 0/55 (0.00)
    exploit-local: 2/30 (0.06)
    exploit-remote: 2/285 (0.01)
    exploit-connect: 5/33 (0.13)
  exploit deflected to exploration: 26
  ## Episode: 19/20 'DQL' ϵ=0.2323, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 19|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 6|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 9|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 9|reward:   11.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 19|Iteration 15|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 19|Iteration 15|reward:   22.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 19|Iteration 17|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 19|Iteration 17|reward:   33.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 19|Iteration 22|reward:   42.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 19|Iteration 22|reward:   42.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 19|Iteration 26|reward:   42.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 19|Iteration 32|reward:   42.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 19|Iteration 35|reward:   56.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 19|Iteration 35|reward:   56.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 19|Iteration 40|reward:   62.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 19|Iteration 40|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 45|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 51|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 56|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 63|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:01||

Episode 19|Iteration 70|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:01||

Episode 19|Iteration 76|reward:   62.0|last_reward_at:   40|Elapsed Time: 0:00:01||

Episode 19|Iteration 77|reward:  112.0|last_reward_at:   40|Elapsed Time: 0:00:01||

Episode 19|Iteration 77|reward:  112.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 19|Iteration 78|reward:  123.0|last_reward_at:   77|Elapsed Time: 0:00:01||

Episode 19|Iteration 78|reward:  123.0|last_reward_at:   78|Elapsed Time: 0:00:01||

Episode 19|Iteration 83|reward:  123.0|last_reward_at:   78|Elapsed Time: 0:00:01||

Episode 19|Iteration 88|reward:  123.0|last_reward_at:   78|Elapsed Time: 0:00:01||

Episode 19|Iteration 94|reward:  123.0|last_reward_at:   78|Elapsed Time: 0:00:01||

Episode 19|Iteration 97|reward:  223.0|last_reward_at:   78|Elapsed Time: 0:00:01||

Episode 19|Iteration 97|reward:  223.0|last_reward_at:   97|Elapsed Time: 0:00:01||

Episode 19|Iteration 99|reward:  237.0|last_reward_at:   97|Elapsed Time: 0:00:01||

Episode 19|Iteration 99|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 106|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 111|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 119|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 126|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 19|Iteration 133|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 19|Iteration 137|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 19|Iteration 144|reward:  237.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 19|Iteration 147|reward:  337.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 19|Iteration 147|reward:  337.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 19|Iteration 150|reward:  351.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 19|Iteration 150|reward:  351.0|last_reward_at:  150|Elapsed Time: 0:00:02||

Episode 19|Iteration 157|reward:  351.0|last_reward_at:  150|Elapsed Time: 0:00:02||

Episode 19|Iteration 158|reward:  401.0|last_reward_at:  150|Elapsed Time: 0:00:02||

Episode 19|Iteration 158|reward:  401.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 19|Iteration 161|reward:  401.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 19|Iteration 162|reward:  415.0|last_reward_at:  158|Elapsed Time: 0:00:02||

Episode 19|Iteration 162|reward:  415.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 19|Iteration 167|reward:  465.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 19|Iteration 167|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:02||

Episode 19|Iteration 169|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:02||

Episode 19|Iteration 173|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 178|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 181|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 183|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 185|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 189|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 193|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 195|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:03||

Episode 19|Iteration 199|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:04||

Episode 19|Iteration 201|reward:  465.0|last_reward_at:  167|Elapsed Time: 0:00:04||

Episode 19|Iteration 206|reward:  476.0|last_reward_at:  167|Elapsed Time: 0:00:04||

Episode 19|Iteration 206|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 213|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 220|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 227|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 230|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 232|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 235|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:04||

Episode 19|Iteration 237|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 244|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 245|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 246|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 248|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 254|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 255|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 260|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 266|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 272|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:05||

Episode 19|Iteration 274|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 279|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 281|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 283|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 285|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 292|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 297|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 304|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 307|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:06||

Episode 19|Iteration 314|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 315|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 317|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 318|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 319|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 322|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 329|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 336|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:07||

Episode 19|Iteration 338|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 342|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 347|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 353|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 355|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 361|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 363|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 366|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 368|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:08||

Episode 19|Iteration 374|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 379|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 386|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 388|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 393|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 399|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 400|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 406|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 407|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 408|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:09||

Episode 19|Iteration 414|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 421|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 425|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 426|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 430|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 434|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 439|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 441|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:10||

Episode 19|Iteration 443|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 450|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 454|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 459|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 461|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 463|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 467|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 470|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 472|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:11||

Episode 19|Iteration 474|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 479|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 484|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 488|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 494|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 499|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||

Episode 19|Iteration 500|reward:  476.0|last_reward_at:  206|Elapsed Time: 0:00:12||




  Episode 19 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/13 (0.07)
    explore-remote: 1/88 (0.01)
    explore-connect: 0/65 (0.00)
    exploit-local: 2/32 (0.06)
    exploit-remote: 7/276 (0.02)
    exploit-connect: 5/10 (0.33)
  exploit deflected to exploration: 57
  ## Episode: 20/20 'DQL' ϵ=0.2197, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 20|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 3|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 3|reward:   11.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 9|reward:   11.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 15|reward:   11.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 16|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 16|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 22|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 28|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 32|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 38|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 44|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 50|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 56|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 20|Iteration 63|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:01||

Episode 20|Iteration 70|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:01||

Episode 20|Iteration 75|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:01||

Episode 20|Iteration 81|reward:   22.0|last_reward_at:   16|Elapsed Time: 0:00:01||

Episode 20|Iteration 86|reward:   33.0|last_reward_at:   16|Elapsed Time: 0:00:01||

Episode 20|Iteration 86|reward:   33.0|last_reward_at:   86|Elapsed Time: 0:00:01||

Episode 20|Iteration 89|reward:   44.0|last_reward_at:   86|Elapsed Time: 0:00:01||

Episode 20|Iteration 89|reward:   44.0|last_reward_at:   89|Elapsed Time: 0:00:01||

Episode 20|Iteration 95|reward:   44.0|last_reward_at:   89|Elapsed Time: 0:00:01||

Episode 20|Iteration 100|reward:   44.0|last_reward_at:   89|Elapsed Time: 0:00:01||

Episode 20|Iteration 105|reward:   44.0|last_reward_at:   89|Elapsed Time: 0:00:01||

Episode 20|Iteration 109|reward:   53.0|last_reward_at:   89|Elapsed Time: 0:00:01||

Episode 20|Iteration 109|reward:   53.0|last_reward_at:  109|Elapsed Time: 0:00:01||

Episode 20|Iteration 114|reward:   53.0|last_reward_at:  109|Elapsed Time: 0:00:02||

Episode 20|Iteration 116|reward:   67.0|last_reward_at:  109|Elapsed Time: 0:00:02||

Episode 20|Iteration 116|reward:   67.0|last_reward_at:  116|Elapsed Time: 0:00:02||

Episode 20|Iteration 118|reward:  117.0|last_reward_at:  116|Elapsed Time: 0:00:02||

Episode 20|Iteration 118|reward:  117.0|last_reward_at:  118|Elapsed Time: 0:00:02||

Episode 20|Iteration 127|reward:  117.0|last_reward_at:  118|Elapsed Time: 0:00:02||

Episode 20|Iteration 133|reward:  117.0|last_reward_at:  118|Elapsed Time: 0:00:02||

Episode 20|Iteration 138|reward:  117.0|last_reward_at:  118|Elapsed Time: 0:00:02||

Episode 20|Iteration 139|reward:  131.0|last_reward_at:  118|Elapsed Time: 0:00:02||

Episode 20|Iteration 139|reward:  131.0|last_reward_at:  139|Elapsed Time: 0:00:02||

Episode 20|Iteration 141|reward:  181.0|last_reward_at:  139|Elapsed Time: 0:00:02||

Episode 20|Iteration 141|reward:  181.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 20|Iteration 146|reward:  181.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 20|Iteration 146|reward:  281.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 20|Iteration 146|reward:  281.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 20|Iteration 147|reward:  295.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 20|Iteration 147|reward:  295.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 20|Iteration 152|reward:  295.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 20|Iteration 158|reward:  295.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 20|Iteration 164|reward:  295.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 20|Iteration 171|reward:  295.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 20|Iteration 175|reward:  301.0|last_reward_at:  147|Elapsed Time: 0:00:03||

Episode 20|Iteration 175|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 181|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 184|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 190|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 194|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 201|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 208|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 215|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 222|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 228|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:03||

Episode 20|Iteration 231|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:04||

Episode 20|Iteration 239|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:04||

Episode 20|Iteration 247|reward:  301.0|last_reward_at:  175|Elapsed Time: 0:00:04||

Episode 20|Iteration 253|reward:  312.0|last_reward_at:  175|Elapsed Time: 0:00:04||

Episode 20|Iteration 253|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 257|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 264|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 270|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 277|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 284|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 288|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:04||

Episode 20|Iteration 295|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 298|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 302|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 309|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 315|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 321|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 328|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 334|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 341|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:05||

Episode 20|Iteration 348|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:06||

Episode 20|Iteration 351|reward:  312.0|last_reward_at:  253|Elapsed Time: 0:00:06||

Episode 20|Iteration 352|reward:  412.0|last_reward_at:  253|Elapsed Time: 0:00:06||

Episode 20|Iteration 352|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 357|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 359|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 362|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 366|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 372|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 379|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 20|Iteration 382|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 387|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 391|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 396|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 402|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 405|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 411|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 418|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 422|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:07||

Episode 20|Iteration 424|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:08||

Episode 20|Iteration 426|reward:  412.0|last_reward_at:  352|Elapsed Time: 0:00:08||

Episode 20|Iteration 428|reward:  426.0|last_reward_at:  352|Elapsed Time: 0:00:08||

Episode 20|Iteration 428|reward:  426.0|last_reward_at:  428|Elapsed Time: 0:00:08||

Episode 20|Iteration 436|reward:  426.0|last_reward_at:  428|Elapsed Time: 0:00:08||

Episode 20|Iteration 441|reward:  476.0|last_reward_at:  428|Elapsed Time: 0:00:08||

Episode 20|Iteration 441|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 448|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 449|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 452|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 459|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 463|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:08||

Episode 20|Iteration 464|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 466|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 469|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 473|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 475|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 476|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 482|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 483|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:09||

Episode 20|Iteration 485|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:10||

Episode 20|Iteration 489|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:10||

Episode 20|Iteration 494|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:10||

Episode 20|Iteration 499|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:10||

Episode 20|Iteration 500|reward:  476.0|last_reward_at:  441|Elapsed Time: 0:00:10||




  Episode 20 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/26 (0.07)
    explore-remote: 1/93 (0.01)
    explore-connect: 1/58 (0.02)
    exploit-local: 1/12 (0.08)
    exploit-remote: 7/256 (0.03)
    exploit-connect: 4/39 (0.09)
  exploit deflected to exploration: 52
simulation ended


In [11]:
# -----------------------------------------
# 6) DQL 평가(Exploit) (✅ 파라미터 유지 + 평가에서만 LLM 옵션)
# -----------------------------------------
llm_chat = make_openai_chat_callable(model_id, llm_token_yaml) if use_llm else None

eval_learner = (
    LLMPrunedExploitWrapper(
        base_learner=dql_run["learner"],
        llm_chat=llm_chat,
        llm_every_steps=llm_every_steps,
        candidate_pool=candidate_pool,
        llm_topk=llm_topk,
        obs_max_chars=llm_obs_max_chars,
    )
    if use_llm
    else dql_run["learner"]
)

dql_exploit_run = learner.epsilon_greedy_search(
    gym_env,
    ep,
    learner=eval_learner,
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.0,
    epsilon_minimum=0.00,
    render=False,
    plot_episodes_length=False,
    verbosity=Verbosity.Quiet,
    render_last_episode_rewards_to=os.path.join(plots_dir, f"dql-{gymid}"),
    title=("Exploiting DQL (LLM-pruned)" if use_llm else "Exploiting DQL"),
)

[OpenAI] key_prefix= sk-proj- len= 164 yaml= /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml


###### Exploiting DQL (LLM-pruned)
Learning with: episode_count=10,iteration_count=500,ϵ=0.0,ϵ_min=0.0, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 1|Iteration 8|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 1|Iteration 8|reward:   22.0|last_reward_at:    8|Elapsed Time: 0:00:03||

Episode 1|Iteration 10|reward:   22.0|last_reward_at:    8|Elapsed Time: 0:00:05||

Episode 1|Iteration 15|reward:   22.0|last_reward_at:    8|Elapsed Time: 0:00:07||

Episode 1|Iteration 20|reward:   22.0|last_reward_at:    8|Elapsed Time: 0:00:10||

Episode 1|Iteration 25|reward:   33.0|last_reward_at:    8|Elapsed Time: 0:00:11||

Episode 1|Iteration 25|reward:   33.0|last_reward_at:   25|Elapsed Time: 0:00:11||

Episode 1|Iteration 27|reward:   47.0|last_reward_at:   25|Elapsed Time: 0:00:11||

Episode 1|Iteration 27|reward:   47.0|last_reward_at:   27|Elapsed Time: 0:00:11||

Episode 1|Iteration 30|reward:   47.0|last_reward_at:   27|Elapsed Time: 0:00:13||

Episode 1|Iteration 34|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:13||

Episode 1|Iteration 34|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 1|Iteration 35|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:15||

Episode 1|Iteration 38|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:15||

Episode 1|Iteration 40|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:19||

Episode 1|Iteration 45|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:21||

Episode 1|Iteration 50|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:23||

Episode 1|Iteration 55|reward:   97.0|last_reward_at:   34|Elapsed Time: 0:00:24||

Episode 1|Iteration 56|reward:  108.0|last_reward_at:   34|Elapsed Time: 0:00:24||

Episode 1|Iteration 56|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:24||

Episode 1|Iteration 60|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:26||

Episode 1|Iteration 65|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:27||

Episode 1|Iteration 69|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:27||

Episode 1|Iteration 70|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:29||

Episode 1|Iteration 75|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:31||

Episode 1|Iteration 80|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:34||

Episode 1|Iteration 85|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:36||

Episode 1|Iteration 90|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:37||

Episode 1|Iteration 95|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:38||

Episode 1|Iteration 95|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:39||

Episode 1|Iteration 100|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:41||

Episode 1|Iteration 105|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:42||

Episode 1|Iteration 108|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:42||

Episode 1|Iteration 110|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:44||

Episode 1|Iteration 113|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:44||

Episode 1|Iteration 115|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:46||

Episode 1|Iteration 120|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:48||

Episode 1|Iteration 125|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:49||

Episode 1|Iteration 130|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:51||

Episode 1|Iteration 135|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:52||

Episode 1|Iteration 140|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:52||

Episode 1|Iteration 140|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:54||

Episode 1|Iteration 145|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:56||

Episode 1|Iteration 150|reward:  108.0|last_reward_at:   56|Elapsed Time: 0:00:58||

Episode 1|Iteration 154|reward:  114.0|last_reward_at:   56|Elapsed Time: 0:00:58||

Episode 1|Iteration 154|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:00:58||

Episode 1|Iteration 155|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:00:59||

Episode 1|Iteration 159|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:00:59||

Episode 1|Iteration 160|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:01||

Episode 1|Iteration 165|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:01||

Episode 1|Iteration 165|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:03||

Episode 1|Iteration 170|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:04||

Episode 1|Iteration 175|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:06||

Episode 1|Iteration 178|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:06||

Episode 1|Iteration 180|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:07||

Episode 1|Iteration 185|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:09||

Episode 1|Iteration 189|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:09||

Episode 1|Iteration 190|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:10||

Episode 1|Iteration 193|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:11||

Episode 1|Iteration 195|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:12||

Episode 1|Iteration 200|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:13||

Episode 1|Iteration 205|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:15||

Episode 1|Iteration 210|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:16||

Episode 1|Iteration 215|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:18||

Episode 1|Iteration 220|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:20||

Episode 1|Iteration 225|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:21||

Episode 1|Iteration 230|reward:  114.0|last_reward_at:  154|Elapsed Time: 0:01:23||

Episode 1|Iteration 233|reward:  123.0|last_reward_at:  154|Elapsed Time: 0:01:23||

Episode 1|Iteration 233|reward:  123.0|last_reward_at:  233|Elapsed Time: 0:01:23||

Episode 1|Iteration 234|reward:  223.0|last_reward_at:  233|Elapsed Time: 0:01:23||

Episode 1|Iteration 234|reward:  223.0|last_reward_at:  234|Elapsed Time: 0:01:23||

Episode 1|Iteration 235|reward:  237.0|last_reward_at:  234|Elapsed Time: 0:01:25||

Episode 1|Iteration 235|reward:  237.0|last_reward_at:  235|Elapsed Time: 0:01:25||

Episode 1|Iteration 240|reward:  237.0|last_reward_at:  235|Elapsed Time: 0:01:26||

Episode 1|Iteration 245|reward:  237.0|last_reward_at:  235|Elapsed Time: 0:01:29||

Episode 1|Iteration 250|reward:  251.0|last_reward_at:  235|Elapsed Time: 0:01:30||

Episode 1|Iteration 250|reward:  251.0|last_reward_at:  250|Elapsed Time: 0:01:30||

Episode 1|Iteration 255|reward:  251.0|last_reward_at:  250|Elapsed Time: 0:01:32||

Episode 1|Iteration 260|reward:  251.0|last_reward_at:  250|Elapsed Time: 0:01:33||

Episode 1|Iteration 265|reward:  251.0|last_reward_at:  250|Elapsed Time: 0:01:35||

Episode 1|Iteration 267|reward:  301.0|last_reward_at:  250|Elapsed Time: 0:01:35||

Episode 1|Iteration 267|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:35||

Episode 1|Iteration 270|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:36||

Episode 1|Iteration 275|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:38||

Episode 1|Iteration 280|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:40||

Episode 1|Iteration 285|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:40||

Episode 1|Iteration 285|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:41||

Episode 1|Iteration 290|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:43||

Episode 1|Iteration 295|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:44||

Episode 1|Iteration 300|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:46||

Episode 1|Iteration 305|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:47||

Episode 1|Iteration 310|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:49||

Episode 1|Iteration 315|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:50||

Episode 1|Iteration 320|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:52||

Episode 1|Iteration 325|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:54||

Episode 1|Iteration 330|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:55||

Episode 1|Iteration 335|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:57||

Episode 1|Iteration 340|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:01:58||

Episode 1|Iteration 345|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:00||

Episode 1|Iteration 349|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:00||

Episode 1|Iteration 350|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:02||

Episode 1|Iteration 355|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:04||

Episode 1|Iteration 360|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:05||

Episode 1|Iteration 365|reward:  301.0|last_reward_at:  267|Elapsed Time: 0:02:07||

Episode 1|Iteration 369|reward:  401.0|last_reward_at:  267|Elapsed Time: 0:02:07||

Episode 1|Iteration 369|reward:  401.0|last_reward_at:  369|Elapsed Time: 0:02:07||

Episode 1|Iteration 370|reward:  401.0|last_reward_at:  369|Elapsed Time: 0:02:09||

Episode 1|Iteration 371|reward:  415.0|last_reward_at:  369|Elapsed Time: 0:02:09||

Episode 1|Iteration 371|reward:  415.0|last_reward_at:  371|Elapsed Time: 0:02:09||

Episode 1|Iteration 375|reward:  415.0|last_reward_at:  371|Elapsed Time: 0:02:10||

Episode 1|Iteration 378|reward:  465.0|last_reward_at:  371|Elapsed Time: 0:02:10||

Episode 1|Iteration 378|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:10||

Episode 1|Iteration 380|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:10||

Episode 1|Iteration 380|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:12||

Episode 1|Iteration 385|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:14||

Episode 1|Iteration 388|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:14||

Episode 1|Iteration 390|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:15||

Episode 1|Iteration 395|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:17||

Episode 1|Iteration 396|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:17||

Episode 1|Iteration 400|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:19||

Episode 1|Iteration 405|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:20||

Episode 1|Iteration 410|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:22||

Episode 1|Iteration 413|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:22||

Episode 1|Iteration 414|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:22||

Episode 1|Iteration 415|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:23||

Episode 1|Iteration 420|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:25||

Episode 1|Iteration 425|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:26||

Episode 1|Iteration 430|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:28||

Episode 1|Iteration 435|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:29||

Episode 1|Iteration 440|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:31||

Episode 1|Iteration 445|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:32||

Episode 1|Iteration 450|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:32||

Episode 1|Iteration 450|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:34||

Episode 1|Iteration 455|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:36||

Episode 1|Iteration 460|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:37||

Episode 1|Iteration 465|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:39||

Episode 1|Iteration 470|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:40||

Episode 1|Iteration 475|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:40||

Episode 1|Iteration 475|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:42||

Episode 1|Iteration 480|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:43||

Episode 1|Iteration 484|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:43||

Episode 1|Iteration 485|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:45||

Episode 1|Iteration 490|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:47||

Episode 1|Iteration 493|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:47||

Episode 1|Iteration 495|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:48||

Episode 1|Iteration 500|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:48||

Episode 1|Iteration 500|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:50||

Episode 1|Iteration 500|reward:  465.0|last_reward_at:  378|Elapsed Time: 0:02:50||




  Episode 1 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/12 (0.14)
    explore-remote: 3/102 (0.03)
    explore-connect: 0/39 (0.00)
    exploit-local: 1/22 (0.04)
    exploit-remote: 4/289 (0.01)
    exploit-connect: 5/21 (0.19)
  exploit deflected to exploration: 58
  ## Episode: 2/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:01||

Episode 2|Iteration 9|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:01||

Episode 2|Iteration 9|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:01||

Episode 2|Iteration 10|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:03||

Episode 2|Iteration 15|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:05||

Episode 2|Iteration 20|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:07||

Episode 2|Iteration 25|reward:   22.0|last_reward_at:    9|Elapsed Time: 0:00:08||

Episode 2|Iteration 29|reward:   36.0|last_reward_at:    9|Elapsed Time: 0:00:08||

Episode 2|Iteration 29|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:08||

Episode 2|Iteration 30|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:09||

Episode 2|Iteration 35|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:11||

Episode 2|Iteration 40|reward:   36.0|last_reward_at:   29|Elapsed Time: 0:00:12||

Episode 2|Iteration 45|reward:   86.0|last_reward_at:   29|Elapsed Time: 0:00:14||

Episode 2|Iteration 45|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:14||

Episode 2|Iteration 50|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:15||

Episode 2|Iteration 55|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:17||

Episode 2|Iteration 60|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:18||

Episode 2|Iteration 64|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:18||

Episode 2|Iteration 65|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:20||

Episode 2|Iteration 70|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:21||

Episode 2|Iteration 75|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:23||

Episode 2|Iteration 78|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:23||

Episode 2|Iteration 80|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:25||

Episode 2|Iteration 85|reward:   86.0|last_reward_at:   45|Elapsed Time: 0:00:26||

Episode 2|Iteration 88|reward:   92.0|last_reward_at:   45|Elapsed Time: 0:00:26||

Episode 2|Iteration 88|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:26||

Episode 2|Iteration 90|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:28||

Episode 2|Iteration 95|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:29||

Episode 2|Iteration 97|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:29||

Episode 2|Iteration 100|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:31||

Episode 2|Iteration 105|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:32||

Episode 2|Iteration 110|reward:   92.0|last_reward_at:   88|Elapsed Time: 0:00:34||

Episode 2|Iteration 113|reward:  103.0|last_reward_at:   88|Elapsed Time: 0:00:34||

Episode 2|Iteration 113|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:34||

Episode 2|Iteration 115|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:35||

Episode 2|Iteration 120|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:36||

Episode 2|Iteration 125|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:38||

Episode 2|Iteration 130|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:40||

Episode 2|Iteration 135|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:41||

Episode 2|Iteration 140|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:43||

Episode 2|Iteration 144|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:43||

Episode 2|Iteration 145|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:44||

Episode 2|Iteration 150|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:45||

Episode 2|Iteration 155|reward:  103.0|last_reward_at:  113|Elapsed Time: 0:00:47||

Episode 2|Iteration 157|reward:  112.0|last_reward_at:  113|Elapsed Time: 0:00:47||

Episode 2|Iteration 157|reward:  112.0|last_reward_at:  157|Elapsed Time: 0:00:47||

Episode 2|Iteration 158|reward:  123.0|last_reward_at:  157|Elapsed Time: 0:00:47||

Episode 2|Iteration 158|reward:  123.0|last_reward_at:  158|Elapsed Time: 0:00:47||

Episode 2|Iteration 160|reward:  123.0|last_reward_at:  158|Elapsed Time: 0:00:48||

Episode 2|Iteration 165|reward:  123.0|last_reward_at:  158|Elapsed Time: 0:00:50||

Episode 2|Iteration 170|reward:  123.0|last_reward_at:  158|Elapsed Time: 0:00:51||

Episode 2|Iteration 173|reward:  223.0|last_reward_at:  158|Elapsed Time: 0:00:51||

Episode 2|Iteration 173|reward:  223.0|last_reward_at:  173|Elapsed Time: 0:00:51||

Episode 2|Iteration 174|reward:  237.0|last_reward_at:  173|Elapsed Time: 0:00:51||

Episode 2|Iteration 174|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:51||

Episode 2|Iteration 175|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:53||

Episode 2|Iteration 180|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:54||

Episode 2|Iteration 184|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:54||

Episode 2|Iteration 185|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:55||

Episode 2|Iteration 190|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:57||

Episode 2|Iteration 195|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:00:58||

Episode 2|Iteration 200|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:00||

Episode 2|Iteration 205|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:01||

Episode 2|Iteration 210|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:02||

Episode 2|Iteration 215|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:04||

Episode 2|Iteration 220|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:05||

Episode 2|Iteration 225|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:07||

Episode 2|Iteration 229|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:07||

Episode 2|Iteration 230|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:09||

Episode 2|Iteration 235|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:10||

Episode 2|Iteration 240|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:12||

Episode 2|Iteration 245|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:13||

Episode 2|Iteration 250|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:15||

Episode 2|Iteration 255|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:16||

Episode 2|Iteration 260|reward:  237.0|last_reward_at:  174|Elapsed Time: 0:01:18||

Episode 2|Iteration 261|reward:  337.0|last_reward_at:  174|Elapsed Time: 0:01:18||

Episode 2|Iteration 261|reward:  337.0|last_reward_at:  261|Elapsed Time: 0:01:18||

Episode 2|Iteration 265|reward:  351.0|last_reward_at:  261|Elapsed Time: 0:01:19||

Episode 2|Iteration 265|reward:  351.0|last_reward_at:  265|Elapsed Time: 0:01:19||

Episode 2|Iteration 270|reward:  351.0|last_reward_at:  265|Elapsed Time: 0:01:21||

Episode 2|Iteration 275|reward:  351.0|last_reward_at:  265|Elapsed Time: 0:01:23||

Episode 2|Iteration 280|reward:  351.0|last_reward_at:  265|Elapsed Time: 0:01:24||

Episode 2|Iteration 281|reward:  401.0|last_reward_at:  265|Elapsed Time: 0:01:24||

Episode 2|Iteration 281|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:24||

Episode 2|Iteration 285|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:25||

Episode 2|Iteration 285|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:26||

Episode 2|Iteration 290|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:27||

Episode 2|Iteration 294|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:28||

Episode 2|Iteration 295|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:29||

Episode 2|Iteration 298|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:29||

Episode 2|Iteration 300|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:31||

Episode 2|Iteration 305|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:32||

Episode 2|Iteration 310|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:33||

Episode 2|Iteration 315|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:35||

Episode 2|Iteration 320|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:36||

Episode 2|Iteration 325|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:38||

Episode 2|Iteration 330|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:38||

Episode 2|Iteration 330|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:39||

Episode 2|Iteration 334|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:39||

Episode 2|Iteration 335|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:41||

Episode 2|Iteration 340|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:43||

Episode 2|Iteration 345|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:44||

Episode 2|Iteration 350|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:46||

Episode 2|Iteration 355|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:46||

Episode 2|Iteration 355|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:47||

Episode 2|Iteration 360|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:49||

Episode 2|Iteration 365|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:50||

Episode 2|Iteration 370|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:52||

Episode 2|Iteration 375|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:53||

Episode 2|Iteration 380|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:54||

Episode 2|Iteration 380|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:55||

Episode 2|Iteration 385|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:57||

Episode 2|Iteration 388|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:57||

Episode 2|Iteration 390|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:01:58||

Episode 2|Iteration 395|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:02:00||

Episode 2|Iteration 397|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:02:00||

Episode 2|Iteration 400|reward:  401.0|last_reward_at:  281|Elapsed Time: 0:02:01||

Episode 2|Iteration 404|reward:  415.0|last_reward_at:  281|Elapsed Time: 0:02:01||

Episode 2|Iteration 404|reward:  415.0|last_reward_at:  404|Elapsed Time: 0:02:01||

Episode 2|Iteration 405|reward:  415.0|last_reward_at:  404|Elapsed Time: 0:02:03||

Episode 2|Iteration 408|reward:  426.0|last_reward_at:  404|Elapsed Time: 0:02:03||

Episode 2|Iteration 408|reward:  426.0|last_reward_at:  408|Elapsed Time: 0:02:03||

Episode 2|Iteration 410|reward:  426.0|last_reward_at:  408|Elapsed Time: 0:02:05||

Episode 2|Iteration 414|reward:  476.0|last_reward_at:  408|Elapsed Time: 0:02:05||

Episode 2|Iteration 414|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:05||

Episode 2|Iteration 415|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:06||

Episode 2|Iteration 420|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:08||

Episode 2|Iteration 425|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:08||

Episode 2|Iteration 425|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:09||

Episode 2|Iteration 430|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:11||

Episode 2|Iteration 435|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:12||

Episode 2|Iteration 439|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:12||

Episode 2|Iteration 440|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:16||

Episode 2|Iteration 444|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:17||

Episode 2|Iteration 445|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:18||

Episode 2|Iteration 450|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:19||

Episode 2|Iteration 455|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:21||

Episode 2|Iteration 460|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:22||

Episode 2|Iteration 465|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:24||

Episode 2|Iteration 468|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:25||

Episode 2|Iteration 470|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:26||

Episode 2|Iteration 475|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:26||

Episode 2|Iteration 475|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:28||

Episode 2|Iteration 480|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:29||

Episode 2|Iteration 484|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:29||

Episode 2|Iteration 485|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:31||

Episode 2|Iteration 490|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:32||

Episode 2|Iteration 494|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:32||

Episode 2|Iteration 495|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:34||

Episode 2|Iteration 496|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:34||

Episode 2|Iteration 500|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:34||

Episode 2|Iteration 500|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:35||

Episode 2|Iteration 500|reward:  476.0|last_reward_at:  414|Elapsed Time: 0:02:35||




  Episode 2 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/11 (0.15)
    explore-remote: 1/96 (0.01)
    explore-connect: 1/49 (0.02)
    exploit-local: 1/3 (0.25)
    exploit-remote: 7/302 (0.02)
    exploit-connect: 4/23 (0.15)
  exploit deflected to exploration: 60
  ## Episode: 3/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 5|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 3|Iteration 6|reward:   33.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 3|Iteration 6|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 3|Iteration 10|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:02||

Episode 3|Iteration 15|reward:   44.0|last_reward_at:    6|Elapsed Time: 0:00:04||

Episode 3|Iteration 15|reward:   44.0|last_reward_at:   15|Elapsed Time: 0:00:04||

Episode 3|Iteration 20|reward:   44.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 3|Iteration 24|reward:   44.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 3|Iteration 25|reward:   44.0|last_reward_at:   15|Elapsed Time: 0:00:07||

Episode 3|Iteration 30|reward:   44.0|last_reward_at:   15|Elapsed Time: 0:00:08||

Episode 3|Iteration 32|reward:   58.0|last_reward_at:   15|Elapsed Time: 0:00:08||

Episode 3|Iteration 32|reward:   58.0|last_reward_at:   32|Elapsed Time: 0:00:08||

Episode 3|Iteration 35|reward:   58.0|last_reward_at:   32|Elapsed Time: 0:00:10||

Episode 3|Iteration 40|reward:   58.0|last_reward_at:   32|Elapsed Time: 0:00:12||

Episode 3|Iteration 41|reward:   69.0|last_reward_at:   32|Elapsed Time: 0:00:12||

Episode 3|Iteration 41|reward:   69.0|last_reward_at:   41|Elapsed Time: 0:00:12||

Episode 3|Iteration 44|reward:  119.0|last_reward_at:   41|Elapsed Time: 0:00:12||

Episode 3|Iteration 44|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:12||

Episode 3|Iteration 45|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:13||

Episode 3|Iteration 50|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:14||

Episode 3|Iteration 55|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:16||

Episode 3|Iteration 59|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:16||

Episode 3|Iteration 60|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:18||

Episode 3|Iteration 65|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:19||

Episode 3|Iteration 70|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:20||

Episode 3|Iteration 75|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:23||

Episode 3|Iteration 80|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:24||

Episode 3|Iteration 85|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:25||

Episode 3|Iteration 90|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:27||

Episode 3|Iteration 95|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:28||

Episode 3|Iteration 97|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:28||

Episode 3|Iteration 100|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:30||

Episode 3|Iteration 105|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:31||

Episode 3|Iteration 110|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:33||

Episode 3|Iteration 115|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:34||

Episode 3|Iteration 119|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:34||

Episode 3|Iteration 120|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:36||

Episode 3|Iteration 125|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:37||

Episode 3|Iteration 130|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:39||

Episode 3|Iteration 135|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:41||

Episode 3|Iteration 140|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:43||

Episode 3|Iteration 143|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:43||

Episode 3|Iteration 145|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:44||

Episode 3|Iteration 150|reward:  119.0|last_reward_at:   44|Elapsed Time: 0:00:53||

Episode 3|Iteration 151|reward:  133.0|last_reward_at:   44|Elapsed Time: 0:00:53||

Episode 3|Iteration 151|reward:  133.0|last_reward_at:  151|Elapsed Time: 0:00:53||

Episode 3|Iteration 155|reward:  133.0|last_reward_at:  151|Elapsed Time: 0:00:54||

Episode 3|Iteration 160|reward:  133.0|last_reward_at:  151|Elapsed Time: 0:00:56||

Episode 3|Iteration 162|reward:  139.0|last_reward_at:  151|Elapsed Time: 0:00:56||

Episode 3|Iteration 162|reward:  139.0|last_reward_at:  162|Elapsed Time: 0:00:56||

Episode 3|Iteration 164|reward:  189.0|last_reward_at:  162|Elapsed Time: 0:00:56||

Episode 3|Iteration 164|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:00:56||

Episode 3|Iteration 165|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:00:57||

Episode 3|Iteration 168|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:00:57||

Episode 3|Iteration 170|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:00:58||

Episode 3|Iteration 175|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:00||

Episode 3|Iteration 180|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:02||

Episode 3|Iteration 184|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:02||

Episode 3|Iteration 185|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:03||

Episode 3|Iteration 187|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:04||

Episode 3|Iteration 190|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:05||

Episode 3|Iteration 195|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:07||

Episode 3|Iteration 196|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:07||

Episode 3|Iteration 200|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:09||

Episode 3|Iteration 205|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:11||

Episode 3|Iteration 209|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:11||

Episode 3|Iteration 210|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:12||

Episode 3|Iteration 215|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:14||

Episode 3|Iteration 220|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:16||

Episode 3|Iteration 225|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:18||

Episode 3|Iteration 230|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:19||

Episode 3|Iteration 235|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:19||

Episode 3|Iteration 235|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:21||

Episode 3|Iteration 240|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:23||

Episode 3|Iteration 245|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:24||

Episode 3|Iteration 250|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:25||

Episode 3|Iteration 254|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:25||

Episode 3|Iteration 255|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:27||

Episode 3|Iteration 260|reward:  189.0|last_reward_at:  164|Elapsed Time: 0:01:29||

Episode 3|Iteration 262|reward:  198.0|last_reward_at:  164|Elapsed Time: 0:01:29||

Episode 3|Iteration 262|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:29||

Episode 3|Iteration 265|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:31||

Episode 3|Iteration 270|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:33||

Episode 3|Iteration 275|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:34||

Episode 3|Iteration 280|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:35||

Episode 3|Iteration 285|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:37||

Episode 3|Iteration 290|reward:  198.0|last_reward_at:  262|Elapsed Time: 0:01:38||

Episode 3|Iteration 291|reward:  298.0|last_reward_at:  262|Elapsed Time: 0:01:38||

Episode 3|Iteration 291|reward:  298.0|last_reward_at:  291|Elapsed Time: 0:01:38||

Episode 3|Iteration 292|reward:  312.0|last_reward_at:  291|Elapsed Time: 0:01:38||

Episode 3|Iteration 292|reward:  312.0|last_reward_at:  292|Elapsed Time: 0:01:38||

Episode 3|Iteration 295|reward:  312.0|last_reward_at:  292|Elapsed Time: 0:01:40||

Episode 3|Iteration 300|reward:  312.0|last_reward_at:  292|Elapsed Time: 0:01:42||

Episode 3|Iteration 302|reward:  412.0|last_reward_at:  292|Elapsed Time: 0:01:42||

Episode 3|Iteration 302|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:42||

Episode 3|Iteration 305|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:44||

Episode 3|Iteration 310|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:45||

Episode 3|Iteration 315|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:47||

Episode 3|Iteration 316|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:47||

Episode 3|Iteration 317|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:47||

Episode 3|Iteration 320|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:49||

Episode 3|Iteration 323|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:49||

Episode 3|Iteration 325|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:50||

Episode 3|Iteration 330|reward:  412.0|last_reward_at:  302|Elapsed Time: 0:01:51||

Episode 3|Iteration 330|reward:  426.0|last_reward_at:  302|Elapsed Time: 0:01:52||

Episode 3|Iteration 330|reward:  426.0|last_reward_at:  330|Elapsed Time: 0:01:52||

Episode 3|Iteration 333|reward:  476.0|last_reward_at:  330|Elapsed Time: 0:01:52||

Episode 3|Iteration 333|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:52||

Episode 3|Iteration 335|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:54||

Episode 3|Iteration 340|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:56||

Episode 3|Iteration 342|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:56||

Episode 3|Iteration 345|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:58||

Episode 3|Iteration 349|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:58||

Episode 3|Iteration 350|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:01:59||

Episode 3|Iteration 355|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:00||

Episode 3|Iteration 358|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:00||

Episode 3|Iteration 360|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:02||

Episode 3|Iteration 365|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:03||

Episode 3|Iteration 370|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:05||

Episode 3|Iteration 372|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:05||

Episode 3|Iteration 374|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:05||

Episode 3|Iteration 375|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:06||

Episode 3|Iteration 378|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:07||

Episode 3|Iteration 380|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:08||

Episode 3|Iteration 385|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:10||

Episode 3|Iteration 390|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:11||

Episode 3|Iteration 393|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:11||

Episode 3|Iteration 395|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:13||

Episode 3|Iteration 400|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:15||

Episode 3|Iteration 405|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:17||

Episode 3|Iteration 410|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:18||

Episode 3|Iteration 415|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:20||

Episode 3|Iteration 418|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:20||

Episode 3|Iteration 420|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:21||

Episode 3|Iteration 421|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:21||

Episode 3|Iteration 423|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:21||

Episode 3|Iteration 425|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:23||

Episode 3|Iteration 430|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:24||

Episode 3|Iteration 434|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:24||

Episode 3|Iteration 435|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:25||

Episode 3|Iteration 436|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:25||

Episode 3|Iteration 439|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:26||

Episode 3|Iteration 440|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:27||

Episode 3|Iteration 444|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:27||

Episode 3|Iteration 445|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:28||

Episode 3|Iteration 450|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:28||

Episode 3|Iteration 450|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:30||

Episode 3|Iteration 453|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:30||

Episode 3|Iteration 455|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:31||

Episode 3|Iteration 460|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:33||

Episode 3|Iteration 463|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:33||

Episode 3|Iteration 465|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:35||

Episode 3|Iteration 469|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:35||

Episode 3|Iteration 470|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:36||

Episode 3|Iteration 475|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:36||

Episode 3|Iteration 475|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:37||

Episode 3|Iteration 476|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:37||

Episode 3|Iteration 480|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:39||

Episode 3|Iteration 485|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:40||

Episode 3|Iteration 490|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:41||

Episode 3|Iteration 495|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:43||

Episode 3|Iteration 497|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:43||

Episode 3|Iteration 500|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:44||

Episode 3|Iteration 500|reward:  476.0|last_reward_at:  333|Elapsed Time: 0:02:44||




  Episode 3 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/5 (0.17)
    explore-remote: 3/112 (0.03)
    explore-connect: 0/61 (0.00)
    exploit-local: 2/9 (0.18)
    exploit-remote: 5/284 (0.02)
    exploit-connect: 5/13 (0.28)
  exploit deflected to exploration: 82
  ## Episode: 4/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 4|Iteration 5|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 4|Iteration 5|reward:   22.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 4|Iteration 8|reward:   31.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 4|Iteration 8|reward:   31.0|last_reward_at:    8|Elapsed Time: 0:00:01||

Episode 4|Iteration 10|reward:   31.0|last_reward_at:    8|Elapsed Time: 0:00:02||

Episode 4|Iteration 11|reward:  131.0|last_reward_at:    8|Elapsed Time: 0:00:02||

Episode 4|Iteration 11|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:02||

Episode 4|Iteration 15|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:04||

Episode 4|Iteration 20|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:05||

Episode 4|Iteration 25|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:06||

Episode 4|Iteration 26|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:06||

Episode 4|Iteration 30|reward:  131.0|last_reward_at:   11|Elapsed Time: 0:00:08||

Episode 4|Iteration 31|reward:  142.0|last_reward_at:   11|Elapsed Time: 0:00:08||

Episode 4|Iteration 31|reward:  142.0|last_reward_at:   31|Elapsed Time: 0:00:08||

Episode 4|Iteration 33|reward:  156.0|last_reward_at:   31|Elapsed Time: 0:00:08||

Episode 4|Iteration 33|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:08||

Episode 4|Iteration 35|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:09||

Episode 4|Iteration 40|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:10||

Episode 4|Iteration 45|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:12||

Episode 4|Iteration 50|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:13||

Episode 4|Iteration 55|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:14||

Episode 4|Iteration 60|reward:  156.0|last_reward_at:   33|Elapsed Time: 0:00:16||

Episode 4|Iteration 63|reward:  256.0|last_reward_at:   33|Elapsed Time: 0:00:16||

Episode 4|Iteration 63|reward:  256.0|last_reward_at:   63|Elapsed Time: 0:00:16||

Episode 4|Iteration 64|reward:  270.0|last_reward_at:   63|Elapsed Time: 0:00:16||

Episode 4|Iteration 64|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:16||

Episode 4|Iteration 65|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:18||

Episode 4|Iteration 70|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:18||

Episode 4|Iteration 70|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:19||

Episode 4|Iteration 75|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:21||

Episode 4|Iteration 80|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:22||

Episode 4|Iteration 85|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:25||

Episode 4|Iteration 90|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:26||

Episode 4|Iteration 95|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:27||

Episode 4|Iteration 100|reward:  270.0|last_reward_at:   64|Elapsed Time: 0:00:29||

Episode 4|Iteration 104|reward:  284.0|last_reward_at:   64|Elapsed Time: 0:00:29||

Episode 4|Iteration 104|reward:  284.0|last_reward_at:  104|Elapsed Time: 0:00:29||

Episode 4|Iteration 105|reward:  284.0|last_reward_at:  104|Elapsed Time: 0:00:30||

Episode 4|Iteration 110|reward:  284.0|last_reward_at:  104|Elapsed Time: 0:00:31||

Episode 4|Iteration 111|reward:  295.0|last_reward_at:  104|Elapsed Time: 0:00:31||

Episode 4|Iteration 111|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:31||

Episode 4|Iteration 115|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:33||

Episode 4|Iteration 120|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:34||

Episode 4|Iteration 125|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:36||

Episode 4|Iteration 130|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:37||

Episode 4|Iteration 135|reward:  295.0|last_reward_at:  111|Elapsed Time: 0:00:38||

Episode 4|Iteration 136|reward:  345.0|last_reward_at:  111|Elapsed Time: 0:00:38||

Episode 4|Iteration 136|reward:  345.0|last_reward_at:  136|Elapsed Time: 0:00:38||

Episode 4|Iteration 140|reward:  345.0|last_reward_at:  136|Elapsed Time: 0:00:40||

Episode 4|Iteration 145|reward:  345.0|last_reward_at:  136|Elapsed Time: 0:00:41||

Episode 4|Iteration 149|reward:  395.0|last_reward_at:  136|Elapsed Time: 0:00:41||

Episode 4|Iteration 149|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:41||

Episode 4|Iteration 150|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:42||

Episode 4|Iteration 155|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:44||

Episode 4|Iteration 156|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:44||

Episode 4|Iteration 157|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:44||

Episode 4|Iteration 160|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:45||

Episode 4|Iteration 165|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:46||

Episode 4|Iteration 170|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:47||

Episode 4|Iteration 175|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:49||

Episode 4|Iteration 178|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:49||

Episode 4|Iteration 180|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:51||

Episode 4|Iteration 184|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:51||

Episode 4|Iteration 185|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:52||

Episode 4|Iteration 190|reward:  395.0|last_reward_at:  149|Elapsed Time: 0:00:53||

Episode 4|Iteration 195|reward:  406.0|last_reward_at:  149|Elapsed Time: 0:00:55||

Episode 4|Iteration 195|reward:  406.0|last_reward_at:  195|Elapsed Time: 0:00:55||

Episode 4|Iteration 200|reward:  406.0|last_reward_at:  195|Elapsed Time: 0:00:56||

Episode 4|Iteration 203|reward:  420.0|last_reward_at:  195|Elapsed Time: 0:00:56||

Episode 4|Iteration 203|reward:  420.0|last_reward_at:  203|Elapsed Time: 0:00:56||

Episode 4|Iteration 205|reward:  420.0|last_reward_at:  203|Elapsed Time: 0:00:57||

Episode 4|Iteration 208|reward:  470.0|last_reward_at:  203|Elapsed Time: 0:00:58||

Episode 4|Iteration 208|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:00:58||

Episode 4|Iteration 209|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:00:58||

Episode 4|Iteration 210|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:00:59||

Episode 4|Iteration 215|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:02||

Episode 4|Iteration 219|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:02||

Episode 4|Iteration 220|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:03||

Episode 4|Iteration 225|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:04||

Episode 4|Iteration 230|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:06||

Episode 4|Iteration 235|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:07||

Episode 4|Iteration 240|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:08||

Episode 4|Iteration 245|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:10||

Episode 4|Iteration 250|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:11||

Episode 4|Iteration 254|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:11||

Episode 4|Iteration 255|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:12||

Episode 4|Iteration 258|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:13||

Episode 4|Iteration 260|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:14||

Episode 4|Iteration 265|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:16||

Episode 4|Iteration 270|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:17||

Episode 4|Iteration 275|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:18||

Episode 4|Iteration 277|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:18||

Episode 4|Iteration 280|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:20||

Episode 4|Iteration 285|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:20||

Episode 4|Iteration 285|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:21||

Episode 4|Iteration 290|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:23||

Episode 4|Iteration 292|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:23||

Episode 4|Iteration 295|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:25||

Episode 4|Iteration 296|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:25||

Episode 4|Iteration 300|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:26||

Episode 4|Iteration 305|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:28||

Episode 4|Iteration 306|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:28||

Episode 4|Iteration 310|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:29||

Episode 4|Iteration 315|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:30||

Episode 4|Iteration 317|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:30||

Episode 4|Iteration 320|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:32||

Episode 4|Iteration 323|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:32||

Episode 4|Iteration 325|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:34||

Episode 4|Iteration 330|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:34||

Episode 4|Iteration 330|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:35||

Episode 4|Iteration 334|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:35||

Episode 4|Iteration 335|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:37||

Episode 4|Iteration 340|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:38||

Episode 4|Iteration 345|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:40||

Episode 4|Iteration 350|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:41||

Episode 4|Iteration 355|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:43||

Episode 4|Iteration 360|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:44||

Episode 4|Iteration 365|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:45||

Episode 4|Iteration 367|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:46||

Episode 4|Iteration 370|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:47||

Episode 4|Iteration 374|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:47||

Episode 4|Iteration 375|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:48||

Episode 4|Iteration 380|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:48||

Episode 4|Iteration 380|reward:  470.0|last_reward_at:  208|Elapsed Time: 0:01:49||

Episode 4|Iteration 385|reward:  476.0|last_reward_at:  208|Elapsed Time: 0:01:51||

Episode 4|Iteration 385|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:51||

Episode 4|Iteration 390|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:52||

Episode 4|Iteration 395|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:54||

Episode 4|Iteration 400|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:55||

Episode 4|Iteration 405|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:57||

Episode 4|Iteration 410|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:58||

Episode 4|Iteration 413|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:58||

Episode 4|Iteration 415|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:01:59||

Episode 4|Iteration 419|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:00||

Episode 4|Iteration 420|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:01||

Episode 4|Iteration 425|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:01||

Episode 4|Iteration 425|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:03||

Episode 4|Iteration 428|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:03||

Episode 4|Iteration 430|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:04||

Episode 4|Iteration 435|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:06||

Episode 4|Iteration 438|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:07||

Episode 4|Iteration 440|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:08||

Episode 4|Iteration 444|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:08||

Episode 4|Iteration 445|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:10||

Episode 4|Iteration 450|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:10||

Episode 4|Iteration 450|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:12||

Episode 4|Iteration 455|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:13||

Episode 4|Iteration 460|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:14||

Episode 4|Iteration 465|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:15||

Episode 4|Iteration 467|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:15||

Episode 4|Iteration 470|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:17||

Episode 4|Iteration 475|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:18||

Episode 4|Iteration 478|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:18||

Episode 4|Iteration 480|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:20||

Episode 4|Iteration 485|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:21||

Episode 4|Iteration 488|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:22||

Episode 4|Iteration 490|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:23||

Episode 4|Iteration 494|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:23||

Episode 4|Iteration 495|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:24||

Episode 4|Iteration 499|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:25||

Episode 4|Iteration 500|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:26||

Episode 4|Iteration 500|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:26||




  Episode 4 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/11 (0.00)
    explore-remote: 3/101 (0.03)
    explore-connect: 0/52 (0.00)
    exploit-local: 3/21 (0.12)
    exploit-remote: 5/274 (0.02)
    exploit-connect: 5/25 (0.17)
  exploit deflected to exploration: 67
  ## Episode: 5/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 2|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 2|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 5|Iteration 5|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:01||

Episode 5|Iteration 10|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:02||

Episode 5|Iteration 15|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:03||

Episode 5|Iteration 20|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:04||

Episode 5|Iteration 25|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:05||

Episode 5|Iteration 30|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:06||

Episode 5|Iteration 34|reward:   33.0|last_reward_at:    2|Elapsed Time: 0:00:06||

Episode 5|Iteration 34|reward:   33.0|last_reward_at:   34|Elapsed Time: 0:00:06||

Episode 5|Iteration 35|reward:   33.0|last_reward_at:   34|Elapsed Time: 0:00:08||

Episode 5|Iteration 40|reward:   33.0|last_reward_at:   34|Elapsed Time: 0:00:09||

Episode 5|Iteration 42|reward:   47.0|last_reward_at:   34|Elapsed Time: 0:00:09||

Episode 5|Iteration 42|reward:   47.0|last_reward_at:   42|Elapsed Time: 0:00:09||

Episode 5|Iteration 45|reward:   47.0|last_reward_at:   42|Elapsed Time: 0:00:10||

Episode 5|Iteration 50|reward:   47.0|last_reward_at:   42|Elapsed Time: 0:00:11||

Episode 5|Iteration 55|reward:   47.0|last_reward_at:   42|Elapsed Time: 0:00:13||

Episode 5|Iteration 60|reward:   47.0|last_reward_at:   42|Elapsed Time: 0:00:14||

Episode 5|Iteration 64|reward:   58.0|last_reward_at:   42|Elapsed Time: 0:00:14||

Episode 5|Iteration 64|reward:   58.0|last_reward_at:   64|Elapsed Time: 0:00:14||

Episode 5|Iteration 65|reward:   58.0|last_reward_at:   64|Elapsed Time: 0:00:15||

Episode 5|Iteration 68|reward:   64.0|last_reward_at:   64|Elapsed Time: 0:00:15||

Episode 5|Iteration 68|reward:   64.0|last_reward_at:   68|Elapsed Time: 0:00:15||

Episode 5|Iteration 70|reward:   64.0|last_reward_at:   68|Elapsed Time: 0:00:17||

Episode 5|Iteration 74|reward:   73.0|last_reward_at:   68|Elapsed Time: 0:00:17||

Episode 5|Iteration 74|reward:   73.0|last_reward_at:   74|Elapsed Time: 0:00:17||

Episode 5|Iteration 75|reward:   73.0|last_reward_at:   74|Elapsed Time: 0:00:18||

Episode 5|Iteration 77|reward:  123.0|last_reward_at:   74|Elapsed Time: 0:00:18||

Episode 5|Iteration 77|reward:  123.0|last_reward_at:   77|Elapsed Time: 0:00:18||

Episode 5|Iteration 80|reward:  123.0|last_reward_at:   77|Elapsed Time: 0:00:20||

Episode 5|Iteration 83|reward:  137.0|last_reward_at:   77|Elapsed Time: 0:00:20||

Episode 5|Iteration 83|reward:  137.0|last_reward_at:   83|Elapsed Time: 0:00:20||

Episode 5|Iteration 85|reward:  137.0|last_reward_at:   83|Elapsed Time: 0:00:21||

Episode 5|Iteration 90|reward:  137.0|last_reward_at:   83|Elapsed Time: 0:00:23||

Episode 5|Iteration 91|reward:  187.0|last_reward_at:   83|Elapsed Time: 0:00:23||

Episode 5|Iteration 91|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:23||

Episode 5|Iteration 95|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:24||

Episode 5|Iteration 100|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:26||

Episode 5|Iteration 103|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 5|Iteration 105|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:29||

Episode 5|Iteration 110|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:30||

Episode 5|Iteration 115|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:32||

Episode 5|Iteration 120|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:33||

Episode 5|Iteration 125|reward:  187.0|last_reward_at:   91|Elapsed Time: 0:00:35||

Episode 5|Iteration 127|reward:  287.0|last_reward_at:   91|Elapsed Time: 0:00:35||

Episode 5|Iteration 127|reward:  287.0|last_reward_at:  127|Elapsed Time: 0:00:35||

Episode 5|Iteration 128|reward:  301.0|last_reward_at:  127|Elapsed Time: 0:00:35||

Episode 5|Iteration 128|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:35||

Episode 5|Iteration 130|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:37||

Episode 5|Iteration 135|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:39||

Episode 5|Iteration 140|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:40||

Episode 5|Iteration 144|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:41||

Episode 5|Iteration 145|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:42||

Episode 5|Iteration 150|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:43||

Episode 5|Iteration 155|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:45||

Episode 5|Iteration 160|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:46||

Episode 5|Iteration 165|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:47||

Episode 5|Iteration 170|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:49||

Episode 5|Iteration 172|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:49||

Episode 5|Iteration 175|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:50||

Episode 5|Iteration 180|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:51||

Episode 5|Iteration 185|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:52||

Episode 5|Iteration 190|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:53||

Episode 5|Iteration 190|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:54||

Episode 5|Iteration 195|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:55||

Episode 5|Iteration 200|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:56||

Episode 5|Iteration 205|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:58||

Episode 5|Iteration 209|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:58||

Episode 5|Iteration 210|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:00:59||

Episode 5|Iteration 215|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:00||

Episode 5|Iteration 220|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:02||

Episode 5|Iteration 225|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:03||

Episode 5|Iteration 230|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:04||

Episode 5|Iteration 235|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:06||

Episode 5|Iteration 240|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:07||

Episode 5|Iteration 245|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:08||

Episode 5|Iteration 250|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:10||

Episode 5|Iteration 254|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:10||

Episode 5|Iteration 255|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:11||

Episode 5|Iteration 260|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:13||

Episode 5|Iteration 265|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:14||

Episode 5|Iteration 270|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:16||

Episode 5|Iteration 275|reward:  301.0|last_reward_at:  128|Elapsed Time: 0:01:17||

Episode 5|Iteration 278|reward:  312.0|last_reward_at:  128|Elapsed Time: 0:01:17||

Episode 5|Iteration 278|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:17||

Episode 5|Iteration 280|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:18||

Episode 5|Iteration 285|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:20||

Episode 5|Iteration 290|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:21||

Episode 5|Iteration 295|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:23||

Episode 5|Iteration 300|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:24||

Episode 5|Iteration 305|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:25||

Episode 5|Iteration 310|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:26||

Episode 5|Iteration 315|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:28||

Episode 5|Iteration 320|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:29||

Episode 5|Iteration 325|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:31||

Episode 5|Iteration 330|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:32||

Episode 5|Iteration 335|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:33||

Episode 5|Iteration 337|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:34||

Episode 5|Iteration 340|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:35||

Episode 5|Iteration 345|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:36||

Episode 5|Iteration 349|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:36||

Episode 5|Iteration 350|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:39||

Episode 5|Iteration 355|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:40||

Episode 5|Iteration 360|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:42||

Episode 5|Iteration 365|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:44||

Episode 5|Iteration 370|reward:  312.0|last_reward_at:  278|Elapsed Time: 0:01:45||

Episode 5|Iteration 374|reward:  412.0|last_reward_at:  278|Elapsed Time: 0:01:45||

Episode 5|Iteration 374|reward:  412.0|last_reward_at:  374|Elapsed Time: 0:01:45||

Episode 5|Iteration 375|reward:  412.0|last_reward_at:  374|Elapsed Time: 0:01:47||

Episode 5|Iteration 380|reward:  412.0|last_reward_at:  374|Elapsed Time: 0:01:48||

Episode 5|Iteration 385|reward:  426.0|last_reward_at:  374|Elapsed Time: 0:01:50||

Episode 5|Iteration 385|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:50||

Episode 5|Iteration 390|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:51||

Episode 5|Iteration 395|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:52||

Episode 5|Iteration 400|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:54||

Episode 5|Iteration 405|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:55||

Episode 5|Iteration 410|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:56||

Episode 5|Iteration 415|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:58||

Episode 5|Iteration 420|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:01:59||

Episode 5|Iteration 425|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:02:00||

Episode 5|Iteration 430|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:02:02||

Episode 5|Iteration 435|reward:  426.0|last_reward_at:  385|Elapsed Time: 0:02:03||

Episode 5|Iteration 438|reward:  476.0|last_reward_at:  385|Elapsed Time: 0:02:03||

Episode 5|Iteration 438|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:03||

Episode 5|Iteration 440|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:05||

Episode 5|Iteration 442|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:05||

Episode 5|Iteration 445|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:07||

Episode 5|Iteration 450|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:08||

Episode 5|Iteration 452|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:09||

Episode 5|Iteration 455|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:10||

Episode 5|Iteration 460|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:11||

Episode 5|Iteration 465|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:13||

Episode 5|Iteration 468|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:13||

Episode 5|Iteration 470|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:14||

Episode 5|Iteration 475|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:15||

Episode 5|Iteration 479|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:16||

Episode 5|Iteration 480|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:17||

Episode 5|Iteration 485|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:19||

Episode 5|Iteration 488|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:19||

Episode 5|Iteration 490|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:20||

Episode 5|Iteration 495|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:22||

Episode 5|Iteration 500|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:25||

Episode 5|Iteration 500|reward:  476.0|last_reward_at:  438|Elapsed Time: 0:02:25||




  Episode 5 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/5 (0.17)
    explore-remote: 2/90 (0.02)
    explore-connect: 0/30 (0.00)
    exploit-local: 2/11 (0.15)
    exploit-remote: 6/304 (0.02)
    exploit-connect: 5/44 (0.10)
  exploit deflected to exploration: 28
  ## Episode: 6/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 5|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 6|Iteration 5|reward:   22.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 6|Iteration 6|reward:   36.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 6|Iteration 6|reward:   36.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 6|Iteration 10|reward:   47.0|last_reward_at:    6|Elapsed Time: 0:00:02||

Episode 6|Iteration 10|reward:   47.0|last_reward_at:   10|Elapsed Time: 0:00:02||

Episode 6|Iteration 15|reward:   47.0|last_reward_at:   10|Elapsed Time: 0:00:04||

Episode 6|Iteration 18|reward:   56.0|last_reward_at:   10|Elapsed Time: 0:00:04||

Episode 6|Iteration 18|reward:   56.0|last_reward_at:   18|Elapsed Time: 0:00:04||

Episode 6|Iteration 20|reward:   56.0|last_reward_at:   18|Elapsed Time: 0:00:05||

Episode 6|Iteration 21|reward:  106.0|last_reward_at:   18|Elapsed Time: 0:00:05||

Episode 6|Iteration 21|reward:  106.0|last_reward_at:   21|Elapsed Time: 0:00:05||

Episode 6|Iteration 25|reward:  106.0|last_reward_at:   21|Elapsed Time: 0:00:08||

Episode 6|Iteration 30|reward:  106.0|last_reward_at:   21|Elapsed Time: 0:00:09||

Episode 6|Iteration 31|reward:  206.0|last_reward_at:   21|Elapsed Time: 0:00:09||

Episode 6|Iteration 31|reward:  206.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 6|Iteration 32|reward:  220.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 6|Iteration 32|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:09||

Episode 6|Iteration 35|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:11||

Episode 6|Iteration 40|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:13||

Episode 6|Iteration 45|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:14||

Episode 6|Iteration 48|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:15||

Episode 6|Iteration 50|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:17||

Episode 6|Iteration 55|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:19||

Episode 6|Iteration 58|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:19||

Episode 6|Iteration 60|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:21||

Episode 6|Iteration 65|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:22||

Episode 6|Iteration 70|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:24||

Episode 6|Iteration 71|reward:  220.0|last_reward_at:   32|Elapsed Time: 0:00:24||

Episode 6|Iteration 74|reward:  231.0|last_reward_at:   32|Elapsed Time: 0:00:24||

Episode 6|Iteration 74|reward:  231.0|last_reward_at:   74|Elapsed Time: 0:00:24||

Episode 6|Iteration 75|reward:  231.0|last_reward_at:   74|Elapsed Time: 0:00:26||

Episode 6|Iteration 80|reward:  231.0|last_reward_at:   74|Elapsed Time: 0:00:27||

Episode 6|Iteration 82|reward:  331.0|last_reward_at:   74|Elapsed Time: 0:00:27||

Episode 6|Iteration 82|reward:  331.0|last_reward_at:   82|Elapsed Time: 0:00:27||

Episode 6|Iteration 85|reward:  331.0|last_reward_at:   82|Elapsed Time: 0:00:28||

Episode 6|Iteration 90|reward:  331.0|last_reward_at:   82|Elapsed Time: 0:00:30||

Episode 6|Iteration 93|reward:  345.0|last_reward_at:   82|Elapsed Time: 0:00:30||

Episode 6|Iteration 93|reward:  345.0|last_reward_at:   93|Elapsed Time: 0:00:30||

Episode 6|Iteration 95|reward:  345.0|last_reward_at:   93|Elapsed Time: 0:00:30||

Episode 6|Iteration 95|reward:  345.0|last_reward_at:   93|Elapsed Time: 0:00:32||

Episode 6|Iteration 100|reward:  345.0|last_reward_at:   93|Elapsed Time: 0:00:33||

Episode 6|Iteration 101|reward:  395.0|last_reward_at:   93|Elapsed Time: 0:00:33||

Episode 6|Iteration 101|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:33||

Episode 6|Iteration 102|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:33||

Episode 6|Iteration 103|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:33||

Episode 6|Iteration 105|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:35||

Episode 6|Iteration 110|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:36||

Episode 6|Iteration 115|reward:  395.0|last_reward_at:  101|Elapsed Time: 0:00:38||

Episode 6|Iteration 116|reward:  409.0|last_reward_at:  101|Elapsed Time: 0:00:38||

Episode 6|Iteration 116|reward:  409.0|last_reward_at:  116|Elapsed Time: 0:00:38||

Episode 6|Iteration 120|reward:  409.0|last_reward_at:  116|Elapsed Time: 0:00:40||

Episode 6|Iteration 125|reward:  409.0|last_reward_at:  116|Elapsed Time: 0:00:41||

Episode 6|Iteration 130|reward:  409.0|last_reward_at:  116|Elapsed Time: 0:00:43||

Episode 6|Iteration 135|reward:  409.0|last_reward_at:  116|Elapsed Time: 0:00:44||

Episode 6|Iteration 139|reward:  459.0|last_reward_at:  116|Elapsed Time: 0:00:44||

Episode 6|Iteration 139|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:44||

Episode 6|Iteration 140|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:46||

Episode 6|Iteration 145|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:47||

Episode 6|Iteration 150|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:48||

Episode 6|Iteration 155|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:50||

Episode 6|Iteration 159|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:50||

Episode 6|Iteration 160|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:51||

Episode 6|Iteration 164|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:51||

Episode 6|Iteration 165|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:53||

Episode 6|Iteration 170|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:55||

Episode 6|Iteration 171|reward:  459.0|last_reward_at:  139|Elapsed Time: 0:00:55||

Episode 6|Iteration 175|reward:  465.0|last_reward_at:  139|Elapsed Time: 0:00:56||

Episode 6|Iteration 175|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:00:56||

Episode 6|Iteration 178|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:00:56||

Episode 6|Iteration 180|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:00:58||

Episode 6|Iteration 182|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:00:58||

Episode 6|Iteration 185|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:00:59||

Episode 6|Iteration 190|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:01||

Episode 6|Iteration 195|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:02||

Episode 6|Iteration 200|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:04||

Episode 6|Iteration 205|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:05||

Episode 6|Iteration 207|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:05||

Episode 6|Iteration 209|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:05||

Episode 6|Iteration 210|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:07||

Episode 6|Iteration 215|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:08||

Episode 6|Iteration 220|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:10||

Episode 6|Iteration 224|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:10||

Episode 6|Iteration 225|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:11||

Episode 6|Iteration 230|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:13||

Episode 6|Iteration 235|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:13||

Episode 6|Iteration 235|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:14||

Episode 6|Iteration 240|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:16||

Episode 6|Iteration 245|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:18||

Episode 6|Iteration 250|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:19||

Episode 6|Iteration 251|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:19||

Episode 6|Iteration 254|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:20||

Episode 6|Iteration 255|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:21||

Episode 6|Iteration 260|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:22||

Episode 6|Iteration 265|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:24||

Episode 6|Iteration 270|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:25||

Episode 6|Iteration 275|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:27||

Episode 6|Iteration 276|reward:  465.0|last_reward_at:  175|Elapsed Time: 0:01:27||

Episode 6|Iteration 278|reward:  476.0|last_reward_at:  175|Elapsed Time: 0:01:27||

Episode 6|Iteration 278|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:27||

Episode 6|Iteration 280|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:29||

Episode 6|Iteration 285|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:30||

Episode 6|Iteration 290|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:31||

Episode 6|Iteration 295|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:33||

Episode 6|Iteration 300|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:34||

Episode 6|Iteration 301|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:35||

Episode 6|Iteration 304|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:35||

Episode 6|Iteration 305|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:36||

Episode 6|Iteration 310|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:37||

Episode 6|Iteration 315|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:40||

Episode 6|Iteration 319|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:40||

Episode 6|Iteration 320|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:41||

Episode 6|Iteration 325|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:43||

Episode 6|Iteration 330|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:44||

Episode 6|Iteration 335|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:46||

Episode 6|Iteration 340|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:47||

Episode 6|Iteration 345|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:49||

Episode 6|Iteration 349|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:49||

Episode 6|Iteration 350|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:51||

Episode 6|Iteration 355|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:52||

Episode 6|Iteration 360|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:53||

Episode 6|Iteration 365|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:55||

Episode 6|Iteration 370|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:56||

Episode 6|Iteration 372|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:56||

Episode 6|Iteration 375|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:58||

Episode 6|Iteration 380|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:01:59||

Episode 6|Iteration 385|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:01||

Episode 6|Iteration 390|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:02||

Episode 6|Iteration 391|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:02||

Episode 6|Iteration 393|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:02||

Episode 6|Iteration 395|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:04||

Episode 6|Iteration 400|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:05||

Episode 6|Iteration 405|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:06||

Episode 6|Iteration 410|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:08||

Episode 6|Iteration 414|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:08||

Episode 6|Iteration 415|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:10||

Episode 6|Iteration 420|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:11||

Episode 6|Iteration 425|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:11||

Episode 6|Iteration 425|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:15||

Episode 6|Iteration 429|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:15||

Episode 6|Iteration 430|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:16||

Episode 6|Iteration 435|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:17||

Episode 6|Iteration 440|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:19||

Episode 6|Iteration 445|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:20||

Episode 6|Iteration 450|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:22||

Episode 6|Iteration 455|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:23||

Episode 6|Iteration 460|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:25||

Episode 6|Iteration 464|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:25||

Episode 6|Iteration 465|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:26||

Episode 6|Iteration 470|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:28||

Episode 6|Iteration 475|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:29||

Episode 6|Iteration 480|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:31||

Episode 6|Iteration 482|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:31||

Episode 6|Iteration 485|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:32||

Episode 6|Iteration 488|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:32||

Episode 6|Iteration 490|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:34||

Episode 6|Iteration 495|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:35||

Episode 6|Iteration 498|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:35||

Episode 6|Iteration 500|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:37||

Episode 6|Iteration 500|reward:  476.0|last_reward_at:  278|Elapsed Time: 0:02:37||




  Episode 6 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/5 (0.00)
    explore-remote: 3/98 (0.03)
    explore-connect: 0/48 (0.00)
    exploit-local: 3/17 (0.15)
    exploit-remote: 5/307 (0.02)
    exploit-connect: 5/9 (0.36)
  exploit deflected to exploration: 54
  ## Episode: 7/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 5|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 7|Iteration 10|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 7|Iteration 15|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 7|Iteration 17|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 7|Iteration 17|reward:   22.0|last_reward_at:   17|Elapsed Time: 0:00:04||

Episode 7|Iteration 20|reward:   22.0|last_reward_at:   17|Elapsed Time: 0:00:05||

Episode 7|Iteration 25|reward:   22.0|last_reward_at:   17|Elapsed Time: 0:00:07||

Episode 7|Iteration 27|reward:   33.0|last_reward_at:   17|Elapsed Time: 0:00:07||

Episode 7|Iteration 27|reward:   33.0|last_reward_at:   27|Elapsed Time: 0:00:07||

Episode 7|Iteration 28|reward:   42.0|last_reward_at:   27|Elapsed Time: 0:00:07||

Episode 7|Iteration 28|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:07||

Episode 7|Iteration 30|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:08||

Episode 7|Iteration 35|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:09||

Episode 7|Iteration 40|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:11||

Episode 7|Iteration 41|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:11||

Episode 7|Iteration 45|reward:   42.0|last_reward_at:   28|Elapsed Time: 0:00:13||

Episode 7|Iteration 48|reward:   53.0|last_reward_at:   28|Elapsed Time: 0:00:13||

Episode 7|Iteration 48|reward:   53.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 7|Iteration 50|reward:   53.0|last_reward_at:   48|Elapsed Time: 0:00:14||

Episode 7|Iteration 55|reward:   53.0|last_reward_at:   48|Elapsed Time: 0:00:15||

Episode 7|Iteration 60|reward:   53.0|last_reward_at:   48|Elapsed Time: 0:00:18||

Episode 7|Iteration 65|reward:   67.0|last_reward_at:   48|Elapsed Time: 0:00:20||

Episode 7|Iteration 65|reward:   67.0|last_reward_at:   65|Elapsed Time: 0:00:20||

Episode 7|Iteration 70|reward:   67.0|last_reward_at:   65|Elapsed Time: 0:00:22||

Episode 7|Iteration 73|reward:  117.0|last_reward_at:   65|Elapsed Time: 0:00:22||

Episode 7|Iteration 73|reward:  117.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 7|Iteration 75|reward:  117.0|last_reward_at:   73|Elapsed Time: 0:00:24||

Episode 7|Iteration 80|reward:  117.0|last_reward_at:   73|Elapsed Time: 0:00:25||

Episode 7|Iteration 85|reward:  117.0|last_reward_at:   73|Elapsed Time: 0:00:27||

Episode 7|Iteration 86|reward:  131.0|last_reward_at:   73|Elapsed Time: 0:00:27||

Episode 7|Iteration 86|reward:  131.0|last_reward_at:   86|Elapsed Time: 0:00:27||

Episode 7|Iteration 87|reward:  181.0|last_reward_at:   86|Elapsed Time: 0:00:27||

Episode 7|Iteration 87|reward:  181.0|last_reward_at:   87|Elapsed Time: 0:00:27||

Episode 7|Iteration 90|reward:  181.0|last_reward_at:   87|Elapsed Time: 0:00:28||

Episode 7|Iteration 92|reward:  187.0|last_reward_at:   87|Elapsed Time: 0:00:28||

Episode 7|Iteration 92|reward:  187.0|last_reward_at:   92|Elapsed Time: 0:00:28||

Episode 7|Iteration 95|reward:  187.0|last_reward_at:   92|Elapsed Time: 0:00:29||

Episode 7|Iteration 100|reward:  187.0|last_reward_at:   92|Elapsed Time: 0:00:31||

Episode 7|Iteration 105|reward:  198.0|last_reward_at:   92|Elapsed Time: 0:00:32||

Episode 7|Iteration 105|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:32||

Episode 7|Iteration 110|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:34||

Episode 7|Iteration 114|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:34||

Episode 7|Iteration 115|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:35||

Episode 7|Iteration 120|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:37||

Episode 7|Iteration 125|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:38||

Episode 7|Iteration 130|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:40||

Episode 7|Iteration 133|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:40||

Episode 7|Iteration 135|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:41||

Episode 7|Iteration 140|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:43||

Episode 7|Iteration 145|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:44||

Episode 7|Iteration 150|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:46||

Episode 7|Iteration 155|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:48||

Episode 7|Iteration 160|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:49||

Episode 7|Iteration 165|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:50||

Episode 7|Iteration 170|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:52||

Episode 7|Iteration 175|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:53||

Episode 7|Iteration 180|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:55||

Episode 7|Iteration 185|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:56||

Episode 7|Iteration 190|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:57||

Episode 7|Iteration 195|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:00:59||

Episode 7|Iteration 200|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:01:00||

Episode 7|Iteration 205|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:01:02||

Episode 7|Iteration 209|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:01:02||

Episode 7|Iteration 210|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:01:04||

Episode 7|Iteration 215|reward:  198.0|last_reward_at:  105|Elapsed Time: 0:01:05||

Episode 7|Iteration 216|reward:  298.0|last_reward_at:  105|Elapsed Time: 0:01:05||

Episode 7|Iteration 216|reward:  298.0|last_reward_at:  216|Elapsed Time: 0:01:05||

Episode 7|Iteration 217|reward:  312.0|last_reward_at:  216|Elapsed Time: 0:01:05||

Episode 7|Iteration 217|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:05||

Episode 7|Iteration 220|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:06||

Episode 7|Iteration 225|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:08||

Episode 7|Iteration 230|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:09||

Episode 7|Iteration 235|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:11||

Episode 7|Iteration 240|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:12||

Episode 7|Iteration 245|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:13||

Episode 7|Iteration 250|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:15||

Episode 7|Iteration 254|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:15||

Episode 7|Iteration 255|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:17||

Episode 7|Iteration 260|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:19||

Episode 7|Iteration 265|reward:  312.0|last_reward_at:  217|Elapsed Time: 0:01:20||

Episode 7|Iteration 268|reward:  412.0|last_reward_at:  217|Elapsed Time: 0:01:20||

Episode 7|Iteration 268|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:20||

Episode 7|Iteration 270|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:21||

Episode 7|Iteration 275|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:23||

Episode 7|Iteration 280|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:24||

Episode 7|Iteration 285|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:24||

Episode 7|Iteration 285|reward:  412.0|last_reward_at:  268|Elapsed Time: 0:01:26||

Episode 7|Iteration 287|reward:  426.0|last_reward_at:  268|Elapsed Time: 0:01:26||

Episode 7|Iteration 287|reward:  426.0|last_reward_at:  287|Elapsed Time: 0:01:26||

Episode 7|Iteration 290|reward:  426.0|last_reward_at:  287|Elapsed Time: 0:01:28||

Episode 7|Iteration 295|reward:  426.0|last_reward_at:  287|Elapsed Time: 0:01:29||

Episode 7|Iteration 299|reward:  476.0|last_reward_at:  287|Elapsed Time: 0:01:29||

Episode 7|Iteration 299|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:29||

Episode 7|Iteration 300|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:31||

Episode 7|Iteration 301|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:31||

Episode 7|Iteration 303|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:32||

Episode 7|Iteration 305|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:33||

Episode 7|Iteration 310|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:35||

Episode 7|Iteration 312|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:35||

Episode 7|Iteration 315|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:36||

Episode 7|Iteration 320|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:37||

Episode 7|Iteration 321|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:38||

Episode 7|Iteration 325|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:39||

Episode 7|Iteration 330|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:41||

Episode 7|Iteration 335|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:42||

Episode 7|Iteration 338|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:42||

Episode 7|Iteration 340|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:44||

Episode 7|Iteration 342|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:44||

Episode 7|Iteration 345|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:45||

Episode 7|Iteration 350|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:47||

Episode 7|Iteration 354|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:47||

Episode 7|Iteration 355|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:49||

Episode 7|Iteration 360|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:50||

Episode 7|Iteration 364|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:50||

Episode 7|Iteration 365|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:52||

Episode 7|Iteration 370|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:53||

Episode 7|Iteration 375|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:55||

Episode 7|Iteration 380|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:56||

Episode 7|Iteration 385|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:58||

Episode 7|Iteration 390|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:59||

Episode 7|Iteration 393|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:01:59||

Episode 7|Iteration 395|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:01||

Episode 7|Iteration 396|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:01||

Episode 7|Iteration 400|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:02||

Episode 7|Iteration 405|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:03||

Episode 7|Iteration 410|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:05||

Episode 7|Iteration 415|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:07||

Episode 7|Iteration 420|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:08||

Episode 7|Iteration 425|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:08||

Episode 7|Iteration 425|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:10||

Episode 7|Iteration 428|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:10||

Episode 7|Iteration 430|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:11||

Episode 7|Iteration 435|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:13||

Episode 7|Iteration 440|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:14||

Episode 7|Iteration 445|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:15||

Episode 7|Iteration 448|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:16||

Episode 7|Iteration 450|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:17||

Episode 7|Iteration 455|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:19||

Episode 7|Iteration 460|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:20||

Episode 7|Iteration 465|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:21||

Episode 7|Iteration 470|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:23||

Episode 7|Iteration 475|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:23||

Episode 7|Iteration 475|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:24||

Episode 7|Iteration 479|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:24||

Episode 7|Iteration 480|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:25||

Episode 7|Iteration 485|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:27||

Episode 7|Iteration 490|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:28||

Episode 7|Iteration 494|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:28||

Episode 7|Iteration 495|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:30||

Episode 7|Iteration 500|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:30||

Episode 7|Iteration 500|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:32||

Episode 7|Iteration 500|reward:  476.0|last_reward_at:  299|Elapsed Time: 0:02:32||




  Episode 7 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/10 (0.00)
    explore-remote: 3/87 (0.03)
    explore-connect: 0/36 (0.00)
    exploit-local: 3/20 (0.13)
    exploit-remote: 5/303 (0.02)
    exploit-connect: 5/28 (0.15)
  exploit deflected to exploration: 36
  ## Episode: 8/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 8|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 2|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 8|Iteration 5|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:01||

Episode 8|Iteration 10|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:02||

Episode 8|Iteration 15|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:03||

Episode 8|Iteration 20|reward:   36.0|last_reward_at:    3|Elapsed Time: 0:00:04||

Episode 8|Iteration 20|reward:   36.0|last_reward_at:   20|Elapsed Time: 0:00:04||

Episode 8|Iteration 25|reward:   36.0|last_reward_at:   20|Elapsed Time: 0:00:06||

Episode 8|Iteration 27|reward:   86.0|last_reward_at:   20|Elapsed Time: 0:00:06||

Episode 8|Iteration 27|reward:   86.0|last_reward_at:   27|Elapsed Time: 0:00:06||

Episode 8|Iteration 30|reward:   86.0|last_reward_at:   27|Elapsed Time: 0:00:08||

Episode 8|Iteration 35|reward:   86.0|last_reward_at:   27|Elapsed Time: 0:00:10||

Episode 8|Iteration 40|reward:   86.0|last_reward_at:   27|Elapsed Time: 0:00:11||

Episode 8|Iteration 45|reward:   86.0|last_reward_at:   27|Elapsed Time: 0:00:12||

Episode 8|Iteration 46|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:12||

Episode 8|Iteration 46|reward:   97.0|last_reward_at:   46|Elapsed Time: 0:00:12||

Episode 8|Iteration 50|reward:   97.0|last_reward_at:   46|Elapsed Time: 0:00:14||

Episode 8|Iteration 55|reward:   97.0|last_reward_at:   46|Elapsed Time: 0:00:15||

Episode 8|Iteration 60|reward:   97.0|last_reward_at:   46|Elapsed Time: 0:00:16||

Episode 8|Iteration 62|reward:  106.0|last_reward_at:   46|Elapsed Time: 0:00:16||

Episode 8|Iteration 62|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:16||

Episode 8|Iteration 65|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 8|Iteration 70|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 8|Iteration 70|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:20||

Episode 8|Iteration 75|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:21||

Episode 8|Iteration 80|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:23||

Episode 8|Iteration 85|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:24||

Episode 8|Iteration 90|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:25||

Episode 8|Iteration 95|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:27||

Episode 8|Iteration 100|reward:  106.0|last_reward_at:   62|Elapsed Time: 0:00:28||

Episode 8|Iteration 101|reward:  206.0|last_reward_at:   62|Elapsed Time: 0:00:28||

Episode 8|Iteration 101|reward:  206.0|last_reward_at:  101|Elapsed Time: 0:00:28||

Episode 8|Iteration 102|reward:  220.0|last_reward_at:  101|Elapsed Time: 0:00:28||

Episode 8|Iteration 102|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:28||

Episode 8|Iteration 105|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:29||

Episode 8|Iteration 110|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:30||

Episode 8|Iteration 114|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:30||

Episode 8|Iteration 115|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:32||

Episode 8|Iteration 119|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:32||

Episode 8|Iteration 120|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:33||

Episode 8|Iteration 125|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:34||

Episode 8|Iteration 130|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:36||

Episode 8|Iteration 135|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:37||

Episode 8|Iteration 140|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:38||

Episode 8|Iteration 145|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:40||

Episode 8|Iteration 150|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:42||

Episode 8|Iteration 155|reward:  220.0|last_reward_at:  102|Elapsed Time: 0:00:43||

Episode 8|Iteration 157|reward:  231.0|last_reward_at:  102|Elapsed Time: 0:00:43||

Episode 8|Iteration 157|reward:  231.0|last_reward_at:  157|Elapsed Time: 0:00:43||

Episode 8|Iteration 160|reward:  231.0|last_reward_at:  157|Elapsed Time: 0:00:45||

Episode 8|Iteration 165|reward:  231.0|last_reward_at:  157|Elapsed Time: 0:00:46||

Episode 8|Iteration 170|reward:  231.0|last_reward_at:  157|Elapsed Time: 0:00:47||

Episode 8|Iteration 172|reward:  331.0|last_reward_at:  157|Elapsed Time: 0:00:47||

Episode 8|Iteration 172|reward:  331.0|last_reward_at:  172|Elapsed Time: 0:00:47||

Episode 8|Iteration 175|reward:  331.0|last_reward_at:  172|Elapsed Time: 0:00:48||

Episode 8|Iteration 180|reward:  345.0|last_reward_at:  172|Elapsed Time: 0:00:49||

Episode 8|Iteration 180|reward:  345.0|last_reward_at:  180|Elapsed Time: 0:00:49||

Episode 8|Iteration 185|reward:  345.0|last_reward_at:  180|Elapsed Time: 0:00:51||

Episode 8|Iteration 190|reward:  345.0|last_reward_at:  180|Elapsed Time: 0:00:51||

Episode 8|Iteration 190|reward:  345.0|last_reward_at:  180|Elapsed Time: 0:00:53||

Episode 8|Iteration 194|reward:  395.0|last_reward_at:  180|Elapsed Time: 0:00:53||

Episode 8|Iteration 194|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:53||

Episode 8|Iteration 195|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:55||

Episode 8|Iteration 197|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:55||

Episode 8|Iteration 200|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:56||

Episode 8|Iteration 203|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:56||

Episode 8|Iteration 205|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:57||

Episode 8|Iteration 210|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:00:59||

Episode 8|Iteration 215|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:00||

Episode 8|Iteration 220|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:01||

Episode 8|Iteration 225|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:03||

Episode 8|Iteration 230|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:04||

Episode 8|Iteration 235|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:04||

Episode 8|Iteration 235|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:05||

Episode 8|Iteration 240|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:07||

Episode 8|Iteration 242|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:07||

Episode 8|Iteration 244|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:07||

Episode 8|Iteration 245|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:09||

Episode 8|Iteration 250|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:10||

Episode 8|Iteration 255|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:12||

Episode 8|Iteration 260|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:12||

Episode 8|Iteration 260|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:14||

Episode 8|Iteration 265|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:15||

Episode 8|Iteration 270|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:17||

Episode 8|Iteration 275|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:18||

Episode 8|Iteration 280|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:19||

Episode 8|Iteration 285|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:20||

Episode 8|Iteration 285|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:21||

Episode 8|Iteration 290|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:22||

Episode 8|Iteration 293|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:22||

Episode 8|Iteration 295|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:24||

Episode 8|Iteration 300|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:25||

Episode 8|Iteration 305|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:27||

Episode 8|Iteration 310|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:28||

Episode 8|Iteration 315|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:30||

Episode 8|Iteration 320|reward:  395.0|last_reward_at:  194|Elapsed Time: 0:01:31||

Episode 8|Iteration 321|reward:  409.0|last_reward_at:  194|Elapsed Time: 0:01:31||

Episode 8|Iteration 321|reward:  409.0|last_reward_at:  321|Elapsed Time: 0:01:31||

Episode 8|Iteration 325|reward:  409.0|last_reward_at:  321|Elapsed Time: 0:01:33||

Episode 8|Iteration 330|reward:  409.0|last_reward_at:  321|Elapsed Time: 0:01:34||

Episode 8|Iteration 335|reward:  409.0|last_reward_at:  321|Elapsed Time: 0:01:35||

Episode 8|Iteration 336|reward:  459.0|last_reward_at:  321|Elapsed Time: 0:01:35||

Episode 8|Iteration 336|reward:  459.0|last_reward_at:  336|Elapsed Time: 0:01:35||

Episode 8|Iteration 340|reward:  459.0|last_reward_at:  336|Elapsed Time: 0:01:37||

Episode 8|Iteration 345|reward:  459.0|last_reward_at:  336|Elapsed Time: 0:01:38||

Episode 8|Iteration 350|reward:  459.0|last_reward_at:  336|Elapsed Time: 0:01:39||

Episode 8|Iteration 355|reward:  459.0|last_reward_at:  336|Elapsed Time: 0:01:41||

Episode 8|Iteration 356|reward:  465.0|last_reward_at:  336|Elapsed Time: 0:01:41||

Episode 8|Iteration 356|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:41||

Episode 8|Iteration 360|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:43||

Episode 8|Iteration 365|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:45||

Episode 8|Iteration 370|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:46||

Episode 8|Iteration 375|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:48||

Episode 8|Iteration 380|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:49||

Episode 8|Iteration 385|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:51||

Episode 8|Iteration 387|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:51||

Episode 8|Iteration 390|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:53||

Episode 8|Iteration 395|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:55||

Episode 8|Iteration 398|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:55||

Episode 8|Iteration 400|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:57||

Episode 8|Iteration 405|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:58||

Episode 8|Iteration 410|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:01:59||

Episode 8|Iteration 414|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:00||

Episode 8|Iteration 415|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:01||

Episode 8|Iteration 420|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:02||

Episode 8|Iteration 425|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:04||

Episode 8|Iteration 427|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:04||

Episode 8|Iteration 430|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:05||

Episode 8|Iteration 432|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:06||

Episode 8|Iteration 435|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:07||

Episode 8|Iteration 439|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:07||

Episode 8|Iteration 440|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:08||

Episode 8|Iteration 445|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:10||

Episode 8|Iteration 450|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:10||

Episode 8|Iteration 450|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:11||

Episode 8|Iteration 454|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:11||

Episode 8|Iteration 455|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:13||

Episode 8|Iteration 458|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:13||

Episode 8|Iteration 460|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:14||

Episode 8|Iteration 463|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:15||

Episode 8|Iteration 465|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:16||

Episode 8|Iteration 468|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:16||

Episode 8|Iteration 470|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:18||

Episode 8|Iteration 475|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:18||

Episode 8|Iteration 475|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:19||

Episode 8|Iteration 478|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:19||

Episode 8|Iteration 480|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:21||

Episode 8|Iteration 483|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:21||

Episode 8|Iteration 485|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:22||

Episode 8|Iteration 488|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:22||

Episode 8|Iteration 490|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:24||

Episode 8|Iteration 491|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:24||

Episode 8|Iteration 494|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:24||

Episode 8|Iteration 495|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:26||

Episode 8|Iteration 500|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:26||

Episode 8|Iteration 500|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:27||

Episode 8|Iteration 500|reward:  465.0|last_reward_at:  356|Elapsed Time: 0:02:27||




  Episode 8 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/15 (0.06)
    explore-remote: 1/99 (0.01)
    explore-connect: 0/39 (0.00)
    exploit-local: 2/28 (0.07)
    exploit-remote: 6/285 (0.02)
    exploit-connect: 5/19 (0.21)
  exploit deflected to exploration: 55
  ## Episode: 9/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 9|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 5|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 9|Iteration 9|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 9|Iteration 10|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 9|Iteration 10|reward:   22.0|last_reward_at:   10|Elapsed Time: 0:00:03||

Episode 9|Iteration 15|reward:   33.0|last_reward_at:   10|Elapsed Time: 0:00:04||

Episode 9|Iteration 15|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:04||

Episode 9|Iteration 20|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 9|Iteration 22|reward:   33.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 9|Iteration 25|reward:   47.0|last_reward_at:   15|Elapsed Time: 0:00:07||

Episode 9|Iteration 25|reward:   47.0|last_reward_at:   25|Elapsed Time: 0:00:07||

Episode 9|Iteration 27|reward:   97.0|last_reward_at:   25|Elapsed Time: 0:00:07||

Episode 9|Iteration 27|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:07||

Episode 9|Iteration 30|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:08||

Episode 9|Iteration 35|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:10||

Episode 9|Iteration 40|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:11||

Episode 9|Iteration 45|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:11||

Episode 9|Iteration 45|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:13||

Episode 9|Iteration 50|reward:   97.0|last_reward_at:   27|Elapsed Time: 0:00:14||

Episode 9|Iteration 53|reward:  108.0|last_reward_at:   27|Elapsed Time: 0:00:14||

Episode 9|Iteration 53|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 9|Iteration 55|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:15||

Episode 9|Iteration 59|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 9|Iteration 60|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:17||

Episode 9|Iteration 64|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:17||

Episode 9|Iteration 65|reward:  108.0|last_reward_at:   53|Elapsed Time: 0:00:19||

Episode 9|Iteration 68|reward:  114.0|last_reward_at:   53|Elapsed Time: 0:00:19||

Episode 9|Iteration 68|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Episode 9|Iteration 70|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:20||

Episode 9|Iteration 75|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:21||

Episode 9|Iteration 80|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:23||

Episode 9|Iteration 85|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:24||

Episode 9|Iteration 89|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:24||

Episode 9|Iteration 90|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:26||

Episode 9|Iteration 92|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:26||

Episode 9|Iteration 95|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:26||

Episode 9|Iteration 95|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:28||

Episode 9|Iteration 100|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:29||

Episode 9|Iteration 105|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:30||

Episode 9|Iteration 110|reward:  114.0|last_reward_at:   68|Elapsed Time: 0:00:32||

Episode 9|Iteration 113|reward:  128.0|last_reward_at:   68|Elapsed Time: 0:00:32||

Episode 9|Iteration 113|reward:  128.0|last_reward_at:  113|Elapsed Time: 0:00:32||

Episode 9|Iteration 115|reward:  137.0|last_reward_at:  113|Elapsed Time: 0:00:33||

Episode 9|Iteration 115|reward:  137.0|last_reward_at:  115|Elapsed Time: 0:00:33||

Episode 9|Iteration 119|reward:  137.0|last_reward_at:  115|Elapsed Time: 0:00:33||

Episode 9|Iteration 120|reward:  137.0|last_reward_at:  115|Elapsed Time: 0:00:34||

Episode 9|Iteration 125|reward:  137.0|last_reward_at:  115|Elapsed Time: 0:00:36||

Episode 9|Iteration 128|reward:  187.0|last_reward_at:  115|Elapsed Time: 0:00:36||

Episode 9|Iteration 128|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:36||

Episode 9|Iteration 130|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:37||

Episode 9|Iteration 135|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:39||

Episode 9|Iteration 140|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:39||

Episode 9|Iteration 140|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:40||

Episode 9|Iteration 145|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:42||

Episode 9|Iteration 150|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:43||

Episode 9|Iteration 155|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:44||

Episode 9|Iteration 160|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:46||

Episode 9|Iteration 165|reward:  187.0|last_reward_at:  128|Elapsed Time: 0:00:47||

Episode 9|Iteration 166|reward:  287.0|last_reward_at:  128|Elapsed Time: 0:00:47||

Episode 9|Iteration 166|reward:  287.0|last_reward_at:  166|Elapsed Time: 0:00:47||

Episode 9|Iteration 167|reward:  301.0|last_reward_at:  166|Elapsed Time: 0:00:47||

Episode 9|Iteration 167|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:47||

Episode 9|Iteration 170|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:48||

Episode 9|Iteration 175|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:50||

Episode 9|Iteration 180|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:51||

Episode 9|Iteration 185|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:53||

Episode 9|Iteration 190|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:55||

Episode 9|Iteration 194|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:55||

Episode 9|Iteration 195|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:56||

Episode 9|Iteration 200|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:58||

Episode 9|Iteration 205|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:59||

Episode 9|Iteration 209|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:00:59||

Episode 9|Iteration 210|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:01:00||

Episode 9|Iteration 215|reward:  301.0|last_reward_at:  167|Elapsed Time: 0:01:02||

Episode 9|Iteration 219|reward:  401.0|last_reward_at:  167|Elapsed Time: 0:01:02||

Episode 9|Iteration 219|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:02||

Episode 9|Iteration 220|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:03||

Episode 9|Iteration 225|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:05||

Episode 9|Iteration 230|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:07||

Episode 9|Iteration 235|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:07||

Episode 9|Iteration 235|reward:  401.0|last_reward_at:  219|Elapsed Time: 0:01:08||

Episode 9|Iteration 237|reward:  415.0|last_reward_at:  219|Elapsed Time: 0:01:08||

Episode 9|Iteration 237|reward:  415.0|last_reward_at:  237|Elapsed Time: 0:01:08||

Episode 9|Iteration 240|reward:  415.0|last_reward_at:  237|Elapsed Time: 0:01:10||

Episode 9|Iteration 245|reward:  415.0|last_reward_at:  237|Elapsed Time: 0:01:11||

Episode 9|Iteration 250|reward:  415.0|last_reward_at:  237|Elapsed Time: 0:01:12||

Episode 9|Iteration 255|reward:  415.0|last_reward_at:  237|Elapsed Time: 0:01:14||

Episode 9|Iteration 259|reward:  465.0|last_reward_at:  237|Elapsed Time: 0:01:14||

Episode 9|Iteration 259|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:14||

Episode 9|Iteration 260|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:15||

Episode 9|Iteration 263|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:15||

Episode 9|Iteration 265|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:16||

Episode 9|Iteration 269|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:17||

Episode 9|Iteration 270|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:18||

Episode 9|Iteration 271|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:18||

Episode 9|Iteration 273|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:18||

Episode 9|Iteration 275|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:19||

Episode 9|Iteration 280|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:21||

Episode 9|Iteration 281|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:21||

Episode 9|Iteration 285|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:21||

Episode 9|Iteration 285|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:23||

Episode 9|Iteration 290|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:24||

Episode 9|Iteration 294|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:24||

Episode 9|Iteration 295|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:26||

Episode 9|Iteration 296|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:27||

Episode 9|Iteration 300|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:28||

Episode 9|Iteration 303|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:28||

Episode 9|Iteration 305|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:30||

Episode 9|Iteration 309|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:30||

Episode 9|Iteration 310|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:31||

Episode 9|Iteration 315|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:32||

Episode 9|Iteration 319|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:33||

Episode 9|Iteration 320|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:34||

Episode 9|Iteration 322|reward:  465.0|last_reward_at:  259|Elapsed Time: 0:01:34||

Episode 9|Iteration 324|reward:  476.0|last_reward_at:  259|Elapsed Time: 0:01:34||

Episode 9|Iteration 324|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:34||

Episode 9|Iteration 325|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:38||

Episode 9|Iteration 329|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:39||

Episode 9|Iteration 330|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:40||

Episode 9|Iteration 335|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:41||

Episode 9|Iteration 340|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:43||

Episode 9|Iteration 345|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:44||

Episode 9|Iteration 347|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:44||

Episode 9|Iteration 350|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:46||

Episode 9|Iteration 355|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:47||

Episode 9|Iteration 360|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:49||

Episode 9|Iteration 365|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:50||

Episode 9|Iteration 368|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:50||

Episode 9|Iteration 370|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:51||

Episode 9|Iteration 375|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:53||

Episode 9|Iteration 380|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:54||

Episode 9|Iteration 385|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:56||

Episode 9|Iteration 389|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:56||

Episode 9|Iteration 390|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:57||

Episode 9|Iteration 393|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:57||

Episode 9|Iteration 395|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:58||

Episode 9|Iteration 397|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:01:59||

Episode 9|Iteration 400|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:00||

Episode 9|Iteration 405|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:02||

Episode 9|Iteration 410|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:03||

Episode 9|Iteration 415|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:05||

Episode 9|Iteration 420|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:06||

Episode 9|Iteration 425|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:08||

Episode 9|Iteration 430|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:09||

Episode 9|Iteration 435|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:10||

Episode 9|Iteration 440|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:12||

Episode 9|Iteration 441|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:12||

Episode 9|Iteration 444|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:12||

Episode 9|Iteration 445|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:14||

Episode 9|Iteration 450|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:14||

Episode 9|Iteration 450|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:15||

Episode 9|Iteration 454|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:15||

Episode 9|Iteration 455|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:16||

Episode 9|Iteration 460|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:18||

Episode 9|Iteration 463|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:18||

Episode 9|Iteration 465|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:19||

Episode 9|Iteration 469|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:19||

Episode 9|Iteration 470|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:21||

Episode 9|Iteration 475|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:21||

Episode 9|Iteration 475|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:22||

Episode 9|Iteration 480|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:23||

Episode 9|Iteration 485|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:25||

Episode 9|Iteration 490|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:26||

Episode 9|Iteration 495|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:28||

Episode 9|Iteration 500|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:28||

Episode 9|Iteration 500|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:29||

Episode 9|Iteration 500|reward:  476.0|last_reward_at:  324|Elapsed Time: 0:02:29||




  Episode 9 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/10 (0.00)
    explore-remote: 4/104 (0.04)
    explore-connect: 0/53 (0.00)
    exploit-local: 3/18 (0.14)
    exploit-remote: 4/282 (0.01)
    exploit-connect: 5/17 (0.23)
  exploit deflected to exploration: 71
  ## Episode: 10/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 10|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,discovered,,,"[ScanPageSource, ScanPageContent]"


Episode 10|Iteration 2|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 5|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 10|Iteration 6|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 10|Iteration 6|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:02||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,discovered,,,"[ScanPageSource, ScanPageContent]"
GitHubProject,discovered,,,[CredScanGitHistory]


Episode 10|Iteration 7|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:02||

Episode 10|Iteration 10|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:03||

Episode 10|Iteration 15|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:04||

Episode 10|Iteration 20|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:05||

Episode 10|Iteration 25|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:07||

Episode 10|Iteration 30|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:09||

Episode 10|Iteration 35|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:10||

Episode 10|Iteration 40|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:11||

Episode 10|Iteration 45|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:12||

Episode 10|Iteration 45|reward:   22.0|last_reward_at:    6|Elapsed Time: 0:00:13||

Episode 10|Iteration 47|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:13||

Episode 10|Iteration 47|reward:   33.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,discovered,,,"[ScanPageSource, ScanPageContent]"
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."


Episode 10|Iteration 48|reward:   33.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 10|Iteration 50|reward:   33.0|last_reward_at:   47|Elapsed Time: 0:00:15||

Episode 10|Iteration 51|reward:   42.0|last_reward_at:   47|Elapsed Time: 0:00:15||

Episode 10|Iteration 51|reward:   42.0|last_reward_at:   51|Elapsed Time: 0:00:15||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,discovered,,,"[ScanPageSource, ScanPageContent]"
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."


Episode 10|Iteration 52|reward:   42.0|last_reward_at:   51|Elapsed Time: 0:00:15||

Episode 10|Iteration 52|reward:  142.0|last_reward_at:   51|Elapsed Time: 0:00:15||

Episode 10|Iteration 52|reward:  142.0|last_reward_at:   52|Elapsed Time: 0:00:15||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."


Episode 10|Iteration 53|reward:  142.0|last_reward_at:   52|Elapsed Time: 0:00:15||

Episode 10|Iteration 53|reward:  156.0|last_reward_at:   52|Elapsed Time: 0:00:15||

Episode 10|Iteration 53|reward:  156.0|last_reward_at:   53|Elapsed Time: 0:00:15||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Website[user=monitor],discovered,,,[]


Episode 10|Iteration 54|reward:  156.0|last_reward_at:   53|Elapsed Time: 0:00:15||

Episode 10|Iteration 55|reward:  156.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 10|Iteration 59|reward:  256.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 10|Iteration 59|reward:  256.0|last_reward_at:   59|Elapsed Time: 0:00:16||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."


Episode 10|Iteration 60|reward:  256.0|last_reward_at:   59|Elapsed Time: 0:00:18||

Episode 10|Iteration 61|reward:  270.0|last_reward_at:   59|Elapsed Time: 0:00:18||

Episode 10|Iteration 61|reward:  270.0|last_reward_at:   61|Elapsed Time: 0:00:18||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
AzureStorage,discovered,,,[AccessDataWithSASToken]


Episode 10|Iteration 62|reward:  270.0|last_reward_at:   61|Elapsed Time: 0:00:18||

Episode 10|Iteration 62|reward:  284.0|last_reward_at:   61|Elapsed Time: 0:00:18||

Episode 10|Iteration 62|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
AzureStorage,discovered,,,[AccessDataWithSASToken]
AzureResourceManager[user=monitor],discovered,,,[]


Episode 10|Iteration 63|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 10|Iteration 65|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:20||

Episode 10|Iteration 70|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:20||

Episode 10|Iteration 70|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:22||

Episode 10|Iteration 73|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:22||

Episode 10|Iteration 75|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:23||

Episode 10|Iteration 80|reward:  284.0|last_reward_at:   62|Elapsed Time: 0:00:25||

Episode 10|Iteration 85|reward:  334.0|last_reward_at:   62|Elapsed Time: 0:00:26||

Episode 10|Iteration 85|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:26||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
AzureResourceManager[user=monitor],discovered,,,[]


Episode 10|Iteration 86|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:26||

Episode 10|Iteration 90|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:28||

Episode 10|Iteration 95|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:30||

Episode 10|Iteration 100|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:31||

Episode 10|Iteration 105|reward:  334.0|last_reward_at:   85|Elapsed Time: 0:00:32||

Episode 10|Iteration 107|reward:  384.0|last_reward_at:   85|Elapsed Time: 0:00:32||

Episode 10|Iteration 107|reward:  384.0|last_reward_at:  107|Elapsed Time: 0:00:32||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."


Episode 10|Iteration 108|reward:  384.0|last_reward_at:  107|Elapsed Time: 0:00:33||

Episode 10|Iteration 110|reward:  395.0|last_reward_at:  107|Elapsed Time: 0:00:34||

Episode 10|Iteration 110|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:34||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Sharepoint,discovered,,,[ScanSharepointParentDirectory]


Episode 10|Iteration 111|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:34||

Episode 10|Iteration 114|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:34||

Episode 10|Iteration 115|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:36||

Episode 10|Iteration 120|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:37||

Episode 10|Iteration 124|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:37||

Episode 10|Iteration 125|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:38||

Episode 10|Iteration 127|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:38||

Episode 10|Iteration 129|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:39||

Episode 10|Iteration 130|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:40||

Episode 10|Iteration 135|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:42||

Episode 10|Iteration 136|reward:  395.0|last_reward_at:  110|Elapsed Time: 0:00:42||

Episode 10|Iteration 138|reward:  409.0|last_reward_at:  110|Elapsed Time: 0:00:42||

Episode 10|Iteration 138|reward:  409.0|last_reward_at:  138|Elapsed Time: 0:00:42||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Sharepoint,discovered,,,[ScanSharepointParentDirectory]
AzureResourceManager,discovered,,,[ListAzureResources]


Episode 10|Iteration 139|reward:  409.0|last_reward_at:  138|Elapsed Time: 0:00:42||

Episode 10|Iteration 140|reward:  409.0|last_reward_at:  138|Elapsed Time: 0:00:43||

Episode 10|Iteration 143|reward:  459.0|last_reward_at:  138|Elapsed Time: 0:00:43||

Episode 10|Iteration 143|reward:  459.0|last_reward_at:  143|Elapsed Time: 0:00:43||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
AzureResourceManager,owned,[CTFFLAG:LeakedCustomerData2],[],[ListAzureResources]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Sharepoint,discovered,,,[ScanSharepointParentDirectory]


Episode 10|Iteration 144|reward:  459.0|last_reward_at:  143|Elapsed Time: 0:00:43||

Episode 10|Iteration 145|reward:  459.0|last_reward_at:  143|Elapsed Time: 0:00:45||

Episode 10|Iteration 150|reward:  459.0|last_reward_at:  143|Elapsed Time: 0:00:46||

Episode 10|Iteration 151|reward:  470.0|last_reward_at:  143|Elapsed Time: 0:00:46||

Episode 10|Iteration 151|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:46||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
AzureResourceManager,owned,[CTFFLAG:LeakedCustomerData2],[],[ListAzureResources]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Sharepoint,discovered,,,[ScanSharepointParentDirectory]
AzureVM,discovered,,,[]


Episode 10|Iteration 152|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:46||

Episode 10|Iteration 155|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:48||

Episode 10|Iteration 159|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:48||

Episode 10|Iteration 160|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:49||

Episode 10|Iteration 165|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:49||

Episode 10|Iteration 165|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:51||

Episode 10|Iteration 169|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:51||

Episode 10|Iteration 170|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:52||

Episode 10|Iteration 175|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:54||

Episode 10|Iteration 178|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:54||

Episode 10|Iteration 180|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:55||

Episode 10|Iteration 184|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:55||

Episode 10|Iteration 185|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:57||

Episode 10|Iteration 190|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:57||

Episode 10|Iteration 190|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:58||

Episode 10|Iteration 193|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:00:58||

Episode 10|Iteration 195|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:00||

Episode 10|Iteration 197|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:00||

Episode 10|Iteration 200|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:02||

Episode 10|Iteration 205|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:03||

Episode 10|Iteration 209|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:03||

Episode 10|Iteration 210|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:05||

Episode 10|Iteration 214|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:05||

Episode 10|Iteration 215|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:06||

Episode 10|Iteration 217|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:06||

Episode 10|Iteration 218|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:07||

Episode 10|Iteration 220|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:08||

Episode 10|Iteration 222|reward:  470.0|last_reward_at:  151|Elapsed Time: 0:01:08||

Episode 10|Iteration 222|reward:  476.0|last_reward_at:  151|Elapsed Time: 0:01:08||

Episode 10|Iteration 222|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:08||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
client,owned,[],[SearchEdgeHistory],[]
Website,owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScanBashHistory],"[ScanPageSource, ScanPageContent]"
Website[user=monitor],owned,"[MySql, Ubuntu, nginx/1.10.3]",[CredScan-HomeDirectory],[]
AzureStorage,owned,[CTFFLAG:LeakedCustomerData],[],[AccessDataWithSASToken]
AzureResourceManager[user=monitor],owned,[],[],[]
AzureResourceManager,owned,[CTFFLAG:LeakedCustomerData2],[],[ListAzureResources]
GitHubProject,discovered,,,[CredScanGitHistory]
Website.Directory,discovered,,,"[NavigateWebDirectory, NavigateWebDirectoryFur..."
Sharepoint,discovered,,,[ScanSharepointParentDirectory]
AzureVM,discovered,,,[]


Episode 10|Iteration 223|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:08||

Episode 10|Iteration 225|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:09||

Episode 10|Iteration 230|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:11||

Episode 10|Iteration 234|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:11||

Episode 10|Iteration 235|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:12||

Episode 10|Iteration 240|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:14||

Episode 10|Iteration 245|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:15||

Episode 10|Iteration 250|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:19||

Episode 10|Iteration 254|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:19||

Episode 10|Iteration 255|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:21||

Episode 10|Iteration 260|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:22||

Episode 10|Iteration 265|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:24||

Episode 10|Iteration 267|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:24||

Episode 10|Iteration 270|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:26||

Episode 10|Iteration 275|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:27||

Episode 10|Iteration 279|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:27||

Episode 10|Iteration 280|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:29||

Episode 10|Iteration 285|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:30||

Episode 10|Iteration 290|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:32||

Episode 10|Iteration 295|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:34||

Episode 10|Iteration 300|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:35||

Episode 10|Iteration 304|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:35||

Episode 10|Iteration 305|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:37||

Episode 10|Iteration 310|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:39||

Episode 10|Iteration 315|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:40||

Episode 10|Iteration 320|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:42||

Episode 10|Iteration 325|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:44||

Episode 10|Iteration 328|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:44||

Episode 10|Iteration 330|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:45||

Episode 10|Iteration 335|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:47||

Episode 10|Iteration 340|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:48||

Episode 10|Iteration 344|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:48||

Episode 10|Iteration 345|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:50||

Episode 10|Iteration 349|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:50||

Episode 10|Iteration 350|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:51||

Episode 10|Iteration 355|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:51||

Episode 10|Iteration 355|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:53||

Episode 10|Iteration 358|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:53||

Episode 10|Iteration 360|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:55||

Episode 10|Iteration 365|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:56||

Episode 10|Iteration 369|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:57||

Episode 10|Iteration 370|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:01:58||

Episode 10|Iteration 375|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:00||

Episode 10|Iteration 380|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:01||

Episode 10|Iteration 380|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:02||

Episode 10|Iteration 385|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:04||

Episode 10|Iteration 387|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:04||

Episode 10|Iteration 390|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:05||

Episode 10|Iteration 395|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:07||

Episode 10|Iteration 400|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:09||

Episode 10|Iteration 404|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:09||

Episode 10|Iteration 405|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:11||

Episode 10|Iteration 410|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:12||

Episode 10|Iteration 415|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:14||

Episode 10|Iteration 419|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:14||

Episode 10|Iteration 420|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:15||

Episode 10|Iteration 425|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:17||

Episode 10|Iteration 430|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:18||

Episode 10|Iteration 433|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:18||

Episode 10|Iteration 435|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:20||

Episode 10|Iteration 440|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:21||

Episode 10|Iteration 445|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:23||

Episode 10|Iteration 450|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:23||

Episode 10|Iteration 450|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:25||

Episode 10|Iteration 455|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:26||

Episode 10|Iteration 460|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:27||

Episode 10|Iteration 465|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:29||

Episode 10|Iteration 470|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:30||

Episode 10|Iteration 474|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:30||

Episode 10|Iteration 475|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:32||

Episode 10|Iteration 477|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:32||

Episode 10|Iteration 480|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:33||

Episode 10|Iteration 485|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:35||

Episode 10|Iteration 490|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:36||

Episode 10|Iteration 493|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:36||

Episode 10|Iteration 495|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:37||

Episode 10|Iteration 500|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:37||

Episode 10|Iteration 500|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:39||

Episode 10|Iteration 500|reward:  476.0|last_reward_at:  222|Elapsed Time: 0:02:39||




  Episode 10 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/10 (0.00)
    explore-remote: 1/112 (0.01)
    explore-connect: 1/24 (0.04)
    exploit-local: 3/28 (0.10)
    exploit-remote: 7/305 (0.02)
    exploit-connect: 4/5 (0.44)
  exploit deflected to exploration: 48
simulation ended


In [12]:
# -----------------------------------------
# 7) 플롯 (기존 그대로)
# -----------------------------------------
all_runs = [
    dql_run,
    dql_exploit_run,
]

themodel = dqla.CyberBattleStateActionModel(ep)
p.plot_averaged_cummulative_rewards(
    all_runs=all_runs,
    title=(
        f"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count}\n"
        f"State: {[f.name() for f in themodel.state_space.feature_selection]} "
        f"({len(themodel.state_space.feature_selection)})\n"
        f"Action: abstract_action ({themodel.action_space.flat_size()})"
    ),
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumrewards.png"),
)

contenders = [dql_run, dql_exploit_run]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(
    title=f"Agent Benchmark top contenders\nmax_nodes:{ep.maximum_node_count}\n",
    all_runs=contenders,
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumreward_contenders.png"),
)

for r in contenders:
    p.plot_all_episodes(r)


FigureCanvasAgg is non-interactive, and thus cannot be shown


FigureCanvasAgg is non-interactive, and thus cannot be shown




FigureCanvasAgg is non-interactive, and thus cannot be shown

