In [1]:
# pylint: disable=invalid-name

In [2]:
import sys
import os
import re
import json
import yaml
import logging
import random
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple

import gymnasium as gym

import torch

import cyberbattle.agents.baseline.learner as learner
import cyberbattle.agents.baseline.plotting as p
import cyberbattle.agents.baseline.agent_wrapper as w
import cyberbattle.agents.baseline.agent_dql as dqla
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
from cyberbattle._env.cyberbattle_env import CyberBattleEnv

from openai import OpenAI

logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Papermill notebook parameters
gymid = "CyberBattleAutomotiveCTF-v0"
env_size = 10
iteration_count = 9000
training_episode_count = 50
eval_episode_count = 5
maximum_node_count = 22
maximum_total_credentials = 22
plots_dir = "output/plots"

# --- LLM(평가에서만 사용) 옵션: 기본 OFF ---
use_llm = False
model_id = "gpt-5.1"
llm_every_steps = 1      # 매 step마다 LLM 프루닝(비싸면 5~10 추천)
candidate_pool = 200     # 샘플링으로 구성할 후보 수
llm_topk = 10            # DQL 상위 topK 중 LLM이 1개 선택

def find_llm_token_yaml(start=None):
    p = os.path.abspath(start or os.getcwd())
    while True:
        cand = os.path.join(p, "llm_token.yaml")
        if os.path.exists(cand):
            return cand
        parent = os.path.dirname(p)
        if parent == p:
            raise RuntimeError(f"llm_token.yaml 못 찾음. 시작점={os.getcwd()}")
        p = parent

llm_token_yaml = find_llm_token_yaml()
print("FOUND llm_token_yaml =", llm_token_yaml)

# (옵션) LLM에 관측 텍스트를 얼마나 줄지
llm_obs_max_chars = 1400

FOUND llm_token_yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml


In [4]:
# Parameters
gymid = "CyberBattleAutomotiveCTF-v0"
env_size = None
iteration_count = 500
training_episode_count = 20
eval_episode_count = 10
maximum_node_count = 32
maximum_total_credentials = 10
plots_dir = "notebooks/output/automotive_ctf_hybrid_dql_llm/plots"
use_llm = True
model_id = "gpt-5.1"
llm_every_steps = 5
candidate_pool = 200
llm_topk = 10


In [5]:
os.makedirs(plots_dir, exist_ok=True)

In [6]:
# -----------------------------
# 1) Gym env 로드 (기존 그대로)
# -----------------------------
if env_size:
    _gym_env = gym.make(gymid, size=env_size)
else:
    _gym_env = gym.make(gymid)

from typing import cast
gym_env = cast(CyberBattleEnv, _gym_env.unwrapped)
assert isinstance(gym_env, CyberBattleEnv), f"Expected CyberBattleEnv, got {type(gym_env)}"

ep = w.EnvironmentBounds.of_identifiers(
    maximum_node_count=maximum_node_count,
    maximum_total_credentials=maximum_total_credentials,
    identifiers=gym_env.identifiers,
)

In [7]:
# -----------------------------------------
# 2) OpenAI 토큰 로더 + chat callable
# -----------------------------------------
def load_openai_token(config_path: str) -> str:
    if os.path.exists(config_path):
        with open(config_path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f) or {}
        if isinstance(data, dict):
            oa_cfg = data.get("openai", {})
            if isinstance(oa_cfg, dict) and "api_key" in oa_cfg:
                key = str(oa_cfg["api_key"]).strip()
                if key.lower() == "dummy" or len(key) < 20:
                    raise RuntimeError(f"llm_token.yaml의 openai.api_key가 이상함: {key!r}")
                return key

    key = (os.getenv("OPENAI_API_KEY") or "").strip()
    if key and key.lower() != "dummy":
        return key

    raise RuntimeError(f"OpenAI API 키를 못 찾음. config_path={config_path!r}, ENV OPENAI_API_KEY도 없음/이상함")


def make_openai_chat_callable(model_id: str, llm_token_yaml: str):
    api_key = load_openai_token(llm_token_yaml)  # ✅ 여기 중요
    # 디버그(키 노출 금지용)
    print("[OpenAI] key_prefix=", api_key[:8], "len=", len(api_key), "yaml=", os.path.abspath(llm_token_yaml))

    client = OpenAI(api_key=api_key)

    def chat(messages):
        resp = client.responses.create(
            model=model_id,
            input=[{"role": m["role"], "content": m["content"]} for m in messages],
            max_output_tokens=256,
        )
        return getattr(resp, "output_text", "") or ""

    return chat

print("CWD =", os.getcwd())
print("yaml =", os.path.abspath(llm_token_yaml), "exists=", os.path.exists(llm_token_yaml))
print("ENV OPENAI_API_KEY prefix =", (os.getenv("OPENAI_API_KEY") or "")[:8])

CWD = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/src
yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml exists= True
ENV OPENAI_API_KEY prefix = dummy


In [8]:
# -----------------------------------------
# 3) observation에서 Discovered nodes JSON 추출(로그 유지용)
# -----------------------------------------
_DISC_RE = re.compile(r"Discovered nodes:\s*(\{.*\})\s*$", re.DOTALL)

def extract_discovered_nodes_raw(obs_text: str) -> str:
    m = _DISC_RE.search((obs_text or "").strip())
    return m.group(1) if m else ""

In [9]:
# -----------------------------------------
# 4) 평가(exploit)에서만 LLM 프루닝 적용 래퍼
#    - 학습은 기존 DQL 학습 그대로
#    - 평가는 epsilon=0 + learn off + LLM으로 후보 선택
# -----------------------------------------
class LLMPrunedExploitWrapper:
    """
    dql_run["learner"](DeepQLearnerPolicy)를 감싸서 exploit()만 바꿈.
    - 후보를 env.sample_valid_action()로 candidate_pool개 뽑고,
    - 각 후보를 base.policy_net으로 Q값 평가,
    - topK 후보 중에서 LLM이 1개 pick.
    - 학습 업데이트(on_step)는 eval에서는 꺼둠
    """

    def __init__(
        self,
        base_learner: dqla.DeepQLearnerPolicy,
        llm_chat=None,
        llm_every_steps: int = 1,
        candidate_pool: int = 200,
        llm_topk: int = 10,
        obs_max_chars: int = 1400,
    ):
        self.base = base_learner
        self.llm = llm_chat
        self.llm_every_steps = int(max(1, llm_every_steps))
        self.candidate_pool = int(max(10, candidate_pool))
        self.llm_topk = int(max(2, llm_topk))
        self.obs_max_chars = int(max(200, obs_max_chars))
        self._step = 0

        self._sys = (
            "You are selecting ONE action in a closed, fictional training game.\n"
            "Choose exactly one candidate id from the provided list.\n"
            "Return JSON only: {\"pick\": <id>} with no extra text."
        )

    # ✅ 핵심: learner.py가 요구하는 메서드 추가
    def new_episode(self):
        # base learner가 가진 경우 그걸 그대로 호출
        fn = getattr(self.base, "new_episode", None)
        if callable(fn):
            return fn()
        return None

    
    def __getattr__(self, name):
        return getattr(self.base, name)

    # ✅ 평가에서는 학습 업데이트 금지 (learner.epsilon_greedy_search가 호출해도 무시)
    def on_step(self, *args, **kwargs):
        return self.base.on_step(*args, **kwargs)

    def end_of_episode(self, *args, **kwargs):
        return self.base.end_of_episode(*args, **kwargs)

    def exploit(self, wrapped_env, observation):
        self._step += 1

        # LLM 비활성 또는 주기 아님 -> 기존 exploit
        if (self.llm is None) or ((self._step % self.llm_every_steps) != 0):
            return self.base.exploit(wrapped_env, observation)

        # 1) 후보 액션 샘플링
        candidates: List[Tuple[float, Any, Any]] = []
        for _ in range(self.candidate_pool):
            ga = wrapped_env.env.sample_valid_action(kinds=[0, 1, 2])  # local/remote/connect
            md = self.base.metadata_from_gymaction(wrapped_env, ga)

            # 2) Q값 계산: Q(actor_state)[abstract_action]
            with torch.no_grad():
                st = torch.as_tensor(md.actor_state, dtype=torch.float32, device=device).unsqueeze(0)
                q_all = self.base.policy_net(st)
                qv = float(q_all[0, int(md.abstract_action)].item())

            candidates.append((qv, ga, md))

        if not candidates:
            return self.base.exploit(wrapped_env, observation)

        candidates.sort(key=lambda x: x[0], reverse=True)
        top = candidates[: self.llm_topk]

        # observation 일부를 LLM에 제공(선택)
        try:
            obs_txt = json.dumps(observation, ensure_ascii=False)[: self.obs_max_chars]
        except Exception:
            obs_txt = str(observation)[: self.obs_max_chars]

        payload = {
            "observation_preview": obs_txt,
            "candidates": [
                {"id": i, "q": round(float(qv), 4), "gym_action": repr(ga)}
                for i, (qv, ga, md) in enumerate(top)
            ],
        }

        out = self.llm(
            [
                {"role": "system", "content": self._sys},
                {"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
            ]
        ) or ""

        m = re.search(r"\{.*\}", out, flags=re.DOTALL)
        if not m:
            qv, ga, md = top[0]
            return "exploit[dql_top1]", ga, md

        try:
            obj = json.loads(m.group(0))
            pick = int(obj.get("pick"))
            if 0 <= pick < len(top):
                qv, ga, md = top[pick]
                return "exploit[llm_pruned]", ga, md
        except Exception:
            pass

        qv, ga, md = top[0]
        return "exploit[dql_top1]", ga, md

In [10]:
# -----------------------------------------
# 5) DQL 학습 (✅ 기존 파라미터 그대로 유지)
# -----------------------------------------
dql_run = learner.epsilon_greedy_search(
    cyberbattle_gym_env=gym_env,
    environment_properties=ep,
    learner=dqla.DeepQLearnerPolicy(
        ep=ep,
        gamma=0.015,
        replay_memory_size=10000,
        target_update=10,
        batch_size=512,
        learning_rate=0.01,
    ),
    episode_count=training_episode_count,
    iteration_count=iteration_count,
    epsilon=0.90,
    epsilon_exponential_decay=5000,
    epsilon_minimum=0.10,
    verbosity=Verbosity.Quiet,
    render=False,
    plot_episodes_length=False,
    title="DQL",
)

###### DQL
Learning with: episode_count=20,iteration_count=500,ϵ=0.9,ϵ_min=0.1, ϵ_expdecay=5000,γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/20 'DQL' ϵ=0.9000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

  state_batch = torch.tensor(states_to_consider).to(device)
Episode 1|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:   11.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:   24.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:   24.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 1|Iteration 14|reward:   37.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 1|Iteration 14|reward:   37.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 1|Iteration 15|reward:   48.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 1|Iteration 15|reward:   48.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 1|Iteration 21|reward:   88.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 1|Iteration 21|reward:   88.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 1|Iteration 38|reward:   88.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 1|Iteration 40|reward:   94.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 1|Iteration 40|reward:   94.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 1|Iteration 43|reward:  100.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 1|Iteration 43|reward:  100.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 1|Iteration 57|reward:  100.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 1|Iteration 57|reward:  106.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 1|Iteration 57|reward:  106.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 70|reward:  106.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 83|reward:  106.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 102|reward:  106.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 114|reward:  106.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 116|reward:  117.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 1|Iteration 116|reward:  117.0|last_reward_at:  116|Elapsed Time: 0:00:00||

Episode 1|Iteration 117|reward:  131.0|last_reward_at:  116|Elapsed Time: 0:00:00||

Episode 1|Iteration 117|reward:  131.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 1|Iteration 133|reward:  131.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 1|Iteration 146|reward:  131.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 1|Iteration 165|reward:  131.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 1|Iteration 169|reward:  140.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 1|Iteration 169|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:00||

Episode 1|Iteration 184|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:00||

Episode 1|Iteration 203|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:00||

Episode 1|Iteration 216|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 235|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 254|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 273|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 292|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 311|reward:  140.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 326|reward:  153.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 1|Iteration 326|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 349|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 368|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 387|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 406|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 411|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 418|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:01||

Episode 1|Iteration 425|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 437|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 444|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 450|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 462|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 469|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 479|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 488|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 494|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||

Episode 1|Iteration 500|reward:  153.0|last_reward_at:  326|Elapsed Time: 0:00:02||




  Episode 1 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/12 (0.25)
    explore-remote: 7/236 (0.03)
    explore-connect: 1/224 (0.00)
    exploit-local: 0/11 (0.00)
    exploit-remote: 0/0 (NaN)
    exploit-connect: 0/5 (0.00)
  exploit deflected to exploration: 49
  ## Episode: 2/20 'DQL' ϵ=0.8240, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   33.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   33.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 2|Iteration 6|reward:   47.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 2|Iteration 6|reward:   47.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 2|Iteration 13|reward:   47.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 2|Iteration 13|reward:   61.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 2|Iteration 13|reward:   61.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 2|Iteration 15|reward:   67.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 2|Iteration 15|reward:   67.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 2|Iteration 26|reward:   67.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 2|Iteration 37|reward:   73.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 2|Iteration 37|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 45|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 57|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 64|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 70|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 81|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 89|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 101|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 114|reward:   73.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 118|reward:   86.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 2|Iteration 118|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 2|Iteration 127|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 133|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 140|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 146|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 158|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 165|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 174|reward:   86.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 182|reward:   99.0|last_reward_at:  118|Elapsed Time: 0:00:01||

Episode 2|Iteration 182|reward:   99.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 2|Iteration 190|reward:   99.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 2|Iteration 195|reward:  159.0|last_reward_at:  182|Elapsed Time: 0:00:01||

Episode 2|Iteration 195|reward:  159.0|last_reward_at:  195|Elapsed Time: 0:00:01||

Episode 2|Iteration 196|reward:  167.0|last_reward_at:  195|Elapsed Time: 0:00:01||

Episode 2|Iteration 196|reward:  167.0|last_reward_at:  196|Elapsed Time: 0:00:01||

Episode 2|Iteration 200|reward:  176.0|last_reward_at:  196|Elapsed Time: 0:00:01||

Episode 2|Iteration 200|reward:  176.0|last_reward_at:  200|Elapsed Time: 0:00:01||

Episode 2|Iteration 209|reward:  176.0|last_reward_at:  200|Elapsed Time: 0:00:01||

Episode 2|Iteration 216|reward:  176.0|last_reward_at:  200|Elapsed Time: 0:00:01||

Episode 2|Iteration 218|reward:  182.0|last_reward_at:  200|Elapsed Time: 0:00:01||

Episode 2|Iteration 218|reward:  182.0|last_reward_at:  218|Elapsed Time: 0:00:01||

Episode 2|Iteration 227|reward:  182.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 2|Iteration 235|reward:  182.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 2|Iteration 241|reward:  182.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 2|Iteration 247|reward:  182.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 2|Iteration 252|reward:  242.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 2|Iteration 252|reward:  242.0|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 2|Iteration 260|reward:  242.0|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 2|Iteration 260|reward:  248.0|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 2|Iteration 260|reward:  248.0|last_reward_at:  260|Elapsed Time: 0:00:02||

Episode 2|Iteration 262|reward:  288.0|last_reward_at:  260|Elapsed Time: 0:00:02||

Episode 2|Iteration 262|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 272|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 279|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 285|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 292|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 298|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 304|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 311|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 317|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:02||

Episode 2|Iteration 323|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 330|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 336|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 346|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 355|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 361|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 368|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 374|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 380|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 387|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 397|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 406|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:03||

Episode 2|Iteration 412|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 418|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 425|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 431|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 437|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 444|reward:  288.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 446|reward:  294.0|last_reward_at:  262|Elapsed Time: 0:00:04||

Episode 2|Iteration 446|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 453|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 462|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 469|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 475|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 482|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 488|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 494|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 2|Iteration 500|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 2|Iteration 500|reward:  294.0|last_reward_at:  446|Elapsed Time: 0:00:05||




  Episode 2 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/14 (0.26)
    explore-remote: 7/176 (0.04)
    explore-connect: 1/200 (0.00)
    exploit-local: 2/54 (0.04)
    exploit-remote: 0/2 (0.00)
    exploit-connect: 2/37 (0.05)
  exploit deflected to exploration: 12
  ## Episode: 3/20 'DQL' ϵ=0.7551, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 4|reward:   11.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 8|reward:   24.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 3|Iteration 8|reward:   24.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 3|Iteration 19|reward:   24.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 3|Iteration 19|reward:   35.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 3|Iteration 19|reward:   35.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 3|Iteration 26|reward:   35.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 3|Iteration 30|reward:   44.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 3|Iteration 30|reward:   44.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 3|Iteration 36|reward:   57.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 3|Iteration 36|reward:   57.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 3|Iteration 45|reward:   57.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 3|Iteration 48|reward:   97.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 3|Iteration 48|reward:   97.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 3|Iteration 57|reward:   97.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 3|Iteration 60|reward:  103.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 3|Iteration 60|reward:  103.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 3|Iteration 70|reward:  103.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 3|Iteration 76|reward:  103.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 3|Iteration 83|reward:  103.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 3|Iteration 86|reward:  109.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 3|Iteration 86|reward:  109.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 3|Iteration 95|reward:  109.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 3|Iteration 102|reward:  109.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 3|Iteration 107|reward:  123.0|last_reward_at:   86|Elapsed Time: 0:00:01||

Episode 3|Iteration 107|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 114|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 121|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 127|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 133|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 140|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 146|reward:  123.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 146|reward:  134.0|last_reward_at:  107|Elapsed Time: 0:00:01||

Episode 3|Iteration 146|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 152|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 159|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 165|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 171|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 178|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 184|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 190|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 3|Iteration 197|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 3|Iteration 203|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 3|Iteration 209|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 3|Iteration 216|reward:  134.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 3|Iteration 218|reward:  140.0|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 3|Iteration 218|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 228|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 235|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 241|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 247|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 254|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 260|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 266|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 273|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 279|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 285|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 292|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:02||

Episode 3|Iteration 298|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 304|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 311|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 317|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 323|reward:  140.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 329|reward:  200.0|last_reward_at:  218|Elapsed Time: 0:00:03||

Episode 3|Iteration 329|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 336|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 342|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 349|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 355|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 361|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 368|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 374|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 380|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 387|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:03||

Episode 3|Iteration 393|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 399|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 406|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 412|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 418|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 425|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 431|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 437|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 444|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 450|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 456|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 463|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 469|reward:  200.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 469|reward:  213.0|last_reward_at:  329|Elapsed Time: 0:00:04||

Episode 3|Iteration 469|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:04||

Episode 3|Iteration 479|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:04||

Episode 3|Iteration 488|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:05||

Episode 3|Iteration 494|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:05||

Episode 3|Iteration 500|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:05||

Episode 3|Iteration 500|reward:  213.0|last_reward_at:  469|Elapsed Time: 0:00:05||




  Episode 3 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/4 (0.20)
    explore-remote: 5/186 (0.03)
    explore-connect: 1/163 (0.01)
    exploit-local: 3/64 (0.04)
    exploit-remote: 2/30 (0.06)
    exploit-connect: 1/40 (0.02)
  exploit deflected to exploration: 3
  ## Episode: 4/20 'DQL' ϵ=0.6928, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 11|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 13|reward:   24.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 4|Iteration 13|reward:   24.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 4|Iteration 21|reward:   84.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 4|Iteration 21|reward:   84.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 4|Iteration 26|reward:   84.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 4|Iteration 26|reward:   90.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 4|Iteration 26|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 37|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 45|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 51|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 57|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 64|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 70|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 76|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 83|reward:   90.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 88|reward:  101.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 4|Iteration 88|reward:  101.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 4|Iteration 95|reward:  101.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 4|Iteration 101|reward:  101.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 4|Iteration 108|reward:  101.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 4|Iteration 111|reward:  112.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 4|Iteration 111|reward:  112.0|last_reward_at:  111|Elapsed Time: 0:00:01||

Episode 4|Iteration 119|reward:  126.0|last_reward_at:  111|Elapsed Time: 0:00:01||

Episode 4|Iteration 119|reward:  126.0|last_reward_at:  119|Elapsed Time: 0:00:01||

Episode 4|Iteration 122|reward:  135.0|last_reward_at:  119|Elapsed Time: 0:00:01||

Episode 4|Iteration 122|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 133|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 140|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 146|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 152|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 159|reward:  135.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 164|reward:  149.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 4|Iteration 164|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 4|Iteration 171|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:01||

Episode 4|Iteration 176|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 181|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 187|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 196|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 203|reward:  149.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 203|reward:  155.0|last_reward_at:  164|Elapsed Time: 0:00:02||

Episode 4|Iteration 203|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 209|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 216|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 222|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 228|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 234|reward:  155.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 238|reward:  160.0|last_reward_at:  203|Elapsed Time: 0:00:02||

Episode 4|Iteration 238|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:02||

Episode 4|Iteration 246|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 254|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 260|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 266|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 273|reward:  160.0|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 273|reward:  166.5|last_reward_at:  238|Elapsed Time: 0:00:03||

Episode 4|Iteration 273|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 279|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 285|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 292|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 298|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 304|reward:  166.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 305|reward:  175.5|last_reward_at:  273|Elapsed Time: 0:00:03||

Episode 4|Iteration 305|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:03||

Episode 4|Iteration 311|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:03||

Episode 4|Iteration 317|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:03||

Episode 4|Iteration 323|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:03||

Episode 4|Iteration 330|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 336|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 342|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 349|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 355|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 361|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 368|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 374|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 380|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 387|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 393|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 399|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 406|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 412|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 4|Iteration 418|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 425|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 431|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 437|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 444|reward:  175.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 447|reward:  188.5|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 4|Iteration 447|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 455|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 463|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 469|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 475|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 482|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 488|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 494|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:05||

Episode 4|Iteration 500|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:06||

Episode 4|Iteration 500|reward:  188.5|last_reward_at:  447|Elapsed Time: 0:00:06||




  Episode 4 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/11 (0.31)
    explore-remote: 6/165 (0.04)
    explore-connect: 0/141 (0.00)
    exploit-local: 2/105 (0.02)
    exploit-remote: 0/49 (0.00)
    exploit-connect: 1/15 (0.06)
  exploit deflected to exploration: 11
  ## Episode: 5/20 'DQL' ϵ=0.6364, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 4|reward:   17.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 4|reward:   17.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 5|Iteration 5|reward:   28.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 5|Iteration 5|reward:   28.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 5|Iteration 6|reward:   39.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 5|Iteration 6|reward:   39.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 5|Iteration 11|reward:   50.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 5|Iteration 11|reward:   50.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 5|Iteration 13|reward:   64.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 5|Iteration 13|reward:   64.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 5|Iteration 17|reward:   77.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 5|Iteration 17|reward:   77.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 5|Iteration 26|reward:   77.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 5|Iteration 32|reward:   77.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 5|Iteration 38|reward:   77.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 5|Iteration 40|reward:   86.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 5|Iteration 40|reward:   86.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 5|Iteration 49|reward:   86.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 5|Iteration 57|reward:   86.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 5|Iteration 64|reward:   86.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 5|Iteration 64|reward:   94.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 5|Iteration 64|reward:   94.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 5|Iteration 70|reward:   94.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 5|Iteration 76|reward:   94.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 5|Iteration 83|reward:   94.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 5|Iteration 89|reward:   94.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 5|Iteration 92|reward:  154.0|last_reward_at:   64|Elapsed Time: 0:00:01||

Episode 5|Iteration 92|reward:  154.0|last_reward_at:   92|Elapsed Time: 0:00:01||

Episode 5|Iteration 101|reward:  154.0|last_reward_at:   92|Elapsed Time: 0:00:01||

Episode 5|Iteration 108|reward:  154.0|last_reward_at:   92|Elapsed Time: 0:00:01||

Episode 5|Iteration 114|reward:  154.0|last_reward_at:   92|Elapsed Time: 0:00:01||

Episode 5|Iteration 120|reward:  160.0|last_reward_at:   92|Elapsed Time: 0:00:01||

Episode 5|Iteration 120|reward:  160.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 5|Iteration 127|reward:  160.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 5|Iteration 133|reward:  160.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 5|Iteration 140|reward:  160.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 5|Iteration 142|reward:  169.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 5|Iteration 142|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 5|Iteration 152|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 5|Iteration 159|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 5|Iteration 165|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 5|Iteration 171|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 5|Iteration 178|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 5|Iteration 184|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 5|Iteration 190|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 5|Iteration 196|reward:  169.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 5|Iteration 200|reward:  175.5|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 5|Iteration 200|reward:  175.5|last_reward_at:  200|Elapsed Time: 0:00:02||

Episode 5|Iteration 209|reward:  175.5|last_reward_at:  200|Elapsed Time: 0:00:02||

Episode 5|Iteration 213|reward:  181.5|last_reward_at:  200|Elapsed Time: 0:00:02||

Episode 5|Iteration 213|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 221|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 228|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 235|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 241|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 247|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 254|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 260|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:02||

Episode 5|Iteration 265|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:03||

Episode 5|Iteration 272|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:03||

Episode 5|Iteration 279|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:03||

Episode 5|Iteration 285|reward:  181.5|last_reward_at:  213|Elapsed Time: 0:00:03||

Episode 5|Iteration 290|reward:  221.5|last_reward_at:  213|Elapsed Time: 0:00:03||

Episode 5|Iteration 290|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 298|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 304|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 311|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 317|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 323|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 330|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 336|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 5|Iteration 342|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 347|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 354|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 361|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 365|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 371|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 376|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 380|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 383|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 390|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 5|Iteration 396|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 5|Iteration 402|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 5|Iteration 410|reward:  221.5|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 5|Iteration 415|reward:  235.5|last_reward_at:  290|Elapsed Time: 0:00:05||

Episode 5|Iteration 415|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 422|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 429|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 433|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 437|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 442|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 5|Iteration 448|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 5|Iteration 453|reward:  235.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 5|Iteration 456|reward:  248.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 5|Iteration 456|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 463|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 469|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 474|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 482|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 487|reward:  248.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 488|reward:  308.5|last_reward_at:  456|Elapsed Time: 0:00:06||

Episode 5|Iteration 488|reward:  308.5|last_reward_at:  488|Elapsed Time: 0:00:06||

Episode 5|Iteration 494|reward:  308.5|last_reward_at:  488|Elapsed Time: 0:00:06||

Episode 5|Iteration 500|reward:  308.5|last_reward_at:  488|Elapsed Time: 0:00:06||

Episode 5|Iteration 500|reward:  308.5|last_reward_at:  488|Elapsed Time: 0:00:06||




  Episode 5 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/19 (0.17)
    explore-remote: 7/157 (0.04)
    explore-connect: 0/147 (0.00)
    exploit-local: 4/94 (0.04)
    exploit-remote: 0/36 (0.00)
    exploit-connect: 3/29 (0.09)
  exploit deflected to exploration: 6
  ## Episode: 6/20 'DQL' ϵ=0.5853, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 6|reward:   24.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 6|reward:   24.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 6|Iteration 13|reward:   24.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 6|Iteration 13|reward:   30.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 6|Iteration 13|reward:   30.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 6|Iteration 14|reward:   41.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 6|Iteration 14|reward:   41.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 6|Iteration 18|reward:   55.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 6|Iteration 18|reward:   55.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 6|Iteration 26|reward:   55.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 6|Iteration 31|reward:   68.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 6|Iteration 31|reward:   68.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 6|Iteration 32|reward:  128.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 6|Iteration 32|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 38|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 44|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 50|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 57|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 63|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 70|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 76|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 6|Iteration 83|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 6|Iteration 87|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 6|Iteration 92|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 6|Iteration 99|reward:  128.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 6|Iteration 105|reward:  134.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 6|Iteration 105|reward:  134.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 6|Iteration 112|reward:  134.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 6|Iteration 113|reward:  143.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 6|Iteration 113|reward:  143.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 6|Iteration 119|reward:  143.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 6|Iteration 125|reward:  143.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 6|Iteration 127|reward:  183.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 6|Iteration 127|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 6|Iteration 133|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 6|Iteration 137|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 144|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 151|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 158|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 165|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 171|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 177|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 182|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 189|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 6|Iteration 193|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 201|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 208|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 213|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 216|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 220|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 228|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 235|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 241|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 247|reward:  183.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 6|Iteration 251|reward:  194.0|last_reward_at:  127|Elapsed Time: 0:00:04||

Episode 6|Iteration 251|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 256|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 262|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 266|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 273|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 277|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 285|reward:  194.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 287|reward:  200.0|last_reward_at:  251|Elapsed Time: 0:00:04||

Episode 6|Iteration 287|reward:  200.0|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 6|Iteration 292|reward:  200.0|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 6|Iteration 298|reward:  200.0|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 6|Iteration 304|reward:  200.0|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 6|Iteration 309|reward:  214.0|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 6|Iteration 309|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:04||

Episode 6|Iteration 316|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 321|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 327|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 333|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 340|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 346|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 349|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 355|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 359|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 6|Iteration 364|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:06||

Episode 6|Iteration 368|reward:  214.0|last_reward_at:  309|Elapsed Time: 0:00:06||

Episode 6|Iteration 370|reward:  220.0|last_reward_at:  309|Elapsed Time: 0:00:06||

Episode 6|Iteration 370|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 374|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 380|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 385|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 392|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 398|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 405|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 412|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 418|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 6|Iteration 424|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 429|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 433|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 440|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 444|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 447|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 454|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 459|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 463|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:07||

Episode 6|Iteration 467|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 474|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 481|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 487|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 492|reward:  220.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 498|reward:  260.0|last_reward_at:  370|Elapsed Time: 0:00:08||

Episode 6|Iteration 498|reward:  260.0|last_reward_at:  498|Elapsed Time: 0:00:08||

Episode 6|Iteration 500|reward:  260.0|last_reward_at:  498|Elapsed Time: 0:00:08||




  Episode 6 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/5 (0.17)
    explore-remote: 4/136 (0.03)
    explore-connect: 1/155 (0.01)
    exploit-local: 5/75 (0.06)
    exploit-remote: 2/47 (0.04)
    exploit-connect: 2/67 (0.03)
  exploit deflected to exploration: 23
  ## Episode: 7/20 'DQL' ϵ=0.5391, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 4|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 4|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 7|Iteration 10|reward:   35.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 7|Iteration 10|reward:   35.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 7|Iteration 13|reward:   49.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 7|Iteration 13|reward:   49.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 7|Iteration 19|reward:   49.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 7|Iteration 24|reward:  109.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 7|Iteration 24|reward:  109.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 7|Iteration 27|reward:  149.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 7|Iteration 27|reward:  149.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 31|reward:  149.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 34|reward:  149.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 38|reward:  149.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 43|reward:  149.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 44|reward:  162.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 44|reward:  162.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 7|Iteration 48|reward:  168.0|last_reward_at:   44|Elapsed Time: 0:00:01||

Episode 7|Iteration 48|reward:  168.0|last_reward_at:   48|Elapsed Time: 0:00:01||

Episode 7|Iteration 52|reward:  228.0|last_reward_at:   48|Elapsed Time: 0:00:01||

Episode 7|Iteration 52|reward:  228.0|last_reward_at:   52|Elapsed Time: 0:00:01||

Episode 7|Iteration 57|reward:  228.0|last_reward_at:   52|Elapsed Time: 0:00:01||

Episode 7|Iteration 59|reward:  242.0|last_reward_at:   52|Elapsed Time: 0:00:01||

Episode 7|Iteration 59|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 64|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 66|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 70|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 75|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 82|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 88|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 95|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:01||

Episode 7|Iteration 97|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:02||

Episode 7|Iteration 101|reward:  242.0|last_reward_at:   59|Elapsed Time: 0:00:02||

Episode 7|Iteration 104|reward:  255.0|last_reward_at:   59|Elapsed Time: 0:00:02||

Episode 7|Iteration 104|reward:  255.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 7|Iteration 112|reward:  255.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 7|Iteration 115|reward:  269.0|last_reward_at:  104|Elapsed Time: 0:00:02||

Episode 7|Iteration 115|reward:  269.0|last_reward_at:  115|Elapsed Time: 0:00:02||

Episode 7|Iteration 121|reward:  269.0|last_reward_at:  115|Elapsed Time: 0:00:02||

Episode 7|Iteration 125|reward:  269.0|last_reward_at:  115|Elapsed Time: 0:00:02||

Episode 7|Iteration 131|reward:  275.5|last_reward_at:  115|Elapsed Time: 0:00:02||

Episode 7|Iteration 131|reward:  275.5|last_reward_at:  131|Elapsed Time: 0:00:02||

Episode 7|Iteration 136|reward:  275.5|last_reward_at:  131|Elapsed Time: 0:00:02||

Episode 7|Iteration 140|reward:  275.5|last_reward_at:  131|Elapsed Time: 0:00:02||

Episode 7|Iteration 145|reward:  281.5|last_reward_at:  131|Elapsed Time: 0:00:02||

Episode 7|Iteration 145|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:02||

Episode 7|Iteration 150|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 157|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 163|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 170|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 175|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 181|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 189|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 194|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 196|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:03||

Episode 7|Iteration 201|reward:  281.5|last_reward_at:  145|Elapsed Time: 0:00:04||

Episode 7|Iteration 203|reward:  287.5|last_reward_at:  145|Elapsed Time: 0:00:04||

Episode 7|Iteration 203|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 208|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 211|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 215|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 222|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 228|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 230|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 235|reward:  287.5|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 7|Iteration 240|reward:  293.5|last_reward_at:  203|Elapsed Time: 0:00:05||

Episode 7|Iteration 240|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 247|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 251|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 254|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 256|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 260|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 262|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 265|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 269|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 273|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 276|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:05||

Episode 7|Iteration 278|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 283|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 288|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 292|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 297|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 303|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 308|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 311|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 317|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 321|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:06||

Episode 7|Iteration 326|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 330|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 335|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 340|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 344|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 349|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 355|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 357|reward:  293.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 358|reward:  303.5|last_reward_at:  240|Elapsed Time: 0:00:07||

Episode 7|Iteration 358|reward:  303.5|last_reward_at:  358|Elapsed Time: 0:00:07||

Episode 7|Iteration 363|reward:  303.5|last_reward_at:  358|Elapsed Time: 0:00:07||

Episode 7|Iteration 368|reward:  303.5|last_reward_at:  358|Elapsed Time: 0:00:07||

Episode 7|Iteration 372|reward:  309.5|last_reward_at:  358|Elapsed Time: 0:00:08||

Episode 7|Iteration 372|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 374|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 380|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 381|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 385|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 387|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 393|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 398|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 403|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 409|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 7|Iteration 415|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 422|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 428|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 431|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 432|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 435|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 438|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 444|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 447|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:09||

Episode 7|Iteration 452|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 456|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 462|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 469|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 473|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 475|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 478|reward:  309.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 482|reward:  349.5|last_reward_at:  372|Elapsed Time: 0:00:10||

Episode 7|Iteration 482|reward:  349.5|last_reward_at:  482|Elapsed Time: 0:00:10||

Episode 7|Iteration 485|reward:  349.5|last_reward_at:  482|Elapsed Time: 0:00:10||

Episode 7|Iteration 492|reward:  349.5|last_reward_at:  482|Elapsed Time: 0:00:11||

Episode 7|Iteration 499|reward:  349.5|last_reward_at:  482|Elapsed Time: 0:00:11||

Episode 7|Iteration 500|reward:  349.5|last_reward_at:  482|Elapsed Time: 0:00:11||




  Episode 7 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/9 (0.00)
    explore-remote: 5/109 (0.04)
    explore-connect: 1/122 (0.01)
    exploit-local: 9/111 (0.07)
    exploit-remote: 0/55 (0.00)
    exploit-connect: 4/75 (0.05)
  exploit deflected to exploration: 10
  ## Episode: 8/20 'DQL' ϵ=0.4973, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 8|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 3|reward:   22.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 8|Iteration 6|reward:   28.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 8|Iteration 6|reward:   28.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 8|Iteration 7|reward:   42.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 8|Iteration 7|reward:   42.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 8|Iteration 13|reward:   42.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 8|Iteration 17|reward:   53.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 8|Iteration 17|reward:   53.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 8|Iteration 23|reward:   53.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 8|Iteration 29|reward:   62.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 8|Iteration 29|reward:   62.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 8|Iteration 33|reward:   73.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 8|Iteration 33|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 8|Iteration 38|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 8|Iteration 43|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 8|Iteration 50|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 8|Iteration 55|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 8|Iteration 62|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:01||

Episode 8|Iteration 69|reward:   73.0|last_reward_at:   33|Elapsed Time: 0:00:01||

Episode 8|Iteration 70|reward:  113.0|last_reward_at:   33|Elapsed Time: 0:00:01||

Episode 8|Iteration 70|reward:  113.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 8|Iteration 71|reward:  127.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 8|Iteration 71|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 76|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 79|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 83|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 86|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 90|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 95|reward:  127.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 99|reward:  140.0|last_reward_at:   71|Elapsed Time: 0:00:01||

Episode 8|Iteration 99|reward:  140.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 8|Iteration 107|reward:  140.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 8|Iteration 113|reward:  140.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 8|Iteration 119|reward:  140.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 8|Iteration 126|reward:  140.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 8|Iteration 127|reward:  153.0|last_reward_at:   99|Elapsed Time: 0:00:02||

Episode 8|Iteration 127|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 133|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 138|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 146|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 152|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 157|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 163|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:02||

Episode 8|Iteration 168|reward:  153.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 8|Iteration 171|reward:  159.0|last_reward_at:  127|Elapsed Time: 0:00:03||

Episode 8|Iteration 171|reward:  159.0|last_reward_at:  171|Elapsed Time: 0:00:03||

Episode 8|Iteration 176|reward:  159.0|last_reward_at:  171|Elapsed Time: 0:00:03||

Episode 8|Iteration 183|reward:  159.0|last_reward_at:  171|Elapsed Time: 0:00:03||

Episode 8|Iteration 185|reward:  219.0|last_reward_at:  171|Elapsed Time: 0:00:03||

Episode 8|Iteration 185|reward:  219.0|last_reward_at:  185|Elapsed Time: 0:00:03||

Episode 8|Iteration 190|reward:  219.0|last_reward_at:  185|Elapsed Time: 0:00:03||

Episode 8|Iteration 196|reward:  219.0|last_reward_at:  185|Elapsed Time: 0:00:03||

Episode 8|Iteration 200|reward:  229.0|last_reward_at:  185|Elapsed Time: 0:00:03||

Episode 8|Iteration 200|reward:  229.0|last_reward_at:  200|Elapsed Time: 0:00:03||

Episode 8|Iteration 203|reward:  238.0|last_reward_at:  200|Elapsed Time: 0:00:03||

Episode 8|Iteration 203|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:03||

Episode 8|Iteration 209|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:03||

Episode 8|Iteration 215|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:03||

Episode 8|Iteration 222|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:03||

Episode 8|Iteration 225|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 228|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 233|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 240|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 246|reward:  238.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 247|reward:  247.0|last_reward_at:  203|Elapsed Time: 0:00:04||

Episode 8|Iteration 247|reward:  247.0|last_reward_at:  247|Elapsed Time: 0:00:04||

Episode 8|Iteration 250|reward:  253.5|last_reward_at:  247|Elapsed Time: 0:00:04||

Episode 8|Iteration 250|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:04||

Episode 8|Iteration 254|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:04||

Episode 8|Iteration 257|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:04||

Episode 8|Iteration 262|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:04||

Episode 8|Iteration 269|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:05||

Episode 8|Iteration 273|reward:  253.5|last_reward_at:  250|Elapsed Time: 0:00:05||

Episode 8|Iteration 278|reward:  259.5|last_reward_at:  250|Elapsed Time: 0:00:05||

Episode 8|Iteration 278|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 281|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 288|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 292|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 293|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 297|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 301|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 304|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 307|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:05||

Episode 8|Iteration 312|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 317|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 321|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 326|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 329|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 332|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 336|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 341|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 346|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:06||

Episode 8|Iteration 349|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 353|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 355|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 358|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 360|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 364|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 367|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 373|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 379|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 384|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:07||

Episode 8|Iteration 389|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 391|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 397|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 401|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 404|reward:  259.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 407|reward:  319.5|last_reward_at:  278|Elapsed Time: 0:00:08||

Episode 8|Iteration 407|reward:  319.5|last_reward_at:  407|Elapsed Time: 0:00:08||

Episode 8|Iteration 411|reward:  319.5|last_reward_at:  407|Elapsed Time: 0:00:08||

Episode 8|Iteration 417|reward:  319.5|last_reward_at:  407|Elapsed Time: 0:00:08||

Episode 8|Iteration 418|reward:  359.5|last_reward_at:  407|Elapsed Time: 0:00:08||

Episode 8|Iteration 418|reward:  359.5|last_reward_at:  418|Elapsed Time: 0:00:08||

Episode 8|Iteration 422|reward:  359.5|last_reward_at:  418|Elapsed Time: 0:00:08||

Episode 8|Iteration 424|reward:  359.5|last_reward_at:  418|Elapsed Time: 0:00:09||

Episode 8|Iteration 427|reward:  359.5|last_reward_at:  418|Elapsed Time: 0:00:09||

Episode 8|Iteration 434|reward:  365.5|last_reward_at:  418|Elapsed Time: 0:00:09||

Episode 8|Iteration 434|reward:  365.5|last_reward_at:  434|Elapsed Time: 0:00:09||

Episode 8|Iteration 440|reward:  371.5|last_reward_at:  434|Elapsed Time: 0:00:09||

Episode 8|Iteration 440|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:09||

Episode 8|Iteration 444|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:09||

Episode 8|Iteration 449|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:09||

Episode 8|Iteration 452|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:09||

Episode 8|Iteration 456|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:09||

Episode 8|Iteration 463|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 468|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 473|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 477|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 481|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 488|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 491|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 498|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||

Episode 8|Iteration 500|reward:  371.5|last_reward_at:  440|Elapsed Time: 0:00:10||




  Episode 8 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/5 (0.38)
    explore-remote: 6/130 (0.04)
    explore-connect: 0/109 (0.00)
    exploit-local: 8/109 (0.07)
    exploit-remote: 0/58 (0.00)
    exploit-connect: 5/67 (0.07)
  exploit deflected to exploration: 19
  ## Episode: 9/20 'DQL' ϵ=0.4595, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 9|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   11.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 2|reward:   22.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 2|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 9|Iteration 3|reward:   35.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 9|Iteration 3|reward:   35.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 9|Iteration 13|reward:   35.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 9|Iteration 13|reward:   49.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 9|Iteration 13|reward:   49.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 9|Iteration 15|reward:   55.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 9|Iteration 15|reward:   55.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 9|Iteration 20|reward:  115.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 9|Iteration 20|reward:  115.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 9|Iteration 26|reward:  115.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 9|Iteration 29|reward:  121.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 9|Iteration 29|reward:  121.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 38|reward:  121.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 45|reward:  121.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 50|reward:  127.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 50|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 57|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 64|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 70|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 76|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 83|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 94|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 9|Iteration 102|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 108|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 114|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 121|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 127|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 133|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 140|reward:  127.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 143|reward:  141.0|last_reward_at:   50|Elapsed Time: 0:00:01||

Episode 9|Iteration 143|reward:  141.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 9|Iteration 152|reward:  141.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 9|Iteration 157|reward:  152.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 9|Iteration 157|reward:  152.0|last_reward_at:  157|Elapsed Time: 0:00:01||

Episode 9|Iteration 165|reward:  152.0|last_reward_at:  157|Elapsed Time: 0:00:01||

Episode 9|Iteration 175|reward:  158.0|last_reward_at:  157|Elapsed Time: 0:00:01||

Episode 9|Iteration 175|reward:  158.0|last_reward_at:  175|Elapsed Time: 0:00:01||

Episode 9|Iteration 178|reward:  166.0|last_reward_at:  175|Elapsed Time: 0:00:01||

Episode 9|Iteration 178|reward:  166.0|last_reward_at:  178|Elapsed Time: 0:00:01||

Episode 9|Iteration 184|reward:  166.0|last_reward_at:  178|Elapsed Time: 0:00:01||

Episode 9|Iteration 184|reward:  179.0|last_reward_at:  178|Elapsed Time: 0:00:01||

Episode 9|Iteration 184|reward:  179.0|last_reward_at:  184|Elapsed Time: 0:00:01||

Episode 9|Iteration 195|reward:  179.0|last_reward_at:  184|Elapsed Time: 0:00:01||

Episode 9|Iteration 203|reward:  179.0|last_reward_at:  184|Elapsed Time: 0:00:02||

Episode 9|Iteration 209|reward:  179.0|last_reward_at:  184|Elapsed Time: 0:00:02||

Episode 9|Iteration 216|reward:  179.0|last_reward_at:  184|Elapsed Time: 0:00:02||

Episode 9|Iteration 217|reward:  239.0|last_reward_at:  184|Elapsed Time: 0:00:02||

Episode 9|Iteration 217|reward:  239.0|last_reward_at:  217|Elapsed Time: 0:00:02||

Episode 9|Iteration 225|reward:  279.0|last_reward_at:  217|Elapsed Time: 0:00:02||

Episode 9|Iteration 225|reward:  279.0|last_reward_at:  225|Elapsed Time: 0:00:02||

Episode 9|Iteration 227|reward:  285.0|last_reward_at:  225|Elapsed Time: 0:00:02||

Episode 9|Iteration 227|reward:  285.0|last_reward_at:  227|Elapsed Time: 0:00:02||

Episode 9|Iteration 235|reward:  285.0|last_reward_at:  227|Elapsed Time: 0:00:02||

Episode 9|Iteration 239|reward:  325.0|last_reward_at:  227|Elapsed Time: 0:00:02||

Episode 9|Iteration 239|reward:  325.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 9|Iteration 246|reward:  325.0|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 9|Iteration 252|reward:  331.5|last_reward_at:  239|Elapsed Time: 0:00:02||

Episode 9|Iteration 252|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 9|Iteration 256|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 9|Iteration 265|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 9|Iteration 271|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:02||

Episode 9|Iteration 278|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:03||

Episode 9|Iteration 285|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:03||

Episode 9|Iteration 291|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:03||

Episode 9|Iteration 298|reward:  331.5|last_reward_at:  252|Elapsed Time: 0:00:03||

Episode 9|Iteration 302|reward:  345.5|last_reward_at:  252|Elapsed Time: 0:00:03||

Episode 9|Iteration 302|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 311|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 317|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 323|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 330|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 336|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 342|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 347|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 9|Iteration 355|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 361|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 368|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 374|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 380|reward:  345.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 384|reward:  354.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 9|Iteration 384|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 393|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 399|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 406|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 412|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 418|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 424|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 431|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:04||

Episode 9|Iteration 437|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 444|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 450|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 456|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 462|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 469|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 475|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 482|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 488|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 494|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 500|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||

Episode 9|Iteration 500|reward:  354.5|last_reward_at:  384|Elapsed Time: 0:00:05||




  Episode 9 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/9 (0.18)
    explore-remote: 6/124 (0.05)
    explore-connect: 1/127 (0.01)
    exploit-local: 7/92 (0.07)
    exploit-remote: 1/46 (0.02)
    exploit-connect: 3/82 (0.04)
  exploit deflected to exploration: 36
  ## Episode: 10/20 'DQL' ϵ=0.4253, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 10|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 10|Iteration 4|reward:   22.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 10|Iteration 4|reward:   22.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 10|Iteration 6|reward:   33.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 10|Iteration 6|reward:   33.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 10|Iteration 12|reward:   39.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 10|Iteration 12|reward:   39.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 10|Iteration 15|reward:   52.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 10|Iteration 15|reward:   52.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 10|Iteration 16|reward:  112.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 10|Iteration 16|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 24|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 31|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 38|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 45|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 51|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 57|reward:  112.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 58|reward:  121.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 10|Iteration 58|reward:  121.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 10|Iteration 62|reward:  132.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 10|Iteration 62|reward:  132.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 10|Iteration 72|reward:  138.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 10|Iteration 72|reward:  138.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 10|Iteration 83|reward:  138.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 10|Iteration 93|reward:  178.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 10|Iteration 93|reward:  178.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 10|Iteration 102|reward:  178.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 10|Iteration 105|reward:  191.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 10|Iteration 105|reward:  191.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 10|Iteration 114|reward:  191.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 10|Iteration 121|reward:  191.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 10|Iteration 123|reward:  199.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 10|Iteration 123|reward:  199.0|last_reward_at:  123|Elapsed Time: 0:00:01||

Episode 10|Iteration 127|reward:  205.0|last_reward_at:  123|Elapsed Time: 0:00:01||

Episode 10|Iteration 127|reward:  205.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 10|Iteration 129|reward:  265.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 10|Iteration 129|reward:  265.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 10|Iteration 139|reward:  271.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 10|Iteration 139|reward:  271.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 10|Iteration 141|reward:  285.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 10|Iteration 141|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 146|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 152|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 159|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 165|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 171|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 178|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 184|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 190|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:01||

Episode 10|Iteration 197|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 10|Iteration 203|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 10|Iteration 209|reward:  285.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 10|Iteration 210|reward:  299.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 10|Iteration 210|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 219|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 228|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 235|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 241|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 247|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 254|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 266|reward:  299.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 272|reward:  308.0|last_reward_at:  210|Elapsed Time: 0:00:02||

Episode 10|Iteration 272|reward:  308.0|last_reward_at:  272|Elapsed Time: 0:00:02||

Episode 10|Iteration 279|reward:  308.0|last_reward_at:  272|Elapsed Time: 0:00:02||

Episode 10|Iteration 285|reward:  308.0|last_reward_at:  272|Elapsed Time: 0:00:02||

Episode 10|Iteration 292|reward:  308.0|last_reward_at:  272|Elapsed Time: 0:00:03||

Episode 10|Iteration 293|reward:  318.0|last_reward_at:  272|Elapsed Time: 0:00:03||

Episode 10|Iteration 293|reward:  318.0|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 302|reward:  318.0|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 310|reward:  318.0|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 317|reward:  318.0|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 323|reward:  318.0|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 327|reward:  324.5|last_reward_at:  293|Elapsed Time: 0:00:03||

Episode 10|Iteration 327|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 336|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 342|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 349|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 355|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 361|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 368|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 374|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:03||

Episode 10|Iteration 380|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 383|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 389|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 393|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 399|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 406|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 412|reward:  324.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 416|reward:  330.5|last_reward_at:  327|Elapsed Time: 0:00:04||

Episode 10|Iteration 416|reward:  330.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 425|reward:  330.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 430|reward:  330.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 437|reward:  330.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 444|reward:  330.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 446|reward:  450.5|last_reward_at:  416|Elapsed Time: 0:00:04||

Episode 10|Iteration 446|reward:  450.5|last_reward_at:  446|Elapsed Time: 0:00:04||

Episode 10|Iteration 456|reward:  450.5|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 10|Iteration 462|reward:  450.5|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 10|Iteration 468|reward:  450.5|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 10|Iteration 473|reward:  456.5|last_reward_at:  446|Elapsed Time: 0:00:05||

Episode 10|Iteration 473|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||

Episode 10|Iteration 482|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||

Episode 10|Iteration 487|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||

Episode 10|Iteration 494|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||

Episode 10|Iteration 500|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||

Episode 10|Iteration 500|reward:  456.5|last_reward_at:  473|Elapsed Time: 0:00:05||




  Episode 10 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/9 (0.31)
    explore-remote: 5/95 (0.05)
    explore-connect: 0/116 (0.00)
    exploit-local: 7/74 (0.09)
    exploit-remote: 2/79 (0.02)
    exploit-connect: 5/104 (0.05)
  exploit deflected to exploration: 23
  ## Episode: 11/20 'DQL' ϵ=0.3944, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 11|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 2|reward:   11.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 2|reward:   11.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 11|Iteration 3|reward:   17.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 11|Iteration 3|reward:   17.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:   17.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:   28.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:   28.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 16|reward:   42.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 16|reward:   42.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 11|Iteration 26|reward:   42.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 11|Iteration 28|reward:   55.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 11|Iteration 28|reward:   55.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 11|Iteration 35|reward:   95.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 11|Iteration 35|reward:   95.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 11|Iteration 36|reward:  109.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 11|Iteration 36|reward:  109.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 11|Iteration 42|reward:  120.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 11|Iteration 42|reward:  120.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 11|Iteration 50|reward:  120.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 11|Iteration 57|reward:  120.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 11|Iteration 64|reward:  120.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 11|Iteration 65|reward:  180.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 11|Iteration 65|reward:  180.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 11|Iteration 67|reward:  194.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 11|Iteration 67|reward:  194.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 11|Iteration 75|reward:  194.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 11|Iteration 83|reward:  194.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 11|Iteration 89|reward:  194.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 11|Iteration 95|reward:  194.0|last_reward_at:   67|Elapsed Time: 0:00:01||

Episode 11|Iteration 99|reward:  203.0|last_reward_at:   67|Elapsed Time: 0:00:01||

Episode 11|Iteration 99|reward:  203.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 11|Iteration 103|reward:  216.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 11|Iteration 103|reward:  216.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 11|Iteration 108|reward:  225.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 11|Iteration 108|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 114|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 121|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 127|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 133|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 140|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 146|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 152|reward:  225.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 158|reward:  230.0|last_reward_at:  108|Elapsed Time: 0:00:01||

Episode 11|Iteration 158|reward:  230.0|last_reward_at:  158|Elapsed Time: 0:00:01||

Episode 11|Iteration 160|reward:  236.5|last_reward_at:  158|Elapsed Time: 0:00:01||

Episode 11|Iteration 160|reward:  236.5|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 11|Iteration 171|reward:  236.5|last_reward_at:  160|Elapsed Time: 0:00:01||

Episode 11|Iteration 178|reward:  236.5|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 11|Iteration 184|reward:  236.5|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 11|Iteration 185|reward:  242.5|last_reward_at:  160|Elapsed Time: 0:00:02||

Episode 11|Iteration 185|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 194|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 203|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 209|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 215|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 222|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 228|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 235|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 241|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 247|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 254|reward:  242.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 256|reward:  302.5|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 11|Iteration 256|reward:  302.5|last_reward_at:  256|Elapsed Time: 0:00:02||

Episode 11|Iteration 264|reward:  302.5|last_reward_at:  256|Elapsed Time: 0:00:03||

Episode 11|Iteration 265|reward:  312.5|last_reward_at:  256|Elapsed Time: 0:00:03||

Episode 11|Iteration 265|reward:  312.5|last_reward_at:  265|Elapsed Time: 0:00:03||

Episode 11|Iteration 273|reward:  312.5|last_reward_at:  265|Elapsed Time: 0:00:03||

Episode 11|Iteration 275|reward:  318.5|last_reward_at:  265|Elapsed Time: 0:00:03||

Episode 11|Iteration 275|reward:  318.5|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 11|Iteration 285|reward:  318.5|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 11|Iteration 286|reward:  324.5|last_reward_at:  275|Elapsed Time: 0:00:03||

Episode 11|Iteration 286|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 292|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 298|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 302|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 310|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 317|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 322|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 330|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 336|reward:  324.5|last_reward_at:  286|Elapsed Time: 0:00:03||

Episode 11|Iteration 337|reward:  330.5|last_reward_at:  286|Elapsed Time: 0:00:04||

Episode 11|Iteration 337|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 342|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 349|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 355|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 361|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 368|reward:  330.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 368|reward:  450.5|last_reward_at:  337|Elapsed Time: 0:00:04||

Episode 11|Iteration 368|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 378|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 386|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 393|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 399|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 406|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 412|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 418|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:04||

Episode 11|Iteration 425|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 431|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 437|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 444|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 450|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 455|reward:  450.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 458|reward:  480.5|last_reward_at:  368|Elapsed Time: 0:00:05||

Episode 11|Iteration 458|reward:  480.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 466|reward:  480.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 472|reward:  480.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 478|reward:  480.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 482|reward:  480.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 485|reward:  520.5|last_reward_at:  458|Elapsed Time: 0:00:05||

Episode 11|Iteration 485|reward:  520.5|last_reward_at:  485|Elapsed Time: 0:00:05||

Episode 11|Iteration 492|reward:  520.5|last_reward_at:  485|Elapsed Time: 0:00:05||

Episode 11|Iteration 500|reward:  520.5|last_reward_at:  485|Elapsed Time: 0:00:06||

Episode 11|Iteration 500|reward:  520.5|last_reward_at:  485|Elapsed Time: 0:00:06||




  Episode 11 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/7 (0.22)
    explore-remote: 5/104 (0.05)
    explore-connect: 1/103 (0.01)
    exploit-local: 9/104 (0.08)
    exploit-remote: 2/63 (0.03)
    exploit-connect: 5/95 (0.05)
  exploit deflected to exploration: 21
  ## Episode: 12/20 'DQL' ϵ=0.3664, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 12|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 2|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 2|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 12|Iteration 3|reward:   25.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 12|Iteration 3|reward:   25.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 12|Iteration 4|reward:   31.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 12|Iteration 4|reward:   31.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 12|Iteration 8|reward:   37.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 12|Iteration 8|reward:   37.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 12|Iteration 14|reward:   77.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 12|Iteration 14|reward:   77.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 12|Iteration 18|reward:   88.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 12|Iteration 18|reward:   88.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 12|Iteration 24|reward:  101.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 12|Iteration 24|reward:  101.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 32|reward:  101.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 43|reward:  101.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 50|reward:  141.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 50|reward:  141.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 12|Iteration 53|reward:  155.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 12|Iteration 53|reward:  155.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 12|Iteration 63|reward:  155.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 12|Iteration 70|reward:  155.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 12|Iteration 76|reward:  155.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 12|Iteration 81|reward:  168.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 12|Iteration 81|reward:  168.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 12|Iteration 88|reward:  168.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 12|Iteration 90|reward:  178.0|last_reward_at:   81|Elapsed Time: 0:00:01||

Episode 12|Iteration 90|reward:  178.0|last_reward_at:   90|Elapsed Time: 0:00:01||

Episode 12|Iteration 95|reward:  178.0|last_reward_at:   90|Elapsed Time: 0:00:01||

Episode 12|Iteration 99|reward:  178.0|last_reward_at:   90|Elapsed Time: 0:00:01||

Episode 12|Iteration 104|reward:  192.0|last_reward_at:   90|Elapsed Time: 0:00:01||

Episode 12|Iteration 104|reward:  192.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 12|Iteration 108|reward:  192.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 12|Iteration 114|reward:  192.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 12|Iteration 114|reward:  198.0|last_reward_at:  104|Elapsed Time: 0:00:01||

Episode 12|Iteration 114|reward:  198.0|last_reward_at:  114|Elapsed Time: 0:00:01||

Episode 12|Iteration 121|reward:  198.0|last_reward_at:  114|Elapsed Time: 0:00:01||

Episode 12|Iteration 127|reward:  198.0|last_reward_at:  114|Elapsed Time: 0:00:01||

Episode 12|Iteration 133|reward:  198.0|last_reward_at:  114|Elapsed Time: 0:00:01||

Episode 12|Iteration 139|reward:  258.0|last_reward_at:  114|Elapsed Time: 0:00:01||

Episode 12|Iteration 139|reward:  258.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 12|Iteration 143|reward:  258.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 12|Iteration 149|reward:  264.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 12|Iteration 149|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:01||

Episode 12|Iteration 158|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:01||

Episode 12|Iteration 165|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 170|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 176|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 184|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 190|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 197|reward:  264.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 198|reward:  270.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 12|Iteration 198|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 203|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 208|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 216|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 222|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 227|reward:  270.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 12|Iteration 233|reward:  276.0|last_reward_at:  198|Elapsed Time: 0:00:03||

Episode 12|Iteration 233|reward:  276.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 12|Iteration 240|reward:  276.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 12|Iteration 247|reward:  276.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 12|Iteration 254|reward:  276.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 12|Iteration 259|reward:  396.0|last_reward_at:  233|Elapsed Time: 0:00:03||

Episode 12|Iteration 259|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 261|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 266|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 272|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 277|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 279|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:03||

Episode 12|Iteration 282|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 285|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 289|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 292|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 297|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 304|reward:  396.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 309|reward:  409.0|last_reward_at:  259|Elapsed Time: 0:00:04||

Episode 12|Iteration 309|reward:  409.0|last_reward_at:  309|Elapsed Time: 0:00:04||

Episode 12|Iteration 316|reward:  409.0|last_reward_at:  309|Elapsed Time: 0:00:04||

Episode 12|Iteration 323|reward:  409.0|last_reward_at:  309|Elapsed Time: 0:00:04||

Episode 12|Iteration 330|reward:  409.0|last_reward_at:  309|Elapsed Time: 0:00:04||

Episode 12|Iteration 335|reward:  409.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 12|Iteration 337|reward:  415.0|last_reward_at:  309|Elapsed Time: 0:00:05||

Episode 12|Iteration 337|reward:  415.0|last_reward_at:  337|Elapsed Time: 0:00:05||

Episode 12|Iteration 341|reward:  415.0|last_reward_at:  337|Elapsed Time: 0:00:05||

Episode 12|Iteration 347|reward:  415.0|last_reward_at:  337|Elapsed Time: 0:00:05||

Episode 12|Iteration 352|reward:  445.0|last_reward_at:  337|Elapsed Time: 0:00:05||

Episode 12|Iteration 352|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 12|Iteration 358|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 12|Iteration 364|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 12|Iteration 370|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 12|Iteration 378|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:05||

Episode 12|Iteration 383|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 392|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 398|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 406|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 412|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 416|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 423|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 428|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 431|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 436|reward:  445.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 440|reward:  505.0|last_reward_at:  352|Elapsed Time: 0:00:06||

Episode 12|Iteration 440|reward:  505.0|last_reward_at:  440|Elapsed Time: 0:00:06||

Episode 12|Iteration 448|reward:  505.0|last_reward_at:  440|Elapsed Time: 0:00:07||

Episode 12|Iteration 456|reward:  505.0|last_reward_at:  440|Elapsed Time: 0:00:07||

Episode 12|Iteration 456|reward:  511.5|last_reward_at:  440|Elapsed Time: 0:00:07||

Episode 12|Iteration 456|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 462|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 469|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 475|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 482|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 488|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 494|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 500|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||

Episode 12|Iteration 500|reward:  511.5|last_reward_at:  456|Elapsed Time: 0:00:07||




  Episode 12 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/8 (0.20)
    explore-remote: 5/102 (0.05)
    explore-connect: 0/107 (0.00)
    exploit-local: 8/94 (0.08)
    exploit-remote: 2/88 (0.02)
    exploit-connect: 6/78 (0.07)
  exploit deflected to exploration: 53
  ## Episode: 13/20 'DQL' ϵ=0.3410, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 13|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 2|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 2|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 13|Iteration 13|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 13|Iteration 15|reward:   25.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 13|Iteration 15|reward:   25.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 13|Iteration 17|reward:   65.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 13|Iteration 17|reward:   65.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 13|Iteration 25|reward:   71.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 13|Iteration 25|reward:   71.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 30|reward:   71.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 38|reward:   71.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 41|reward:   84.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 41|reward:   84.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 51|reward:   84.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 57|reward:   84.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 70|reward:   84.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 76|reward:   84.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 79|reward:   97.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 79|reward:   97.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 13|Iteration 84|reward:  108.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 13|Iteration 84|reward:  108.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 13|Iteration 87|reward:  168.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 13|Iteration 87|reward:  168.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 13|Iteration 95|reward:  168.0|last_reward_at:   87|Elapsed Time: 0:00:01||

Episode 13|Iteration 95|reward:  174.0|last_reward_at:   87|Elapsed Time: 0:00:01||

Episode 13|Iteration 95|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 102|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 108|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 110|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 119|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 127|reward:  174.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 129|reward:  214.0|last_reward_at:   95|Elapsed Time: 0:00:01||

Episode 13|Iteration 129|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 13|Iteration 132|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 13|Iteration 137|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 13|Iteration 144|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:01||

Episode 13|Iteration 150|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:02||

Episode 13|Iteration 154|reward:  214.0|last_reward_at:  129|Elapsed Time: 0:00:02||

Episode 13|Iteration 161|reward:  220.0|last_reward_at:  129|Elapsed Time: 0:00:02||

Episode 13|Iteration 161|reward:  220.0|last_reward_at:  161|Elapsed Time: 0:00:02||

Episode 13|Iteration 162|reward:  233.0|last_reward_at:  161|Elapsed Time: 0:00:02||

Episode 13|Iteration 162|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 170|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 178|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 184|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 190|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 197|reward:  233.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 198|reward:  247.0|last_reward_at:  162|Elapsed Time: 0:00:02||

Episode 13|Iteration 198|reward:  247.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 13|Iteration 201|reward:  253.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 13|Iteration 201|reward:  253.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 13|Iteration 202|reward:  267.0|last_reward_at:  201|Elapsed Time: 0:00:02||

Episode 13|Iteration 202|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 13|Iteration 209|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 13|Iteration 215|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 13|Iteration 222|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 13|Iteration 227|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 235|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 237|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 241|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 247|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 252|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 258|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 266|reward:  267.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 266|reward:  273.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 13|Iteration 266|reward:  273.0|last_reward_at:  266|Elapsed Time: 0:00:03||

Episode 13|Iteration 273|reward:  273.0|last_reward_at:  266|Elapsed Time: 0:00:03||

Episode 13|Iteration 279|reward:  273.0|last_reward_at:  266|Elapsed Time: 0:00:03||

Episode 13|Iteration 282|reward:  333.0|last_reward_at:  266|Elapsed Time: 0:00:03||

Episode 13|Iteration 282|reward:  333.0|last_reward_at:  282|Elapsed Time: 0:00:03||

Episode 13|Iteration 283|reward:  339.0|last_reward_at:  282|Elapsed Time: 0:00:03||

Episode 13|Iteration 283|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:03||

Episode 13|Iteration 292|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 298|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 304|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 311|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 317|reward:  339.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 317|reward:  348.0|last_reward_at:  283|Elapsed Time: 0:00:04||

Episode 13|Iteration 317|reward:  348.0|last_reward_at:  317|Elapsed Time: 0:00:04||

Episode 13|Iteration 323|reward:  348.0|last_reward_at:  317|Elapsed Time: 0:00:04||

Episode 13|Iteration 330|reward:  348.0|last_reward_at:  317|Elapsed Time: 0:00:04||

Episode 13|Iteration 331|reward:  358.0|last_reward_at:  317|Elapsed Time: 0:00:04||

Episode 13|Iteration 331|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:04||

Episode 13|Iteration 341|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:04||

Episode 13|Iteration 348|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:04||

Episode 13|Iteration 355|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:04||

Episode 13|Iteration 361|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:04||

Episode 13|Iteration 368|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:05||

Episode 13|Iteration 374|reward:  358.0|last_reward_at:  331|Elapsed Time: 0:00:05||

Episode 13|Iteration 377|reward:  364.5|last_reward_at:  331|Elapsed Time: 0:00:05||

Episode 13|Iteration 377|reward:  364.5|last_reward_at:  377|Elapsed Time: 0:00:05||

Episode 13|Iteration 384|reward:  364.5|last_reward_at:  377|Elapsed Time: 0:00:05||

Episode 13|Iteration 387|reward:  370.5|last_reward_at:  377|Elapsed Time: 0:00:05||

Episode 13|Iteration 387|reward:  370.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 392|reward:  370.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 397|reward:  370.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 406|reward:  370.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 412|reward:  370.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 415|reward:  490.5|last_reward_at:  387|Elapsed Time: 0:00:05||

Episode 13|Iteration 415|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 13|Iteration 418|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 13|Iteration 420|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:05||

Episode 13|Iteration 422|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 425|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 426|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 430|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 433|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 437|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 444|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 450|reward:  490.5|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 452|reward:  512.0|last_reward_at:  415|Elapsed Time: 0:00:06||

Episode 13|Iteration 452|reward:  512.0|last_reward_at:  452|Elapsed Time: 0:00:06||

Episode 13|Iteration 456|reward:  512.0|last_reward_at:  452|Elapsed Time: 0:00:06||

Episode 13|Iteration 460|reward:  512.0|last_reward_at:  452|Elapsed Time: 0:00:06||

Episode 13|Iteration 462|reward:  518.0|last_reward_at:  452|Elapsed Time: 0:00:07||

Episode 13|Iteration 462|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 468|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 472|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 479|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 485|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 488|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 492|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 498|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||

Episode 13|Iteration 500|reward:  518.0|last_reward_at:  462|Elapsed Time: 0:00:07||




  Episode 13 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 6/6 (0.50)
    explore-remote: 3/122 (0.02)
    explore-connect: 0/106 (0.00)
    exploit-local: 6/70 (0.08)
    exploit-remote: 4/105 (0.04)
    exploit-connect: 6/66 (0.08)
  exploit deflected to exploration: 66
  ## Episode: 14/20 'DQL' ϵ=0.3181, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 14|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 3|reward:   20.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 3|reward:   20.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 14|Iteration 9|reward:   60.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 14|Iteration 9|reward:   60.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 14|Iteration 13|reward:   60.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 14|Iteration 18|reward:   71.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 14|Iteration 18|reward:   71.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 14|Iteration 22|reward:   77.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 14|Iteration 22|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 30|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 38|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 44|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 50|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 57|reward:   77.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 57|reward:   91.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 14|Iteration 57|reward:   91.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 14|Iteration 64|reward:   91.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 14|Iteration 70|reward:   91.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 14|Iteration 76|reward:   91.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 14|Iteration 83|reward:   91.0|last_reward_at:   57|Elapsed Time: 0:00:01||

Episode 14|Iteration 83|reward:  102.0|last_reward_at:   57|Elapsed Time: 0:00:01||

Episode 14|Iteration 83|reward:  102.0|last_reward_at:   83|Elapsed Time: 0:00:01||

Episode 14|Iteration 88|reward:  115.0|last_reward_at:   83|Elapsed Time: 0:00:01||

Episode 14|Iteration 88|reward:  115.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 95|reward:  115.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 102|reward:  115.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 112|reward:  115.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 120|reward:  115.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 121|reward:  175.0|last_reward_at:   88|Elapsed Time: 0:00:01||

Episode 14|Iteration 121|reward:  175.0|last_reward_at:  121|Elapsed Time: 0:00:01||

Episode 14|Iteration 127|reward:  175.0|last_reward_at:  121|Elapsed Time: 0:00:01||

Episode 14|Iteration 133|reward:  175.0|last_reward_at:  121|Elapsed Time: 0:00:01||

Episode 14|Iteration 140|reward:  175.0|last_reward_at:  121|Elapsed Time: 0:00:01||

Episode 14|Iteration 143|reward:  188.0|last_reward_at:  121|Elapsed Time: 0:00:01||

Episode 14|Iteration 143|reward:  188.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 14|Iteration 152|reward:  188.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 14|Iteration 159|reward:  188.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 14|Iteration 165|reward:  188.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 14|Iteration 168|reward:  198.0|last_reward_at:  143|Elapsed Time: 0:00:01||

Episode 14|Iteration 168|reward:  198.0|last_reward_at:  168|Elapsed Time: 0:00:01||

Episode 14|Iteration 178|reward:  198.0|last_reward_at:  168|Elapsed Time: 0:00:02||

Episode 14|Iteration 184|reward:  198.0|last_reward_at:  168|Elapsed Time: 0:00:02||

Episode 14|Iteration 186|reward:  209.0|last_reward_at:  168|Elapsed Time: 0:00:02||

Episode 14|Iteration 186|reward:  209.0|last_reward_at:  186|Elapsed Time: 0:00:02||

Episode 14|Iteration 197|reward:  209.0|last_reward_at:  186|Elapsed Time: 0:00:02||

Episode 14|Iteration 198|reward:  215.0|last_reward_at:  186|Elapsed Time: 0:00:02||

Episode 14|Iteration 198|reward:  215.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 14|Iteration 202|reward:  221.0|last_reward_at:  198|Elapsed Time: 0:00:02||

Episode 14|Iteration 202|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 209|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 215|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 222|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 228|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 235|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 241|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 247|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 254|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 14|Iteration 260|reward:  221.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 14|Iteration 260|reward:  341.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 14|Iteration 260|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 266|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 273|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 279|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 285|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 291|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 298|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 304|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 308|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 317|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 323|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:03||

Episode 14|Iteration 329|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:04||

Episode 14|Iteration 335|reward:  341.0|last_reward_at:  260|Elapsed Time: 0:00:04||

Episode 14|Iteration 341|reward:  371.0|last_reward_at:  260|Elapsed Time: 0:00:04||

Episode 14|Iteration 341|reward:  371.0|last_reward_at:  341|Elapsed Time: 0:00:04||

Episode 14|Iteration 344|reward:  431.0|last_reward_at:  341|Elapsed Time: 0:00:04||

Episode 14|Iteration 344|reward:  431.0|last_reward_at:  344|Elapsed Time: 0:00:04||

Episode 14|Iteration 349|reward:  431.0|last_reward_at:  344|Elapsed Time: 0:00:04||

Episode 14|Iteration 355|reward:  431.0|last_reward_at:  344|Elapsed Time: 0:00:04||

Episode 14|Iteration 361|reward:  431.0|last_reward_at:  344|Elapsed Time: 0:00:04||

Episode 14|Iteration 364|reward:  437.5|last_reward_at:  344|Elapsed Time: 0:00:04||

Episode 14|Iteration 364|reward:  437.5|last_reward_at:  364|Elapsed Time: 0:00:04||

Episode 14|Iteration 374|reward:  437.5|last_reward_at:  364|Elapsed Time: 0:00:04||

Episode 14|Iteration 376|reward:  444.0|last_reward_at:  364|Elapsed Time: 0:00:04||

Episode 14|Iteration 376|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:04||

Episode 14|Iteration 380|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:04||

Episode 14|Iteration 387|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:04||

Episode 14|Iteration 393|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:04||

Episode 14|Iteration 399|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 406|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 412|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 418|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 424|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 431|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 435|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 442|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 449|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 454|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:05||

Episode 14|Iteration 461|reward:  444.0|last_reward_at:  376|Elapsed Time: 0:00:06||

Episode 14|Iteration 463|reward:  457.0|last_reward_at:  376|Elapsed Time: 0:00:06||

Episode 14|Iteration 463|reward:  457.0|last_reward_at:  463|Elapsed Time: 0:00:06||

Episode 14|Iteration 464|reward:  463.0|last_reward_at:  463|Elapsed Time: 0:00:06||

Episode 14|Iteration 464|reward:  463.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 469|reward:  463.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 475|reward:  463.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 482|reward:  463.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 488|reward:  463.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 493|reward:  503.0|last_reward_at:  464|Elapsed Time: 0:00:06||

Episode 14|Iteration 493|reward:  503.0|last_reward_at:  493|Elapsed Time: 0:00:06||

Episode 14|Iteration 499|reward:  503.0|last_reward_at:  493|Elapsed Time: 0:00:06||

Episode 14|Iteration 500|reward:  503.0|last_reward_at:  493|Elapsed Time: 0:00:06||




  Episode 14 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/2 (0.60)
    explore-remote: 1/82 (0.01)
    explore-connect: 0/91 (0.00)
    exploit-local: 9/71 (0.11)
    exploit-remote: 3/121 (0.02)
    exploit-connect: 6/111 (0.05)
  exploit deflected to exploration: 46
  ## Episode: 15/20 'DQL' ϵ=0.2973, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 15|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 4|reward:   25.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 4|reward:   25.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 15|Iteration 7|reward:   65.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 15|Iteration 7|reward:   65.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 15|Iteration 13|reward:   65.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 15|Iteration 13|reward:   71.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 15|Iteration 13|reward:   71.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 15|Iteration 19|reward:   71.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   71.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   84.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 15|Iteration 26|reward:   84.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 15|Iteration 32|reward:   84.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 15|Iteration 34|reward:  144.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 15|Iteration 34|reward:  144.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 15|Iteration 38|reward:  144.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 15|Iteration 44|reward:  155.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 15|Iteration 44|reward:  155.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 15|Iteration 50|reward:  155.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 15|Iteration 54|reward:  155.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 15|Iteration 56|reward:  169.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 15|Iteration 56|reward:  169.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 15|Iteration 60|reward:  169.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 15|Iteration 61|reward:  183.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 15|Iteration 61|reward:  183.0|last_reward_at:   61|Elapsed Time: 0:00:01||

Episode 15|Iteration 63|reward:  197.0|last_reward_at:   61|Elapsed Time: 0:00:01||

Episode 15|Iteration 63|reward:  197.0|last_reward_at:   63|Elapsed Time: 0:00:01||

Episode 15|Iteration 70|reward:  197.0|last_reward_at:   63|Elapsed Time: 0:00:01||

Episode 15|Iteration 72|reward:  202.0|last_reward_at:   63|Elapsed Time: 0:00:01||

Episode 15|Iteration 72|reward:  202.0|last_reward_at:   72|Elapsed Time: 0:00:01||

Episode 15|Iteration 80|reward:  202.0|last_reward_at:   72|Elapsed Time: 0:00:01||

Episode 15|Iteration 83|reward:  208.0|last_reward_at:   72|Elapsed Time: 0:00:01||

Episode 15|Iteration 83|reward:  208.0|last_reward_at:   83|Elapsed Time: 0:00:01||

Episode 15|Iteration 84|reward:  214.5|last_reward_at:   83|Elapsed Time: 0:00:01||

Episode 15|Iteration 84|reward:  214.5|last_reward_at:   84|Elapsed Time: 0:00:01||

Episode 15|Iteration 89|reward:  214.5|last_reward_at:   84|Elapsed Time: 0:00:01||

Episode 15|Iteration 95|reward:  214.5|last_reward_at:   84|Elapsed Time: 0:00:01||

Episode 15|Iteration 102|reward:  214.5|last_reward_at:   84|Elapsed Time: 0:00:01||

Episode 15|Iteration 103|reward:  254.5|last_reward_at:   84|Elapsed Time: 0:00:01||

Episode 15|Iteration 103|reward:  254.5|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 15|Iteration 108|reward:  254.5|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 15|Iteration 114|reward:  254.5|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 15|Iteration 120|reward:  254.5|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 15|Iteration 125|reward:  264.5|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 15|Iteration 125|reward:  264.5|last_reward_at:  125|Elapsed Time: 0:00:01||

Episode 15|Iteration 132|reward:  264.5|last_reward_at:  125|Elapsed Time: 0:00:02||

Episode 15|Iteration 140|reward:  264.5|last_reward_at:  125|Elapsed Time: 0:00:02||

Episode 15|Iteration 146|reward:  264.5|last_reward_at:  125|Elapsed Time: 0:00:02||

Episode 15|Iteration 147|reward:  277.5|last_reward_at:  125|Elapsed Time: 0:00:02||

Episode 15|Iteration 147|reward:  277.5|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 15|Iteration 149|reward:  283.5|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 15|Iteration 149|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 159|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 165|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 171|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 178|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 183|reward:  283.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 186|reward:  289.5|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 15|Iteration 186|reward:  289.5|last_reward_at:  186|Elapsed Time: 0:00:02||

Episode 15|Iteration 188|reward:  349.5|last_reward_at:  186|Elapsed Time: 0:00:02||

Episode 15|Iteration 188|reward:  349.5|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 15|Iteration 191|reward:  355.5|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 15|Iteration 191|reward:  355.5|last_reward_at:  191|Elapsed Time: 0:00:02||

Episode 15|Iteration 197|reward:  355.5|last_reward_at:  191|Elapsed Time: 0:00:02||

Episode 15|Iteration 203|reward:  355.5|last_reward_at:  191|Elapsed Time: 0:00:03||

Episode 15|Iteration 209|reward:  355.5|last_reward_at:  191|Elapsed Time: 0:00:03||

Episode 15|Iteration 210|reward:  361.5|last_reward_at:  191|Elapsed Time: 0:00:03||

Episode 15|Iteration 210|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 216|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 222|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 227|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 232|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 241|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 245|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 249|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 254|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:03||

Episode 15|Iteration 258|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 266|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 271|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 274|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 279|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 284|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 289|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 292|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 296|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 304|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 311|reward:  361.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 311|reward:  370.5|last_reward_at:  210|Elapsed Time: 0:00:04||

Episode 15|Iteration 311|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:04||

Episode 15|Iteration 317|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 321|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 327|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 336|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 342|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 349|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 352|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 355|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 361|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 368|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 373|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:05||

Episode 15|Iteration 380|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 384|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 388|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 396|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 404|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 411|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 418|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 425|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 430|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 437|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:06||

Episode 15|Iteration 443|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 450|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 456|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 463|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 469|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 475|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 479|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 482|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 488|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 494|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 497|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:07||

Episode 15|Iteration 500|reward:  370.5|last_reward_at:  311|Elapsed Time: 0:00:08||




  Episode 15 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/7 (0.22)
    explore-remote: 3/86 (0.03)
    explore-connect: 0/86 (0.00)
    exploit-local: 8/99 (0.07)
    exploit-remote: 4/82 (0.05)
    exploit-connect: 5/118 (0.04)
  exploit deflected to exploration: 41
  ## Episode: 16/20 'DQL' ϵ=0.2785, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 16|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 16|Iteration 13|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 16|Iteration 13|reward:   25.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 16|Iteration 13|reward:   25.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 16|Iteration 16|reward:   36.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 16|Iteration 16|reward:   36.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 26|reward:   36.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 32|reward:   36.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 38|reward:   36.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:   36.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:   49.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:   49.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 51|reward:   49.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 57|reward:   49.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 64|reward:   49.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 70|reward:  109.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 70|reward:  109.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 16|Iteration 78|reward:  120.5|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 16|Iteration 78|reward:  120.5|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 16|Iteration 81|reward:  133.5|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 16|Iteration 81|reward:  133.5|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 16|Iteration 86|reward:  173.5|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 16|Iteration 86|reward:  173.5|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 16|Iteration 95|reward:  173.5|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 16|Iteration 98|reward:  179.5|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 16|Iteration 98|reward:  179.5|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 16|Iteration 108|reward:  179.5|last_reward_at:   98|Elapsed Time: 0:00:01||

Episode 16|Iteration 114|reward:  179.5|last_reward_at:   98|Elapsed Time: 0:00:01||

Episode 16|Iteration 116|reward:  219.5|last_reward_at:   98|Elapsed Time: 0:00:01||

Episode 16|Iteration 116|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 121|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 123|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 127|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 132|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 137|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 143|reward:  219.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 146|reward:  228.5|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 16|Iteration 146|reward:  228.5|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 16|Iteration 152|reward:  228.5|last_reward_at:  146|Elapsed Time: 0:00:01||

Episode 16|Iteration 156|reward:  228.5|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 16|Iteration 165|reward:  228.5|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 16|Iteration 171|reward:  228.5|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 16|Iteration 173|reward:  237.5|last_reward_at:  146|Elapsed Time: 0:00:02||

Episode 16|Iteration 173|reward:  237.5|last_reward_at:  173|Elapsed Time: 0:00:02||

Episode 16|Iteration 182|reward:  243.5|last_reward_at:  173|Elapsed Time: 0:00:02||

Episode 16|Iteration 182|reward:  243.5|last_reward_at:  182|Elapsed Time: 0:00:02||

Episode 16|Iteration 190|reward:  243.5|last_reward_at:  182|Elapsed Time: 0:00:02||

Episode 16|Iteration 195|reward:  257.5|last_reward_at:  182|Elapsed Time: 0:00:02||

Episode 16|Iteration 195|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 203|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 207|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 213|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 220|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 228|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:02||

Episode 16|Iteration 233|reward:  257.5|last_reward_at:  195|Elapsed Time: 0:00:03||

Episode 16|Iteration 237|reward:  263.5|last_reward_at:  195|Elapsed Time: 0:00:03||

Episode 16|Iteration 237|reward:  263.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 16|Iteration 244|reward:  263.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 16|Iteration 246|reward:  273.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 16|Iteration 246|reward:  273.5|last_reward_at:  246|Elapsed Time: 0:00:03||

Episode 16|Iteration 254|reward:  273.5|last_reward_at:  246|Elapsed Time: 0:00:03||

Episode 16|Iteration 255|reward:  279.5|last_reward_at:  246|Elapsed Time: 0:00:03||

Episode 16|Iteration 255|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 263|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 273|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 279|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 285|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 292|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 298|reward:  279.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 302|reward:  285.5|last_reward_at:  255|Elapsed Time: 0:00:03||

Episode 16|Iteration 302|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:03||

Episode 16|Iteration 310|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 317|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 321|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 330|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 335|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 342|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 349|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 355|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 361|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 368|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 374|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 379|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:04||

Episode 16|Iteration 386|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 391|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 397|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 406|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 411|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 418|reward:  285.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 424|reward:  298.5|last_reward_at:  302|Elapsed Time: 0:00:05||

Episode 16|Iteration 424|reward:  298.5|last_reward_at:  424|Elapsed Time: 0:00:05||

Episode 16|Iteration 431|reward:  298.5|last_reward_at:  424|Elapsed Time: 0:00:05||

Episode 16|Iteration 437|reward:  298.5|last_reward_at:  424|Elapsed Time: 0:00:05||

Episode 16|Iteration 441|reward:  298.5|last_reward_at:  424|Elapsed Time: 0:00:05||

Episode 16|Iteration 447|reward:  298.5|last_reward_at:  424|Elapsed Time: 0:00:06||

Episode 16|Iteration 448|reward:  304.5|last_reward_at:  424|Elapsed Time: 0:00:06||

Episode 16|Iteration 448|reward:  304.5|last_reward_at:  448|Elapsed Time: 0:00:06||

Episode 16|Iteration 456|reward:  304.5|last_reward_at:  448|Elapsed Time: 0:00:06||

Episode 16|Iteration 460|reward:  304.5|last_reward_at:  448|Elapsed Time: 0:00:06||

Episode 16|Iteration 467|reward:  424.5|last_reward_at:  448|Elapsed Time: 0:00:06||

Episode 16|Iteration 467|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 475|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 479|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 488|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 493|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 500|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||

Episode 16|Iteration 500|reward:  424.5|last_reward_at:  467|Elapsed Time: 0:00:06||




  Episode 16 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/7 (0.00)
    explore-remote: 2/87 (0.02)
    explore-connect: 0/86 (0.00)
    exploit-local: 10/66 (0.13)
    exploit-remote: 4/92 (0.04)
    exploit-connect: 5/141 (0.03)
  exploit deflected to exploration: 37
  ## Episode: 17/20 'DQL' ϵ=0.2615, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 17|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 5|reward:   20.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 17|Iteration 5|reward:   20.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 17|Iteration 13|reward:   20.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 17|Iteration 19|reward:   20.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 17|Iteration 25|reward:   60.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 17|Iteration 25|reward:   60.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 17|Iteration 32|reward:   60.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 17|Iteration 32|reward:   74.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 17|Iteration 32|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 38|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 45|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 51|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 57|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 64|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 74|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 81|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 17|Iteration 87|reward:   74.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 17|Iteration 93|reward:   85.0|last_reward_at:   32|Elapsed Time: 0:00:01||

Episode 17|Iteration 93|reward:   85.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 17|Iteration 100|reward:   96.0|last_reward_at:   93|Elapsed Time: 0:00:01||

Episode 17|Iteration 100|reward:   96.0|last_reward_at:  100|Elapsed Time: 0:00:01||

Episode 17|Iteration 105|reward:  106.0|last_reward_at:  100|Elapsed Time: 0:00:01||

Episode 17|Iteration 105|reward:  106.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 17|Iteration 112|reward:  112.0|last_reward_at:  105|Elapsed Time: 0:00:01||

Episode 17|Iteration 112|reward:  112.0|last_reward_at:  112|Elapsed Time: 0:00:01||

Episode 17|Iteration 113|reward:  125.0|last_reward_at:  112|Elapsed Time: 0:00:01||

Episode 17|Iteration 113|reward:  125.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 121|reward:  125.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 127|reward:  125.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 133|reward:  125.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 140|reward:  125.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 142|reward:  131.0|last_reward_at:  113|Elapsed Time: 0:00:01||

Episode 17|Iteration 142|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 17|Iteration 146|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 17|Iteration 152|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 17|Iteration 159|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:01||

Episode 17|Iteration 165|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 171|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 178|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 184|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 190|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 197|reward:  131.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 202|reward:  171.0|last_reward_at:  142|Elapsed Time: 0:00:02||

Episode 17|Iteration 202|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 17|Iteration 207|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 17|Iteration 213|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 17|Iteration 219|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:02||

Episode 17|Iteration 224|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 17|Iteration 228|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 17|Iteration 233|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 17|Iteration 239|reward:  171.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 17|Iteration 240|reward:  184.0|last_reward_at:  202|Elapsed Time: 0:00:03||

Episode 17|Iteration 240|reward:  184.0|last_reward_at:  240|Elapsed Time: 0:00:03||

Episode 17|Iteration 247|reward:  184.0|last_reward_at:  240|Elapsed Time: 0:00:03||

Episode 17|Iteration 253|reward:  184.0|last_reward_at:  240|Elapsed Time: 0:00:03||

Episode 17|Iteration 258|reward:  195.0|last_reward_at:  240|Elapsed Time: 0:00:03||

Episode 17|Iteration 258|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 17|Iteration 265|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 17|Iteration 272|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 17|Iteration 279|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 17|Iteration 285|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 17|Iteration 291|reward:  195.0|last_reward_at:  258|Elapsed Time: 0:00:04||

Episode 17|Iteration 296|reward:  255.0|last_reward_at:  258|Elapsed Time: 0:00:04||

Episode 17|Iteration 296|reward:  255.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 17|Iteration 300|reward:  255.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 17|Iteration 303|reward:  264.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 17|Iteration 303|reward:  264.0|last_reward_at:  303|Elapsed Time: 0:00:04||

Episode 17|Iteration 305|reward:  270.0|last_reward_at:  303|Elapsed Time: 0:00:04||

Episode 17|Iteration 305|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 310|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 316|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 320|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 329|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 336|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:04||

Episode 17|Iteration 342|reward:  270.0|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 17|Iteration 348|reward:  390.0|last_reward_at:  305|Elapsed Time: 0:00:05||

Episode 17|Iteration 348|reward:  390.0|last_reward_at:  348|Elapsed Time: 0:00:05||

Episode 17|Iteration 350|reward:  399.0|last_reward_at:  348|Elapsed Time: 0:00:05||

Episode 17|Iteration 350|reward:  399.0|last_reward_at:  350|Elapsed Time: 0:00:05||

Episode 17|Iteration 353|reward:  399.0|last_reward_at:  350|Elapsed Time: 0:00:05||

Episode 17|Iteration 360|reward:  399.0|last_reward_at:  350|Elapsed Time: 0:00:05||

Episode 17|Iteration 367|reward:  399.0|last_reward_at:  350|Elapsed Time: 0:00:05||

Episode 17|Iteration 370|reward:  420.5|last_reward_at:  350|Elapsed Time: 0:00:05||

Episode 17|Iteration 370|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:05||

Episode 17|Iteration 374|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:05||

Episode 17|Iteration 380|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:05||

Episode 17|Iteration 386|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:05||

Episode 17|Iteration 388|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:05||

Episode 17|Iteration 391|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 17|Iteration 393|reward:  420.5|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 17|Iteration 393|reward:  427.0|last_reward_at:  370|Elapsed Time: 0:00:06||

Episode 17|Iteration 393|reward:  427.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 399|reward:  427.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 402|reward:  427.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 406|reward:  427.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 412|reward:  427.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 413|reward:  440.0|last_reward_at:  393|Elapsed Time: 0:00:06||

Episode 17|Iteration 413|reward:  440.0|last_reward_at:  413|Elapsed Time: 0:00:06||

Episode 17|Iteration 417|reward:  440.0|last_reward_at:  413|Elapsed Time: 0:00:06||

Episode 17|Iteration 419|reward:  446.0|last_reward_at:  413|Elapsed Time: 0:00:06||

Episode 17|Iteration 419|reward:  446.0|last_reward_at:  419|Elapsed Time: 0:00:06||

Episode 17|Iteration 425|reward:  446.0|last_reward_at:  419|Elapsed Time: 0:00:06||

Episode 17|Iteration 430|reward:  446.0|last_reward_at:  419|Elapsed Time: 0:00:06||

Episode 17|Iteration 432|reward:  506.0|last_reward_at:  419|Elapsed Time: 0:00:06||

Episode 17|Iteration 432|reward:  506.0|last_reward_at:  432|Elapsed Time: 0:00:06||

Episode 17|Iteration 435|reward:  506.0|last_reward_at:  432|Elapsed Time: 0:00:06||

Episode 17|Iteration 437|reward:  506.0|last_reward_at:  432|Elapsed Time: 0:00:07||

Episode 17|Iteration 437|reward:  521.0|last_reward_at:  432|Elapsed Time: 0:00:07||

Episode 17|Iteration 437|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 444|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 450|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 456|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 462|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 467|reward:  521.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 468|reward:  527.0|last_reward_at:  437|Elapsed Time: 0:00:07||

Episode 17|Iteration 468|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:07||

Episode 17|Iteration 475|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:07||

Episode 17|Iteration 481|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:07||

Episode 17|Iteration 488|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:07||

Episode 17|Iteration 494|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:07||

Episode 17|Iteration 495|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:08||

Episode 17|Iteration 500|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:08||

Episode 17|Iteration 500|reward:  527.0|last_reward_at:  468|Elapsed Time: 0:00:08||




  Episode 17 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/6 (0.14)
    explore-remote: 6/79 (0.07)
    explore-connect: 0/78 (0.00)
    exploit-local: 12/95 (0.11)
    exploit-remote: 0/125 (0.00)
    exploit-connect: 6/92 (0.06)
  exploit deflected to exploration: 59
  ## Episode: 18/20 'DQL' ϵ=0.2462, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 18|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 18|Iteration 4|reward:   54.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 18|Iteration 4|reward:   54.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 7|reward:   54.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 13|reward:   54.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 19|reward:   54.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 26|reward:   54.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 26|reward:   60.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 26|reward:   60.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 18|Iteration 31|reward:   60.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 18|Iteration 38|reward:   60.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 18|Iteration 39|reward:   71.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 18|Iteration 39|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 45|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 51|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 57|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 64|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 70|reward:   71.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 73|reward:   82.0|last_reward_at:   39|Elapsed Time: 0:00:01||

Episode 18|Iteration 73|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 80|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 88|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 94|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 99|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 105|reward:   82.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 109|reward:   95.0|last_reward_at:   73|Elapsed Time: 0:00:01||

Episode 18|Iteration 109|reward:   95.0|last_reward_at:  109|Elapsed Time: 0:00:01||

Episode 18|Iteration 116|reward:  108.0|last_reward_at:  109|Elapsed Time: 0:00:01||

Episode 18|Iteration 116|reward:  108.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 18|Iteration 123|reward:  114.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 18|Iteration 123|reward:  114.0|last_reward_at:  123|Elapsed Time: 0:00:01||

Episode 18|Iteration 133|reward:  114.0|last_reward_at:  123|Elapsed Time: 0:00:01||

Episode 18|Iteration 140|reward:  114.0|last_reward_at:  123|Elapsed Time: 0:00:02||

Episode 18|Iteration 141|reward:  128.0|last_reward_at:  123|Elapsed Time: 0:00:02||

Episode 18|Iteration 141|reward:  128.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 18|Iteration 143|reward:  188.0|last_reward_at:  141|Elapsed Time: 0:00:02||

Episode 18|Iteration 143|reward:  188.0|last_reward_at:  143|Elapsed Time: 0:00:02||

Episode 18|Iteration 147|reward:  201.0|last_reward_at:  143|Elapsed Time: 0:00:02||

Episode 18|Iteration 147|reward:  201.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 18|Iteration 149|reward:  211.0|last_reward_at:  147|Elapsed Time: 0:00:02||

Episode 18|Iteration 149|reward:  211.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 18|Iteration 153|reward:  251.0|last_reward_at:  149|Elapsed Time: 0:00:02||

Episode 18|Iteration 153|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 159|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 165|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 171|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 178|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 184|reward:  251.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 185|reward:  262.0|last_reward_at:  153|Elapsed Time: 0:00:02||

Episode 18|Iteration 185|reward:  262.0|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 18|Iteration 187|reward:  268.0|last_reward_at:  185|Elapsed Time: 0:00:02||

Episode 18|Iteration 187|reward:  268.0|last_reward_at:  187|Elapsed Time: 0:00:02||

Episode 18|Iteration 188|reward:  274.0|last_reward_at:  187|Elapsed Time: 0:00:02||

Episode 18|Iteration 188|reward:  274.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 18|Iteration 190|reward:  274.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 18|Iteration 197|reward:  274.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 18|Iteration 203|reward:  274.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 18|Iteration 209|reward:  274.0|last_reward_at:  188|Elapsed Time: 0:00:03||

Episode 18|Iteration 211|reward:  394.0|last_reward_at:  188|Elapsed Time: 0:00:03||

Episode 18|Iteration 211|reward:  394.0|last_reward_at:  211|Elapsed Time: 0:00:03||

Episode 18|Iteration 215|reward:  403.0|last_reward_at:  211|Elapsed Time: 0:00:03||

Episode 18|Iteration 215|reward:  403.0|last_reward_at:  215|Elapsed Time: 0:00:03||

Episode 18|Iteration 220|reward:  403.0|last_reward_at:  215|Elapsed Time: 0:00:03||

Episode 18|Iteration 228|reward:  403.0|last_reward_at:  215|Elapsed Time: 0:00:03||

Episode 18|Iteration 234|reward:  403.0|last_reward_at:  215|Elapsed Time: 0:00:03||

Episode 18|Iteration 237|reward:  424.5|last_reward_at:  215|Elapsed Time: 0:00:03||

Episode 18|Iteration 237|reward:  424.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 18|Iteration 241|reward:  424.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 18|Iteration 242|reward:  484.5|last_reward_at:  237|Elapsed Time: 0:00:03||

Episode 18|Iteration 242|reward:  484.5|last_reward_at:  242|Elapsed Time: 0:00:03||

Episode 18|Iteration 245|reward:  484.5|last_reward_at:  242|Elapsed Time: 0:00:03||

Episode 18|Iteration 246|reward:  491.0|last_reward_at:  242|Elapsed Time: 0:00:03||

Episode 18|Iteration 246|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:03||

Episode 18|Iteration 253|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:03||

Episode 18|Iteration 258|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 266|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 273|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 277|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 279|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 284|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 290|reward:  491.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 292|reward:  497.0|last_reward_at:  246|Elapsed Time: 0:00:04||

Episode 18|Iteration 292|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:04||

Episode 18|Iteration 297|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:04||

Episode 18|Iteration 304|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:04||

Episode 18|Iteration 308|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:04||

Episode 18|Iteration 315|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:05||

Episode 18|Iteration 322|reward:  497.0|last_reward_at:  292|Elapsed Time: 0:00:05||

Episode 18|Iteration 329|reward:  503.0|last_reward_at:  292|Elapsed Time: 0:00:05||

Episode 18|Iteration 329|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 333|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 340|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 345|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 347|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 349|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 354|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:05||

Episode 18|Iteration 356|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 359|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 366|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 370|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 374|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 375|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 380|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 383|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:06||

Episode 18|Iteration 387|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:07||

Episode 18|Iteration 389|reward:  503.0|last_reward_at:  329|Elapsed Time: 0:00:07||

Episode 18|Iteration 391|reward:  518.0|last_reward_at:  329|Elapsed Time: 0:00:07||

Episode 18|Iteration 391|reward:  518.0|last_reward_at:  391|Elapsed Time: 0:00:07||

Episode 18|Iteration 399|reward:  518.0|last_reward_at:  391|Elapsed Time: 0:00:07||

Episode 18|Iteration 405|reward:  524.0|last_reward_at:  391|Elapsed Time: 0:00:07||

Episode 18|Iteration 405|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:07||

Episode 18|Iteration 409|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:07||

Episode 18|Iteration 412|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:07||

Episode 18|Iteration 416|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:07||

Episode 18|Iteration 424|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:07||

Episode 18|Iteration 430|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 435|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 442|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 450|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 456|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 463|reward:  524.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 467|reward:  533.0|last_reward_at:  405|Elapsed Time: 0:00:08||

Episode 18|Iteration 467|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:08||

Episode 18|Iteration 471|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:08||

Episode 18|Iteration 475|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:08||

Episode 18|Iteration 481|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:08||

Episode 18|Iteration 486|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:08||

Episode 18|Iteration 492|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:09||

Episode 18|Iteration 494|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:09||

Episode 18|Iteration 500|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:09||

Episode 18|Iteration 500|reward:  533.0|last_reward_at:  467|Elapsed Time: 0:00:09||




  Episode 18 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/2 (0.50)
    explore-remote: 3/82 (0.04)
    explore-connect: 0/92 (0.00)
    exploit-local: 11/104 (0.10)
    exploit-remote: 4/156 (0.03)
    exploit-connect: 6/38 (0.14)
  exploit deflected to exploration: 67
  ## Episode: 19/20 'DQL' ϵ=0.2323, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 19|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 19|Iteration 7|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 19|Iteration 11|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 19|Iteration 11|reward:   25.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 19|Iteration 15|reward:   65.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 19|Iteration 15|reward:   65.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 19|Iteration 19|reward:   65.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 19|Iteration 21|reward:   71.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 19|Iteration 21|reward:   71.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 19|Iteration 24|reward:   82.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 19|Iteration 24|reward:   82.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 32|reward:   82.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 38|reward:   82.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 45|reward:   82.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 47|reward:   88.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 47|reward:   88.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 19|Iteration 54|reward:   88.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 19|Iteration 58|reward:   96.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 19|Iteration 58|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 19|Iteration 64|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 19|Iteration 68|reward:  136.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 19|Iteration 68|reward:  136.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 19|Iteration 70|reward:  150.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 19|Iteration 70|reward:  150.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 19|Iteration 76|reward:  150.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 19|Iteration 83|reward:  150.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 19|Iteration 88|reward:  150.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 19|Iteration 94|reward:  150.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 19|Iteration 99|reward:  161.0|last_reward_at:   70|Elapsed Time: 0:00:01||

Episode 19|Iteration 99|reward:  161.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 101|reward:  175.0|last_reward_at:   99|Elapsed Time: 0:00:01||

Episode 19|Iteration 101|reward:  175.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 19|Iteration 108|reward:  175.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 19|Iteration 114|reward:  175.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 19|Iteration 116|reward:  188.0|last_reward_at:  101|Elapsed Time: 0:00:01||

Episode 19|Iteration 116|reward:  188.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 19|Iteration 121|reward:  188.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 19|Iteration 129|reward:  188.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 19|Iteration 137|reward:  198.0|last_reward_at:  116|Elapsed Time: 0:00:01||

Episode 19|Iteration 137|reward:  198.0|last_reward_at:  137|Elapsed Time: 0:00:01||

Episode 19|Iteration 146|reward:  198.0|last_reward_at:  137|Elapsed Time: 0:00:01||

Episode 19|Iteration 152|reward:  198.0|last_reward_at:  137|Elapsed Time: 0:00:02||

Episode 19|Iteration 159|reward:  198.0|last_reward_at:  137|Elapsed Time: 0:00:02||

Episode 19|Iteration 165|reward:  198.0|last_reward_at:  137|Elapsed Time: 0:00:02||

Episode 19|Iteration 169|reward:  204.0|last_reward_at:  137|Elapsed Time: 0:00:02||

Episode 19|Iteration 169|reward:  204.0|last_reward_at:  169|Elapsed Time: 0:00:02||

Episode 19|Iteration 173|reward:  210.0|last_reward_at:  169|Elapsed Time: 0:00:02||

Episode 19|Iteration 173|reward:  210.0|last_reward_at:  173|Elapsed Time: 0:00:02||

Episode 19|Iteration 182|reward:  210.0|last_reward_at:  173|Elapsed Time: 0:00:02||

Episode 19|Iteration 188|reward:  216.0|last_reward_at:  173|Elapsed Time: 0:00:02||

Episode 19|Iteration 188|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 19|Iteration 196|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 19|Iteration 202|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 19|Iteration 208|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 19|Iteration 213|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:02||

Episode 19|Iteration 222|reward:  216.0|last_reward_at:  188|Elapsed Time: 0:00:03||

Episode 19|Iteration 223|reward:  229.0|last_reward_at:  188|Elapsed Time: 0:00:03||

Episode 19|Iteration 223|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 228|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 235|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 240|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 247|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 254|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 260|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 266|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 273|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 279|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 285|reward:  229.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 290|reward:  289.0|last_reward_at:  223|Elapsed Time: 0:00:03||

Episode 19|Iteration 290|reward:  289.0|last_reward_at:  290|Elapsed Time: 0:00:03||

Episode 19|Iteration 296|reward:  295.0|last_reward_at:  290|Elapsed Time: 0:00:04||

Episode 19|Iteration 296|reward:  295.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 19|Iteration 303|reward:  295.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 19|Iteration 311|reward:  295.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 19|Iteration 315|reward:  415.0|last_reward_at:  296|Elapsed Time: 0:00:04||

Episode 19|Iteration 315|reward:  415.0|last_reward_at:  315|Elapsed Time: 0:00:04||

Episode 19|Iteration 322|reward:  415.0|last_reward_at:  315|Elapsed Time: 0:00:04||

Episode 19|Iteration 323|reward:  475.0|last_reward_at:  315|Elapsed Time: 0:00:04||

Episode 19|Iteration 323|reward:  475.0|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 328|reward:  475.0|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 335|reward:  475.0|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 340|reward:  475.0|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 343|reward:  475.0|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 345|reward:  481.5|last_reward_at:  323|Elapsed Time: 0:00:04||

Episode 19|Iteration 345|reward:  481.5|last_reward_at:  345|Elapsed Time: 0:00:04||

Episode 19|Iteration 352|reward:  481.5|last_reward_at:  345|Elapsed Time: 0:00:05||

Episode 19|Iteration 353|reward:  503.0|last_reward_at:  345|Elapsed Time: 0:00:05||

Episode 19|Iteration 353|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 356|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 361|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 363|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 367|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 370|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 379|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 386|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 391|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:05||

Episode 19|Iteration 398|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:06||

Episode 19|Iteration 402|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:06||

Episode 19|Iteration 406|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:06||

Episode 19|Iteration 410|reward:  503.0|last_reward_at:  353|Elapsed Time: 0:00:06||

Episode 19|Iteration 411|reward:  518.0|last_reward_at:  353|Elapsed Time: 0:00:06||

Episode 19|Iteration 411|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 417|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 425|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 431|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 437|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 442|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 450|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:06||

Episode 19|Iteration 456|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 463|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 469|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 474|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 482|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 487|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 494|reward:  518.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 496|reward:  527.0|last_reward_at:  411|Elapsed Time: 0:00:07||

Episode 19|Iteration 496|reward:  527.0|last_reward_at:  496|Elapsed Time: 0:00:07||

Episode 19|Iteration 500|reward:  527.0|last_reward_at:  496|Elapsed Time: 0:00:07||

Episode 19|Iteration 500|reward:  527.0|last_reward_at:  496|Elapsed Time: 0:00:07||




  Episode 19 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/4 (0.20)
    explore-remote: 3/87 (0.03)
    explore-connect: 0/86 (0.00)
    exploit-local: 11/89 (0.11)
    exploit-remote: 4/152 (0.03)
    exploit-connect: 6/57 (0.10)
  exploit deflected to exploration: 62
  ## Episode: 20/20 'DQL' ϵ=0.2197, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 20|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 2|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 2|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 20|Iteration 8|reward:   54.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 20|Iteration 8|reward:   54.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 20|Iteration 13|reward:   54.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 20|Iteration 17|reward:   68.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 20|Iteration 17|reward:   68.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 20|Iteration 26|reward:   68.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 20|Iteration 32|reward:   68.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 20|Iteration 38|reward:   68.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 20|Iteration 44|reward:   79.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 20|Iteration 44|reward:   79.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 20|Iteration 50|reward:   90.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 20|Iteration 50|reward:   90.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 20|Iteration 51|reward:  103.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 20|Iteration 51|reward:  103.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 20|Iteration 55|reward:  109.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 20|Iteration 55|reward:  109.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 20|Iteration 64|reward:  109.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 20|Iteration 70|reward:  109.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 20|Iteration 74|reward:  169.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 20|Iteration 74|reward:  169.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 20|Iteration 83|reward:  169.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 20|Iteration 89|reward:  169.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 20|Iteration 95|reward:  169.0|last_reward_at:   74|Elapsed Time: 0:00:01||

Episode 20|Iteration 97|reward:  182.0|last_reward_at:   74|Elapsed Time: 0:00:01||

Episode 20|Iteration 97|reward:  182.0|last_reward_at:   97|Elapsed Time: 0:00:01||

Episode 20|Iteration 102|reward:  182.0|last_reward_at:   97|Elapsed Time: 0:00:01||

Episode 20|Iteration 103|reward:  188.0|last_reward_at:   97|Elapsed Time: 0:00:01||

Episode 20|Iteration 103|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 111|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 121|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 127|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 133|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 140|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 146|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 152|reward:  188.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 156|reward:  248.0|last_reward_at:  103|Elapsed Time: 0:00:01||

Episode 20|Iteration 156|reward:  248.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 20|Iteration 164|reward:  248.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 20|Iteration 171|reward:  248.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 20|Iteration 177|reward:  262.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 20|Iteration 177|reward:  262.0|last_reward_at:  177|Elapsed Time: 0:00:01||

Episode 20|Iteration 184|reward:  262.0|last_reward_at:  177|Elapsed Time: 0:00:02||

Episode 20|Iteration 190|reward:  262.0|last_reward_at:  177|Elapsed Time: 0:00:02||

Episode 20|Iteration 197|reward:  262.0|last_reward_at:  177|Elapsed Time: 0:00:02||

Episode 20|Iteration 203|reward:  262.0|last_reward_at:  177|Elapsed Time: 0:00:02||

Episode 20|Iteration 206|reward:  276.0|last_reward_at:  177|Elapsed Time: 0:00:02||

Episode 20|Iteration 206|reward:  276.0|last_reward_at:  206|Elapsed Time: 0:00:02||

Episode 20|Iteration 207|reward:  282.0|last_reward_at:  206|Elapsed Time: 0:00:02||

Episode 20|Iteration 207|reward:  282.0|last_reward_at:  207|Elapsed Time: 0:00:02||

Episode 20|Iteration 216|reward:  282.0|last_reward_at:  207|Elapsed Time: 0:00:02||

Episode 20|Iteration 221|reward:  291.0|last_reward_at:  207|Elapsed Time: 0:00:02||

Episode 20|Iteration 221|reward:  291.0|last_reward_at:  221|Elapsed Time: 0:00:02||

Episode 20|Iteration 225|reward:  301.0|last_reward_at:  221|Elapsed Time: 0:00:02||

Episode 20|Iteration 225|reward:  301.0|last_reward_at:  225|Elapsed Time: 0:00:02||

Episode 20|Iteration 233|reward:  341.0|last_reward_at:  225|Elapsed Time: 0:00:02||

Episode 20|Iteration 233|reward:  341.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 20|Iteration 238|reward:  341.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 20|Iteration 240|reward:  347.0|last_reward_at:  233|Elapsed Time: 0:00:02||

Episode 20|Iteration 240|reward:  347.0|last_reward_at:  240|Elapsed Time: 0:00:02||

Episode 20|Iteration 247|reward:  347.0|last_reward_at:  240|Elapsed Time: 0:00:02||

Episode 20|Iteration 247|reward:  353.5|last_reward_at:  240|Elapsed Time: 0:00:02||

Episode 20|Iteration 247|reward:  353.5|last_reward_at:  247|Elapsed Time: 0:00:02||

Episode 20|Iteration 253|reward:  353.5|last_reward_at:  247|Elapsed Time: 0:00:03||

Episode 20|Iteration 258|reward:  359.5|last_reward_at:  247|Elapsed Time: 0:00:03||

Episode 20|Iteration 258|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 263|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 266|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 273|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 279|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 285|reward:  359.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 287|reward:  365.5|last_reward_at:  258|Elapsed Time: 0:00:03||

Episode 20|Iteration 287|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 20|Iteration 296|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 20|Iteration 300|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 20|Iteration 305|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:03||

Episode 20|Iteration 311|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 20|Iteration 317|reward:  365.5|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 20|Iteration 321|reward:  485.5|last_reward_at:  287|Elapsed Time: 0:00:04||

Episode 20|Iteration 321|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 323|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 330|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 336|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 338|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 342|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 343|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 349|reward:  485.5|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 351|reward:  507.0|last_reward_at:  321|Elapsed Time: 0:00:04||

Episode 20|Iteration 351|reward:  507.0|last_reward_at:  351|Elapsed Time: 0:00:04||

Episode 20|Iteration 355|reward:  507.0|last_reward_at:  351|Elapsed Time: 0:00:05||

Episode 20|Iteration 359|reward:  507.0|last_reward_at:  351|Elapsed Time: 0:00:05||

Episode 20|Iteration 360|reward:  522.0|last_reward_at:  351|Elapsed Time: 0:00:05||

Episode 20|Iteration 360|reward:  522.0|last_reward_at:  360|Elapsed Time: 0:00:05||

Episode 20|Iteration 367|reward:  522.0|last_reward_at:  360|Elapsed Time: 0:00:05||

Episode 20|Iteration 372|reward:  573.0|last_reward_at:  360|Elapsed Time: 0:00:05||

Episode 20|Iteration 372|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 377|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 379|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 384|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 387|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 392|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 399|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:05||

Episode 20|Iteration 402|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 404|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 411|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 416|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 418|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 424|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 431|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 437|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 444|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 449|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:06||

Episode 20|Iteration 456|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 458|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 460|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 464|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 469|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 474|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 479|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 483|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 491|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:07||

Episode 20|Iteration 496|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:08||

Episode 20|Iteration 500|reward:  573.0|last_reward_at:  372|Elapsed Time: 0:00:08||




  Episode 20 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/3 (0.40)
    explore-remote: 2/66 (0.03)
    explore-connect: 0/65 (0.00)
    exploit-local: 12/123 (0.09)
    exploit-remote: 3/135 (0.02)
    exploit-connect: 6/83 (0.07)
  exploit deflected to exploration: 20
simulation ended


In [11]:
# -----------------------------------------
# 6) DQL 평가(Exploit) (✅ 파라미터 유지 + 평가에서만 LLM 옵션)
# -----------------------------------------
llm_chat = make_openai_chat_callable(model_id, llm_token_yaml) if use_llm else None

eval_learner = (
    LLMPrunedExploitWrapper(
        base_learner=dql_run["learner"],
        llm_chat=llm_chat,
        llm_every_steps=llm_every_steps,
        candidate_pool=candidate_pool,
        llm_topk=llm_topk,
        obs_max_chars=llm_obs_max_chars,
    )
    if use_llm
    else dql_run["learner"]
)

dql_exploit_run = learner.epsilon_greedy_search(
    gym_env,
    ep,
    learner=eval_learner,
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.0,
    epsilon_minimum=0.00,
    render=False,
    plot_episodes_length=False,
    verbosity=Verbosity.Quiet,
    render_last_episode_rewards_to=os.path.join(plots_dir, f"dql-{gymid}"),
    title=("Exploiting DQL (LLM-pruned)" if use_llm else "Exploiting DQL"),
)

[OpenAI] key_prefix= sk-proj- len= 164 yaml= /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml
###### Exploiting DQL (LLM-pruned)
Learning with: episode_count=10,iteration_count=500,ϵ=0.0,ϵ_min=0.0, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 1|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 1|Iteration 15|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:05||

Episode 1|Iteration 18|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:05||

Episode 1|Iteration 20|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 1|Iteration 24|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 1|Iteration 25|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:07||

Episode 1|Iteration 27|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:08||

Episode 1|Iteration 27|reward:   25.0|last_reward_at:   27|Elapsed Time: 0:00:08||

Episode 1|Iteration 28|reward:   38.0|last_reward_at:   27|Elapsed Time: 0:00:08||

Episode 1|Iteration 28|reward:   38.0|last_reward_at:   28|Elapsed Time: 0:00:08||

Episode 1|Iteration 30|reward:   38.0|last_reward_at:   28|Elapsed Time: 0:00:09||

Episode 1|Iteration 32|reward:   78.0|last_reward_at:   28|Elapsed Time: 0:00:09||

Episode 1|Iteration 32|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:09||

Episode 1|Iteration 35|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:10||

Episode 1|Iteration 38|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:11||

Episode 1|Iteration 40|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:12||

Episode 1|Iteration 45|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:12||

Episode 1|Iteration 45|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:14||

Episode 1|Iteration 50|reward:   78.0|last_reward_at:   32|Elapsed Time: 0:00:15||

Episode 1|Iteration 54|reward:   84.0|last_reward_at:   32|Elapsed Time: 0:00:15||

Episode 1|Iteration 54|reward:   84.0|last_reward_at:   54|Elapsed Time: 0:00:15||

Episode 1|Iteration 55|reward:   84.0|last_reward_at:   54|Elapsed Time: 0:00:17||

Episode 1|Iteration 60|reward:   84.0|last_reward_at:   54|Elapsed Time: 0:00:18||

Episode 1|Iteration 62|reward:   98.0|last_reward_at:   54|Elapsed Time: 0:00:18||

Episode 1|Iteration 62|reward:   98.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 1|Iteration 65|reward:   98.0|last_reward_at:   62|Elapsed Time: 0:00:20||

Episode 1|Iteration 68|reward:  104.0|last_reward_at:   62|Elapsed Time: 0:00:20||

Episode 1|Iteration 68|reward:  104.0|last_reward_at:   68|Elapsed Time: 0:00:20||

Episode 1|Iteration 70|reward:  104.0|last_reward_at:   68|Elapsed Time: 0:00:22||

Episode 1|Iteration 71|reward:  164.0|last_reward_at:   68|Elapsed Time: 0:00:22||

Episode 1|Iteration 71|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:22||

Episode 1|Iteration 75|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:23||

Episode 1|Iteration 76|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:23||

Episode 1|Iteration 80|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:25||

Episode 1|Iteration 85|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:26||

Episode 1|Iteration 90|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:28||

Episode 1|Iteration 93|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:28||

Episode 1|Iteration 95|reward:  164.0|last_reward_at:   71|Elapsed Time: 0:00:30||

Episode 1|Iteration 97|reward:  178.0|last_reward_at:   71|Elapsed Time: 0:00:30||

Episode 1|Iteration 97|reward:  178.0|last_reward_at:   97|Elapsed Time: 0:00:30||

Episode 1|Iteration 100|reward:  178.0|last_reward_at:   97|Elapsed Time: 0:00:31||

Episode 1|Iteration 102|reward:  184.5|last_reward_at:   97|Elapsed Time: 0:00:31||

Episode 1|Iteration 102|reward:  184.5|last_reward_at:  102|Elapsed Time: 0:00:31||

Episode 1|Iteration 105|reward:  184.5|last_reward_at:  102|Elapsed Time: 0:00:33||

Episode 1|Iteration 110|reward:  184.5|last_reward_at:  102|Elapsed Time: 0:00:35||

Episode 1|Iteration 112|reward:  194.5|last_reward_at:  102|Elapsed Time: 0:00:35||

Episode 1|Iteration 112|reward:  194.5|last_reward_at:  112|Elapsed Time: 0:00:35||

Episode 1|Iteration 115|reward:  194.5|last_reward_at:  112|Elapsed Time: 0:00:37||

Episode 1|Iteration 117|reward:  194.5|last_reward_at:  112|Elapsed Time: 0:00:37||

Episode 1|Iteration 119|reward:  200.5|last_reward_at:  112|Elapsed Time: 0:00:37||

Episode 1|Iteration 119|reward:  200.5|last_reward_at:  119|Elapsed Time: 0:00:37||

Episode 1|Iteration 120|reward:  200.5|last_reward_at:  119|Elapsed Time: 0:00:38||

Episode 1|Iteration 123|reward:  211.5|last_reward_at:  119|Elapsed Time: 0:00:38||

Episode 1|Iteration 123|reward:  211.5|last_reward_at:  123|Elapsed Time: 0:00:38||

Episode 1|Iteration 125|reward:  211.5|last_reward_at:  123|Elapsed Time: 0:00:40||

Episode 1|Iteration 130|reward:  211.5|last_reward_at:  123|Elapsed Time: 0:00:41||

Episode 1|Iteration 135|reward:  211.5|last_reward_at:  123|Elapsed Time: 0:00:43||

Episode 1|Iteration 140|reward:  211.5|last_reward_at:  123|Elapsed Time: 0:00:44||

Episode 1|Iteration 141|reward:  224.5|last_reward_at:  123|Elapsed Time: 0:00:44||

Episode 1|Iteration 141|reward:  224.5|last_reward_at:  141|Elapsed Time: 0:00:44||

Episode 1|Iteration 145|reward:  224.5|last_reward_at:  141|Elapsed Time: 0:00:46||

Episode 1|Iteration 150|reward:  224.5|last_reward_at:  141|Elapsed Time: 0:00:47||

Episode 1|Iteration 153|reward:  237.5|last_reward_at:  141|Elapsed Time: 0:00:47||

Episode 1|Iteration 153|reward:  237.5|last_reward_at:  153|Elapsed Time: 0:00:47||

Episode 1|Iteration 155|reward:  237.5|last_reward_at:  153|Elapsed Time: 0:00:49||

Episode 1|Iteration 160|reward:  237.5|last_reward_at:  153|Elapsed Time: 0:00:54||

Episode 1|Iteration 165|reward:  237.5|last_reward_at:  153|Elapsed Time: 0:00:55||

Episode 1|Iteration 167|reward:  243.5|last_reward_at:  153|Elapsed Time: 0:00:55||

Episode 1|Iteration 167|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:00:55||

Episode 1|Iteration 170|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:00:57||

Episode 1|Iteration 171|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:00:57||

Episode 1|Iteration 173|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:00:57||

Episode 1|Iteration 175|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:00:59||

Episode 1|Iteration 180|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:00||

Episode 1|Iteration 184|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:01||

Episode 1|Iteration 185|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:02||

Episode 1|Iteration 190|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:03||

Episode 1|Iteration 195|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:05||

Episode 1|Iteration 200|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:06||

Episode 1|Iteration 205|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:08||

Episode 1|Iteration 210|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:10||

Episode 1|Iteration 215|reward:  243.5|last_reward_at:  167|Elapsed Time: 0:01:11||

Episode 1|Iteration 217|reward:  252.5|last_reward_at:  167|Elapsed Time: 0:01:11||

Episode 1|Iteration 217|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:11||

Episode 1|Iteration 220|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:13||

Episode 1|Iteration 225|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:14||

Episode 1|Iteration 230|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:16||

Episode 1|Iteration 235|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:16||

Episode 1|Iteration 235|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:18||

Episode 1|Iteration 239|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:18||

Episode 1|Iteration 240|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:19||

Episode 1|Iteration 245|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:21||

Episode 1|Iteration 250|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:23||

Episode 1|Iteration 255|reward:  252.5|last_reward_at:  217|Elapsed Time: 0:01:24||

Episode 1|Iteration 258|reward:  258.5|last_reward_at:  217|Elapsed Time: 0:01:25||

Episode 1|Iteration 258|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:25||

Episode 1|Iteration 260|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:26||

Episode 1|Iteration 265|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:28||

Episode 1|Iteration 270|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:29||

Episode 1|Iteration 273|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:29||

Episode 1|Iteration 275|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:31||

Episode 1|Iteration 280|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:32||

Episode 1|Iteration 285|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:32||

Episode 1|Iteration 285|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:34||

Episode 1|Iteration 290|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:35||

Episode 1|Iteration 295|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:37||

Episode 1|Iteration 300|reward:  258.5|last_reward_at:  258|Elapsed Time: 0:01:38||

Episode 1|Iteration 303|reward:  318.5|last_reward_at:  258|Elapsed Time: 0:01:38||

Episode 1|Iteration 303|reward:  318.5|last_reward_at:  303|Elapsed Time: 0:01:38||

Episode 1|Iteration 305|reward:  318.5|last_reward_at:  303|Elapsed Time: 0:01:40||

Episode 1|Iteration 310|reward:  318.5|last_reward_at:  303|Elapsed Time: 0:01:42||

Episode 1|Iteration 313|reward:  318.5|last_reward_at:  303|Elapsed Time: 0:01:42||

Episode 1|Iteration 315|reward:  318.5|last_reward_at:  303|Elapsed Time: 0:01:44||

Episode 1|Iteration 317|reward:  324.5|last_reward_at:  303|Elapsed Time: 0:01:44||

Episode 1|Iteration 317|reward:  324.5|last_reward_at:  317|Elapsed Time: 0:01:44||

Episode 1|Iteration 319|reward:  324.5|last_reward_at:  317|Elapsed Time: 0:01:44||

Episode 1|Iteration 320|reward:  324.5|last_reward_at:  317|Elapsed Time: 0:01:46||

Episode 1|Iteration 325|reward:  324.5|last_reward_at:  317|Elapsed Time: 0:01:48||

Episode 1|Iteration 329|reward:  444.5|last_reward_at:  317|Elapsed Time: 0:01:48||

Episode 1|Iteration 329|reward:  444.5|last_reward_at:  329|Elapsed Time: 0:01:48||

Episode 1|Iteration 330|reward:  444.5|last_reward_at:  329|Elapsed Time: 0:01:49||

Episode 1|Iteration 335|reward:  444.5|last_reward_at:  329|Elapsed Time: 0:01:51||

Episode 1|Iteration 337|reward:  484.5|last_reward_at:  329|Elapsed Time: 0:01:51||

Episode 1|Iteration 337|reward:  484.5|last_reward_at:  337|Elapsed Time: 0:01:51||

Episode 1|Iteration 340|reward:  484.5|last_reward_at:  337|Elapsed Time: 0:01:52||

Episode 1|Iteration 345|reward:  484.5|last_reward_at:  337|Elapsed Time: 0:01:54||

Episode 1|Iteration 349|reward:  484.5|last_reward_at:  337|Elapsed Time: 0:01:54||

Episode 1|Iteration 350|reward:  484.5|last_reward_at:  337|Elapsed Time: 0:01:55||

Episode 1|Iteration 351|reward:  535.5|last_reward_at:  337|Elapsed Time: 0:01:55||

Episode 1|Iteration 351|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:01:55||

Episode 1|Iteration 355|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:01:56||

Episode 1|Iteration 355|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:01:57||

Episode 1|Iteration 360|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:00||

Episode 1|Iteration 365|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:01||

Episode 1|Iteration 368|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:01||

Episode 1|Iteration 370|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:03||

Episode 1|Iteration 375|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:04||

Episode 1|Iteration 377|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:05||

Episode 1|Iteration 380|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:05||

Episode 1|Iteration 380|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:07||

Episode 1|Iteration 381|reward:  535.5|last_reward_at:  351|Elapsed Time: 0:02:07||

Episode 1|Iteration 383|reward:  557.0|last_reward_at:  351|Elapsed Time: 0:02:07||

Episode 1|Iteration 383|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:07||

Episode 1|Iteration 385|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:08||

Episode 1|Iteration 390|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:10||

Episode 1|Iteration 393|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:10||

Episode 1|Iteration 395|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:12||

Episode 1|Iteration 398|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:12||

Episode 1|Iteration 400|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:13||

Episode 1|Iteration 403|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:13||

Episode 1|Iteration 405|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:17||

Episode 1|Iteration 407|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:17||

Episode 1|Iteration 408|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:17||

Episode 1|Iteration 410|reward:  557.0|last_reward_at:  383|Elapsed Time: 0:02:18||

Episode 1|Iteration 413|reward:  572.0|last_reward_at:  383|Elapsed Time: 0:02:18||

Episode 1|Iteration 413|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:18||

Episode 1|Iteration 415|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:20||

Episode 1|Iteration 418|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:20||

Episode 1|Iteration 420|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:21||

Episode 1|Iteration 425|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:23||

Episode 1|Iteration 427|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:23||

Episode 1|Iteration 430|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:25||

Episode 1|Iteration 433|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:25||

Episode 1|Iteration 435|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:27||

Episode 1|Iteration 440|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:28||

Episode 1|Iteration 444|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:28||

Episode 1|Iteration 445|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:30||

Episode 1|Iteration 450|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:30||

Episode 1|Iteration 450|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:32||

Episode 1|Iteration 455|reward:  572.0|last_reward_at:  413|Elapsed Time: 0:02:33||

Episode 1|Iteration 457|reward:  578.0|last_reward_at:  413|Elapsed Time: 0:02:33||

Episode 1|Iteration 457|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:33||

Episode 1|Iteration 460|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:35||

Episode 1|Iteration 463|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:35||

Episode 1|Iteration 465|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:36||

Episode 1|Iteration 469|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:36||

Episode 1|Iteration 470|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:38||

Episode 1|Iteration 475|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:38||

Episode 1|Iteration 475|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:40||

Episode 1|Iteration 478|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:40||

Episode 1|Iteration 480|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:41||

Episode 1|Iteration 485|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:43||

Episode 1|Iteration 490|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:44||

Episode 1|Iteration 494|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:44||

Episode 1|Iteration 495|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:46||

Episode 1|Iteration 500|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:46||

Episode 1|Iteration 500|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:47||

Episode 1|Iteration 500|reward:  578.0|last_reward_at:  457|Elapsed Time: 0:02:47||




  Episode 1 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/22 (0.00)
    explore-remote: 0/72 (0.00)
    explore-connect: 0/41 (0.00)
    exploit-local: 14/104 (0.12)
    exploit-remote: 6/166 (0.03)
    exploit-connect: 6/69 (0.08)
  exploit deflected to exploration: 35
  ## Episode: 2/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 2|Iteration 3|reward:   25.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 2|Iteration 5|reward:   25.0|last_reward_at:    3|Elapsed Time: 0:00:01||

Episode 2|Iteration 6|reward:   65.0|last_reward_at:    3|Elapsed Time: 0:00:01||

Episode 2|Iteration 6|reward:   65.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 2|Iteration 8|reward:   78.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 2|Iteration 8|reward:   78.0|last_reward_at:    8|Elapsed Time: 0:00:01||

Episode 2|Iteration 10|reward:   78.0|last_reward_at:    8|Elapsed Time: 0:00:03||

Episode 2|Iteration 13|reward:   89.0|last_reward_at:    8|Elapsed Time: 0:00:03||

Episode 2|Iteration 13|reward:   89.0|last_reward_at:   13|Elapsed Time: 0:00:03||

Episode 2|Iteration 15|reward:   89.0|last_reward_at:   13|Elapsed Time: 0:00:04||

Episode 2|Iteration 20|reward:   89.0|last_reward_at:   13|Elapsed Time: 0:00:05||

Episode 2|Iteration 25|reward:  103.0|last_reward_at:   13|Elapsed Time: 0:00:07||

Episode 2|Iteration 25|reward:  103.0|last_reward_at:   25|Elapsed Time: 0:00:07||

Episode 2|Iteration 30|reward:  103.0|last_reward_at:   25|Elapsed Time: 0:00:08||

Episode 2|Iteration 34|reward:  114.0|last_reward_at:   25|Elapsed Time: 0:00:08||

Episode 2|Iteration 34|reward:  114.0|last_reward_at:   34|Elapsed Time: 0:00:08||

Episode 2|Iteration 35|reward:  114.0|last_reward_at:   34|Elapsed Time: 0:00:10||

Episode 2|Iteration 40|reward:  114.0|last_reward_at:   34|Elapsed Time: 0:00:11||

Episode 2|Iteration 44|reward:  114.0|last_reward_at:   34|Elapsed Time: 0:00:11||

Episode 2|Iteration 45|reward:  114.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 2|Iteration 46|reward:  124.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 2|Iteration 46|reward:  124.0|last_reward_at:   46|Elapsed Time: 0:00:13||

Episode 2|Iteration 48|reward:  184.0|last_reward_at:   46|Elapsed Time: 0:00:13||

Episode 2|Iteration 48|reward:  184.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 2|Iteration 50|reward:  184.0|last_reward_at:   48|Elapsed Time: 0:00:14||

Episode 2|Iteration 53|reward:  190.0|last_reward_at:   48|Elapsed Time: 0:00:15||

Episode 2|Iteration 53|reward:  190.0|last_reward_at:   53|Elapsed Time: 0:00:15||

Episode 2|Iteration 55|reward:  201.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 2|Iteration 55|reward:  201.0|last_reward_at:   55|Elapsed Time: 0:00:16||

Episode 2|Iteration 60|reward:  201.0|last_reward_at:   55|Elapsed Time: 0:00:18||

Episode 2|Iteration 64|reward:  201.0|last_reward_at:   55|Elapsed Time: 0:00:18||

Episode 2|Iteration 65|reward:  201.0|last_reward_at:   55|Elapsed Time: 0:00:19||

Episode 2|Iteration 70|reward:  201.0|last_reward_at:   55|Elapsed Time: 0:00:21||

Episode 2|Iteration 72|reward:  207.0|last_reward_at:   55|Elapsed Time: 0:00:21||

Episode 2|Iteration 72|reward:  207.0|last_reward_at:   72|Elapsed Time: 0:00:21||

Episode 2|Iteration 73|reward:  213.0|last_reward_at:   72|Elapsed Time: 0:00:21||

Episode 2|Iteration 73|reward:  213.0|last_reward_at:   73|Elapsed Time: 0:00:21||

Episode 2|Iteration 75|reward:  213.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 2|Iteration 76|reward:  219.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 2|Iteration 76|reward:  219.0|last_reward_at:   76|Elapsed Time: 0:00:22||

Episode 2|Iteration 79|reward:  232.0|last_reward_at:   76|Elapsed Time: 0:00:22||

Episode 2|Iteration 79|reward:  232.0|last_reward_at:   79|Elapsed Time: 0:00:22||

Episode 2|Iteration 80|reward:  232.0|last_reward_at:   79|Elapsed Time: 0:00:24||

Episode 2|Iteration 85|reward:  232.0|last_reward_at:   79|Elapsed Time: 0:00:26||

Episode 2|Iteration 88|reward:  292.0|last_reward_at:   79|Elapsed Time: 0:00:26||

Episode 2|Iteration 88|reward:  292.0|last_reward_at:   88|Elapsed Time: 0:00:26||

Episode 2|Iteration 90|reward:  292.0|last_reward_at:   88|Elapsed Time: 0:00:27||

Episode 2|Iteration 91|reward:  301.0|last_reward_at:   88|Elapsed Time: 0:00:27||

Episode 2|Iteration 91|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 2|Iteration 95|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 2|Iteration 95|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:29||

Episode 2|Iteration 100|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:30||

Episode 2|Iteration 105|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:32||

Episode 2|Iteration 110|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:34||

Episode 2|Iteration 114|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:34||

Episode 2|Iteration 115|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:35||

Episode 2|Iteration 120|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:37||

Episode 2|Iteration 124|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:37||

Episode 2|Iteration 125|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:39||

Episode 2|Iteration 127|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:39||

Episode 2|Iteration 128|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:39||

Episode 2|Iteration 130|reward:  301.0|last_reward_at:   91|Elapsed Time: 0:00:41||

Episode 2|Iteration 132|reward:  421.0|last_reward_at:   91|Elapsed Time: 0:00:41||

Episode 2|Iteration 132|reward:  421.0|last_reward_at:  132|Elapsed Time: 0:00:41||

Episode 2|Iteration 135|reward:  421.0|last_reward_at:  132|Elapsed Time: 0:00:43||

Episode 2|Iteration 140|reward:  421.0|last_reward_at:  132|Elapsed Time: 0:00:44||

Episode 2|Iteration 145|reward:  421.0|last_reward_at:  132|Elapsed Time: 0:00:46||

Episode 2|Iteration 150|reward:  472.0|last_reward_at:  132|Elapsed Time: 0:00:47||

Episode 2|Iteration 150|reward:  472.0|last_reward_at:  150|Elapsed Time: 0:00:47||

Episode 2|Iteration 152|reward:  512.0|last_reward_at:  150|Elapsed Time: 0:00:47||

Episode 2|Iteration 152|reward:  512.0|last_reward_at:  152|Elapsed Time: 0:00:47||

Episode 2|Iteration 155|reward:  518.0|last_reward_at:  152|Elapsed Time: 0:00:49||

Episode 2|Iteration 155|reward:  518.0|last_reward_at:  155|Elapsed Time: 0:00:49||

Episode 2|Iteration 156|reward:  518.0|last_reward_at:  155|Elapsed Time: 0:00:50||

Episode 2|Iteration 158|reward:  518.0|last_reward_at:  155|Elapsed Time: 0:00:50||

Episode 2|Iteration 159|reward:  539.5|last_reward_at:  155|Elapsed Time: 0:00:50||

Episode 2|Iteration 159|reward:  539.5|last_reward_at:  159|Elapsed Time: 0:00:50||

Episode 2|Iteration 160|reward:  539.5|last_reward_at:  159|Elapsed Time: 0:00:51||

Episode 2|Iteration 161|reward:  546.0|last_reward_at:  159|Elapsed Time: 0:00:51||

Episode 2|Iteration 161|reward:  546.0|last_reward_at:  161|Elapsed Time: 0:00:51||

Episode 2|Iteration 165|reward:  546.0|last_reward_at:  161|Elapsed Time: 0:00:51||

Episode 2|Iteration 165|reward:  546.0|last_reward_at:  161|Elapsed Time: 0:00:53||

Episode 2|Iteration 167|reward:  546.0|last_reward_at:  161|Elapsed Time: 0:00:53||

Episode 2|Iteration 169|reward:  546.0|last_reward_at:  161|Elapsed Time: 0:00:53||

Episode 2|Iteration 170|reward:  551.0|last_reward_at:  161|Elapsed Time: 0:00:56||

Episode 2|Iteration 170|reward:  551.0|last_reward_at:  170|Elapsed Time: 0:00:56||

Episode 2|Iteration 175|reward:  551.0|last_reward_at:  170|Elapsed Time: 0:00:58||

Episode 2|Iteration 178|reward:  551.0|last_reward_at:  170|Elapsed Time: 0:00:58||

Episode 2|Iteration 180|reward:  560.0|last_reward_at:  170|Elapsed Time: 0:01:01||

Episode 2|Iteration 180|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:01||

Episode 2|Iteration 185|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:02||

Episode 2|Iteration 189|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:03||

Episode 2|Iteration 190|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:05||

Episode 2|Iteration 195|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:06||

Episode 2|Iteration 198|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:06||

Episode 2|Iteration 200|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:08||

Episode 2|Iteration 203|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:08||

Episode 2|Iteration 205|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:10||

Episode 2|Iteration 210|reward:  560.0|last_reward_at:  180|Elapsed Time: 0:01:12||

Episode 2|Iteration 212|reward:  569.0|last_reward_at:  180|Elapsed Time: 0:01:12||

Episode 2|Iteration 212|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:12||

Episode 2|Iteration 215|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:14||

Episode 2|Iteration 220|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:15||

Episode 2|Iteration 222|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:15||

Episode 2|Iteration 224|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:15||

Episode 2|Iteration 225|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:18||

Episode 2|Iteration 229|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:18||

Episode 2|Iteration 230|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:19||

Episode 2|Iteration 235|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:19||

Episode 2|Iteration 235|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:21||

Episode 2|Iteration 239|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:21||

Episode 2|Iteration 240|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:23||

Episode 2|Iteration 245|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:25||

Episode 2|Iteration 249|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:25||

Episode 2|Iteration 250|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:27||

Episode 2|Iteration 253|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:27||

Episode 2|Iteration 255|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:28||

Episode 2|Iteration 259|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:29||

Episode 2|Iteration 260|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:30||

Episode 2|Iteration 265|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:32||

Episode 2|Iteration 266|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:32||

Episode 2|Iteration 270|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:34||

Episode 2|Iteration 275|reward:  569.0|last_reward_at:  212|Elapsed Time: 0:01:36||

Episode 2|Iteration 279|reward:  584.0|last_reward_at:  212|Elapsed Time: 0:01:36||

Episode 2|Iteration 279|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:36||

Episode 2|Iteration 280|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:37||

Episode 2|Iteration 283|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:38||

Episode 2|Iteration 285|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:39||

Episode 2|Iteration 290|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:41||

Episode 2|Iteration 292|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:41||

Episode 2|Iteration 295|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:43||

Episode 2|Iteration 300|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:44||

Episode 2|Iteration 305|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:46||

Episode 2|Iteration 309|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:46||

Episode 2|Iteration 310|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:47||

Episode 2|Iteration 315|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:49||

Episode 2|Iteration 320|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:51||

Episode 2|Iteration 325|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:53||

Episode 2|Iteration 327|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:53||

Episode 2|Iteration 329|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:53||

Episode 2|Iteration 330|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:54||

Episode 2|Iteration 335|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:56||

Episode 2|Iteration 337|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:56||

Episode 2|Iteration 339|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:57||

Episode 2|Iteration 340|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:01:59||

Episode 2|Iteration 345|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:00||

Episode 2|Iteration 349|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:00||

Episode 2|Iteration 350|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:02||

Episode 2|Iteration 351|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:03||

Episode 2|Iteration 355|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:03||

Episode 2|Iteration 355|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:04||

Episode 2|Iteration 360|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:06||

Episode 2|Iteration 365|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:08||

Episode 2|Iteration 369|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:08||

Episode 2|Iteration 370|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:11||

Episode 2|Iteration 375|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:12||

Episode 2|Iteration 380|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:13||

Episode 2|Iteration 380|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:15||

Episode 2|Iteration 385|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:17||

Episode 2|Iteration 387|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:17||

Episode 2|Iteration 390|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:19||

Episode 2|Iteration 395|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:21||

Episode 2|Iteration 399|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:21||

Episode 2|Iteration 400|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:22||

Episode 2|Iteration 405|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:25||

Episode 2|Iteration 408|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:25||

Episode 2|Iteration 410|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:27||

Episode 2|Iteration 415|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:28||

Episode 2|Iteration 416|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:28||

Episode 2|Iteration 417|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:29||

Episode 2|Iteration 420|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:31||

Episode 2|Iteration 423|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:31||

Episode 2|Iteration 425|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:33||

Episode 2|Iteration 430|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:35||

Episode 2|Iteration 435|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:37||

Episode 2|Iteration 440|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:38||

Episode 2|Iteration 445|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:40||

Episode 2|Iteration 446|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:41||

Episode 2|Iteration 450|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:41||

Episode 2|Iteration 450|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:43||

Episode 2|Iteration 454|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:43||

Episode 2|Iteration 455|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:45||

Episode 2|Iteration 460|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:47||

Episode 2|Iteration 465|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:48||

Episode 2|Iteration 469|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:48||

Episode 2|Iteration 470|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:50||

Episode 2|Iteration 475|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:50||

Episode 2|Iteration 475|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:52||

Episode 2|Iteration 479|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:52||

Episode 2|Iteration 480|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:54||

Episode 2|Iteration 485|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:56||

Episode 2|Iteration 490|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:57||

Episode 2|Iteration 495|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:02:59||

Episode 2|Iteration 500|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:03:01||

Episode 2|Iteration 500|reward:  584.0|last_reward_at:  279|Elapsed Time: 0:03:01||




  Episode 2 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/20 (0.20)
    explore-remote: 3/77 (0.04)
    explore-connect: 0/32 (0.00)
    exploit-local: 9/116 (0.07)
    exploit-remote: 4/171 (0.02)
    exploit-connect: 6/57 (0.10)
  exploit deflected to exploration: 37
  ## Episode: 3/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 3|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 3|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 3|Iteration 15|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 3|Iteration 20|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 3|Iteration 23|reward:   54.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 3|Iteration 23|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:06||

Episode 3|Iteration 25|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:07||

Episode 3|Iteration 30|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:08||

Episode 3|Iteration 33|reward:   65.0|last_reward_at:   23|Elapsed Time: 0:00:09||

Episode 3|Iteration 33|reward:   65.0|last_reward_at:   33|Elapsed Time: 0:00:09||

Episode 3|Iteration 35|reward:   65.0|last_reward_at:   33|Elapsed Time: 0:00:10||

Episode 3|Iteration 40|reward:   76.0|last_reward_at:   33|Elapsed Time: 0:00:12||

Episode 3|Iteration 40|reward:   76.0|last_reward_at:   40|Elapsed Time: 0:00:12||

Episode 3|Iteration 42|reward:   82.0|last_reward_at:   40|Elapsed Time: 0:00:12||

Episode 3|Iteration 42|reward:   82.0|last_reward_at:   42|Elapsed Time: 0:00:12||

Episode 3|Iteration 45|reward:   82.0|last_reward_at:   42|Elapsed Time: 0:00:13||

Episode 3|Iteration 48|reward:   95.0|last_reward_at:   42|Elapsed Time: 0:00:13||

Episode 3|Iteration 48|reward:   95.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 3|Iteration 49|reward:  155.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 3|Iteration 49|reward:  155.0|last_reward_at:   49|Elapsed Time: 0:00:13||

Episode 3|Iteration 50|reward:  155.0|last_reward_at:   49|Elapsed Time: 0:00:15||

Episode 3|Iteration 55|reward:  155.0|last_reward_at:   49|Elapsed Time: 0:00:17||

Episode 3|Iteration 60|reward:  155.0|last_reward_at:   49|Elapsed Time: 0:00:18||

Episode 3|Iteration 62|reward:  161.0|last_reward_at:   49|Elapsed Time: 0:00:18||

Episode 3|Iteration 62|reward:  161.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 3|Iteration 64|reward:  172.0|last_reward_at:   62|Elapsed Time: 0:00:18||

Episode 3|Iteration 64|reward:  172.0|last_reward_at:   64|Elapsed Time: 0:00:18||

Episode 3|Iteration 65|reward:  180.0|last_reward_at:   64|Elapsed Time: 0:00:20||

Episode 3|Iteration 65|reward:  180.0|last_reward_at:   65|Elapsed Time: 0:00:20||

Episode 3|Iteration 70|reward:  180.0|last_reward_at:   65|Elapsed Time: 0:00:21||

Episode 3|Iteration 75|reward:  180.0|last_reward_at:   65|Elapsed Time: 0:00:23||

Episode 3|Iteration 76|reward:  194.0|last_reward_at:   65|Elapsed Time: 0:00:23||

Episode 3|Iteration 76|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:23||

Episode 3|Iteration 80|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:24||

Episode 3|Iteration 85|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:26||

Episode 3|Iteration 90|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:27||

Episode 3|Iteration 95|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:27||

Episode 3|Iteration 95|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:29||

Episode 3|Iteration 98|reward:  194.0|last_reward_at:   76|Elapsed Time: 0:00:29||

Episode 3|Iteration 99|reward:  200.0|last_reward_at:   76|Elapsed Time: 0:00:29||

Episode 3|Iteration 99|reward:  200.0|last_reward_at:   99|Elapsed Time: 0:00:29||

Episode 3|Iteration 100|reward:  200.0|last_reward_at:   99|Elapsed Time: 0:00:30||

Episode 3|Iteration 105|reward:  200.0|last_reward_at:   99|Elapsed Time: 0:00:32||

Episode 3|Iteration 110|reward:  200.0|last_reward_at:   99|Elapsed Time: 0:00:33||

Episode 3|Iteration 115|reward:  200.0|last_reward_at:   99|Elapsed Time: 0:00:34||

Episode 3|Iteration 118|reward:  206.0|last_reward_at:   99|Elapsed Time: 0:00:34||

Episode 3|Iteration 118|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:34||

Episode 3|Iteration 120|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:36||

Episode 3|Iteration 125|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:37||

Episode 3|Iteration 130|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:39||

Episode 3|Iteration 135|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:40||

Episode 3|Iteration 140|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:40||

Episode 3|Iteration 140|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:41||

Episode 3|Iteration 145|reward:  206.0|last_reward_at:  118|Elapsed Time: 0:00:43||

Episode 3|Iteration 149|reward:  219.0|last_reward_at:  118|Elapsed Time: 0:00:43||

Episode 3|Iteration 149|reward:  219.0|last_reward_at:  149|Elapsed Time: 0:00:43||

Episode 3|Iteration 150|reward:  219.0|last_reward_at:  149|Elapsed Time: 0:00:44||

Episode 3|Iteration 155|reward:  219.0|last_reward_at:  149|Elapsed Time: 0:00:45||

Episode 3|Iteration 158|reward:  219.0|last_reward_at:  149|Elapsed Time: 0:00:45||

Episode 3|Iteration 160|reward:  219.0|last_reward_at:  149|Elapsed Time: 0:00:47||

Episode 3|Iteration 161|reward:  233.0|last_reward_at:  149|Elapsed Time: 0:00:47||

Episode 3|Iteration 161|reward:  233.0|last_reward_at:  161|Elapsed Time: 0:00:47||

Episode 3|Iteration 165|reward:  233.0|last_reward_at:  161|Elapsed Time: 0:00:48||

Episode 3|Iteration 166|reward:  243.0|last_reward_at:  161|Elapsed Time: 0:00:48||

Episode 3|Iteration 166|reward:  243.0|last_reward_at:  166|Elapsed Time: 0:00:48||

Episode 3|Iteration 168|reward:  249.0|last_reward_at:  166|Elapsed Time: 0:00:48||

Episode 3|Iteration 168|reward:  249.0|last_reward_at:  168|Elapsed Time: 0:00:48||

Episode 3|Iteration 170|reward:  249.0|last_reward_at:  168|Elapsed Time: 0:00:49||

Episode 3|Iteration 174|reward:  309.0|last_reward_at:  168|Elapsed Time: 0:00:50||

Episode 3|Iteration 174|reward:  309.0|last_reward_at:  174|Elapsed Time: 0:00:50||

Episode 3|Iteration 175|reward:  309.0|last_reward_at:  174|Elapsed Time: 0:00:52||

Episode 3|Iteration 177|reward:  315.0|last_reward_at:  174|Elapsed Time: 0:00:52||

Episode 3|Iteration 177|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:52||

Episode 3|Iteration 180|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:53||

Episode 3|Iteration 184|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:53||

Episode 3|Iteration 185|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:55||

Episode 3|Iteration 189|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:55||

Episode 3|Iteration 190|reward:  315.0|last_reward_at:  177|Elapsed Time: 0:00:56||

Episode 3|Iteration 193|reward:  324.0|last_reward_at:  177|Elapsed Time: 0:00:56||

Episode 3|Iteration 193|reward:  324.0|last_reward_at:  193|Elapsed Time: 0:00:56||

Episode 3|Iteration 195|reward:  324.0|last_reward_at:  193|Elapsed Time: 0:00:58||

Episode 3|Iteration 198|reward:  324.0|last_reward_at:  193|Elapsed Time: 0:00:58||

Episode 3|Iteration 200|reward:  324.0|last_reward_at:  193|Elapsed Time: 0:01:00||

Episode 3|Iteration 204|reward:  330.5|last_reward_at:  193|Elapsed Time: 0:01:00||

Episode 3|Iteration 204|reward:  330.5|last_reward_at:  204|Elapsed Time: 0:01:00||

Episode 3|Iteration 205|reward:  330.5|last_reward_at:  204|Elapsed Time: 0:01:01||

Episode 3|Iteration 210|reward:  330.5|last_reward_at:  204|Elapsed Time: 0:01:02||

Episode 3|Iteration 212|reward:  336.5|last_reward_at:  204|Elapsed Time: 0:01:03||

Episode 3|Iteration 212|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:03||

Episode 3|Iteration 215|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:04||

Episode 3|Iteration 219|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:05||

Episode 3|Iteration 220|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:06||

Episode 3|Iteration 225|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:08||

Episode 3|Iteration 228|reward:  336.5|last_reward_at:  212|Elapsed Time: 0:01:08||

Episode 3|Iteration 228|reward:  376.5|last_reward_at:  212|Elapsed Time: 0:01:08||

Episode 3|Iteration 228|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:08||

Episode 3|Iteration 230|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:09||

Episode 3|Iteration 233|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:10||

Episode 3|Iteration 235|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:11||

Episode 3|Iteration 240|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:12||

Episode 3|Iteration 245|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:14||

Episode 3|Iteration 247|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:14||

Episode 3|Iteration 250|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:15||

Episode 3|Iteration 255|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:17||

Episode 3|Iteration 257|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:17||

Episode 3|Iteration 260|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:17||

Episode 3|Iteration 260|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:18||

Episode 3|Iteration 265|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:20||

Episode 3|Iteration 270|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:21||

Episode 3|Iteration 274|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:21||

Episode 3|Iteration 275|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:23||

Episode 3|Iteration 277|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:23||

Episode 3|Iteration 279|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:23||

Episode 3|Iteration 280|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:25||

Episode 3|Iteration 285|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:25||

Episode 3|Iteration 285|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:26||

Episode 3|Iteration 290|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:28||

Episode 3|Iteration 293|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:28||

Episode 3|Iteration 295|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:30||

Episode 3|Iteration 300|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:31||

Episode 3|Iteration 301|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:32||

Episode 3|Iteration 305|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:33||

Episode 3|Iteration 310|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:35||

Episode 3|Iteration 315|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:36||

Episode 3|Iteration 320|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:38||

Episode 3|Iteration 325|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:39||

Episode 3|Iteration 329|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:39||

Episode 3|Iteration 330|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:40||

Episode 3|Iteration 333|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:40||

Episode 3|Iteration 335|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:42||

Episode 3|Iteration 340|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:44||

Episode 3|Iteration 344|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:44||

Episode 3|Iteration 345|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:45||

Episode 3|Iteration 350|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:47||

Episode 3|Iteration 355|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:47||

Episode 3|Iteration 355|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:48||

Episode 3|Iteration 359|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:48||

Episode 3|Iteration 360|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:50||

Episode 3|Iteration 365|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:51||

Episode 3|Iteration 367|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:51||

Episode 3|Iteration 370|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:53||

Episode 3|Iteration 375|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:55||

Episode 3|Iteration 379|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:55||

Episode 3|Iteration 380|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:56||

Episode 3|Iteration 385|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:57||

Episode 3|Iteration 390|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:01:59||

Episode 3|Iteration 395|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:00||

Episode 3|Iteration 399|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:00||

Episode 3|Iteration 400|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:02||

Episode 3|Iteration 404|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:02||

Episode 3|Iteration 405|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:03||

Episode 3|Iteration 410|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:05||

Episode 3|Iteration 415|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:06||

Episode 3|Iteration 418|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:06||

Episode 3|Iteration 420|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:08||

Episode 3|Iteration 425|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:08||

Episode 3|Iteration 425|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:09||

Episode 3|Iteration 430|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:11||

Episode 3|Iteration 435|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:12||

Episode 3|Iteration 440|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:14||

Episode 3|Iteration 445|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:15||

Episode 3|Iteration 450|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:17||

Episode 3|Iteration 455|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:18||

Episode 3|Iteration 458|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:19||

Episode 3|Iteration 460|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:20||

Episode 3|Iteration 465|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:22||

Episode 3|Iteration 469|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:22||

Episode 3|Iteration 470|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:24||

Episode 3|Iteration 475|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:24||

Episode 3|Iteration 475|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:25||

Episode 3|Iteration 480|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:27||

Episode 3|Iteration 485|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:29||

Episode 3|Iteration 490|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:30||

Episode 3|Iteration 493|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:30||

Episode 3|Iteration 495|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:32||

Episode 3|Iteration 500|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:33||

Episode 3|Iteration 500|reward:  376.5|last_reward_at:  228|Elapsed Time: 0:02:33||




  Episode 3 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/23 (0.08)
    explore-remote: 5/70 (0.07)
    explore-connect: 1/49 (0.02)
    exploit-local: 9/100 (0.08)
    exploit-remote: 2/173 (0.01)
    exploit-connect: 4/62 (0.06)
  exploit deflected to exploration: 50
  ## Episode: 4/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 4|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 4|Iteration 9|reward:   54.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 4|Iteration 9|reward:   54.0|last_reward_at:    9|Elapsed Time: 0:00:01||

Episode 4|Iteration 10|reward:   54.0|last_reward_at:    9|Elapsed Time: 0:00:02||

Episode 4|Iteration 12|reward:   54.0|last_reward_at:    9|Elapsed Time: 0:00:03||

Episode 4|Iteration 13|reward:   65.0|last_reward_at:    9|Elapsed Time: 0:00:03||

Episode 4|Iteration 13|reward:   65.0|last_reward_at:   13|Elapsed Time: 0:00:03||

Episode 4|Iteration 14|reward:   79.0|last_reward_at:   13|Elapsed Time: 0:00:03||

Episode 4|Iteration 14|reward:   79.0|last_reward_at:   14|Elapsed Time: 0:00:03||

Episode 4|Iteration 15|reward:   79.0|last_reward_at:   14|Elapsed Time: 0:00:04||

Episode 4|Iteration 20|reward:   79.0|last_reward_at:   14|Elapsed Time: 0:00:05||

Episode 4|Iteration 25|reward:   79.0|last_reward_at:   14|Elapsed Time: 0:00:06||

Episode 4|Iteration 30|reward:   79.0|last_reward_at:   14|Elapsed Time: 0:00:08||

Episode 4|Iteration 33|reward:   89.0|last_reward_at:   14|Elapsed Time: 0:00:08||

Episode 4|Iteration 33|reward:   89.0|last_reward_at:   33|Elapsed Time: 0:00:08||

Episode 4|Iteration 35|reward:  102.0|last_reward_at:   33|Elapsed Time: 0:00:09||

Episode 4|Iteration 35|reward:  102.0|last_reward_at:   35|Elapsed Time: 0:00:09||

Episode 4|Iteration 37|reward:  108.0|last_reward_at:   35|Elapsed Time: 0:00:09||

Episode 4|Iteration 37|reward:  108.0|last_reward_at:   37|Elapsed Time: 0:00:09||

Episode 4|Iteration 40|reward:  108.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 4|Iteration 41|reward:  119.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 4|Iteration 41|reward:  119.0|last_reward_at:   41|Elapsed Time: 0:00:11||

Episode 4|Iteration 45|reward:  119.0|last_reward_at:   41|Elapsed Time: 0:00:12||

Episode 4|Iteration 49|reward:  239.0|last_reward_at:   41|Elapsed Time: 0:00:12||

Episode 4|Iteration 49|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:12||

Episode 4|Iteration 50|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:14||

Episode 4|Iteration 55|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:18||

Episode 4|Iteration 60|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:19||

Episode 4|Iteration 63|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:19||

Episode 4|Iteration 65|reward:  239.0|last_reward_at:   49|Elapsed Time: 0:00:21||

Episode 4|Iteration 67|reward:  299.0|last_reward_at:   49|Elapsed Time: 0:00:21||

Episode 4|Iteration 67|reward:  299.0|last_reward_at:   67|Elapsed Time: 0:00:21||

Episode 4|Iteration 70|reward:  312.0|last_reward_at:   67|Elapsed Time: 0:00:23||

Episode 4|Iteration 70|reward:  312.0|last_reward_at:   70|Elapsed Time: 0:00:23||

Episode 4|Iteration 71|reward:  318.0|last_reward_at:   70|Elapsed Time: 0:00:23||

Episode 4|Iteration 71|reward:  318.0|last_reward_at:   71|Elapsed Time: 0:00:23||

Episode 4|Iteration 75|reward:  318.0|last_reward_at:   71|Elapsed Time: 0:00:24||

Episode 4|Iteration 80|reward:  318.0|last_reward_at:   71|Elapsed Time: 0:00:25||

Episode 4|Iteration 83|reward:  358.0|last_reward_at:   71|Elapsed Time: 0:00:25||

Episode 4|Iteration 83|reward:  358.0|last_reward_at:   83|Elapsed Time: 0:00:25||

Episode 4|Iteration 85|reward:  358.0|last_reward_at:   83|Elapsed Time: 0:00:27||

Episode 4|Iteration 88|reward:  358.0|last_reward_at:   83|Elapsed Time: 0:00:27||

Episode 4|Iteration 90|reward:  358.0|last_reward_at:   83|Elapsed Time: 0:00:28||

Episode 4|Iteration 93|reward:  388.0|last_reward_at:   83|Elapsed Time: 0:00:28||

Episode 4|Iteration 93|reward:  388.0|last_reward_at:   93|Elapsed Time: 0:00:28||

Episode 4|Iteration 95|reward:  388.0|last_reward_at:   93|Elapsed Time: 0:00:30||

Episode 4|Iteration 96|reward:  394.0|last_reward_at:   93|Elapsed Time: 0:00:30||

Episode 4|Iteration 96|reward:  394.0|last_reward_at:   96|Elapsed Time: 0:00:30||

Episode 4|Iteration 98|reward:  405.0|last_reward_at:   96|Elapsed Time: 0:00:30||

Episode 4|Iteration 98|reward:  405.0|last_reward_at:   98|Elapsed Time: 0:00:30||

Episode 4|Iteration 100|reward:  405.0|last_reward_at:   98|Elapsed Time: 0:00:32||

Episode 4|Iteration 104|reward:  411.5|last_reward_at:   98|Elapsed Time: 0:00:32||

Episode 4|Iteration 104|reward:  411.5|last_reward_at:  104|Elapsed Time: 0:00:32||

Episode 4|Iteration 105|reward:  411.5|last_reward_at:  104|Elapsed Time: 0:00:33||

Episode 4|Iteration 106|reward:  462.5|last_reward_at:  104|Elapsed Time: 0:00:33||

Episode 4|Iteration 106|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:33||

Episode 4|Iteration 110|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:35||

Episode 4|Iteration 115|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:36||

Episode 4|Iteration 120|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:38||

Episode 4|Iteration 124|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:38||

Episode 4|Iteration 125|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:40||

Episode 4|Iteration 129|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:40||

Episode 4|Iteration 130|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:41||

Episode 4|Iteration 135|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:43||

Episode 4|Iteration 140|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:43||

Episode 4|Iteration 140|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:45||

Episode 4|Iteration 143|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:45||

Episode 4|Iteration 145|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:46||

Episode 4|Iteration 149|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:46||

Episode 4|Iteration 150|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:48||

Episode 4|Iteration 153|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:48||

Episode 4|Iteration 154|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:48||

Episode 4|Iteration 155|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:49||

Episode 4|Iteration 159|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:49||

Episode 4|Iteration 160|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:51||

Episode 4|Iteration 164|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:51||

Episode 4|Iteration 165|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:52||

Episode 4|Iteration 170|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:54||

Episode 4|Iteration 175|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:55||

Episode 4|Iteration 180|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:57||

Episode 4|Iteration 185|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:00:58||

Episode 4|Iteration 190|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:00||

Episode 4|Iteration 193|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:00||

Episode 4|Iteration 195|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:01||

Episode 4|Iteration 200|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:03||

Episode 4|Iteration 205|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:04||

Episode 4|Iteration 207|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:04||

Episode 4|Iteration 209|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:05||

Episode 4|Iteration 210|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:06||

Episode 4|Iteration 215|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:07||

Episode 4|Iteration 220|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:09||

Episode 4|Iteration 224|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:09||

Episode 4|Iteration 225|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:11||

Episode 4|Iteration 228|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:11||

Episode 4|Iteration 230|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:12||

Episode 4|Iteration 232|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:12||

Episode 4|Iteration 235|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:14||

Episode 4|Iteration 240|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:15||

Episode 4|Iteration 244|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:15||

Episode 4|Iteration 245|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:16||

Episode 4|Iteration 250|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:18||

Episode 4|Iteration 254|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:18||

Episode 4|Iteration 255|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:20||

Episode 4|Iteration 260|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:20||

Episode 4|Iteration 260|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:22||

Episode 4|Iteration 265|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:23||

Episode 4|Iteration 270|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:25||

Episode 4|Iteration 275|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:27||

Episode 4|Iteration 279|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:27||

Episode 4|Iteration 280|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:28||

Episode 4|Iteration 285|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:30||

Episode 4|Iteration 288|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:30||

Episode 4|Iteration 290|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:31||

Episode 4|Iteration 295|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:33||

Episode 4|Iteration 300|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:34||

Episode 4|Iteration 304|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:34||

Episode 4|Iteration 304|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:34||

Episode 4|Iteration 305|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:36||

Episode 4|Iteration 310|reward:  462.5|last_reward_at:  106|Elapsed Time: 0:01:37||

Episode 4|Iteration 315|reward:  471.5|last_reward_at:  106|Elapsed Time: 0:01:38||

Episode 4|Iteration 315|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:38||

Episode 4|Iteration 317|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:38||

Episode 4|Iteration 319|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:39||

Episode 4|Iteration 320|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:40||

Episode 4|Iteration 325|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:42||

Episode 4|Iteration 330|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:43||

Episode 4|Iteration 335|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:44||

Episode 4|Iteration 337|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:44||

Episode 4|Iteration 340|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:46||

Episode 4|Iteration 345|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:47||

Episode 4|Iteration 348|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:47||

Episode 4|Iteration 350|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:49||

Episode 4|Iteration 355|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:49||

Episode 4|Iteration 355|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:50||

Episode 4|Iteration 356|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:50||

Episode 4|Iteration 358|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:50||

Episode 4|Iteration 360|reward:  471.5|last_reward_at:  315|Elapsed Time: 0:01:52||

Episode 4|Iteration 362|reward:  477.5|last_reward_at:  315|Elapsed Time: 0:01:52||

Episode 4|Iteration 362|reward:  477.5|last_reward_at:  362|Elapsed Time: 0:01:52||

Episode 4|Iteration 365|reward:  477.5|last_reward_at:  362|Elapsed Time: 0:01:53||

Episode 4|Iteration 370|reward:  477.5|last_reward_at:  362|Elapsed Time: 0:01:55||

Episode 4|Iteration 374|reward:  483.5|last_reward_at:  362|Elapsed Time: 0:01:55||

Episode 4|Iteration 374|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:55||

Episode 4|Iteration 375|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:56||

Episode 4|Iteration 380|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:56||

Episode 4|Iteration 380|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:57||

Episode 4|Iteration 384|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:57||

Episode 4|Iteration 385|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:01:59||

Episode 4|Iteration 390|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:01||

Episode 4|Iteration 395|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:02||

Episode 4|Iteration 399|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:03||

Episode 4|Iteration 400|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:04||

Episode 4|Iteration 405|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:06||

Episode 4|Iteration 410|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:07||

Episode 4|Iteration 412|reward:  483.5|last_reward_at:  374|Elapsed Time: 0:02:07||

Episode 4|Iteration 413|reward:  496.5|last_reward_at:  374|Elapsed Time: 0:02:08||

Episode 4|Iteration 413|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:08||

Episode 4|Iteration 415|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:09||

Episode 4|Iteration 418|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:09||

Episode 4|Iteration 420|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:10||

Episode 4|Iteration 425|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:11||

Episode 4|Iteration 425|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:12||

Episode 4|Iteration 430|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:14||

Episode 4|Iteration 433|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:14||

Episode 4|Iteration 435|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:16||

Episode 4|Iteration 440|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:17||

Episode 4|Iteration 445|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:19||

Episode 4|Iteration 450|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:20||

Episode 4|Iteration 455|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:22||

Episode 4|Iteration 459|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:22||

Episode 4|Iteration 460|reward:  496.5|last_reward_at:  413|Elapsed Time: 0:02:23||

Episode 4|Iteration 461|reward:  556.5|last_reward_at:  413|Elapsed Time: 0:02:23||

Episode 4|Iteration 461|reward:  556.5|last_reward_at:  461|Elapsed Time: 0:02:23||

Episode 4|Iteration 465|reward:  556.5|last_reward_at:  461|Elapsed Time: 0:02:25||

Episode 4|Iteration 470|reward:  556.5|last_reward_at:  461|Elapsed Time: 0:02:26||

Episode 4|Iteration 475|reward:  556.5|last_reward_at:  461|Elapsed Time: 0:02:28||

Episode 4|Iteration 476|reward:  562.5|last_reward_at:  461|Elapsed Time: 0:02:28||

Episode 4|Iteration 476|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:28||

Episode 4|Iteration 480|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:29||

Episode 4|Iteration 485|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:31||

Episode 4|Iteration 486|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:31||

Episode 4|Iteration 490|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:33||

Episode 4|Iteration 494|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:33||

Episode 4|Iteration 495|reward:  562.5|last_reward_at:  476|Elapsed Time: 0:02:35||

Episode 4|Iteration 499|reward:  569.0|last_reward_at:  476|Elapsed Time: 0:02:35||

Episode 4|Iteration 499|reward:  569.0|last_reward_at:  499|Elapsed Time: 0:02:35||

Episode 4|Iteration 500|reward:  569.0|last_reward_at:  499|Elapsed Time: 0:02:36||

Episode 4|Iteration 500|reward:  569.0|last_reward_at:  499|Elapsed Time: 0:02:36||




  Episode 4 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/13 (0.00)
    explore-remote: 3/86 (0.03)
    explore-connect: 0/48 (0.00)
    exploit-local: 13/83 (0.14)
    exploit-remote: 3/146 (0.02)
    exploit-connect: 6/99 (0.06)
  exploit deflected to exploration: 50
  ## Episode: 5/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 5|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 5|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 5|Iteration 13|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 5|Iteration 13|reward:   25.0|last_reward_at:   13|Elapsed Time: 0:00:02||

Episode 5|Iteration 14|reward:   36.0|last_reward_at:   13|Elapsed Time: 0:00:02||

Episode 5|Iteration 14|reward:   36.0|last_reward_at:   14|Elapsed Time: 0:00:02||

Episode 5|Iteration 15|reward:   36.0|last_reward_at:   14|Elapsed Time: 0:00:03||

Episode 5|Iteration 16|reward:   76.0|last_reward_at:   14|Elapsed Time: 0:00:03||

Episode 5|Iteration 16|reward:   76.0|last_reward_at:   16|Elapsed Time: 0:00:03||

Episode 5|Iteration 19|reward:   89.0|last_reward_at:   16|Elapsed Time: 0:00:03||

Episode 5|Iteration 19|reward:   89.0|last_reward_at:   19|Elapsed Time: 0:00:03||

Episode 5|Iteration 20|reward:   89.0|last_reward_at:   19|Elapsed Time: 0:00:05||

Episode 5|Iteration 21|reward:  103.0|last_reward_at:   19|Elapsed Time: 0:00:05||

Episode 5|Iteration 21|reward:  103.0|last_reward_at:   21|Elapsed Time: 0:00:05||

Episode 5|Iteration 25|reward:  103.0|last_reward_at:   21|Elapsed Time: 0:00:06||

Episode 5|Iteration 28|reward:  103.0|last_reward_at:   21|Elapsed Time: 0:00:06||

Episode 5|Iteration 30|reward:  103.0|last_reward_at:   21|Elapsed Time: 0:00:07||

Episode 5|Iteration 34|reward:  109.0|last_reward_at:   21|Elapsed Time: 0:00:08||

Episode 5|Iteration 34|reward:  109.0|last_reward_at:   34|Elapsed Time: 0:00:08||

Episode 5|Iteration 35|reward:  109.0|last_reward_at:   34|Elapsed Time: 0:00:09||

Episode 5|Iteration 40|reward:  109.0|last_reward_at:   34|Elapsed Time: 0:00:11||

Episode 5|Iteration 44|reward:  109.0|last_reward_at:   34|Elapsed Time: 0:00:11||

Episode 5|Iteration 45|reward:  109.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 5|Iteration 49|reward:  119.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 5|Iteration 49|reward:  119.0|last_reward_at:   49|Elapsed Time: 0:00:13||

Episode 5|Iteration 50|reward:  119.0|last_reward_at:   49|Elapsed Time: 0:00:14||

Episode 5|Iteration 55|reward:  119.0|last_reward_at:   49|Elapsed Time: 0:00:16||

Episode 5|Iteration 60|reward:  119.0|last_reward_at:   49|Elapsed Time: 0:00:17||

Episode 5|Iteration 63|reward:  130.0|last_reward_at:   49|Elapsed Time: 0:00:17||

Episode 5|Iteration 63|reward:  130.0|last_reward_at:   63|Elapsed Time: 0:00:17||

Episode 5|Iteration 65|reward:  130.0|last_reward_at:   63|Elapsed Time: 0:00:19||

Episode 5|Iteration 70|reward:  130.0|last_reward_at:   63|Elapsed Time: 0:00:19||

Episode 5|Iteration 70|reward:  130.0|last_reward_at:   63|Elapsed Time: 0:00:20||

Episode 5|Iteration 71|reward:  136.0|last_reward_at:   63|Elapsed Time: 0:00:20||

Episode 5|Iteration 71|reward:  136.0|last_reward_at:   71|Elapsed Time: 0:00:20||

Episode 5|Iteration 73|reward:  149.0|last_reward_at:   71|Elapsed Time: 0:00:20||

Episode 5|Iteration 73|reward:  149.0|last_reward_at:   73|Elapsed Time: 0:00:20||

Episode 5|Iteration 75|reward:  149.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 5|Iteration 77|reward:  149.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 5|Iteration 79|reward:  269.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 5|Iteration 79|reward:  269.0|last_reward_at:   79|Elapsed Time: 0:00:22||

Episode 5|Iteration 80|reward:  269.0|last_reward_at:   79|Elapsed Time: 0:00:24||

Episode 5|Iteration 84|reward:  275.0|last_reward_at:   79|Elapsed Time: 0:00:24||

Episode 5|Iteration 84|reward:  275.0|last_reward_at:   84|Elapsed Time: 0:00:24||

Episode 5|Iteration 85|reward:  275.0|last_reward_at:   84|Elapsed Time: 0:00:25||

Episode 5|Iteration 88|reward:  275.0|last_reward_at:   84|Elapsed Time: 0:00:25||

Episode 5|Iteration 90|reward:  326.0|last_reward_at:   84|Elapsed Time: 0:00:27||

Episode 5|Iteration 90|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:27||

Episode 5|Iteration 95|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:27||

Episode 5|Iteration 95|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:28||

Episode 5|Iteration 100|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:30||

Episode 5|Iteration 105|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:31||

Episode 5|Iteration 108|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:32||

Episode 5|Iteration 110|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:33||

Episode 5|Iteration 115|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:35||

Episode 5|Iteration 120|reward:  326.0|last_reward_at:   90|Elapsed Time: 0:00:36||

Episode 5|Iteration 121|reward:  339.0|last_reward_at:   90|Elapsed Time: 0:00:36||

Episode 5|Iteration 121|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:36||

Episode 5|Iteration 125|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:38||

Episode 5|Iteration 130|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:39||

Episode 5|Iteration 135|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:40||

Episode 5|Iteration 140|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:40||

Episode 5|Iteration 140|reward:  339.0|last_reward_at:  121|Elapsed Time: 0:00:42||

Episode 5|Iteration 142|reward:  360.5|last_reward_at:  121|Elapsed Time: 0:00:42||

Episode 5|Iteration 142|reward:  360.5|last_reward_at:  142|Elapsed Time: 0:00:42||

Episode 5|Iteration 143|reward:  375.5|last_reward_at:  142|Elapsed Time: 0:00:42||

Episode 5|Iteration 143|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:42||

Episode 5|Iteration 145|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:44||

Episode 5|Iteration 150|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:45||

Episode 5|Iteration 155|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:46||

Episode 5|Iteration 159|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:46||

Episode 5|Iteration 160|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:48||

Episode 5|Iteration 165|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:50||

Episode 5|Iteration 166|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:50||

Episode 5|Iteration 167|reward:  375.5|last_reward_at:  143|Elapsed Time: 0:00:50||

Episode 5|Iteration 168|reward:  415.5|last_reward_at:  143|Elapsed Time: 0:00:50||

Episode 5|Iteration 168|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:50||

Episode 5|Iteration 170|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:51||

Episode 5|Iteration 175|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:53||

Episode 5|Iteration 178|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:53||

Episode 5|Iteration 180|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:54||

Episode 5|Iteration 185|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:56||

Episode 5|Iteration 190|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:56||

Episode 5|Iteration 190|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:58||

Episode 5|Iteration 195|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:00:59||

Episode 5|Iteration 200|reward:  415.5|last_reward_at:  168|Elapsed Time: 0:01:01||

Episode 5|Iteration 203|reward:  475.5|last_reward_at:  168|Elapsed Time: 0:01:01||

Episode 5|Iteration 203|reward:  475.5|last_reward_at:  203|Elapsed Time: 0:01:01||

Episode 5|Iteration 204|reward:  481.5|last_reward_at:  203|Elapsed Time: 0:01:01||

Episode 5|Iteration 204|reward:  481.5|last_reward_at:  204|Elapsed Time: 0:01:01||

Episode 5|Iteration 205|reward:  481.5|last_reward_at:  204|Elapsed Time: 0:01:03||

Episode 5|Iteration 207|reward:  490.5|last_reward_at:  204|Elapsed Time: 0:01:03||

Episode 5|Iteration 207|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:03||

Episode 5|Iteration 210|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:04||

Episode 5|Iteration 215|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:06||

Episode 5|Iteration 217|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:06||

Episode 5|Iteration 220|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:08||

Episode 5|Iteration 225|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:09||

Episode 5|Iteration 230|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:12||

Episode 5|Iteration 234|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:12||

Episode 5|Iteration 235|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:14||

Episode 5|Iteration 239|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:14||

Episode 5|Iteration 240|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:15||

Episode 5|Iteration 245|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:17||

Episode 5|Iteration 250|reward:  490.5|last_reward_at:  207|Elapsed Time: 0:01:18||

Episode 5|Iteration 251|reward:  550.5|last_reward_at:  207|Elapsed Time: 0:01:18||

Episode 5|Iteration 251|reward:  550.5|last_reward_at:  251|Elapsed Time: 0:01:18||

Episode 5|Iteration 254|reward:  557.0|last_reward_at:  251|Elapsed Time: 0:01:18||

Episode 5|Iteration 254|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:18||

Episode 5|Iteration 255|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:20||

Episode 5|Iteration 258|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:20||

Episode 5|Iteration 260|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:21||

Episode 5|Iteration 265|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:23||

Episode 5|Iteration 270|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:25||

Episode 5|Iteration 275|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:27||

Episode 5|Iteration 280|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:28||

Episode 5|Iteration 285|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:28||

Episode 5|Iteration 285|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:30||

Episode 5|Iteration 290|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:32||

Episode 5|Iteration 295|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:33||

Episode 5|Iteration 299|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:33||

Episode 5|Iteration 300|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:35||

Episode 5|Iteration 304|reward:  557.0|last_reward_at:  254|Elapsed Time: 0:01:35||

Episode 5|Iteration 304|reward:  563.0|last_reward_at:  254|Elapsed Time: 0:01:35||

Episode 5|Iteration 304|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:35||

Episode 5|Iteration 305|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:37||

Episode 5|Iteration 310|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:38||

Episode 5|Iteration 314|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:38||

Episode 5|Iteration 315|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:40||

Episode 5|Iteration 320|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:42||

Episode 5|Iteration 325|reward:  563.0|last_reward_at:  304|Elapsed Time: 0:01:44||

Episode 5|Iteration 326|reward:  569.0|last_reward_at:  304|Elapsed Time: 0:01:44||

Episode 5|Iteration 326|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:44||

Episode 5|Iteration 330|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:45||

Episode 5|Iteration 335|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:47||

Episode 5|Iteration 340|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:48||

Episode 5|Iteration 345|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:50||

Episode 5|Iteration 349|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:50||

Episode 5|Iteration 350|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:51||

Episode 5|Iteration 354|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:52||

Episode 5|Iteration 355|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:53||

Episode 5|Iteration 360|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:55||

Episode 5|Iteration 364|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:55||

Episode 5|Iteration 365|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:56||

Episode 5|Iteration 370|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:58||

Episode 5|Iteration 373|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:58||

Episode 5|Iteration 375|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:59||

Episode 5|Iteration 379|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:01:59||

Episode 5|Iteration 380|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:01||

Episode 5|Iteration 385|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:02||

Episode 5|Iteration 386|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:03||

Episode 5|Iteration 390|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:04||

Episode 5|Iteration 395|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:05||

Episode 5|Iteration 400|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:07||

Episode 5|Iteration 405|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:08||

Episode 5|Iteration 410|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:09||

Episode 5|Iteration 414|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:10||

Episode 5|Iteration 415|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:11||

Episode 5|Iteration 420|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:13||

Episode 5|Iteration 425|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:14||

Episode 5|Iteration 430|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:16||

Episode 5|Iteration 431|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:16||

Episode 5|Iteration 435|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:18||

Episode 5|Iteration 440|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:20||

Episode 5|Iteration 445|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:21||

Episode 5|Iteration 446|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:21||

Episode 5|Iteration 450|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:21||

Episode 5|Iteration 450|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:23||

Episode 5|Iteration 453|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:23||

Episode 5|Iteration 455|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:24||

Episode 5|Iteration 460|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:26||

Episode 5|Iteration 463|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:26||

Episode 5|Iteration 465|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:28||

Episode 5|Iteration 469|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:28||

Episode 5|Iteration 470|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:29||

Episode 5|Iteration 474|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:29||

Episode 5|Iteration 475|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:31||

Episode 5|Iteration 480|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:33||

Episode 5|Iteration 485|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:34||

Episode 5|Iteration 490|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:36||

Episode 5|Iteration 495|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:38||

Episode 5|Iteration 500|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:39||

Episode 5|Iteration 500|reward:  569.0|last_reward_at:  326|Elapsed Time: 0:02:39||




  Episode 5 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/22 (0.04)
    explore-remote: 1/65 (0.02)
    explore-connect: 0/32 (0.00)
    exploit-local: 13/112 (0.10)
    exploit-remote: 4/154 (0.03)
    exploit-connect: 6/90 (0.06)
  exploit deflected to exploration: 21
  ## Episode: 6/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 6|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 6|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 6|Iteration 13|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 6|Iteration 13|reward:   25.0|last_reward_at:   13|Elapsed Time: 0:00:02||

Episode 6|Iteration 15|reward:   38.0|last_reward_at:   13|Elapsed Time: 0:00:04||

Episode 6|Iteration 15|reward:   38.0|last_reward_at:   15|Elapsed Time: 0:00:04||

Episode 6|Iteration 19|reward:   38.0|last_reward_at:   15|Elapsed Time: 0:00:04||

Episode 6|Iteration 20|reward:   38.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 6|Iteration 22|reward:   78.0|last_reward_at:   15|Elapsed Time: 0:00:05||

Episode 6|Iteration 22|reward:   78.0|last_reward_at:   22|Elapsed Time: 0:00:05||

Episode 6|Iteration 25|reward:   78.0|last_reward_at:   22|Elapsed Time: 0:00:07||

Episode 6|Iteration 30|reward:   78.0|last_reward_at:   22|Elapsed Time: 0:00:09||

Episode 6|Iteration 31|reward:  138.0|last_reward_at:   22|Elapsed Time: 0:00:09||

Episode 6|Iteration 31|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 6|Iteration 34|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 6|Iteration 35|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:10||

Episode 6|Iteration 38|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:10||

Episode 6|Iteration 40|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:12||

Episode 6|Iteration 45|reward:  138.0|last_reward_at:   31|Elapsed Time: 0:00:13||

Episode 6|Iteration 47|reward:  144.0|last_reward_at:   31|Elapsed Time: 0:00:13||

Episode 6|Iteration 47|reward:  144.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 6|Iteration 50|reward:  155.0|last_reward_at:   47|Elapsed Time: 0:00:15||

Episode 6|Iteration 50|reward:  155.0|last_reward_at:   50|Elapsed Time: 0:00:15||

Episode 6|Iteration 52|reward:  155.0|last_reward_at:   50|Elapsed Time: 0:00:15||

Episode 6|Iteration 55|reward:  155.0|last_reward_at:   50|Elapsed Time: 0:00:16||

Episode 6|Iteration 57|reward:  161.0|last_reward_at:   50|Elapsed Time: 0:00:16||

Episode 6|Iteration 57|reward:  161.0|last_reward_at:   57|Elapsed Time: 0:00:16||

Episode 6|Iteration 60|reward:  161.0|last_reward_at:   57|Elapsed Time: 0:00:17||

Episode 6|Iteration 64|reward:  161.0|last_reward_at:   57|Elapsed Time: 0:00:17||

Episode 6|Iteration 65|reward:  161.0|last_reward_at:   57|Elapsed Time: 0:00:19||

Episode 6|Iteration 68|reward:  175.0|last_reward_at:   57|Elapsed Time: 0:00:19||

Episode 6|Iteration 68|reward:  175.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Episode 6|Iteration 70|reward:  175.0|last_reward_at:   68|Elapsed Time: 0:00:20||

Episode 6|Iteration 75|reward:  175.0|last_reward_at:   68|Elapsed Time: 0:00:22||

Episode 6|Iteration 80|reward:  175.0|last_reward_at:   68|Elapsed Time: 0:00:23||

Episode 6|Iteration 81|reward:  189.0|last_reward_at:   68|Elapsed Time: 0:00:23||

Episode 6|Iteration 81|reward:  189.0|last_reward_at:   81|Elapsed Time: 0:00:23||

Episode 6|Iteration 85|reward:  189.0|last_reward_at:   81|Elapsed Time: 0:00:24||

Episode 6|Iteration 90|reward:  189.0|last_reward_at:   81|Elapsed Time: 0:00:27||

Episode 6|Iteration 91|reward:  199.0|last_reward_at:   81|Elapsed Time: 0:00:27||

Episode 6|Iteration 91|reward:  199.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 6|Iteration 95|reward:  199.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 6|Iteration 95|reward:  199.0|last_reward_at:   91|Elapsed Time: 0:00:28||

Episode 6|Iteration 100|reward:  199.0|last_reward_at:   91|Elapsed Time: 0:00:30||

Episode 6|Iteration 104|reward:  213.0|last_reward_at:   91|Elapsed Time: 0:00:30||

Episode 6|Iteration 104|reward:  213.0|last_reward_at:  104|Elapsed Time: 0:00:30||

Episode 6|Iteration 105|reward:  213.0|last_reward_at:  104|Elapsed Time: 0:00:31||

Episode 6|Iteration 107|reward:  219.5|last_reward_at:  104|Elapsed Time: 0:00:31||

Episode 6|Iteration 107|reward:  219.5|last_reward_at:  107|Elapsed Time: 0:00:31||

Episode 6|Iteration 110|reward:  219.5|last_reward_at:  107|Elapsed Time: 0:00:33||

Episode 6|Iteration 115|reward:  224.5|last_reward_at:  107|Elapsed Time: 0:00:34||

Episode 6|Iteration 115|reward:  224.5|last_reward_at:  115|Elapsed Time: 0:00:34||

Episode 6|Iteration 120|reward:  224.5|last_reward_at:  115|Elapsed Time: 0:00:35||

Episode 6|Iteration 122|reward:  224.5|last_reward_at:  115|Elapsed Time: 0:00:35||

Episode 6|Iteration 124|reward:  224.5|last_reward_at:  115|Elapsed Time: 0:00:35||

Episode 6|Iteration 125|reward:  224.5|last_reward_at:  115|Elapsed Time: 0:00:37||

Episode 6|Iteration 127|reward:  233.5|last_reward_at:  115|Elapsed Time: 0:00:37||

Episode 6|Iteration 127|reward:  233.5|last_reward_at:  127|Elapsed Time: 0:00:37||

Episode 6|Iteration 130|reward:  233.5|last_reward_at:  127|Elapsed Time: 0:00:38||

Episode 6|Iteration 135|reward:  233.5|last_reward_at:  127|Elapsed Time: 0:00:40||

Episode 6|Iteration 140|reward:  233.5|last_reward_at:  127|Elapsed Time: 0:00:40||

Episode 6|Iteration 140|reward:  239.5|last_reward_at:  127|Elapsed Time: 0:00:42||

Episode 6|Iteration 140|reward:  239.5|last_reward_at:  140|Elapsed Time: 0:00:42||

Episode 6|Iteration 145|reward:  245.5|last_reward_at:  140|Elapsed Time: 0:00:43||

Episode 6|Iteration 145|reward:  245.5|last_reward_at:  145|Elapsed Time: 0:00:43||

Episode 6|Iteration 150|reward:  245.5|last_reward_at:  145|Elapsed Time: 0:00:44||

Episode 6|Iteration 155|reward:  245.5|last_reward_at:  145|Elapsed Time: 0:00:46||

Episode 6|Iteration 159|reward:  245.5|last_reward_at:  145|Elapsed Time: 0:00:46||

Episode 6|Iteration 159|reward:  251.5|last_reward_at:  145|Elapsed Time: 0:00:46||

Episode 6|Iteration 159|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:46||

Episode 6|Iteration 160|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:48||

Episode 6|Iteration 165|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:49||

Episode 6|Iteration 168|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:49||

Episode 6|Iteration 170|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:51||

Episode 6|Iteration 174|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:51||

Episode 6|Iteration 175|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:52||

Episode 6|Iteration 180|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:54||

Episode 6|Iteration 182|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:54||

Episode 6|Iteration 185|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:55||

Episode 6|Iteration 190|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:55||

Episode 6|Iteration 190|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:57||

Episode 6|Iteration 195|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:58||

Episode 6|Iteration 199|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:00:58||

Episode 6|Iteration 200|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:01:00||

Episode 6|Iteration 205|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:01:01||

Episode 6|Iteration 210|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:01:03||

Episode 6|Iteration 215|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:01:04||

Episode 6|Iteration 220|reward:  251.5|last_reward_at:  159|Elapsed Time: 0:01:06||

Episode 6|Iteration 222|reward:  264.5|last_reward_at:  159|Elapsed Time: 0:01:06||

Episode 6|Iteration 222|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:06||

Episode 6|Iteration 225|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:07||

Episode 6|Iteration 228|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:08||

Episode 6|Iteration 230|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:09||

Episode 6|Iteration 233|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:09||

Episode 6|Iteration 235|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:10||

Episode 6|Iteration 240|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:12||

Episode 6|Iteration 245|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:13||

Episode 6|Iteration 250|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:15||

Episode 6|Iteration 255|reward:  264.5|last_reward_at:  222|Elapsed Time: 0:01:16||

Episode 6|Iteration 257|reward:  324.5|last_reward_at:  222|Elapsed Time: 0:01:16||

Episode 6|Iteration 257|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:16||

Episode 6|Iteration 260|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:17||

Episode 6|Iteration 265|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:19||

Episode 6|Iteration 270|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:21||

Episode 6|Iteration 275|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:22||

Episode 6|Iteration 280|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:23||

Episode 6|Iteration 285|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:25||

Episode 6|Iteration 290|reward:  324.5|last_reward_at:  257|Elapsed Time: 0:01:26||

Episode 6|Iteration 293|reward:  330.5|last_reward_at:  257|Elapsed Time: 0:01:26||

Episode 6|Iteration 293|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:26||

Episode 6|Iteration 295|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:27||

Episode 6|Iteration 300|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:29||

Episode 6|Iteration 305|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:30||

Episode 6|Iteration 309|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:31||

Episode 6|Iteration 310|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:32||

Episode 6|Iteration 315|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:34||

Episode 6|Iteration 320|reward:  330.5|last_reward_at:  293|Elapsed Time: 0:01:35||

Episode 6|Iteration 321|reward:  450.5|last_reward_at:  293|Elapsed Time: 0:01:35||

Episode 6|Iteration 321|reward:  450.5|last_reward_at:  321|Elapsed Time: 0:01:35||

Episode 6|Iteration 325|reward:  450.5|last_reward_at:  321|Elapsed Time: 0:01:37||

Episode 6|Iteration 330|reward:  450.5|last_reward_at:  321|Elapsed Time: 0:01:38||

Episode 6|Iteration 333|reward:  490.5|last_reward_at:  321|Elapsed Time: 0:01:38||

Episode 6|Iteration 333|reward:  490.5|last_reward_at:  333|Elapsed Time: 0:01:38||

Episode 6|Iteration 335|reward:  490.5|last_reward_at:  333|Elapsed Time: 0:01:39||

Episode 6|Iteration 340|reward:  541.5|last_reward_at:  333|Elapsed Time: 0:01:41||

Episode 6|Iteration 340|reward:  541.5|last_reward_at:  340|Elapsed Time: 0:01:41||

Episode 6|Iteration 342|reward:  541.5|last_reward_at:  340|Elapsed Time: 0:01:41||

Episode 6|Iteration 345|reward:  541.5|last_reward_at:  340|Elapsed Time: 0:01:42||

Episode 6|Iteration 349|reward:  541.5|last_reward_at:  340|Elapsed Time: 0:01:42||

Episode 6|Iteration 350|reward:  571.5|last_reward_at:  340|Elapsed Time: 0:01:44||

Episode 6|Iteration 350|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:44||

Episode 6|Iteration 355|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:44||

Episode 6|Iteration 355|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:46||

Episode 6|Iteration 360|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:47||

Episode 6|Iteration 365|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:49||

Episode 6|Iteration 367|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:49||

Episode 6|Iteration 370|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:50||

Episode 6|Iteration 375|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:52||

Episode 6|Iteration 378|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:52||

Episode 6|Iteration 380|reward:  571.5|last_reward_at:  350|Elapsed Time: 0:01:53||

Episode 6|Iteration 384|reward:  578.0|last_reward_at:  350|Elapsed Time: 0:01:53||

Episode 6|Iteration 384|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:01:53||

Episode 6|Iteration 385|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:01:55||

Episode 6|Iteration 386|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:01:55||

Episode 6|Iteration 390|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:01:57||

Episode 6|Iteration 395|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:01:59||

Episode 6|Iteration 400|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:00||

Episode 6|Iteration 401|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:01||

Episode 6|Iteration 405|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:03||

Episode 6|Iteration 410|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:04||

Episode 6|Iteration 412|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:04||

Episode 6|Iteration 415|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:06||

Episode 6|Iteration 420|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:07||

Episode 6|Iteration 424|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:08||

Episode 6|Iteration 425|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:09||

Episode 6|Iteration 430|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:11||

Episode 6|Iteration 435|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:13||

Episode 6|Iteration 440|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:15||

Episode 6|Iteration 444|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:15||

Episode 6|Iteration 445|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:16||

Episode 6|Iteration 450|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:16||

Episode 6|Iteration 450|reward:  578.0|last_reward_at:  384|Elapsed Time: 0:02:18||

Episode 6|Iteration 455|reward:  584.0|last_reward_at:  384|Elapsed Time: 0:02:20||

Episode 6|Iteration 455|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:20||

Episode 6|Iteration 457|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:20||

Episode 6|Iteration 460|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:22||

Episode 6|Iteration 465|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:24||

Episode 6|Iteration 469|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:24||

Episode 6|Iteration 470|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:25||

Episode 6|Iteration 474|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:25||

Episode 6|Iteration 475|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:27||

Episode 6|Iteration 480|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:28||

Episode 6|Iteration 483|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:28||

Episode 6|Iteration 485|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:30||

Episode 6|Iteration 490|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:31||

Episode 6|Iteration 495|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:38||

Episode 6|Iteration 500|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:38||

Episode 6|Iteration 500|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:40||

Episode 6|Iteration 500|reward:  584.0|last_reward_at:  455|Elapsed Time: 0:02:40||




  Episode 6 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/13 (0.28)
    explore-remote: 5/58 (0.08)
    explore-connect: 0/52 (0.00)
    exploit-local: 9/94 (0.09)
    exploit-remote: 2/141 (0.01)
    exploit-connect: 6/115 (0.05)
  exploit deflected to exploration: 33
  ## Episode: 7/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 7|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 7|Iteration 15|reward:   20.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 7|Iteration 15|reward:   20.0|last_reward_at:   15|Elapsed Time: 0:00:03||

Episode 7|Iteration 20|reward:   20.0|last_reward_at:   15|Elapsed Time: 0:00:04||

Episode 7|Iteration 25|reward:   20.0|last_reward_at:   15|Elapsed Time: 0:00:06||

Episode 7|Iteration 29|reward:   31.0|last_reward_at:   15|Elapsed Time: 0:00:06||

Episode 7|Iteration 29|reward:   31.0|last_reward_at:   29|Elapsed Time: 0:00:06||

Episode 7|Iteration 30|reward:   42.0|last_reward_at:   29|Elapsed Time: 0:00:07||

Episode 7|Iteration 30|reward:   42.0|last_reward_at:   30|Elapsed Time: 0:00:07||

Episode 7|Iteration 31|reward:   48.0|last_reward_at:   30|Elapsed Time: 0:00:07||

Episode 7|Iteration 31|reward:   48.0|last_reward_at:   31|Elapsed Time: 0:00:07||

Episode 7|Iteration 35|reward:   48.0|last_reward_at:   31|Elapsed Time: 0:00:08||

Episode 7|Iteration 37|reward:   48.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 7|Iteration 40|reward:   48.0|last_reward_at:   31|Elapsed Time: 0:00:10||

Episode 7|Iteration 41|reward:   61.0|last_reward_at:   31|Elapsed Time: 0:00:10||

Episode 7|Iteration 41|reward:   61.0|last_reward_at:   41|Elapsed Time: 0:00:10||

Episode 7|Iteration 45|reward:  101.0|last_reward_at:   41|Elapsed Time: 0:00:11||

Episode 7|Iteration 45|reward:  101.0|last_reward_at:   45|Elapsed Time: 0:00:11||

Episode 7|Iteration 48|reward:  115.0|last_reward_at:   45|Elapsed Time: 0:00:11||

Episode 7|Iteration 48|reward:  115.0|last_reward_at:   48|Elapsed Time: 0:00:11||

Episode 7|Iteration 50|reward:  115.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 7|Iteration 53|reward:  125.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 7|Iteration 53|reward:  125.0|last_reward_at:   53|Elapsed Time: 0:00:13||

Episode 7|Iteration 55|reward:  125.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 7|Iteration 57|reward:  125.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 7|Iteration 58|reward:  136.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 7|Iteration 58|reward:  136.0|last_reward_at:   58|Elapsed Time: 0:00:14||

Episode 7|Iteration 60|reward:  256.0|last_reward_at:   58|Elapsed Time: 0:00:15||

Episode 7|Iteration 60|reward:  256.0|last_reward_at:   60|Elapsed Time: 0:00:15||

Episode 7|Iteration 61|reward:  307.0|last_reward_at:   60|Elapsed Time: 0:00:15||

Episode 7|Iteration 61|reward:  307.0|last_reward_at:   61|Elapsed Time: 0:00:15||

Episode 7|Iteration 65|reward:  307.0|last_reward_at:   61|Elapsed Time: 0:00:16||

Episode 7|Iteration 70|reward:  307.0|last_reward_at:   61|Elapsed Time: 0:00:17||

Episode 7|Iteration 70|reward:  328.5|last_reward_at:   61|Elapsed Time: 0:00:18||

Episode 7|Iteration 70|reward:  328.5|last_reward_at:   70|Elapsed Time: 0:00:18||

Episode 7|Iteration 75|reward:  328.5|last_reward_at:   70|Elapsed Time: 0:00:19||

Episode 7|Iteration 77|reward:  334.5|last_reward_at:   70|Elapsed Time: 0:00:19||

Episode 7|Iteration 77|reward:  334.5|last_reward_at:   77|Elapsed Time: 0:00:19||

Episode 7|Iteration 79|reward:  374.5|last_reward_at:   77|Elapsed Time: 0:00:19||

Episode 7|Iteration 79|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:19||

Episode 7|Iteration 80|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:21||

Episode 7|Iteration 83|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:21||

Episode 7|Iteration 85|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:23||

Episode 7|Iteration 89|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:23||

Episode 7|Iteration 90|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:25||

Episode 7|Iteration 93|reward:  374.5|last_reward_at:   79|Elapsed Time: 0:00:25||

Episode 7|Iteration 94|reward:  389.5|last_reward_at:   79|Elapsed Time: 0:00:25||

Episode 7|Iteration 94|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:25||

Episode 7|Iteration 95|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:26||

Episode 7|Iteration 100|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:27||

Episode 7|Iteration 105|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:29||

Episode 7|Iteration 110|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:31||

Episode 7|Iteration 114|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:31||

Episode 7|Iteration 115|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:33||

Episode 7|Iteration 120|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:35||

Episode 7|Iteration 125|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:36||

Episode 7|Iteration 130|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:38||

Episode 7|Iteration 135|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:39||

Episode 7|Iteration 140|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:39||

Episode 7|Iteration 140|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:40||

Episode 7|Iteration 145|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:41||

Episode 7|Iteration 147|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:41||

Episode 7|Iteration 150|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:43||

Episode 7|Iteration 155|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:45||

Episode 7|Iteration 160|reward:  389.5|last_reward_at:   94|Elapsed Time: 0:00:46||

Episode 7|Iteration 161|reward:  395.5|last_reward_at:   94|Elapsed Time: 0:00:46||

Episode 7|Iteration 161|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:46||

Episode 7|Iteration 165|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:47||

Episode 7|Iteration 169|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:48||

Episode 7|Iteration 170|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:49||

Episode 7|Iteration 175|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:51||

Episode 7|Iteration 180|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:52||

Episode 7|Iteration 184|reward:  395.5|last_reward_at:  161|Elapsed Time: 0:00:52||

Episode 7|Iteration 185|reward:  408.5|last_reward_at:  161|Elapsed Time: 0:00:53||

Episode 7|Iteration 185|reward:  408.5|last_reward_at:  185|Elapsed Time: 0:00:53||

Episode 7|Iteration 186|reward:  408.5|last_reward_at:  185|Elapsed Time: 0:00:53||

Episode 7|Iteration 190|reward:  408.5|last_reward_at:  185|Elapsed Time: 0:00:54||

Episode 7|Iteration 190|reward:  408.5|last_reward_at:  185|Elapsed Time: 0:00:55||

Episode 7|Iteration 195|reward:  408.5|last_reward_at:  185|Elapsed Time: 0:00:56||

Episode 7|Iteration 198|reward:  468.5|last_reward_at:  185|Elapsed Time: 0:00:56||

Episode 7|Iteration 198|reward:  468.5|last_reward_at:  198|Elapsed Time: 0:00:56||

Episode 7|Iteration 200|reward:  474.5|last_reward_at:  198|Elapsed Time: 0:00:57||

Episode 7|Iteration 200|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:00:57||

Episode 7|Iteration 203|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:00:57||

Episode 7|Iteration 205|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:00:59||

Episode 7|Iteration 210|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:01||

Episode 7|Iteration 215|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:02||

Episode 7|Iteration 220|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:04||

Episode 7|Iteration 224|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:04||

Episode 7|Iteration 225|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:05||

Episode 7|Iteration 228|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:05||

Episode 7|Iteration 230|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:07||

Episode 7|Iteration 235|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:08||

Episode 7|Iteration 238|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:08||

Episode 7|Iteration 240|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:10||

Episode 7|Iteration 245|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:11||

Episode 7|Iteration 246|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:11||

Episode 7|Iteration 250|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:13||

Episode 7|Iteration 255|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:15||

Episode 7|Iteration 260|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:16||

Episode 7|Iteration 265|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:18||

Episode 7|Iteration 270|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:20||

Episode 7|Iteration 275|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:22||

Episode 7|Iteration 277|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:22||

Episode 7|Iteration 280|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:24||

Episode 7|Iteration 285|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:24||

Episode 7|Iteration 285|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:25||

Episode 7|Iteration 290|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:27||

Episode 7|Iteration 294|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:27||

Episode 7|Iteration 295|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:28||

Episode 7|Iteration 300|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:29||

Episode 7|Iteration 305|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:31||

Episode 7|Iteration 310|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:32||

Episode 7|Iteration 315|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:34||

Episode 7|Iteration 320|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:35||

Episode 7|Iteration 325|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:37||

Episode 7|Iteration 330|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:38||

Episode 7|Iteration 335|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:40||

Episode 7|Iteration 340|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:41||

Episode 7|Iteration 345|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:43||

Episode 7|Iteration 349|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:43||

Episode 7|Iteration 350|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:45||

Episode 7|Iteration 355|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:46||

Episode 7|Iteration 358|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:46||

Episode 7|Iteration 360|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:48||

Episode 7|Iteration 365|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:50||

Episode 7|Iteration 370|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:52||

Episode 7|Iteration 375|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:53||

Episode 7|Iteration 380|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:55||

Episode 7|Iteration 385|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:56||

Episode 7|Iteration 390|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:58||

Episode 7|Iteration 395|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:59||

Episode 7|Iteration 399|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:01:59||

Episode 7|Iteration 400|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:01||

Episode 7|Iteration 405|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:02||

Episode 7|Iteration 410|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:03||

Episode 7|Iteration 415|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:04||

Episode 7|Iteration 420|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:06||

Episode 7|Iteration 425|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:06||

Episode 7|Iteration 425|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:08||

Episode 7|Iteration 430|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:09||

Episode 7|Iteration 435|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:11||

Episode 7|Iteration 440|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:13||

Episode 7|Iteration 445|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:14||

Episode 7|Iteration 448|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:14||

Episode 7|Iteration 450|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:15||

Episode 7|Iteration 450|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:16||

Episode 7|Iteration 455|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:17||

Episode 7|Iteration 460|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:19||

Episode 7|Iteration 465|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:20||

Episode 7|Iteration 469|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:20||

Episode 7|Iteration 470|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:22||

Episode 7|Iteration 475|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:22||

Episode 7|Iteration 475|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:23||

Episode 7|Iteration 480|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:24||

Episode 7|Iteration 485|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:26||

Episode 7|Iteration 488|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:26||

Episode 7|Iteration 490|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:27||

Episode 7|Iteration 495|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:29||

Episode 7|Iteration 500|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:29||

Episode 7|Iteration 500|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:31||

Episode 7|Iteration 500|reward:  474.5|last_reward_at:  200|Elapsed Time: 0:02:31||




  Episode 7 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/15 (0.21)
    explore-remote: 2/64 (0.03)
    explore-connect: 2/47 (0.04)
    exploit-local: 7/111 (0.06)
    exploit-remote: 2/131 (0.02)
    exploit-connect: 3/112 (0.03)
  exploit deflected to exploration: 34
  ## Episode: 8/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 8|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 8|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 8|Iteration 15|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 8|Iteration 20|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:05||

Episode 8|Iteration 25|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 8|Iteration 30|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:08||

Episode 8|Iteration 35|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:09||

Episode 8|Iteration 37|reward:   25.0|last_reward_at:    1|Elapsed Time: 0:00:09||

Episode 8|Iteration 37|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:09||

Episode 8|Iteration 40|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 8|Iteration 45|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 8|Iteration 45|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 8|Iteration 48|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 8|Iteration 50|reward:   25.0|last_reward_at:   37|Elapsed Time: 0:00:13||

Episode 8|Iteration 53|reward:   38.0|last_reward_at:   37|Elapsed Time: 0:00:13||

Episode 8|Iteration 53|reward:   38.0|last_reward_at:   53|Elapsed Time: 0:00:13||

Episode 8|Iteration 55|reward:   38.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 8|Iteration 58|reward:   98.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 8|Iteration 58|reward:   98.0|last_reward_at:   58|Elapsed Time: 0:00:14||

Episode 8|Iteration 60|reward:   98.0|last_reward_at:   58|Elapsed Time: 0:00:15||

Episode 8|Iteration 64|reward:   98.0|last_reward_at:   58|Elapsed Time: 0:00:16||

Episode 8|Iteration 65|reward:   98.0|last_reward_at:   58|Elapsed Time: 0:00:17||

Episode 8|Iteration 70|reward:   98.0|last_reward_at:   58|Elapsed Time: 0:00:18||

Episode 8|Iteration 73|reward:  104.0|last_reward_at:   58|Elapsed Time: 0:00:18||

Episode 8|Iteration 73|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:18||

Episode 8|Iteration 75|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:20||

Episode 8|Iteration 80|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 8|Iteration 85|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:23||

Episode 8|Iteration 88|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:23||

Episode 8|Iteration 90|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:25||

Episode 8|Iteration 95|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:27||

Episode 8|Iteration 100|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:28||

Episode 8|Iteration 105|reward:  104.0|last_reward_at:   73|Elapsed Time: 0:00:30||

Episode 8|Iteration 108|reward:  118.0|last_reward_at:   73|Elapsed Time: 0:00:30||

Episode 8|Iteration 108|reward:  118.0|last_reward_at:  108|Elapsed Time: 0:00:30||

Episode 8|Iteration 110|reward:  118.0|last_reward_at:  108|Elapsed Time: 0:00:31||

Episode 8|Iteration 115|reward:  118.0|last_reward_at:  108|Elapsed Time: 0:00:33||

Episode 8|Iteration 120|reward:  118.0|last_reward_at:  108|Elapsed Time: 0:00:35||

Episode 8|Iteration 125|reward:  118.0|last_reward_at:  108|Elapsed Time: 0:00:36||

Episode 8|Iteration 130|reward:  124.5|last_reward_at:  108|Elapsed Time: 0:00:37||

Episode 8|Iteration 130|reward:  124.5|last_reward_at:  130|Elapsed Time: 0:00:37||

Episode 8|Iteration 132|reward:  138.5|last_reward_at:  130|Elapsed Time: 0:00:37||

Episode 8|Iteration 132|reward:  138.5|last_reward_at:  132|Elapsed Time: 0:00:37||

Episode 8|Iteration 135|reward:  138.5|last_reward_at:  132|Elapsed Time: 0:00:39||

Episode 8|Iteration 139|reward:  149.5|last_reward_at:  132|Elapsed Time: 0:00:39||

Episode 8|Iteration 139|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:39||

Episode 8|Iteration 140|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:41||

Episode 8|Iteration 145|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:42||

Episode 8|Iteration 150|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:44||

Episode 8|Iteration 154|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:44||

Episode 8|Iteration 155|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:45||

Episode 8|Iteration 159|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:45||

Episode 8|Iteration 160|reward:  149.5|last_reward_at:  139|Elapsed Time: 0:00:47||

Episode 8|Iteration 163|reward:  189.5|last_reward_at:  139|Elapsed Time: 0:00:47||

Episode 8|Iteration 163|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:47||

Episode 8|Iteration 165|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:48||

Episode 8|Iteration 170|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:50||

Episode 8|Iteration 175|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:51||

Episode 8|Iteration 180|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:52||

Episode 8|Iteration 183|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:53||

Episode 8|Iteration 185|reward:  189.5|last_reward_at:  163|Elapsed Time: 0:00:54||

Episode 8|Iteration 186|reward:  195.5|last_reward_at:  163|Elapsed Time: 0:00:54||

Episode 8|Iteration 186|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:00:54||

Episode 8|Iteration 190|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:00:55||

Episode 8|Iteration 190|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:00:56||

Episode 8|Iteration 195|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:00:58||

Episode 8|Iteration 200|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:01:00||

Episode 8|Iteration 203|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:01:00||

Episode 8|Iteration 205|reward:  195.5|last_reward_at:  186|Elapsed Time: 0:01:01||

Episode 8|Iteration 206|reward:  204.5|last_reward_at:  186|Elapsed Time: 0:01:01||

Episode 8|Iteration 206|reward:  204.5|last_reward_at:  206|Elapsed Time: 0:01:01||

Episode 8|Iteration 208|reward:  244.5|last_reward_at:  206|Elapsed Time: 0:01:01||

Episode 8|Iteration 208|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:01||

Episode 8|Iteration 210|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:02||

Episode 8|Iteration 215|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:04||

Episode 8|Iteration 220|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:05||

Episode 8|Iteration 222|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:05||

Episode 8|Iteration 225|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:07||

Episode 8|Iteration 230|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:08||

Episode 8|Iteration 234|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:08||

Episode 8|Iteration 235|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:09||

Episode 8|Iteration 239|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:10||

Episode 8|Iteration 240|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:11||

Episode 8|Iteration 245|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:13||

Episode 8|Iteration 250|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:15||

Episode 8|Iteration 255|reward:  244.5|last_reward_at:  208|Elapsed Time: 0:01:16||

Episode 8|Iteration 259|reward:  258.5|last_reward_at:  208|Elapsed Time: 0:01:16||

Episode 8|Iteration 259|reward:  258.5|last_reward_at:  259|Elapsed Time: 0:01:16||

Episode 8|Iteration 260|reward:  258.5|last_reward_at:  259|Elapsed Time: 0:01:17||

Episode 8|Iteration 262|reward:  263.5|last_reward_at:  259|Elapsed Time: 0:01:17||

Episode 8|Iteration 262|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:17||

Episode 8|Iteration 265|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:19||

Episode 8|Iteration 270|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:20||

Episode 8|Iteration 275|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:21||

Episode 8|Iteration 277|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:22||

Episode 8|Iteration 279|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:22||

Episode 8|Iteration 280|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:23||

Episode 8|Iteration 284|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:23||

Episode 8|Iteration 285|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:24||

Episode 8|Iteration 290|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:26||

Episode 8|Iteration 295|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:27||

Episode 8|Iteration 300|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:29||

Episode 8|Iteration 305|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:30||

Episode 8|Iteration 309|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:30||

Episode 8|Iteration 310|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:31||

Episode 8|Iteration 315|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:33||

Episode 8|Iteration 320|reward:  263.5|last_reward_at:  262|Elapsed Time: 0:01:34||

Episode 8|Iteration 324|reward:  276.5|last_reward_at:  262|Elapsed Time: 0:01:34||

Episode 8|Iteration 324|reward:  276.5|last_reward_at:  324|Elapsed Time: 0:01:34||

Episode 8|Iteration 325|reward:  276.5|last_reward_at:  324|Elapsed Time: 0:01:35||

Episode 8|Iteration 330|reward:  276.5|last_reward_at:  324|Elapsed Time: 0:01:35||

Episode 8|Iteration 330|reward:  276.5|last_reward_at:  324|Elapsed Time: 0:01:37||

Episode 8|Iteration 334|reward:  286.5|last_reward_at:  324|Elapsed Time: 0:01:37||

Episode 8|Iteration 334|reward:  286.5|last_reward_at:  334|Elapsed Time: 0:01:37||

Episode 8|Iteration 335|reward:  286.5|last_reward_at:  334|Elapsed Time: 0:01:38||

Episode 8|Iteration 336|reward:  292.5|last_reward_at:  334|Elapsed Time: 0:01:38||

Episode 8|Iteration 336|reward:  292.5|last_reward_at:  336|Elapsed Time: 0:01:38||

Episode 8|Iteration 340|reward:  292.5|last_reward_at:  336|Elapsed Time: 0:01:40||

Episode 8|Iteration 341|reward:  298.5|last_reward_at:  336|Elapsed Time: 0:01:40||

Episode 8|Iteration 341|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:40||

Episode 8|Iteration 345|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:41||

Episode 8|Iteration 350|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:43||

Episode 8|Iteration 355|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:43||

Episode 8|Iteration 355|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:45||

Episode 8|Iteration 358|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:45||

Episode 8|Iteration 360|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:46||

Episode 8|Iteration 365|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:48||

Episode 8|Iteration 367|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:48||

Episode 8|Iteration 370|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:50||

Episode 8|Iteration 375|reward:  298.5|last_reward_at:  341|Elapsed Time: 0:01:51||

Episode 8|Iteration 376|reward:  358.5|last_reward_at:  341|Elapsed Time: 0:01:51||

Episode 8|Iteration 376|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:51||

Episode 8|Iteration 380|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:51||

Episode 8|Iteration 380|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:53||

Episode 8|Iteration 385|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:54||

Episode 8|Iteration 390|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:56||

Episode 8|Iteration 394|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:56||

Episode 8|Iteration 395|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:57||

Episode 8|Iteration 400|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:01:59||

Episode 8|Iteration 405|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:02:01||

Episode 8|Iteration 409|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:02:01||

Episode 8|Iteration 410|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:02:02||

Episode 8|Iteration 415|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:02:04||

Episode 8|Iteration 420|reward:  358.5|last_reward_at:  376|Elapsed Time: 0:02:06||

Episode 8|Iteration 424|reward:  478.5|last_reward_at:  376|Elapsed Time: 0:02:06||

Episode 8|Iteration 424|reward:  478.5|last_reward_at:  424|Elapsed Time: 0:02:06||

Episode 8|Iteration 425|reward:  478.5|last_reward_at:  424|Elapsed Time: 0:02:07||

Episode 8|Iteration 429|reward:  478.5|last_reward_at:  424|Elapsed Time: 0:02:07||

Episode 8|Iteration 430|reward:  478.5|last_reward_at:  424|Elapsed Time: 0:02:09||

Episode 8|Iteration 431|reward:  484.5|last_reward_at:  424|Elapsed Time: 0:02:09||

Episode 8|Iteration 431|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:09||

Episode 8|Iteration 433|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:09||

Episode 8|Iteration 435|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:10||

Episode 8|Iteration 438|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:10||

Episode 8|Iteration 440|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:12||

Episode 8|Iteration 444|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:12||

Episode 8|Iteration 445|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:13||

Episode 8|Iteration 449|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:13||

Episode 8|Iteration 450|reward:  484.5|last_reward_at:  431|Elapsed Time: 0:02:15||

Episode 8|Iteration 452|reward:  535.5|last_reward_at:  431|Elapsed Time: 0:02:15||

Episode 8|Iteration 452|reward:  535.5|last_reward_at:  452|Elapsed Time: 0:02:15||

Episode 8|Iteration 455|reward:  535.5|last_reward_at:  452|Elapsed Time: 0:02:16||

Episode 8|Iteration 457|reward:  535.5|last_reward_at:  452|Elapsed Time: 0:02:16||

Episode 8|Iteration 460|reward:  535.5|last_reward_at:  452|Elapsed Time: 0:02:18||

Episode 8|Iteration 465|reward:  535.5|last_reward_at:  452|Elapsed Time: 0:02:20||

Episode 8|Iteration 467|reward:  565.5|last_reward_at:  452|Elapsed Time: 0:02:20||

Episode 8|Iteration 467|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:20||

Episode 8|Iteration 470|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:21||

Episode 8|Iteration 475|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:22||

Episode 8|Iteration 475|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:23||

Episode 8|Iteration 477|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:23||

Episode 8|Iteration 480|reward:  565.5|last_reward_at:  467|Elapsed Time: 0:02:25||

Episode 8|Iteration 483|reward:  572.0|last_reward_at:  467|Elapsed Time: 0:02:25||

Episode 8|Iteration 483|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:25||

Episode 8|Iteration 485|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:26||

Episode 8|Iteration 489|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:26||

Episode 8|Iteration 490|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:28||

Episode 8|Iteration 494|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:28||

Episode 8|Iteration 495|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:29||

Episode 8|Iteration 500|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:29||

Episode 8|Iteration 500|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:31||

Episode 8|Iteration 500|reward:  572.0|last_reward_at:  483|Elapsed Time: 0:02:31||




  Episode 8 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/13 (0.07)
    explore-remote: 1/79 (0.01)
    explore-connect: 0/71 (0.00)
    exploit-local: 13/71 (0.15)
    exploit-remote: 4/130 (0.03)
    exploit-connect: 6/111 (0.05)
  exploit deflected to exploration: 65
  ## Episode: 9/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 9|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 9|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 9|Iteration 15|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 9|Iteration 20|reward:   20.0|last_reward_at:    1|Elapsed Time: 0:00:05||

Episode 9|Iteration 20|reward:   20.0|last_reward_at:   20|Elapsed Time: 0:00:05||

Episode 9|Iteration 24|reward:   31.0|last_reward_at:   20|Elapsed Time: 0:00:06||

Episode 9|Iteration 24|reward:   31.0|last_reward_at:   24|Elapsed Time: 0:00:06||

Episode 9|Iteration 25|reward:   44.0|last_reward_at:   24|Elapsed Time: 0:00:07||

Episode 9|Iteration 25|reward:   44.0|last_reward_at:   25|Elapsed Time: 0:00:07||

Episode 9|Iteration 30|reward:   44.0|last_reward_at:   25|Elapsed Time: 0:00:08||

Episode 9|Iteration 31|reward:   50.0|last_reward_at:   25|Elapsed Time: 0:00:08||

Episode 9|Iteration 31|reward:   50.0|last_reward_at:   31|Elapsed Time: 0:00:08||

Episode 9|Iteration 35|reward:   50.0|last_reward_at:   31|Elapsed Time: 0:00:10||

Episode 9|Iteration 40|reward:   50.0|last_reward_at:   31|Elapsed Time: 0:00:11||

Episode 9|Iteration 41|reward:   90.0|last_reward_at:   31|Elapsed Time: 0:00:11||

Episode 9|Iteration 41|reward:   90.0|last_reward_at:   41|Elapsed Time: 0:00:11||

Episode 9|Iteration 43|reward:   90.0|last_reward_at:   41|Elapsed Time: 0:00:11||

Episode 9|Iteration 45|reward:   90.0|last_reward_at:   41|Elapsed Time: 0:00:13||

Episode 9|Iteration 47|reward:  104.0|last_reward_at:   41|Elapsed Time: 0:00:13||

Episode 9|Iteration 47|reward:  104.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 9|Iteration 50|reward:  104.0|last_reward_at:   47|Elapsed Time: 0:00:15||

Episode 9|Iteration 54|reward:  104.0|last_reward_at:   47|Elapsed Time: 0:00:15||

Episode 9|Iteration 55|reward:  104.0|last_reward_at:   47|Elapsed Time: 0:00:17||

Episode 9|Iteration 58|reward:  114.0|last_reward_at:   47|Elapsed Time: 0:00:17||

Episode 9|Iteration 58|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:17||

Episode 9|Iteration 60|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:18||

Episode 9|Iteration 64|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:18||

Episode 9|Iteration 65|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:19||

Episode 9|Iteration 69|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:19||

Episode 9|Iteration 70|reward:  114.0|last_reward_at:   58|Elapsed Time: 0:00:21||

Episode 9|Iteration 73|reward:  125.0|last_reward_at:   58|Elapsed Time: 0:00:21||

Episode 9|Iteration 73|reward:  125.0|last_reward_at:   73|Elapsed Time: 0:00:21||

Episode 9|Iteration 75|reward:  125.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 9|Iteration 77|reward:  131.0|last_reward_at:   73|Elapsed Time: 0:00:22||

Episode 9|Iteration 77|reward:  131.0|last_reward_at:   77|Elapsed Time: 0:00:22||

Episode 9|Iteration 80|reward:  131.0|last_reward_at:   77|Elapsed Time: 0:00:23||

Episode 9|Iteration 83|reward:  131.0|last_reward_at:   77|Elapsed Time: 0:00:23||

Episode 9|Iteration 84|reward:  131.0|last_reward_at:   77|Elapsed Time: 0:00:23||

Episode 9|Iteration 85|reward:  131.0|last_reward_at:   77|Elapsed Time: 0:00:25||

Episode 9|Iteration 90|reward:  140.0|last_reward_at:   77|Elapsed Time: 0:00:26||

Episode 9|Iteration 90|reward:  140.0|last_reward_at:   90|Elapsed Time: 0:00:26||

Episode 9|Iteration 94|reward:  140.0|last_reward_at:   90|Elapsed Time: 0:00:27||

Episode 9|Iteration 95|reward:  140.0|last_reward_at:   90|Elapsed Time: 0:00:28||

Episode 9|Iteration 99|reward:  260.0|last_reward_at:   90|Elapsed Time: 0:00:28||

Episode 9|Iteration 99|reward:  260.0|last_reward_at:   99|Elapsed Time: 0:00:28||

Episode 9|Iteration 100|reward:  260.0|last_reward_at:   99|Elapsed Time: 0:00:30||

Episode 9|Iteration 105|reward:  281.5|last_reward_at:   99|Elapsed Time: 0:00:32||

Episode 9|Iteration 105|reward:  281.5|last_reward_at:  105|Elapsed Time: 0:00:32||

Episode 9|Iteration 110|reward:  281.5|last_reward_at:  105|Elapsed Time: 0:00:34||

Episode 9|Iteration 113|reward:  281.5|last_reward_at:  105|Elapsed Time: 0:00:34||

Episode 9|Iteration 115|reward:  281.5|last_reward_at:  105|Elapsed Time: 0:00:36||

Episode 9|Iteration 117|reward:  281.5|last_reward_at:  105|Elapsed Time: 0:00:36||

Episode 9|Iteration 120|reward:  296.5|last_reward_at:  105|Elapsed Time: 0:00:37||

Episode 9|Iteration 120|reward:  296.5|last_reward_at:  120|Elapsed Time: 0:00:37||

Episode 9|Iteration 125|reward:  296.5|last_reward_at:  120|Elapsed Time: 0:00:38||

Episode 9|Iteration 130|reward:  296.5|last_reward_at:  120|Elapsed Time: 0:00:40||

Episode 9|Iteration 134|reward:  309.5|last_reward_at:  120|Elapsed Time: 0:00:40||

Episode 9|Iteration 134|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:40||

Episode 9|Iteration 135|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:42||

Episode 9|Iteration 140|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:43||

Episode 9|Iteration 145|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:44||

Episode 9|Iteration 150|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:46||

Episode 9|Iteration 155|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:47||

Episode 9|Iteration 160|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:49||

Episode 9|Iteration 165|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:49||

Episode 9|Iteration 165|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:51||

Episode 9|Iteration 170|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:52||

Episode 9|Iteration 175|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:53||

Episode 9|Iteration 180|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:55||

Episode 9|Iteration 185|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:56||

Episode 9|Iteration 190|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:56||

Episode 9|Iteration 190|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:00:58||

Episode 9|Iteration 195|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:01:00||

Episode 9|Iteration 200|reward:  309.5|last_reward_at:  134|Elapsed Time: 0:01:01||

Episode 9|Iteration 202|reward:  320.5|last_reward_at:  134|Elapsed Time: 0:01:01||

Episode 9|Iteration 202|reward:  320.5|last_reward_at:  202|Elapsed Time: 0:01:01||

Episode 9|Iteration 205|reward:  320.5|last_reward_at:  202|Elapsed Time: 0:01:03||

Episode 9|Iteration 210|reward:  320.5|last_reward_at:  202|Elapsed Time: 0:01:04||

Episode 9|Iteration 215|reward:  320.5|last_reward_at:  202|Elapsed Time: 0:01:05||

Episode 9|Iteration 220|reward:  320.5|last_reward_at:  202|Elapsed Time: 0:01:07||

Episode 9|Iteration 221|reward:  360.5|last_reward_at:  202|Elapsed Time: 0:01:07||

Episode 9|Iteration 221|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:07||

Episode 9|Iteration 224|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:07||

Episode 9|Iteration 225|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:09||

Episode 9|Iteration 230|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:10||

Episode 9|Iteration 235|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:10||

Episode 9|Iteration 235|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:12||

Episode 9|Iteration 240|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:13||

Episode 9|Iteration 245|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:15||

Episode 9|Iteration 250|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:16||

Episode 9|Iteration 255|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:18||

Episode 9|Iteration 257|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:18||

Episode 9|Iteration 260|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:20||

Episode 9|Iteration 265|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:21||

Episode 9|Iteration 270|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:23||

Episode 9|Iteration 275|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:25||

Episode 9|Iteration 280|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:26||

Episode 9|Iteration 285|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:26||

Episode 9|Iteration 285|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:27||

Episode 9|Iteration 289|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:28||

Episode 9|Iteration 290|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:29||

Episode 9|Iteration 295|reward:  360.5|last_reward_at:  221|Elapsed Time: 0:01:30||

Episode 9|Iteration 298|reward:  411.5|last_reward_at:  221|Elapsed Time: 0:01:30||

Episode 9|Iteration 298|reward:  411.5|last_reward_at:  298|Elapsed Time: 0:01:30||

Episode 9|Iteration 300|reward:  411.5|last_reward_at:  298|Elapsed Time: 0:01:32||

Episode 9|Iteration 304|reward:  411.5|last_reward_at:  298|Elapsed Time: 0:01:32||

Episode 9|Iteration 305|reward:  411.5|last_reward_at:  298|Elapsed Time: 0:01:33||

Episode 9|Iteration 309|reward:  471.5|last_reward_at:  298|Elapsed Time: 0:01:34||

Episode 9|Iteration 309|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:34||

Episode 9|Iteration 310|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:36||

Episode 9|Iteration 314|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:36||

Episode 9|Iteration 315|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:39||

Episode 9|Iteration 318|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:39||

Episode 9|Iteration 320|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:40||

Episode 9|Iteration 325|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:42||

Episode 9|Iteration 329|reward:  471.5|last_reward_at:  309|Elapsed Time: 0:01:42||

Episode 9|Iteration 330|reward:  477.5|last_reward_at:  309|Elapsed Time: 0:01:44||

Episode 9|Iteration 330|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:44||

Episode 9|Iteration 332|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:44||

Episode 9|Iteration 335|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:46||

Episode 9|Iteration 340|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:47||

Episode 9|Iteration 345|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:49||

Episode 9|Iteration 350|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:50||

Episode 9|Iteration 351|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:50||

Episode 9|Iteration 355|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:50||

Episode 9|Iteration 355|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:52||

Episode 9|Iteration 360|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:54||

Episode 9|Iteration 365|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:55||

Episode 9|Iteration 370|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:57||

Episode 9|Iteration 375|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:01:59||

Episode 9|Iteration 380|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:00||

Episode 9|Iteration 385|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:02||

Episode 9|Iteration 390|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:03||

Episode 9|Iteration 393|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:03||

Episode 9|Iteration 395|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:05||

Episode 9|Iteration 399|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:05||

Episode 9|Iteration 400|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:07||

Episode 9|Iteration 405|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:08||

Episode 9|Iteration 406|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:08||

Episode 9|Iteration 408|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:09||

Episode 9|Iteration 410|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:10||

Episode 9|Iteration 414|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:10||

Episode 9|Iteration 415|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:12||

Episode 9|Iteration 418|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:12||

Episode 9|Iteration 420|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:14||

Episode 9|Iteration 425|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:14||

Episode 9|Iteration 425|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:16||

Episode 9|Iteration 430|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:18||

Episode 9|Iteration 435|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:19||

Episode 9|Iteration 438|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:19||

Episode 9|Iteration 440|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:21||

Episode 9|Iteration 444|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:21||

Episode 9|Iteration 445|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:23||

Episode 9|Iteration 447|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:23||

Episode 9|Iteration 450|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:23||

Episode 9|Iteration 450|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:25||

Episode 9|Iteration 455|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:26||

Episode 9|Iteration 460|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:28||

Episode 9|Iteration 462|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:28||

Episode 9|Iteration 465|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:30||

Episode 9|Iteration 470|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:31||

Episode 9|Iteration 475|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:31||

Episode 9|Iteration 475|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:34||

Episode 9|Iteration 480|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:35||

Episode 9|Iteration 484|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:35||

Episode 9|Iteration 485|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:38||

Episode 9|Iteration 486|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:38||

Episode 9|Iteration 490|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:39||

Episode 9|Iteration 495|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:41||

Episode 9|Iteration 500|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:41||

Episode 9|Iteration 500|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:45||

Episode 9|Iteration 500|reward:  477.5|last_reward_at:  330|Elapsed Time: 0:02:45||




  Episode 9 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/17 (0.19)
    explore-remote: 3/70 (0.04)
    explore-connect: 0/43 (0.00)
    exploit-local: 7/86 (0.08)
    exploit-remote: 1/142 (0.01)
    exploit-connect: 5/122 (0.04)
  exploit deflected to exploration: 37
  ## Episode: 10/10 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 10|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,discovered,,,[]


Episode 10|Iteration 2|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 5|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 10|Iteration 10|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:03||

Episode 10|Iteration 15|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:04||

Episode 10|Iteration 20|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 10|Iteration 23|reward:   54.0|last_reward_at:    1|Elapsed Time: 0:00:06||

Episode 10|Iteration 23|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:06||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]


Episode 10|Iteration 24|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:06||

Episode 10|Iteration 25|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:07||

Episode 10|Iteration 30|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:08||

Episode 10|Iteration 32|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:08||

Episode 10|Iteration 35|reward:   54.0|last_reward_at:   23|Elapsed Time: 0:00:10||

Episode 10|Iteration 37|reward:   60.0|last_reward_at:   23|Elapsed Time: 0:00:10||

Episode 10|Iteration 37|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]


Episode 10|Iteration 38|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 10|Iteration 40|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 10|Iteration 45|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:11||

Episode 10|Iteration 45|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:12||

Episode 10|Iteration 47|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:12||

Episode 10|Iteration 50|reward:   60.0|last_reward_at:   37|Elapsed Time: 0:00:14||

Episode 10|Iteration 52|reward:   74.0|last_reward_at:   37|Elapsed Time: 0:00:14||

Episode 10|Iteration 52|reward:   74.0|last_reward_at:   52|Elapsed Time: 0:00:14||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,discovered,,,[]


Episode 10|Iteration 53|reward:   74.0|last_reward_at:   52|Elapsed Time: 0:00:14||

Episode 10|Iteration 55|reward:   85.0|last_reward_at:   52|Elapsed Time: 0:00:15||

Episode 10|Iteration 55|reward:   85.0|last_reward_at:   55|Elapsed Time: 0:00:15||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,discovered,,,[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."


Episode 10|Iteration 56|reward:   85.0|last_reward_at:   55|Elapsed Time: 0:00:15||

Episode 10|Iteration 58|reward:   96.0|last_reward_at:   55|Elapsed Time: 0:00:15||

Episode 10|Iteration 58|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:15||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,discovered,,,[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"


Episode 10|Iteration 59|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:16||

Episode 10|Iteration 60|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:17||

Episode 10|Iteration 64|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:18||

Episode 10|Iteration 65|reward:   96.0|last_reward_at:   58|Elapsed Time: 0:00:19||

Episode 10|Iteration 67|reward:  109.0|last_reward_at:   58|Elapsed Time: 0:00:19||

Episode 10|Iteration 67|reward:  109.0|last_reward_at:   67|Elapsed Time: 0:00:19||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,discovered,,,[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]


Episode 10|Iteration 68|reward:  109.0|last_reward_at:   67|Elapsed Time: 0:00:19||

Episode 10|Iteration 68|reward:  119.0|last_reward_at:   67|Elapsed Time: 0:00:19||

Episode 10|Iteration 68|reward:  119.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]


Episode 10|Iteration 69|reward:  119.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Episode 10|Iteration 70|reward:  119.0|last_reward_at:   68|Elapsed Time: 0:00:21||

Episode 10|Iteration 75|reward:  119.0|last_reward_at:   68|Elapsed Time: 0:00:22||

Episode 10|Iteration 80|reward:  130.0|last_reward_at:   68|Elapsed Time: 0:00:23||

Episode 10|Iteration 80|reward:  130.0|last_reward_at:   80|Elapsed Time: 0:00:23||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
GTW,discovered,,,[]


Episode 10|Iteration 81|reward:  130.0|last_reward_at:   80|Elapsed Time: 0:00:23||

Episode 10|Iteration 85|reward:  130.0|last_reward_at:   80|Elapsed Time: 0:00:25||

Episode 10|Iteration 88|reward:  143.0|last_reward_at:   80|Elapsed Time: 0:00:25||

Episode 10|Iteration 88|reward:  143.0|last_reward_at:   88|Elapsed Time: 0:00:25||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
GTW,discovered,,,[]
IVI[user=media],discovered,,,[]


Episode 10|Iteration 89|reward:  143.0|last_reward_at:   88|Elapsed Time: 0:00:25||

Episode 10|Iteration 90|reward:  143.0|last_reward_at:   88|Elapsed Time: 0:00:27||

Episode 10|Iteration 91|reward:  263.0|last_reward_at:   88|Elapsed Time: 0:00:27||

Episode 10|Iteration 91|reward:  263.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]


Episode 10|Iteration 92|reward:  263.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 10|Iteration 94|reward:  263.0|last_reward_at:   91|Elapsed Time: 0:00:27||

Episode 10|Iteration 95|reward:  263.0|last_reward_at:   91|Elapsed Time: 0:00:28||

Episode 10|Iteration 96|reward:  314.0|last_reward_at:   91|Elapsed Time: 0:00:28||

Episode 10|Iteration 96|reward:  314.0|last_reward_at:   96|Elapsed Time: 0:00:28||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 97|reward:  314.0|last_reward_at:   96|Elapsed Time: 0:00:29||

Episode 10|Iteration 100|reward:  314.0|last_reward_at:   96|Elapsed Time: 0:00:30||

Episode 10|Iteration 103|reward:  320.0|last_reward_at:   96|Elapsed Time: 0:00:30||

Episode 10|Iteration 103|reward:  320.0|last_reward_at:  103|Elapsed Time: 0:00:30||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 104|reward:  320.0|last_reward_at:  103|Elapsed Time: 0:00:30||

Episode 10|Iteration 105|reward:  350.0|last_reward_at:  103|Elapsed Time: 0:00:31||

Episode 10|Iteration 105|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:31||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 106|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:31||

Episode 10|Iteration 110|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:33||

Episode 10|Iteration 113|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:33||

Episode 10|Iteration 114|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:33||

Episode 10|Iteration 115|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:35||

Episode 10|Iteration 120|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:36||

Episode 10|Iteration 125|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:38||

Episode 10|Iteration 127|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:38||

Episode 10|Iteration 130|reward:  350.0|last_reward_at:  105|Elapsed Time: 0:00:39||

Episode 10|Iteration 131|reward:  356.0|last_reward_at:  105|Elapsed Time: 0:00:39||

Episode 10|Iteration 131|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:39||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 132|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:39||

Episode 10|Iteration 134|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:40||

Episode 10|Iteration 135|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:41||

Episode 10|Iteration 140|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:42||

Episode 10|Iteration 142|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:43||

Episode 10|Iteration 145|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:44||

Episode 10|Iteration 150|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:46||

Episode 10|Iteration 155|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:47||

Episode 10|Iteration 159|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:47||

Episode 10|Iteration 160|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:49||

Episode 10|Iteration 162|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:49||

Episode 10|Iteration 165|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:50||

Episode 10|Iteration 170|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:52||

Episode 10|Iteration 175|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:54||

Episode 10|Iteration 177|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:54||

Episode 10|Iteration 180|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:56||

Episode 10|Iteration 182|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:56||

Episode 10|Iteration 185|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:57||

Episode 10|Iteration 190|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:00:59||

Episode 10|Iteration 195|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:01:00||

Episode 10|Iteration 199|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:01:00||

Episode 10|Iteration 200|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:01:02||

Episode 10|Iteration 203|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:01:02||

Episode 10|Iteration 205|reward:  356.0|last_reward_at:  131|Elapsed Time: 0:01:03||

Episode 10|Iteration 206|reward:  362.5|last_reward_at:  131|Elapsed Time: 0:01:03||

Episode 10|Iteration 206|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:03||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
IVI[user=media],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 207|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:03||

Episode 10|Iteration 210|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:05||

Episode 10|Iteration 213|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:05||

Episode 10|Iteration 215|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:06||

Episode 10|Iteration 220|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:08||

Episode 10|Iteration 224|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:08||

Episode 10|Iteration 225|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:10||

Episode 10|Iteration 226|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:10||

Episode 10|Iteration 229|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:10||

Episode 10|Iteration 230|reward:  362.5|last_reward_at:  206|Elapsed Time: 0:01:12||

Episode 10|Iteration 234|reward:  422.5|last_reward_at:  206|Elapsed Time: 0:01:12||

Episode 10|Iteration 234|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:12||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 235|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:12||

Episode 10|Iteration 235|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:13||

Episode 10|Iteration 240|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:15||

Episode 10|Iteration 243|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:15||

Episode 10|Iteration 245|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:17||

Episode 10|Iteration 250|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:18||

Episode 10|Iteration 253|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:18||

Episode 10|Iteration 255|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:20||

Episode 10|Iteration 260|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:20||

Episode 10|Iteration 260|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:22||

Episode 10|Iteration 263|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:22||

Episode 10|Iteration 265|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:23||

Episode 10|Iteration 270|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:25||

Episode 10|Iteration 272|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:25||

Episode 10|Iteration 274|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:25||

Episode 10|Iteration 275|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:26||

Episode 10|Iteration 278|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:26||

Episode 10|Iteration 280|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:28||

Episode 10|Iteration 285|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:28||

Episode 10|Iteration 285|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:29||

Episode 10|Iteration 289|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:30||

Episode 10|Iteration 290|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:31||

Episode 10|Iteration 295|reward:  422.5|last_reward_at:  234|Elapsed Time: 0:01:33||

Episode 10|Iteration 296|reward:  431.5|last_reward_at:  234|Elapsed Time: 0:01:33||

Episode 10|Iteration 296|reward:  431.5|last_reward_at:  296|Elapsed Time: 0:01:33||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 297|reward:  431.5|last_reward_at:  296|Elapsed Time: 0:01:33||

Episode 10|Iteration 300|reward:  431.5|last_reward_at:  296|Elapsed Time: 0:01:35||

Episode 10|Iteration 303|reward:  431.5|last_reward_at:  296|Elapsed Time: 0:01:35||

Episode 10|Iteration 304|reward:  438.0|last_reward_at:  296|Elapsed Time: 0:01:35||

Episode 10|Iteration 304|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:35||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 305|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:35||

Episode 10|Iteration 305|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:37||

Episode 10|Iteration 306|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:37||

Episode 10|Iteration 310|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:39||

Episode 10|Iteration 313|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:39||

Episode 10|Iteration 315|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:41||

Episode 10|Iteration 320|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:42||

Episode 10|Iteration 323|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:42||

Episode 10|Iteration 325|reward:  438.0|last_reward_at:  304|Elapsed Time: 0:01:43||

Episode 10|Iteration 327|reward:  452.0|last_reward_at:  304|Elapsed Time: 0:01:43||

Episode 10|Iteration 327|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:43||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
Telematics[user=root],discovered,,,[]
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 328|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:44||

Episode 10|Iteration 330|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:45||

Episode 10|Iteration 333|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:45||

Episode 10|Iteration 335|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:46||

Episode 10|Iteration 340|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:48||

Episode 10|Iteration 345|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:49||

Episode 10|Iteration 349|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:49||

Episode 10|Iteration 350|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:51||

Episode 10|Iteration 354|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:51||

Episode 10|Iteration 355|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:52||

Episode 10|Iteration 360|reward:  452.0|last_reward_at:  327|Elapsed Time: 0:01:54||

Episode 10|Iteration 361|reward:  512.0|last_reward_at:  327|Elapsed Time: 0:01:54||

Episode 10|Iteration 361|reward:  512.0|last_reward_at:  361|Elapsed Time: 0:01:54||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics[user=root],owned,"[Telematics, PostExploitation]",[DumpVehicleBackendConfig],[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 362|reward:  512.0|last_reward_at:  361|Elapsed Time: 0:01:54||

Episode 10|Iteration 363|reward:  512.0|last_reward_at:  361|Elapsed Time: 0:01:54||

Episode 10|Iteration 364|reward:  518.0|last_reward_at:  361|Elapsed Time: 0:01:54||

Episode 10|Iteration 364|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:54||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics[user=root],owned,"[Telematics, PostExploitation]",[DumpVehicleBackendConfig],[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
BCM,discovered,,,[]
DoorLockECU,discovered,,,[]


Episode 10|Iteration 365|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:54||

Episode 10|Iteration 365|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:56||

Episode 10|Iteration 370|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:57||

Episode 10|Iteration 375|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:59||

Episode 10|Iteration 376|reward:  518.0|last_reward_at:  364|Elapsed Time: 0:01:59||

Episode 10|Iteration 377|reward:  558.0|last_reward_at:  364|Elapsed Time: 0:01:59||

Episode 10|Iteration 377|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:01:59||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics[user=root],owned,"[Telematics, PostExploitation]",[DumpVehicleBackendConfig],[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
OTA.Server,owned,"[OTA, Backend]",[],"[StealFirmwareArtifacts, DownloadUpdateBundle]"
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
BCM,discovered,,,[]


Episode 10|Iteration 378|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:01:59||

Episode 10|Iteration 379|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:01:59||

Episode 10|Iteration 380|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:01||

Episode 10|Iteration 382|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:01||

Episode 10|Iteration 385|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:02||

Episode 10|Iteration 390|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:04||

Episode 10|Iteration 395|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:06||

Episode 10|Iteration 396|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:06||

Episode 10|Iteration 399|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:06||

Episode 10|Iteration 400|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:07||

Episode 10|Iteration 405|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:09||

Episode 10|Iteration 409|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:09||

Episode 10|Iteration 410|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:11||

Episode 10|Iteration 415|reward:  558.0|last_reward_at:  377|Elapsed Time: 0:02:13||

Episode 10|Iteration 416|reward:  567.0|last_reward_at:  377|Elapsed Time: 0:02:13||

Episode 10|Iteration 416|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:13||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics[user=root],owned,"[Telematics, PostExploitation]",[DumpVehicleBackendConfig],[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
OTA.Server,owned,"[OTA, Backend]",[],"[StealFirmwareArtifacts, DownloadUpdateBundle]"
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
BCM,discovered,,,[]


Episode 10|Iteration 417|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:13||

Episode 10|Iteration 417|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:13||

Episode 10|Iteration 420|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:14||

Episode 10|Iteration 423|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:14||

Episode 10|Iteration 425|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:16||

Episode 10|Iteration 430|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:18||

Episode 10|Iteration 434|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:18||

Episode 10|Iteration 435|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:19||

Episode 10|Iteration 437|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:19||

Episode 10|Iteration 440|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:21||

Episode 10|Iteration 444|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:21||

Episode 10|Iteration 445|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:22||

Episode 10|Iteration 447|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:23||

Episode 10|Iteration 450|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:24||

Episode 10|Iteration 453|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:24||

Episode 10|Iteration 455|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:26||

Episode 10|Iteration 460|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:27||

Episode 10|Iteration 461|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:28||

Episode 10|Iteration 465|reward:  567.0|last_reward_at:  416|Elapsed Time: 0:02:30||

Episode 10|Iteration 468|reward:  573.0|last_reward_at:  416|Elapsed Time: 0:02:30||

Episode 10|Iteration 468|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:30||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AttackerLaptop,owned,"[Attacker, Laptop, Tools]","[AcquireOBDDongle, ScanCellularEndpoints, Scan...",[]
OBD,owned,"[OBD, Physical]",[EnableDCANAccess],[]
DCAN,owned,"[Bus, DCAN]","[DiscoverGatewayFromDiagnostics, ProvisionGate...",[]
Telematics[user=root],owned,"[Telematics, PostExploitation]",[DumpVehicleBackendConfig],[]
GTW,owned,"[Gateway, Bridge]","[LeakBusTopology, EnumerateECUs, DumpBusAccess...",[]
IVI[user=media],owned,"[IVI, PostExploitation]","[DiscoverGatewayNode, ExtractOTASessionToken, ...",[]
OTA.Server,owned,"[OTA, Backend]",[],"[StealFirmwareArtifacts, DownloadUpdateBundle]"
Telematics,discovered,,,"[AbuseRemoteControlAPI, DiscoverOTAServer, Exp..."
IVI,discovered,,,"[ReadInfotainmentData, ExploitWebViewRCE]"
BCM,discovered,,,[]


Episode 10|Iteration 469|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:30||

Episode 10|Iteration 470|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:32||

Episode 10|Iteration 474|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:32||

Episode 10|Iteration 475|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:34||

Episode 10|Iteration 477|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:34||

Episode 10|Iteration 478|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:34||

Episode 10|Iteration 480|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:36||

Episode 10|Iteration 485|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:38||

Episode 10|Iteration 488|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:38||

Episode 10|Iteration 489|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:38||

Episode 10|Iteration 490|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:40||

Episode 10|Iteration 494|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:40||

Episode 10|Iteration 495|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:41||

Episode 10|Iteration 499|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:41||

Episode 10|Iteration 500|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:43||

Episode 10|Iteration 500|reward:  573.0|last_reward_at:  468|Elapsed Time: 0:02:43||




  Episode 10 stopped at t=500 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/18 (0.14)
    explore-remote: 1/44 (0.02)
    explore-connect: 0/67 (0.00)
    exploit-local: 11/121 (0.08)
    exploit-remote: 4/90 (0.04)
    exploit-connect: 6/135 (0.04)
  exploit deflected to exploration: 33
simulation ended


In [12]:
# -----------------------------------------
# 7) 플롯 (기존 그대로)
# -----------------------------------------
all_runs = [
    dql_run,
    dql_exploit_run,
]

themodel = dqla.CyberBattleStateActionModel(ep)
p.plot_averaged_cummulative_rewards(
    all_runs=all_runs,
    title=(
        f"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count}\n"
        f"State: {[f.name() for f in themodel.state_space.feature_selection]} "
        f"({len(themodel.state_space.feature_selection)})\n"
        f"Action: abstract_action ({themodel.action_space.flat_size()})"
    ),
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumrewards.png"),
)

contenders = [dql_run, dql_exploit_run]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(
    title=f"Agent Benchmark top contenders\nmax_nodes:{ep.maximum_node_count}\n",
    all_runs=contenders,
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumreward_contenders.png"),
)

for r in contenders:
    p.plot_all_episodes(r)


FigureCanvasAgg is non-interactive, and thus cannot be shown


FigureCanvasAgg is non-interactive, and thus cannot be shown




FigureCanvasAgg is non-interactive, and thus cannot be shown

