In [1]:
# pylint: disable=invalid-name

In [2]:
import sys
import os
import re
import json
import yaml
import logging
import random
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple

import gymnasium as gym

import torch

import cyberbattle.agents.baseline.learner as learner
import cyberbattle.agents.baseline.plotting as p
import cyberbattle.agents.baseline.agent_wrapper as w
import cyberbattle.agents.baseline.agent_dql as dqla
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
from cyberbattle._env.cyberbattle_env import CyberBattleEnv

from openai import OpenAI

logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Papermill notebook parameters
gymid = "CyberBattleChain-v0"
env_size = 10
iteration_count = 9000
training_episode_count = 50
eval_episode_count = 5
maximum_node_count = 22
maximum_total_credentials = 22
plots_dir = "output/plots"

# --- LLM(평가에서만 사용) 옵션: 기본 OFF ---
use_llm = False
model_id = "gpt-5.1"
llm_every_steps = 1      # 매 step마다 LLM 프루닝(비싸면 5~10 추천)
candidate_pool = 200     # 샘플링으로 구성할 후보 수
llm_topk = 10            # DQL 상위 topK 중 LLM이 1개 선택

def find_llm_token_yaml(start=None):
    p = os.path.abspath(start or os.getcwd())
    while True:
        cand = os.path.join(p, "llm_token.yaml")
        if os.path.exists(cand):
            return cand
        parent = os.path.dirname(p)
        if parent == p:
            raise RuntimeError(f"llm_token.yaml 못 찾음. 시작점={os.getcwd()}")
        p = parent

llm_token_yaml = find_llm_token_yaml()
print("FOUND llm_token_yaml =", llm_token_yaml)

# (옵션) LLM에 관측 텍스트를 얼마나 줄지
llm_obs_max_chars = 1400

FOUND llm_token_yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml


In [4]:
# Parameters
gymid = "CyberBattleChain-v0"
iteration_count = 200
training_episode_count = 20
eval_episode_count = 3
maximum_node_count = 20
maximum_total_credentials = 20
env_size = 10
plots_dir = "notebooks/output/chain10_hybrid_dql_llm/plots"
use_llm = True
model_id = "gpt-5.1"
llm_every_steps = 5
candidate_pool = 200
llm_topk = 10


In [5]:
os.makedirs(plots_dir, exist_ok=True)

In [6]:
# -----------------------------
# 1) Gym env 로드 (기존 그대로)
# -----------------------------
if env_size:
    _gym_env = gym.make(gymid, size=env_size)
else:
    _gym_env = gym.make(gymid)

from typing import cast
gym_env = cast(CyberBattleEnv, _gym_env.unwrapped)
assert isinstance(gym_env, CyberBattleEnv), f"Expected CyberBattleEnv, got {type(gym_env)}"

ep = w.EnvironmentBounds.of_identifiers(
    maximum_node_count=maximum_node_count,
    maximum_total_credentials=maximum_total_credentials,
    identifiers=gym_env.identifiers,
)

In [7]:
# -----------------------------------------
# 2) OpenAI 토큰 로더 + chat callable
# -----------------------------------------
def load_openai_token(config_path: str) -> str:
    if os.path.exists(config_path):
        with open(config_path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f) or {}
        if isinstance(data, dict):
            oa_cfg = data.get("openai", {})
            if isinstance(oa_cfg, dict) and "api_key" in oa_cfg:
                key = str(oa_cfg["api_key"]).strip()
                if key.lower() == "dummy" or len(key) < 20:
                    raise RuntimeError(f"llm_token.yaml의 openai.api_key가 이상함: {key!r}")
                return key

    key = (os.getenv("OPENAI_API_KEY") or "").strip()
    if key and key.lower() != "dummy":
        return key

    raise RuntimeError(f"OpenAI API 키를 못 찾음. config_path={config_path!r}, ENV OPENAI_API_KEY도 없음/이상함")


def make_openai_chat_callable(model_id: str, llm_token_yaml: str):
    api_key = load_openai_token(llm_token_yaml)  # ✅ 여기 중요
    # 디버그(키 노출 금지용)
    print("[OpenAI] key_prefix=", api_key[:8], "len=", len(api_key), "yaml=", os.path.abspath(llm_token_yaml))

    client = OpenAI(api_key=api_key)

    def chat(messages):
        resp = client.responses.create(
            model=model_id,
            input=[{"role": m["role"], "content": m["content"]} for m in messages],
            max_output_tokens=256,
        )
        return getattr(resp, "output_text", "") or ""

    return chat

print("CWD =", os.getcwd())
print("yaml =", os.path.abspath(llm_token_yaml), "exists=", os.path.exists(llm_token_yaml))
print("ENV OPENAI_API_KEY prefix =", (os.getenv("OPENAI_API_KEY") or "")[:8])

CWD = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/src
yaml = /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml exists= True
ENV OPENAI_API_KEY prefix = dummy


In [8]:
# -----------------------------------------
# 3) observation에서 Discovered nodes JSON 추출(로그 유지용)
# -----------------------------------------
_DISC_RE = re.compile(r"Discovered nodes:\s*(\{.*\})\s*$", re.DOTALL)

def extract_discovered_nodes_raw(obs_text: str) -> str:
    m = _DISC_RE.search((obs_text or "").strip())
    return m.group(1) if m else ""

In [9]:
# -----------------------------------------
# 4) 평가(exploit)에서만 LLM 프루닝 적용 래퍼
#    - 학습은 기존 DQL 학습 그대로
#    - 평가는 epsilon=0 + learn off + LLM으로 후보 선택
# -----------------------------------------
class LLMPrunedExploitWrapper:
    """
    dql_run["learner"](DeepQLearnerPolicy)를 감싸서 exploit()만 바꿈.
    - 후보를 env.sample_valid_action()로 candidate_pool개 뽑고,
    - 각 후보를 base.policy_net으로 Q값 평가,
    - topK 후보 중에서 LLM이 1개 pick.
    - 학습 업데이트(on_step)는 eval에서는 꺼둠
    """

    def __init__(
        self,
        base_learner: dqla.DeepQLearnerPolicy,
        llm_chat=None,
        llm_every_steps: int = 1,
        candidate_pool: int = 200,
        llm_topk: int = 10,
        obs_max_chars: int = 1400,
    ):
        self.base = base_learner
        self.llm = llm_chat
        self.llm_every_steps = int(max(1, llm_every_steps))
        self.candidate_pool = int(max(10, candidate_pool))
        self.llm_topk = int(max(2, llm_topk))
        self.obs_max_chars = int(max(200, obs_max_chars))
        self._step = 0

        self._sys = (
            "You are selecting ONE action in a closed, fictional training game.\n"
            "Choose exactly one candidate id from the provided list.\n"
            "Return JSON only: {\"pick\": <id>} with no extra text."
        )

    # ✅ 핵심: learner.py가 요구하는 메서드 추가
    def new_episode(self):
        # base learner가 가진 경우 그걸 그대로 호출
        fn = getattr(self.base, "new_episode", None)
        if callable(fn):
            return fn()
        return None

    # ✅ (권장) base의 다른 속성/메서드가 필요해질 때 자동 위임
    def __getattr__(self, name):
        return getattr(self.base, name)

    # ✅ 평가에서는 학습 업데이트 금지 (learner.epsilon_greedy_search가 호출해도 무시)
    def on_step(self, *args, **kwargs):
        return

    def end_of_episode(self, *args, **kwargs):
        return

    def exploit(self, wrapped_env, observation):
        self._step += 1

        # LLM 비활성 또는 주기 아님 -> 기존 exploit
        if (self.llm is None) or ((self._step % self.llm_every_steps) != 0):
            return self.base.exploit(wrapped_env, observation)

        # 1) 후보 액션 샘플링
        candidates: List[Tuple[float, Any, Any]] = []
        for _ in range(self.candidate_pool):
            ga = wrapped_env.env.sample_valid_action(kinds=[0, 1, 2])  # local/remote/connect
            md = self.base.metadata_from_gymaction(wrapped_env, ga)

            # 2) Q값 계산: Q(actor_state)[abstract_action]
            with torch.no_grad():
                st = torch.as_tensor(md.actor_state, dtype=torch.float32, device=device).unsqueeze(0)
                q_all = self.base.policy_net(st)
                qv = float(q_all[0, int(md.abstract_action)].item())

            candidates.append((qv, ga, md))

        if not candidates:
            return self.base.exploit(wrapped_env, observation)

        candidates.sort(key=lambda x: x[0], reverse=True)
        top = candidates[: self.llm_topk]

        # observation 일부를 LLM에 제공(선택)
        try:
            obs_txt = json.dumps(observation, ensure_ascii=False)[: self.obs_max_chars]
        except Exception:
            obs_txt = str(observation)[: self.obs_max_chars]

        payload = {
            "observation_preview": obs_txt,
            "candidates": [
                {"id": i, "q": round(float(qv), 4), "gym_action": repr(ga)}
                for i, (qv, ga, md) in enumerate(top)
            ],
        }

        out = self.llm(
            [
                {"role": "system", "content": self._sys},
                {"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
            ]
        ) or ""

        m = re.search(r"\{.*\}", out, flags=re.DOTALL)
        if not m:
            qv, ga, md = top[0]
            return "exploit[dql_top1]", ga, md

        try:
            obj = json.loads(m.group(0))
            pick = int(obj.get("pick"))
            if 0 <= pick < len(top):
                qv, ga, md = top[pick]
                return "exploit[llm_pruned]", ga, md
        except Exception:
            pass

        qv, ga, md = top[0]
        return "exploit[dql_top1]", ga, md

In [10]:
# -----------------------------------------
# 5) DQL 학습 (✅ 기존 파라미터 그대로 유지)
# -----------------------------------------
dql_run = learner.epsilon_greedy_search(
    cyberbattle_gym_env=gym_env,
    environment_properties=ep,
    learner=dqla.DeepQLearnerPolicy(
        ep=ep,
        gamma=0.015,
        replay_memory_size=10000,
        target_update=10,
        batch_size=512,
        learning_rate=0.01,
    ),
    episode_count=training_episode_count,
    iteration_count=iteration_count,
    epsilon=0.90,
    epsilon_exponential_decay=5000,
    epsilon_minimum=0.10,
    verbosity=Verbosity.Quiet,
    render=False,
    plot_episodes_length=False,
    title="DQL",
)

###### DQL
Learning with: episode_count=20,iteration_count=200,ϵ=0.9,ϵ_min=0.1, ϵ_expdecay=5000,γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/20 'DQL' ϵ=0.9000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

  state_batch = torch.tensor(states_to_consider).to(device)
Episode 1|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:  116.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:  116.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 7|reward:  127.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 7|reward:  127.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 1|Iteration 8|reward:  136.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 1|Iteration 8|reward:  136.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 1|Iteration 9|reward:  142.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 1|Iteration 9|reward:  142.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 1|Iteration 22|reward:  144.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 1|Iteration 22|reward:  144.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 1|Iteration 41|reward:  152.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 1|Iteration 41|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 84|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 119|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 145|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 165|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 195|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 1|Iteration 200|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||




  Episode 1 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/44 (0.06)
    explore-remote: 4/59 (0.06)
    explore-connect: 1/57 (0.02)
    exploit-local: 0/19 (0.00)
    exploit-remote: 0/0 (NaN)
    exploit-connect: 0/13 (0.00)
  exploit deflected to exploration: 2
  ## Episode: 2/20 'DQL' ϵ=0.8688, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 15|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 15|reward:   14.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 2|Iteration 18|reward:  114.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 2|Iteration 18|reward:  114.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 2|Iteration 20|reward:  116.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 2|Iteration 20|reward:  116.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 2|Iteration 25|reward:  118.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 2|Iteration 25|reward:  118.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 2|Iteration 30|reward:  132.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 2|Iteration 30|reward:  132.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 2|Iteration 43|reward:  138.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 2|Iteration 43|reward:  138.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 2|Iteration 47|reward:  144.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 2|Iteration 47|reward:  144.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 2|Iteration 51|reward:  152.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 2|Iteration 51|reward:  152.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 2|Iteration 94|reward:  152.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 2|Iteration 132|reward:  152.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 2|Iteration 173|reward:  152.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 2|Iteration 200|reward:  152.0|last_reward_at:   51|Elapsed Time: 0:00:00||




  Episode 2 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/31 (0.09)
    explore-remote: 4/66 (0.06)
    explore-connect: 1/68 (0.01)
    exploit-local: 0/15 (0.00)
    exploit-remote: 0/0 (NaN)
    exploit-connect: 0/12 (0.00)
  exploit deflected to exploration: 3
  ## Episode: 3/20 'DQL' ϵ=0.8386, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 16|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 16|reward:   14.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 3|Iteration 17|reward:   16.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 3|Iteration 17|reward:   16.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 3|Iteration 18|reward:   20.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 3|Iteration 18|reward:   20.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 3|Iteration 66|reward:   20.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 3|Iteration 72|reward:  120.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 3|Iteration 72|reward:  120.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 3|Iteration 74|reward:  134.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 3|Iteration 74|reward:  134.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 3|Iteration 84|reward:  134.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 3|Iteration 91|reward:  140.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 3|Iteration 91|reward:  140.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 3|Iteration 94|reward:  146.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 3|Iteration 94|reward:  146.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 3|Iteration 98|reward:  154.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 3|Iteration 98|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 109|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 122|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 135|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 147|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 157|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 168|reward:  154.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 173|reward:  254.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 3|Iteration 173|reward:  254.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 3|Iteration 183|reward:  254.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 3|Iteration 190|reward:  254.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 3|Iteration 198|reward:  254.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 3|Iteration 200|reward:  254.0|last_reward_at:  173|Elapsed Time: 0:00:00||




  Episode 3 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/21 (0.09)
    explore-remote: 4/90 (0.04)
    explore-connect: 1/56 (0.02)
    exploit-local: 1/8 (0.11)
    exploit-remote: 0/2 (0.00)
    exploit-connect: 1/14 (0.07)
  exploit deflected to exploration: 11
  ## Episode: 4/20 'DQL' ϵ=0.8097, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 4|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 8|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 4|Iteration 8|reward:   14.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 4|Iteration 16|reward:   18.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 4|Iteration 16|reward:   18.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 4|Iteration 31|reward:   18.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 4|Iteration 39|reward:   20.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 4|Iteration 39|reward:   20.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 51|reward:   20.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 52|reward:  120.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 4|Iteration 52|reward:  120.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 4|Iteration 56|reward:  131.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 4|Iteration 56|reward:  131.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 4|Iteration 62|reward:  140.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 4|Iteration 62|reward:  140.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 4|Iteration 70|reward:  148.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 4|Iteration 70|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 4|Iteration 84|reward:  148.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 4|Iteration 84|reward:  154.0|last_reward_at:   70|Elapsed Time: 0:00:00||

Episode 4|Iteration 84|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 97|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 109|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 122|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 132|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 142|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 155|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 165|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 178|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 188|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 198|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 4|Iteration 200|reward:  154.0|last_reward_at:   84|Elapsed Time: 0:00:00||




  Episode 4 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/23 (0.12)
    explore-remote: 3/61 (0.05)
    explore-connect: 0/71 (0.00)
    exploit-local: 0/0 (NaN)
    exploit-remote: 1/5 (0.17)
    exploit-connect: 1/32 (0.03)
  exploit deflected to exploration: 1
  ## Episode: 5/20 'DQL' ϵ=0.7819, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 5|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 2|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 5|Iteration 2|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 5|Iteration 3|reward:   18.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 5|Iteration 3|reward:   18.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 4|reward:  118.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 5|Iteration 4|reward:  118.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 5|Iteration 13|reward:  129.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 5|Iteration 13|reward:  129.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 5|Iteration 19|reward:  138.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 5|Iteration 19|reward:  138.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 5|Iteration 23|reward:  144.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 5|Iteration 23|reward:  144.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 5|Iteration 25|reward:  152.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 5|Iteration 25|reward:  152.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 5|Iteration 29|reward:  252.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 5|Iteration 29|reward:  252.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 5|Iteration 31|reward:  263.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 5|Iteration 31|reward:  263.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 5|Iteration 35|reward:  265.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 5|Iteration 35|reward:  265.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 5|Iteration 39|reward:  274.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 5|Iteration 39|reward:  274.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 5|Iteration 49|reward:  274.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 5|Iteration 50|reward:  276.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 5|Iteration 50|reward:  276.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 5|Iteration 61|reward:  276.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 5|Iteration 74|reward:  276.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 5|Iteration 87|reward:  276.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 5|Iteration 87|reward:  376.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 5|Iteration 87|reward:  376.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 5|Iteration 93|reward:  390.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 5|Iteration 93|reward:  390.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 5|Iteration 94|reward:  398.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 5|Iteration 94|reward:  398.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 5|Iteration 95|reward:  404.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 5|Iteration 95|reward:  404.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 107|reward:  404.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 117|reward:  404.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 127|reward:  404.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 137|reward:  404.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 139|reward:  406.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 5|Iteration 139|reward:  406.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 5|Iteration 152|reward:  406.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 5|Iteration 163|reward:  406.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 5|Iteration 168|reward:  506.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 5|Iteration 168|reward:  506.0|last_reward_at:  168|Elapsed Time: 0:00:00||

Episode 5|Iteration 169|reward:  520.0|last_reward_at:  168|Elapsed Time: 0:00:00||

Episode 5|Iteration 169|reward:  520.0|last_reward_at:  169|Elapsed Time: 0:00:00||

Episode 5|Iteration 178|reward:  520.0|last_reward_at:  169|Elapsed Time: 0:00:00||

Episode 5|Iteration 186|reward:  522.0|last_reward_at:  169|Elapsed Time: 0:00:01||

Episode 5|Iteration 186|reward:  522.0|last_reward_at:  186|Elapsed Time: 0:00:01||

Episode 5|Iteration 195|reward:  528.0|last_reward_at:  186|Elapsed Time: 0:00:01||

Episode 5|Iteration 195|reward:  528.0|last_reward_at:  195|Elapsed Time: 0:00:01||

Episode 5|Iteration 199|reward:  532.0|last_reward_at:  195|Elapsed Time: 0:00:01||

Episode 5|Iteration 199|reward:  532.0|last_reward_at:  199|Elapsed Time: 0:00:01||

Episode 5|Iteration 200|reward:  532.0|last_reward_at:  199|Elapsed Time: 0:00:01||




  Episode 5 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/35 (0.10)
    explore-remote: 9/55 (0.14)
    explore-connect: 0/55 (0.00)
    exploit-local: 4/10 (0.29)
    exploit-remote: 1/8 (0.11)
    exploit-connect: 4/15 (0.21)
  exploit deflected to exploration: 3
  ## Episode: 6/20 'DQL' ϵ=0.7551, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 6|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 7|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 6|Iteration 7|reward:   14.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 6|Iteration 8|reward:   18.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 6|Iteration 8|reward:   18.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 6|Iteration 11|reward:   20.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 6|Iteration 11|reward:   20.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 6|Iteration 26|reward:   20.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 6|Iteration 29|reward:  120.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 6|Iteration 29|reward:  120.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 6|Iteration 37|reward:  134.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 6|Iteration 37|reward:  134.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 6|Iteration 42|reward:  140.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 6|Iteration 42|reward:  140.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 6|Iteration 46|reward:  146.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 6|Iteration 46|reward:  146.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 6|Iteration 59|reward:  146.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 6|Iteration 69|reward:  146.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 6|Iteration 79|reward:  146.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 6|Iteration 82|reward:  246.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 6|Iteration 82|reward:  246.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 6|Iteration 85|reward:  260.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 6|Iteration 85|reward:  260.0|last_reward_at:   85|Elapsed Time: 0:00:00||

Episode 6|Iteration 91|reward:  266.0|last_reward_at:   85|Elapsed Time: 0:00:00||

Episode 6|Iteration 91|reward:  266.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 6|Iteration 102|reward:  266.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 6|Iteration 109|reward:  272.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 6|Iteration 109|reward:  272.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 6|Iteration 118|reward:  276.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 6|Iteration 118|reward:  276.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 6|Iteration 121|reward:  376.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 6|Iteration 121|reward:  376.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 6|Iteration 122|reward:  378.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 6|Iteration 122|reward:  378.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 6|Iteration 123|reward:  392.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 6|Iteration 123|reward:  392.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 6|Iteration 126|reward:  398.0|last_reward_at:  123|Elapsed Time: 0:00:00||

Episode 6|Iteration 126|reward:  398.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 6|Iteration 137|reward:  398.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 6|Iteration 139|reward:  406.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 6|Iteration 139|reward:  406.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 6|Iteration 147|reward:  506.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 6|Iteration 147|reward:  506.0|last_reward_at:  147|Elapsed Time: 0:00:00||

Episode 6|Iteration 150|reward:  520.0|last_reward_at:  147|Elapsed Time: 0:00:00||

Episode 6|Iteration 150|reward:  520.0|last_reward_at:  150|Elapsed Time: 0:00:00||

Episode 6|Iteration 154|reward:  620.0|last_reward_at:  150|Elapsed Time: 0:00:00||

Episode 6|Iteration 154|reward:  620.0|last_reward_at:  154|Elapsed Time: 0:00:00||

Episode 6|Iteration 156|reward:  634.0|last_reward_at:  154|Elapsed Time: 0:00:00||

Episode 6|Iteration 156|reward:  634.0|last_reward_at:  156|Elapsed Time: 0:00:00||

Episode 6|Iteration 157|reward:  640.0|last_reward_at:  156|Elapsed Time: 0:00:00||

Episode 6|Iteration 157|reward:  640.0|last_reward_at:  157|Elapsed Time: 0:00:00||

Episode 6|Iteration 158|reward:  646.0|last_reward_at:  157|Elapsed Time: 0:00:00||

Episode 6|Iteration 158|reward:  646.0|last_reward_at:  158|Elapsed Time: 0:00:00||

Episode 6|Iteration 166|reward:  746.0|last_reward_at:  158|Elapsed Time: 0:00:00||

Episode 6|Iteration 166|reward:  746.0|last_reward_at:  166|Elapsed Time: 0:00:00||

Episode 6|Iteration 170|reward:  752.0|last_reward_at:  166|Elapsed Time: 0:00:00||

Episode 6|Iteration 170|reward:  752.0|last_reward_at:  170|Elapsed Time: 0:00:00||

Episode 6|Iteration 173|reward:  766.0|last_reward_at:  170|Elapsed Time: 0:00:00||

Episode 6|Iteration 173|reward:  766.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 6|Iteration 174|reward:  866.0|last_reward_at:  173|Elapsed Time: 0:00:00||

Episode 6|Iteration 174|reward:  866.0|last_reward_at:  174|Elapsed Time: 0:00:00||

Episode 6|Iteration 176|reward:  880.0|last_reward_at:  174|Elapsed Time: 0:00:01||

Episode 6|Iteration 176|reward:  880.0|last_reward_at:  176|Elapsed Time: 0:00:01||

Episode 6|Iteration 181|reward:  886.0|last_reward_at:  176|Elapsed Time: 0:00:01||

Episode 6|Iteration 181|reward:  886.0|last_reward_at:  181|Elapsed Time: 0:00:01||

Episode 6|Iteration 187|reward:  986.0|last_reward_at:  181|Elapsed Time: 0:00:01||

Episode 6|Iteration 187|reward:  986.0|last_reward_at:  187|Elapsed Time: 0:00:01||

Episode 6|Iteration 189|reward:  997.0|last_reward_at:  187|Elapsed Time: 0:00:01||

Episode 6|Iteration 189|reward:  997.0|last_reward_at:  189|Elapsed Time: 0:00:01||

Episode 6|Iteration 190|reward: 1006.0|last_reward_at:  189|Elapsed Time: 0:00:01||

Episode 6|Iteration 190|reward: 1006.0|last_reward_at:  190|Elapsed Time: 0:00:01||

Episode 6|Iteration 191|reward: 1106.0|last_reward_at:  190|Elapsed Time: 0:00:01||

Episode 6|Iteration 191|reward: 1106.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 6|Iteration 193|reward: 1120.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 6|Iteration 193|reward: 1120.0|last_reward_at:  193|Elapsed Time: 0:00:01||

Episode 6|Iteration 196|reward: 1126.0|last_reward_at:  193|Elapsed Time: 0:00:01||

Episode 6|Iteration 196|reward: 1126.0|last_reward_at:  196|Elapsed Time: 0:00:01||

Episode 6|Iteration 198|reward: 1132.0|last_reward_at:  196|Elapsed Time: 0:00:01||

Episode 6|Iteration 198|reward: 1132.0|last_reward_at:  198|Elapsed Time: 0:00:01||

Episode 6|Iteration 200|reward: 1132.0|last_reward_at:  198|Elapsed Time: 0:00:01||




  Episode 6 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/39 (0.09)
    explore-remote: 8/49 (0.14)
    explore-connect: 0/53 (0.00)
    exploit-local: 14/5 (0.74)
    exploit-remote: 1/8 (0.11)
    exploit-connect: 9/10 (0.47)
  exploit deflected to exploration: 4
  ## Episode: 7/20 'DQL' ϵ=0.7294, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 7|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 7|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 7|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 7|Iteration 8|reward:  116.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 7|Iteration 8|reward:  116.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 7|Iteration 21|reward:  116.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 7|Iteration 27|reward:  127.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 7|Iteration 27|reward:  127.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 28|reward:  129.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 7|Iteration 28|reward:  129.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 7|Iteration 33|reward:  135.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 7|Iteration 33|reward:  135.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 7|Iteration 35|reward:  143.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 7|Iteration 35|reward:  143.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 7|Iteration 46|reward:  143.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 7|Iteration 54|reward:  143.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 7|Iteration 58|reward:  152.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 7|Iteration 58|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 69|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 82|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 92|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 102|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 109|reward:  152.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 113|reward:  252.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 7|Iteration 113|reward:  252.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 7|Iteration 118|reward:  266.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 7|Iteration 118|reward:  266.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 7|Iteration 119|reward:  366.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 7|Iteration 119|reward:  366.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 7|Iteration 120|reward:  380.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 7|Iteration 120|reward:  380.0|last_reward_at:  120|Elapsed Time: 0:00:00||

Episode 7|Iteration 121|reward:  386.0|last_reward_at:  120|Elapsed Time: 0:00:00||

Episode 7|Iteration 121|reward:  386.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 7|Iteration 122|reward:  392.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 7|Iteration 122|reward:  392.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 7|Iteration 129|reward:  394.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 7|Iteration 129|reward:  394.0|last_reward_at:  129|Elapsed Time: 0:00:00||

Episode 7|Iteration 132|reward:  400.0|last_reward_at:  129|Elapsed Time: 0:00:00||

Episode 7|Iteration 132|reward:  400.0|last_reward_at:  132|Elapsed Time: 0:00:00||

Episode 7|Iteration 137|reward:  500.0|last_reward_at:  132|Elapsed Time: 0:00:00||

Episode 7|Iteration 137|reward:  500.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 7|Iteration 138|reward:  514.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 7|Iteration 138|reward:  514.0|last_reward_at:  138|Elapsed Time: 0:00:00||

Episode 7|Iteration 140|reward:  518.0|last_reward_at:  138|Elapsed Time: 0:00:00||

Episode 7|Iteration 140|reward:  518.0|last_reward_at:  140|Elapsed Time: 0:00:00||

Episode 7|Iteration 147|reward:  520.0|last_reward_at:  140|Elapsed Time: 0:00:00||

Episode 7|Iteration 147|reward:  520.0|last_reward_at:  147|Elapsed Time: 0:00:00||

Episode 7|Iteration 155|reward:  520.0|last_reward_at:  147|Elapsed Time: 0:00:01||

Episode 7|Iteration 155|reward:  522.0|last_reward_at:  147|Elapsed Time: 0:00:01||

Episode 7|Iteration 155|reward:  522.0|last_reward_at:  155|Elapsed Time: 0:00:01||

Episode 7|Iteration 159|reward:  528.0|last_reward_at:  155|Elapsed Time: 0:00:01||

Episode 7|Iteration 159|reward:  528.0|last_reward_at:  159|Elapsed Time: 0:00:01||

Episode 7|Iteration 168|reward:  528.0|last_reward_at:  159|Elapsed Time: 0:00:01||

Episode 7|Iteration 178|reward:  528.0|last_reward_at:  159|Elapsed Time: 0:00:01||

Episode 7|Iteration 188|reward:  528.0|last_reward_at:  159|Elapsed Time: 0:00:01||

Episode 7|Iteration 193|reward:  628.0|last_reward_at:  159|Elapsed Time: 0:00:01||

Episode 7|Iteration 193|reward:  628.0|last_reward_at:  193|Elapsed Time: 0:00:01||

Episode 7|Iteration 194|reward:  642.0|last_reward_at:  193|Elapsed Time: 0:00:01||

Episode 7|Iteration 194|reward:  642.0|last_reward_at:  194|Elapsed Time: 0:00:01||

Episode 7|Iteration 200|reward:  642.0|last_reward_at:  194|Elapsed Time: 0:00:01||




  Episode 7 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/30 (0.09)
    explore-remote: 8/49 (0.14)
    explore-connect: 1/58 (0.02)
    exploit-local: 6/2 (0.75)
    exploit-remote: 2/23 (0.08)
    exploit-connect: 4/14 (0.22)
  exploit deflected to exploration: 6
  ## Episode: 8/20 'DQL' ϵ=0.7047, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 8|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 8|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 5|reward:   16.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 8|Iteration 5|reward:   16.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 8|Iteration 9|reward:  116.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 8|Iteration 9|reward:  116.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 8|Iteration 10|reward:  130.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 8|Iteration 10|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 8|Iteration 13|reward:  136.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 8|Iteration 13|reward:  136.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 8|Iteration 14|reward:  142.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 8|Iteration 14|reward:  142.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 8|Iteration 21|reward:  150.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 8|Iteration 21|reward:  150.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 8|Iteration 24|reward:  250.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 8|Iteration 24|reward:  250.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 8|Iteration 25|reward:  264.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 8|Iteration 25|reward:  264.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 8|Iteration 36|reward:  264.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 8|Iteration 38|reward:  266.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 8|Iteration 38|reward:  266.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 8|Iteration 44|reward:  268.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 8|Iteration 44|reward:  268.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 8|Iteration 52|reward:  368.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 8|Iteration 52|reward:  368.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 8|Iteration 60|reward:  382.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 8|Iteration 60|reward:  382.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 8|Iteration 61|reward:  388.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 8|Iteration 61|reward:  388.0|last_reward_at:   61|Elapsed Time: 0:00:00||

Episode 8|Iteration 64|reward:  394.0|last_reward_at:   61|Elapsed Time: 0:00:00||

Episode 8|Iteration 64|reward:  394.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 8|Iteration 74|reward:  394.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 8|Iteration 79|reward:  494.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 8|Iteration 79|reward:  494.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 8|Iteration 86|reward:  508.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 8|Iteration 86|reward:  508.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 8|Iteration 88|reward:  510.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 8|Iteration 88|reward:  510.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 8|Iteration 93|reward:  516.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 8|Iteration 93|reward:  516.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 8|Iteration 98|reward:  616.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 8|Iteration 98|reward:  616.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 8|Iteration 100|reward:  630.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 8|Iteration 100|reward:  630.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 8|Iteration 101|reward:  636.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 8|Iteration 101|reward:  636.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 8|Iteration 106|reward:  736.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 8|Iteration 106|reward:  736.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 8|Iteration 109|reward:  750.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 8|Iteration 109|reward:  750.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 8|Iteration 114|reward:  754.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 8|Iteration 114|reward:  754.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 8|Iteration 124|reward:  756.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 8|Iteration 124|reward:  756.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 8|Iteration 128|reward:  762.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 8|Iteration 128|reward:  762.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 8|Iteration 130|reward:  764.0|last_reward_at:  128|Elapsed Time: 0:00:00||

Episode 8|Iteration 130|reward:  764.0|last_reward_at:  130|Elapsed Time: 0:00:00||

Episode 8|Iteration 140|reward:  764.0|last_reward_at:  130|Elapsed Time: 0:00:00||

Episode 8|Iteration 143|reward:  770.0|last_reward_at:  130|Elapsed Time: 0:00:00||

Episode 8|Iteration 143|reward:  770.0|last_reward_at:  143|Elapsed Time: 0:00:00||

Episode 8|Iteration 144|reward:  870.0|last_reward_at:  143|Elapsed Time: 0:00:00||

Episode 8|Iteration 144|reward:  870.0|last_reward_at:  144|Elapsed Time: 0:00:00||

Episode 8|Iteration 145|reward:  884.0|last_reward_at:  144|Elapsed Time: 0:00:00||

Episode 8|Iteration 145|reward:  884.0|last_reward_at:  145|Elapsed Time: 0:00:00||

Episode 8|Iteration 152|reward:  890.0|last_reward_at:  145|Elapsed Time: 0:00:01||

Episode 8|Iteration 152|reward:  890.0|last_reward_at:  152|Elapsed Time: 0:00:01||

Episode 8|Iteration 156|reward:  990.0|last_reward_at:  152|Elapsed Time: 0:00:01||

Episode 8|Iteration 156|reward:  990.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 8|Iteration 161|reward: 1004.0|last_reward_at:  156|Elapsed Time: 0:00:01||

Episode 8|Iteration 161|reward: 1004.0|last_reward_at:  161|Elapsed Time: 0:00:01||

Episode 8|Iteration 163|reward: 1010.0|last_reward_at:  161|Elapsed Time: 0:00:01||

Episode 8|Iteration 163|reward: 1010.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 8|Iteration 171|reward: 1012.0|last_reward_at:  163|Elapsed Time: 0:00:01||

Episode 8|Iteration 171|reward: 1012.0|last_reward_at:  171|Elapsed Time: 0:00:01||

Episode 8|Iteration 177|reward: 1018.0|last_reward_at:  171|Elapsed Time: 0:00:01||

Episode 8|Iteration 177|reward: 1018.0|last_reward_at:  177|Elapsed Time: 0:00:01||

Episode 8|Iteration 178|reward: 1118.0|last_reward_at:  177|Elapsed Time: 0:00:01||

Episode 8|Iteration 178|reward: 1118.0|last_reward_at:  178|Elapsed Time: 0:00:01||

Episode 8|Iteration 179|reward: 1132.0|last_reward_at:  178|Elapsed Time: 0:00:01||

Episode 8|Iteration 179|reward: 1132.0|last_reward_at:  179|Elapsed Time: 0:00:01||

Episode 8|Iteration 180|reward: 1138.0|last_reward_at:  179|Elapsed Time: 0:00:01||

Episode 8|Iteration 180|reward: 1138.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 8|Iteration 185|reward: 1146.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 8|Iteration 185|reward: 1146.0|last_reward_at:  185|Elapsed Time: 0:00:01||

Episode 8|Iteration 188|reward: 1152.0|last_reward_at:  185|Elapsed Time: 0:00:01||

Episode 8|Iteration 188|reward: 1152.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 8|Iteration 190|reward: 1252.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 8|Iteration 190|reward: 1252.0|last_reward_at:  190|Elapsed Time: 0:00:01||

Episode 8|Iteration 191|reward: 1266.0|last_reward_at:  190|Elapsed Time: 0:00:01||

Episode 8|Iteration 191|reward: 1266.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 8|Iteration 192|reward: 6266.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 8|Iteration 192|reward: 6266.0|last_reward_at:  192|Elapsed Time: 0:00:01||




  Episode 8 ended at t=192 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 5/41 (0.11)
    explore-remote: 12/35 (0.26)
    explore-connect: 0/42 (0.00)
    exploit-local: 12/4 (0.75)
    exploit-remote: 5/11 (0.31)
    exploit-connect: 11/14 (0.44)
  exploit deflected to exploration: 0
  ## Episode: 9/20 'DQL' ϵ=0.6820, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 9|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 9|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 5|reward:   16.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 9|Iteration 5|reward:   16.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 9|Iteration 8|reward:   20.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 9|Iteration 8|reward:   20.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 10|reward:  120.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 9|Iteration 10|reward:  120.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 9|Iteration 14|reward:  134.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 9|Iteration 14|reward:  134.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 9|Iteration 17|reward:  140.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 9|Iteration 17|reward:  140.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 9|Iteration 28|reward:  140.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 9|Iteration 29|reward:  148.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 9|Iteration 29|reward:  148.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 37|reward:  154.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 9|Iteration 37|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 46|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 56|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 64|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 74|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 84|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 94|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 104|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 114|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 125|reward:  154.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 135|reward:  254.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 9|Iteration 135|reward:  254.0|last_reward_at:  135|Elapsed Time: 0:00:00||

Episode 9|Iteration 137|reward:  268.0|last_reward_at:  135|Elapsed Time: 0:00:00||

Episode 9|Iteration 137|reward:  268.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 9|Iteration 139|reward:  368.0|last_reward_at:  137|Elapsed Time: 0:00:00||

Episode 9|Iteration 139|reward:  368.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 9|Iteration 140|reward:  382.0|last_reward_at:  139|Elapsed Time: 0:00:00||

Episode 9|Iteration 140|reward:  382.0|last_reward_at:  140|Elapsed Time: 0:00:00||

Episode 9|Iteration 142|reward:  388.0|last_reward_at:  140|Elapsed Time: 0:00:00||

Episode 9|Iteration 142|reward:  388.0|last_reward_at:  142|Elapsed Time: 0:00:00||

Episode 9|Iteration 144|reward:  488.0|last_reward_at:  142|Elapsed Time: 0:00:00||

Episode 9|Iteration 144|reward:  488.0|last_reward_at:  144|Elapsed Time: 0:00:00||

Episode 9|Iteration 145|reward:  502.0|last_reward_at:  144|Elapsed Time: 0:00:00||

Episode 9|Iteration 145|reward:  502.0|last_reward_at:  145|Elapsed Time: 0:00:00||

Episode 9|Iteration 152|reward:  504.0|last_reward_at:  145|Elapsed Time: 0:00:00||

Episode 9|Iteration 152|reward:  504.0|last_reward_at:  152|Elapsed Time: 0:00:00||

Episode 9|Iteration 161|reward:  510.0|last_reward_at:  152|Elapsed Time: 0:00:00||

Episode 9|Iteration 161|reward:  510.0|last_reward_at:  161|Elapsed Time: 0:00:00||

Episode 9|Iteration 165|reward:  514.0|last_reward_at:  161|Elapsed Time: 0:00:00||

Episode 9|Iteration 165|reward:  514.0|last_reward_at:  165|Elapsed Time: 0:00:00||

Episode 9|Iteration 175|reward:  514.0|last_reward_at:  165|Elapsed Time: 0:00:00||

Episode 9|Iteration 177|reward:  520.0|last_reward_at:  165|Elapsed Time: 0:00:01||

Episode 9|Iteration 177|reward:  520.0|last_reward_at:  177|Elapsed Time: 0:00:01||

Episode 9|Iteration 180|reward:  620.0|last_reward_at:  177|Elapsed Time: 0:00:01||

Episode 9|Iteration 180|reward:  620.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 9|Iteration 186|reward:  634.0|last_reward_at:  180|Elapsed Time: 0:00:01||

Episode 9|Iteration 186|reward:  634.0|last_reward_at:  186|Elapsed Time: 0:00:01||

Episode 9|Iteration 187|reward:  640.0|last_reward_at:  186|Elapsed Time: 0:00:01||

Episode 9|Iteration 187|reward:  640.0|last_reward_at:  187|Elapsed Time: 0:00:01||

Episode 9|Iteration 188|reward:  646.0|last_reward_at:  187|Elapsed Time: 0:00:01||

Episode 9|Iteration 188|reward:  646.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 9|Iteration 189|reward:  654.0|last_reward_at:  188|Elapsed Time: 0:00:01||

Episode 9|Iteration 189|reward:  654.0|last_reward_at:  189|Elapsed Time: 0:00:01||

Episode 9|Iteration 191|reward:  754.0|last_reward_at:  189|Elapsed Time: 0:00:01||

Episode 9|Iteration 191|reward:  754.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 9|Iteration 192|reward:  768.0|last_reward_at:  191|Elapsed Time: 0:00:01||

Episode 9|Iteration 192|reward:  768.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 9|Iteration 198|reward:  774.0|last_reward_at:  192|Elapsed Time: 0:00:01||

Episode 9|Iteration 198|reward:  774.0|last_reward_at:  198|Elapsed Time: 0:00:01||

Episode 9|Iteration 200|reward:  776.0|last_reward_at:  198|Elapsed Time: 0:00:01||

Episode 9|Iteration 200|reward:  776.0|last_reward_at:  200|Elapsed Time: 0:00:01||

Episode 9|Iteration 200|reward:  776.0|last_reward_at:  200|Elapsed Time: 0:00:01||




  Episode 9 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/23 (0.08)
    explore-remote: 11/56 (0.16)
    explore-connect: 1/57 (0.02)
    exploit-local: 9/3 (0.75)
    exploit-remote: 0/1 (0.00)
    exploit-connect: 5/32 (0.14)
  exploit deflected to exploration: 0
  ## Episode: 10/20 'DQL' ϵ=0.6591, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 10|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 10|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 8|reward:   16.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 10|Iteration 8|reward:   16.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 10|Iteration 10|reward:  116.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 10|Iteration 10|reward:  116.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 10|Iteration 11|reward:  130.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 10|Iteration 11|reward:  130.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 10|Iteration 12|reward:  132.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 10|Iteration 12|reward:  132.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 10|Iteration 14|reward:  138.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 10|Iteration 14|reward:  138.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 10|Iteration 15|reward:  238.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 10|Iteration 15|reward:  238.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 10|Iteration 17|reward:  252.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 10|Iteration 17|reward:  252.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 10|Iteration 20|reward:  258.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 10|Iteration 20|reward:  258.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 10|Iteration 21|reward:  358.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 10|Iteration 21|reward:  358.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 10|Iteration 28|reward:  364.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 10|Iteration 28|reward:  364.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 10|Iteration 31|reward:  378.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 10|Iteration 31|reward:  378.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 10|Iteration 32|reward:  384.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 10|Iteration 32|reward:  384.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 10|Iteration 36|reward:  484.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 10|Iteration 36|reward:  484.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 10|Iteration 38|reward:  498.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 10|Iteration 38|reward:  498.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 10|Iteration 39|reward:  598.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 10|Iteration 39|reward:  598.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 10|Iteration 42|reward:  612.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 10|Iteration 42|reward:  612.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 10|Iteration 44|reward:  618.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 10|Iteration 44|reward:  618.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 10|Iteration 49|reward:  718.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 10|Iteration 49|reward:  718.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 10|Iteration 50|reward:  732.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 10|Iteration 50|reward:  732.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 10|Iteration 53|reward:  832.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 10|Iteration 53|reward:  832.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 10|Iteration 55|reward:  834.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 10|Iteration 55|reward:  834.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 57|reward:  848.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 10|Iteration 57|reward:  848.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 10|Iteration 58|reward:  948.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 10|Iteration 58|reward:  948.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 10|Iteration 65|reward:  962.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 10|Iteration 65|reward:  962.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 10|Iteration 66|reward:  968.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 10|Iteration 66|reward:  968.0|last_reward_at:   66|Elapsed Time: 0:00:00||

Episode 10|Iteration 67|reward:  974.0|last_reward_at:   66|Elapsed Time: 0:00:00||

Episode 10|Iteration 67|reward:  974.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 10|Iteration 73|reward: 1074.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 10|Iteration 73|reward: 1074.0|last_reward_at:   73|Elapsed Time: 0:00:00||

Episode 10|Iteration 75|reward: 1088.0|last_reward_at:   73|Elapsed Time: 0:00:00||

Episode 10|Iteration 75|reward: 1088.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 10|Iteration 76|reward: 1094.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 10|Iteration 76|reward: 1094.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 10|Iteration 77|reward: 1194.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 10|Iteration 77|reward: 1194.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 10|Iteration 79|reward: 1196.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 10|Iteration 79|reward: 1196.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 10|Iteration 81|reward: 1210.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 10|Iteration 81|reward: 1210.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 10|Iteration 83|reward: 1212.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 10|Iteration 83|reward: 1212.0|last_reward_at:   83|Elapsed Time: 0:00:00||

Episode 10|Iteration 85|reward: 6212.0|last_reward_at:   83|Elapsed Time: 0:00:00||

Episode 10|Iteration 85|reward: 6212.0|last_reward_at:   85|Elapsed Time: 0:00:00||




  Episode 10 ended at t=85 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/18 (0.10)
    explore-remote: 6/9 (0.40)
    explore-connect: 0/15 (0.00)
    exploit-local: 15/1 (0.94)
    exploit-remote: 1/1 (0.50)
    exploit-connect: 11/6 (0.65)
  exploit deflected to exploration: 0
  ## Episode: 11/20 'DQL' ϵ=0.6497, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 11|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 11|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 11|Iteration 3|reward:   18.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 11|Iteration 3|reward:   18.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 11|Iteration 5|reward:  118.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 11|Iteration 5|reward:  118.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 11|Iteration 7|reward:  132.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 11|Iteration 7|reward:  132.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 11|Iteration 10|reward:  138.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 11|Iteration 10|reward:  138.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 11|Iteration 12|reward:  140.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 11|Iteration 12|reward:  140.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:  146.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 11|Iteration 13|reward:  146.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 16|reward:  246.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 11|Iteration 16|reward:  246.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 11|Iteration 17|reward:  260.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 11|Iteration 17|reward:  260.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 11|Iteration 18|reward:  360.0|last_reward_at:   17|Elapsed Time: 0:00:00||

Episode 11|Iteration 18|reward:  360.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 11|Iteration 20|reward:  374.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 11|Iteration 20|reward:  374.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 11|Iteration 25|reward:  380.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 11|Iteration 25|reward:  380.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 11|Iteration 26|reward:  386.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 11|Iteration 26|reward:  386.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 11|Iteration 27|reward:  392.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 11|Iteration 27|reward:  392.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 11|Iteration 31|reward:  492.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 11|Iteration 31|reward:  492.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 11|Iteration 32|reward:  506.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 11|Iteration 32|reward:  506.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 11|Iteration 38|reward:  606.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 11|Iteration 38|reward:  606.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 11|Iteration 39|reward:  608.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 11|Iteration 39|reward:  608.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 11|Iteration 40|reward:  622.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 11|Iteration 40|reward:  622.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 11|Iteration 44|reward:  628.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 11|Iteration 44|reward:  628.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 11|Iteration 45|reward:  728.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 11|Iteration 45|reward:  728.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 11|Iteration 46|reward:  742.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 11|Iteration 46|reward:  742.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 11|Iteration 49|reward:  748.0|last_reward_at:   46|Elapsed Time: 0:00:00||

Episode 11|Iteration 49|reward:  748.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 11|Iteration 50|reward:  752.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 11|Iteration 50|reward:  752.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 11|Iteration 52|reward:  852.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 11|Iteration 52|reward:  852.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 11|Iteration 54|reward:  858.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 11|Iteration 54|reward:  858.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 11|Iteration 57|reward:  864.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 11|Iteration 57|reward:  864.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 11|Iteration 59|reward:  878.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 11|Iteration 59|reward:  878.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 11|Iteration 68|reward:  884.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 11|Iteration 68|reward:  884.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 74|reward:  892.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 11|Iteration 74|reward:  892.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 11|Iteration 76|reward:  992.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 11|Iteration 76|reward:  992.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 11|Iteration 81|reward: 1006.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 11|Iteration 81|reward: 1006.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 11|Iteration 82|reward: 1106.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 11|Iteration 82|reward: 1106.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 11|Iteration 83|reward: 1120.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 11|Iteration 83|reward: 1120.0|last_reward_at:   83|Elapsed Time: 0:00:00||

Episode 11|Iteration 84|reward: 1126.0|last_reward_at:   83|Elapsed Time: 0:00:00||

Episode 11|Iteration 84|reward: 1126.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 11|Iteration 87|reward: 1128.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 11|Iteration 87|reward: 1128.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 11|Iteration 88|reward: 1228.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 11|Iteration 88|reward: 1228.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 11|Iteration 89|reward: 1242.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 11|Iteration 89|reward: 1242.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 11|Iteration 91|reward: 6242.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 11|Iteration 91|reward: 6242.0|last_reward_at:   91|Elapsed Time: 0:00:00||




  Episode 11 ended at t=91 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/14 (0.22)
    explore-remote: 9/10 (0.47)
    explore-connect: 0/22 (0.00)
    exploit-local: 14/0 (1.00)
    exploit-remote: 1/0 (1.00)
    exploit-connect: 11/6 (0.65)
  exploit deflected to exploration: 0
  ## Episode: 12/20 'DQL' ϵ=0.6398, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 12|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 4|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 12|Iteration 4|reward:   14.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 12|Iteration 6|reward:   16.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 12|Iteration 6|reward:   16.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 12|Iteration 7|reward:  116.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 12|Iteration 7|reward:  116.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 12|Iteration 8|reward:  130.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 12|Iteration 8|reward:  130.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 12|Iteration 11|reward:  136.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 12|Iteration 11|reward:  136.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 12|Iteration 13|reward:  138.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 12|Iteration 13|reward:  138.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 12|Iteration 19|reward:  144.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 12|Iteration 19|reward:  144.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 12|Iteration 23|reward:  244.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 12|Iteration 23|reward:  244.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 12|Iteration 24|reward:  258.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 12|Iteration 24|reward:  258.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 31|reward:  262.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 12|Iteration 31|reward:  262.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 12|Iteration 34|reward:  362.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 12|Iteration 34|reward:  362.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 12|Iteration 35|reward:  376.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 12|Iteration 35|reward:  376.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 12|Iteration 36|reward:  382.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 12|Iteration 36|reward:  382.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 12|Iteration 38|reward:  388.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 12|Iteration 38|reward:  388.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 12|Iteration 40|reward:  394.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 12|Iteration 40|reward:  394.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 12|Iteration 41|reward:  494.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 12|Iteration 41|reward:  494.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 12|Iteration 44|reward:  505.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 12|Iteration 44|reward:  505.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 12|Iteration 45|reward:  514.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 12|Iteration 45|reward:  514.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 12|Iteration 47|reward:  516.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 12|Iteration 47|reward:  516.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 12|Iteration 49|reward:  616.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 12|Iteration 49|reward:  616.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 12|Iteration 50|reward:  630.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 12|Iteration 50|reward:  630.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 12|Iteration 54|reward:  636.0|last_reward_at:   50|Elapsed Time: 0:00:00||

Episode 12|Iteration 54|reward:  636.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 12|Iteration 55|reward:  736.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 12|Iteration 55|reward:  736.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 12|Iteration 58|reward:  738.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 12|Iteration 58|reward:  738.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 12|Iteration 62|reward:  752.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 12|Iteration 62|reward:  752.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 12|Iteration 63|reward:  852.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 12|Iteration 63|reward:  852.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 12|Iteration 64|reward:  858.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 12|Iteration 64|reward:  858.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 12|Iteration 65|reward:  872.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 12|Iteration 65|reward:  872.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 12|Iteration 69|reward:  874.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 12|Iteration 69|reward:  874.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 12|Iteration 72|reward:  880.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 12|Iteration 72|reward:  880.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 12|Iteration 74|reward:  980.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 12|Iteration 74|reward:  980.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 12|Iteration 76|reward:  994.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 12|Iteration 76|reward:  994.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 12|Iteration 77|reward: 1094.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 12|Iteration 77|reward: 1094.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 12|Iteration 80|reward: 1100.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 12|Iteration 80|reward: 1100.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 82|reward: 1114.0|last_reward_at:   80|Elapsed Time: 0:00:00||

Episode 12|Iteration 82|reward: 1114.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 12|Iteration 90|reward: 1120.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 12|Iteration 90|reward: 1120.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 12|Iteration 92|reward: 1220.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 12|Iteration 92|reward: 1220.0|last_reward_at:   92|Elapsed Time: 0:00:00||

Episode 12|Iteration 95|reward: 1234.0|last_reward_at:   92|Elapsed Time: 0:00:00||

Episode 12|Iteration 95|reward: 1234.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 12|Iteration 97|reward: 6234.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 12|Iteration 97|reward: 6234.0|last_reward_at:   97|Elapsed Time: 0:00:00||




  Episode 12 ended at t=97 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/18 (0.10)
    explore-remote: 10/16 (0.38)
    explore-connect: 0/14 (0.00)
    exploit-local: 16/4 (0.80)
    exploit-remote: 0/5 (0.00)
    exploit-connect: 11/1 (0.92)
  exploit deflected to exploration: 0
  ## Episode: 13/20 'DQL' ϵ=0.6294, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 13|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 13|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 13|Iteration 3|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 13|Iteration 3|reward:  114.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 13|Iteration 4|reward:  125.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 13|Iteration 4|reward:  125.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 13|Iteration 10|reward:  125.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 13|Iteration 14|reward:  127.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 13|Iteration 14|reward:  127.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 13|Iteration 16|reward:  129.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 13|Iteration 16|reward:  129.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 13|Iteration 23|reward:  129.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 13|Iteration 25|reward:  138.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 13|Iteration 25|reward:  138.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 36|reward:  138.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 41|reward:  146.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 13|Iteration 41|reward:  146.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 44|reward:  152.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 13|Iteration 44|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 56|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 66|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 74|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 82|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 89|reward:  152.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 89|reward:  252.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 13|Iteration 89|reward:  252.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 13|Iteration 90|reward:  266.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 13|Iteration 90|reward:  266.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 13|Iteration 93|reward:  366.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 13|Iteration 93|reward:  366.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 13|Iteration 95|reward:  380.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 13|Iteration 95|reward:  380.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 13|Iteration 96|reward:  386.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 13|Iteration 96|reward:  386.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 13|Iteration 98|reward:  486.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 13|Iteration 98|reward:  486.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 13|Iteration 99|reward:  500.0|last_reward_at:   98|Elapsed Time: 0:00:00||

Episode 13|Iteration 99|reward:  500.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 101|reward:  502.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 13|Iteration 101|reward:  502.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 13|Iteration 103|reward:  602.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 13|Iteration 103|reward:  602.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 13|Iteration 104|reward:  616.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 13|Iteration 104|reward:  616.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 13|Iteration 105|reward:  622.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 13|Iteration 105|reward:  622.0|last_reward_at:  105|Elapsed Time: 0:00:00||

Episode 13|Iteration 106|reward:  722.0|last_reward_at:  105|Elapsed Time: 0:00:00||

Episode 13|Iteration 106|reward:  722.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 13|Iteration 107|reward:  728.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 13|Iteration 107|reward:  728.0|last_reward_at:  107|Elapsed Time: 0:00:00||

Episode 13|Iteration 109|reward:  742.0|last_reward_at:  107|Elapsed Time: 0:00:00||

Episode 13|Iteration 109|reward:  742.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 13|Iteration 110|reward:  842.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 13|Iteration 110|reward:  842.0|last_reward_at:  110|Elapsed Time: 0:00:00||

Episode 13|Iteration 112|reward:  844.0|last_reward_at:  110|Elapsed Time: 0:00:00||

Episode 13|Iteration 112|reward:  844.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 13|Iteration 114|reward:  850.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 13|Iteration 114|reward:  850.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 13|Iteration 117|reward:  864.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 13|Iteration 117|reward:  864.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 13|Iteration 120|reward:  870.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 13|Iteration 120|reward:  870.0|last_reward_at:  120|Elapsed Time: 0:00:00||

Episode 13|Iteration 122|reward:  876.0|last_reward_at:  120|Elapsed Time: 0:00:01||

Episode 13|Iteration 122|reward:  876.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 13|Iteration 124|reward:  878.0|last_reward_at:  122|Elapsed Time: 0:00:01||

Episode 13|Iteration 124|reward:  878.0|last_reward_at:  124|Elapsed Time: 0:00:01||

Episode 13|Iteration 127|reward:  978.0|last_reward_at:  124|Elapsed Time: 0:00:01||

Episode 13|Iteration 127|reward:  978.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 13|Iteration 130|reward:  980.0|last_reward_at:  127|Elapsed Time: 0:00:01||

Episode 13|Iteration 130|reward:  980.0|last_reward_at:  130|Elapsed Time: 0:00:01||

Episode 13|Iteration 132|reward:  994.0|last_reward_at:  130|Elapsed Time: 0:00:01||

Episode 13|Iteration 132|reward:  994.0|last_reward_at:  132|Elapsed Time: 0:00:01||

Episode 13|Iteration 133|reward: 1094.0|last_reward_at:  132|Elapsed Time: 0:00:01||

Episode 13|Iteration 133|reward: 1094.0|last_reward_at:  133|Elapsed Time: 0:00:01||

Episode 13|Iteration 134|reward: 1108.0|last_reward_at:  133|Elapsed Time: 0:00:01||

Episode 13|Iteration 134|reward: 1108.0|last_reward_at:  134|Elapsed Time: 0:00:01||

Episode 13|Iteration 136|reward: 1114.0|last_reward_at:  134|Elapsed Time: 0:00:01||

Episode 13|Iteration 136|reward: 1114.0|last_reward_at:  136|Elapsed Time: 0:00:01||

Episode 13|Iteration 137|reward: 1214.0|last_reward_at:  136|Elapsed Time: 0:00:01||

Episode 13|Iteration 137|reward: 1214.0|last_reward_at:  137|Elapsed Time: 0:00:01||

Episode 13|Iteration 138|reward: 1228.0|last_reward_at:  137|Elapsed Time: 0:00:01||

Episode 13|Iteration 138|reward: 1228.0|last_reward_at:  138|Elapsed Time: 0:00:01||

Episode 13|Iteration 139|reward: 1234.0|last_reward_at:  138|Elapsed Time: 0:00:01||

Episode 13|Iteration 139|reward: 1234.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 13|Iteration 140|reward: 6234.0|last_reward_at:  139|Elapsed Time: 0:00:01||

Episode 13|Iteration 140|reward: 6234.0|last_reward_at:  140|Elapsed Time: 0:00:01||




  Episode 13 ended at t=140 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 4/13 (0.24)
    explore-remote: 9/24 (0.27)
    explore-connect: 1/39 (0.03)
    exploit-local: 13/0 (1.00)
    exploit-remote: 2/1 (0.67)
    exploit-connect: 10/24 (0.29)
  exploit deflected to exploration: 11
  ## Episode: 14/20 'DQL' ϵ=0.6148, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 14|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 14|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 4|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 14|Iteration 4|reward:  114.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 14|Iteration 8|reward:  116.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 14|Iteration 8|reward:  116.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 14|Iteration 11|reward:  130.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 14|Iteration 11|reward:  130.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 14|Iteration 13|reward:  138.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 14|Iteration 13|reward:  138.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 14|Iteration 14|reward:  144.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 14|Iteration 14|reward:  144.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 14|Iteration 23|reward:  144.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 14|Iteration 33|reward:  144.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 14|Iteration 33|reward:  146.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 14|Iteration 33|reward:  146.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 14|Iteration 44|reward:  146.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 14|Iteration 53|reward:  152.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 14|Iteration 53|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 66|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 76|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 87|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 99|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 109|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 119|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 130|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 140|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 150|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 157|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 168|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 14|Iteration 178|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 14|Iteration 188|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 14|Iteration 198|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:01||

Episode 14|Iteration 200|reward:  152.0|last_reward_at:   53|Elapsed Time: 0:00:01||




  Episode 14 stopped at t=200 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/25 (0.04)
    explore-remote: 3/43 (0.07)
    explore-connect: 0/57 (0.00)
    exploit-local: 2/0 (1.00)
    exploit-remote: 1/2 (0.33)
    exploit-connect: 1/65 (0.02)
  exploit deflected to exploration: 0
  ## Episode: 15/20 'DQL' ϵ=0.5946, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 15|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 2|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 15|Iteration 2|reward:   14.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   18.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 15|Iteration 3|reward:   18.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 9|reward:  118.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 15|Iteration 9|reward:  118.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 15|Iteration 10|reward:  132.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 15|Iteration 10|reward:  132.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 15|Iteration 11|reward:  134.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 15|Iteration 11|reward:  134.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 12|reward:  140.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 15|Iteration 12|reward:  140.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 15|Iteration 14|reward:  146.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 15|Iteration 14|reward:  146.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 15|Iteration 20|reward:  154.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 15|Iteration 20|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 31|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 41|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 49|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 59|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 71|reward:  154.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 76|reward:  254.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 15|Iteration 76|reward:  254.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 15|Iteration 77|reward:  268.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 15|Iteration 77|reward:  268.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 15|Iteration 78|reward:  270.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 15|Iteration 78|reward:  270.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 15|Iteration 84|reward:  370.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 15|Iteration 84|reward:  370.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 15|Iteration 86|reward:  384.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 15|Iteration 86|reward:  384.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 15|Iteration 87|reward:  392.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 15|Iteration 87|reward:  392.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 15|Iteration 89|reward:  398.0|last_reward_at:   87|Elapsed Time: 0:00:00||

Episode 15|Iteration 89|reward:  398.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 15|Iteration 90|reward:  498.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 15|Iteration 90|reward:  498.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 15|Iteration 94|reward:  512.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 15|Iteration 94|reward:  512.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 15|Iteration 95|reward:  518.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 15|Iteration 95|reward:  518.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 15|Iteration 101|reward:  524.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 15|Iteration 101|reward:  524.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 15|Iteration 102|reward:  624.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 15|Iteration 102|reward:  624.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 15|Iteration 103|reward:  638.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 15|Iteration 103|reward:  638.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 15|Iteration 106|reward:  644.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 15|Iteration 106|reward:  644.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 15|Iteration 107|reward:  744.0|last_reward_at:  106|Elapsed Time: 0:00:00||

Episode 15|Iteration 107|reward:  744.0|last_reward_at:  107|Elapsed Time: 0:00:00||

Episode 15|Iteration 108|reward:  758.0|last_reward_at:  107|Elapsed Time: 0:00:00||

Episode 15|Iteration 108|reward:  758.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 15|Iteration 111|reward:  858.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 15|Iteration 111|reward:  858.0|last_reward_at:  111|Elapsed Time: 0:00:00||

Episode 15|Iteration 112|reward:  872.0|last_reward_at:  111|Elapsed Time: 0:00:00||

Episode 15|Iteration 112|reward:  872.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 15|Iteration 113|reward:  878.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 15|Iteration 113|reward:  878.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 15|Iteration 117|reward:  978.0|last_reward_at:  113|Elapsed Time: 0:00:00||

Episode 15|Iteration 117|reward:  978.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 15|Iteration 118|reward:  980.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 15|Iteration 118|reward:  980.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 15|Iteration 120|reward:  994.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 15|Iteration 120|reward:  994.0|last_reward_at:  120|Elapsed Time: 0:00:00||

Episode 15|Iteration 121|reward: 1094.0|last_reward_at:  120|Elapsed Time: 0:00:00||

Episode 15|Iteration 121|reward: 1094.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 15|Iteration 124|reward: 1108.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 15|Iteration 124|reward: 1108.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 15|Iteration 127|reward: 1110.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 15|Iteration 127|reward: 1110.0|last_reward_at:  127|Elapsed Time: 0:00:00||

Episode 15|Iteration 129|reward: 1116.0|last_reward_at:  127|Elapsed Time: 0:00:00||

Episode 15|Iteration 129|reward: 1116.0|last_reward_at:  129|Elapsed Time: 0:00:00||

Episode 15|Iteration 131|reward: 1216.0|last_reward_at:  129|Elapsed Time: 0:00:00||

Episode 15|Iteration 131|reward: 1216.0|last_reward_at:  131|Elapsed Time: 0:00:00||

Episode 15|Iteration 133|reward: 1230.0|last_reward_at:  131|Elapsed Time: 0:00:00||

Episode 15|Iteration 133|reward: 1230.0|last_reward_at:  133|Elapsed Time: 0:00:00||

Episode 15|Iteration 136|reward: 6230.0|last_reward_at:  133|Elapsed Time: 0:00:00||

Episode 15|Iteration 136|reward: 6230.0|last_reward_at:  136|Elapsed Time: 0:00:00||




  Episode 15 ended at t=136 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/18 (0.10)
    explore-remote: 8/29 (0.22)
    explore-connect: 1/24 (0.04)
    exploit-local: 15/0 (1.00)
    exploit-remote: 1/7 (0.12)
    exploit-connect: 10/21 (0.32)
  exploit deflected to exploration: 0
  ## Episode: 16/20 'DQL' ϵ=0.5814, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 16|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 16|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 16|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 16|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 16|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 16|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 16|Iteration 4|reward:  134.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 16|Iteration 4|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 16|Iteration 5|reward:  234.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 16|Iteration 5|reward:  234.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 16|Iteration 7|reward:  248.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 16|Iteration 7|reward:  248.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 16|Iteration 8|reward:  250.0|last_reward_at:    7|Elapsed Time: 0:00:00||

Episode 16|Iteration 8|reward:  250.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 16|Iteration 9|reward:  252.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 16|Iteration 9|reward:  252.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 13|reward:  256.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 16|Iteration 13|reward:  256.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 16|Iteration 14|reward:  356.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 16|Iteration 14|reward:  356.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 16|Iteration 15|reward:  370.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 16|Iteration 15|reward:  370.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 16|Iteration 16|reward:  376.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 16|Iteration 16|reward:  376.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 19|reward:  476.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 16|Iteration 19|reward:  476.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 16|Iteration 21|reward:  490.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 16|Iteration 21|reward:  490.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 16|Iteration 22|reward:  590.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 16|Iteration 22|reward:  590.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 16|Iteration 24|reward:  604.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 16|Iteration 24|reward:  604.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 16|Iteration 26|reward:  610.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 16|Iteration 26|reward:  610.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 16|Iteration 27|reward:  616.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 16|Iteration 27|reward:  616.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 16|Iteration 29|reward:  716.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 16|Iteration 29|reward:  716.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 16|Iteration 30|reward:  730.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 16|Iteration 30|reward:  730.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 16|Iteration 35|reward:  732.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 16|Iteration 35|reward:  732.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 16|Iteration 36|reward:  738.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 16|Iteration 36|reward:  738.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 16|Iteration 38|reward:  744.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 16|Iteration 38|reward:  744.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 16|Iteration 39|reward:  844.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 16|Iteration 39|reward:  844.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 16|Iteration 40|reward:  858.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 16|Iteration 40|reward:  858.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:  958.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 16|Iteration 45|reward:  958.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 47|reward:  972.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 16|Iteration 47|reward:  972.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 16|Iteration 54|reward:  972.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 16|Iteration 54|reward:  978.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 16|Iteration 54|reward:  978.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 16|Iteration 56|reward: 1078.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 16|Iteration 56|reward: 1078.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 16|Iteration 57|reward: 1092.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 16|Iteration 57|reward: 1092.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 16|Iteration 63|reward: 1098.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 16|Iteration 63|reward: 1098.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 16|Iteration 64|reward: 1100.0|last_reward_at:   63|Elapsed Time: 0:00:00||

Episode 16|Iteration 64|reward: 1100.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 16|Iteration 74|reward: 1100.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 16|Iteration 75|reward: 1106.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 16|Iteration 75|reward: 1106.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 16|Iteration 77|reward: 1206.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 16|Iteration 77|reward: 1206.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 16|Iteration 78|reward: 1208.0|last_reward_at:   77|Elapsed Time: 0:00:00||

Episode 16|Iteration 78|reward: 1208.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 16|Iteration 79|reward: 1222.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 16|Iteration 79|reward: 1222.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 16|Iteration 81|reward: 6222.0|last_reward_at:   79|Elapsed Time: 0:00:00||

Episode 16|Iteration 81|reward: 6222.0|last_reward_at:   81|Elapsed Time: 0:00:00||




  Episode 16 ended at t=81 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 0/8 (0.00)
    explore-remote: 7/6 (0.54)
    explore-connect: 0/18 (0.00)
    exploit-local: 16/0 (1.00)
    exploit-remote: 3/2 (0.60)
    exploit-connect: 11/10 (0.52)
  exploit deflected to exploration: 0
  ## Episode: 17/20 'DQL' ϵ=0.5736, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 17|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 1|reward:    0.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 3|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 17|Iteration 3|reward:   14.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 17|Iteration 4|reward:  114.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 17|Iteration 4|reward:  114.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 17|Iteration 9|reward:  128.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 17|Iteration 9|reward:  128.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 17|Iteration 12|reward:  130.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 17|Iteration 12|reward:  130.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 17|Iteration 13|reward:  136.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 17|Iteration 13|reward:  136.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 17|Iteration 15|reward:  236.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 17|Iteration 15|reward:  236.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 17|Iteration 18|reward:  250.0|last_reward_at:   15|Elapsed Time: 0:00:00||

Episode 17|Iteration 18|reward:  250.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 17|Iteration 19|reward:  252.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 17|Iteration 19|reward:  252.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 17|Iteration 20|reward:  352.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 17|Iteration 20|reward:  352.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 17|Iteration 22|reward:  366.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 17|Iteration 22|reward:  366.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 17|Iteration 24|reward:  372.0|last_reward_at:   22|Elapsed Time: 0:00:00||

Episode 17|Iteration 24|reward:  372.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 17|Iteration 27|reward:  378.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 17|Iteration 27|reward:  378.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 17|Iteration 30|reward:  384.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 17|Iteration 30|reward:  384.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 17|Iteration 41|reward:  384.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 17|Iteration 44|reward:  390.0|last_reward_at:   30|Elapsed Time: 0:00:00||

Episode 17|Iteration 44|reward:  390.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 17|Iteration 53|reward:  490.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 17|Iteration 53|reward:  490.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 17|Iteration 54|reward:  504.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 17|Iteration 54|reward:  504.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 17|Iteration 59|reward:  506.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 17|Iteration 59|reward:  506.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 17|Iteration 60|reward:  512.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 17|Iteration 60|reward:  512.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 17|Iteration 62|reward:  518.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 17|Iteration 62|reward:  518.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 17|Iteration 71|reward:  518.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 17|Iteration 72|reward:  618.0|last_reward_at:   62|Elapsed Time: 0:00:00||

Episode 17|Iteration 72|reward:  618.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 17|Iteration 73|reward:  632.0|last_reward_at:   72|Elapsed Time: 0:00:00||

Episode 17|Iteration 73|reward:  632.0|last_reward_at:   73|Elapsed Time: 0:00:00||

Episode 17|Iteration 74|reward:  638.0|last_reward_at:   73|Elapsed Time: 0:00:00||

Episode 17|Iteration 74|reward:  638.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 17|Iteration 75|reward:  738.0|last_reward_at:   74|Elapsed Time: 0:00:00||

Episode 17|Iteration 75|reward:  738.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 17|Iteration 76|reward:  752.0|last_reward_at:   75|Elapsed Time: 0:00:00||

Episode 17|Iteration 76|reward:  752.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 17|Iteration 78|reward:  758.0|last_reward_at:   76|Elapsed Time: 0:00:00||

Episode 17|Iteration 78|reward:  758.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 17|Iteration 81|reward:  760.0|last_reward_at:   78|Elapsed Time: 0:00:00||

Episode 17|Iteration 81|reward:  760.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 17|Iteration 88|reward:  860.0|last_reward_at:   81|Elapsed Time: 0:00:00||

Episode 17|Iteration 88|reward:  860.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 17|Iteration 90|reward:  874.0|last_reward_at:   88|Elapsed Time: 0:00:00||

Episode 17|Iteration 90|reward:  874.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 17|Iteration 93|reward:  880.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 17|Iteration 93|reward:  880.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 17|Iteration 94|reward:  882.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 17|Iteration 94|reward:  882.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 17|Iteration 95|reward:  982.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 17|Iteration 95|reward:  982.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 17|Iteration 96|reward:  996.0|last_reward_at:   95|Elapsed Time: 0:00:00||

Episode 17|Iteration 96|reward:  996.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 17|Iteration 97|reward: 1096.0|last_reward_at:   96|Elapsed Time: 0:00:00||

Episode 17|Iteration 97|reward: 1096.0|last_reward_at:   97|Elapsed Time: 0:00:00||

Episode 17|Iteration 99|reward: 1110.0|last_reward_at:   97|Elapsed Time: 0:00:00||

Episode 17|Iteration 99|reward: 1110.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 17|Iteration 102|reward: 1116.0|last_reward_at:   99|Elapsed Time: 0:00:00||

Episode 17|Iteration 102|reward: 1116.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 17|Iteration 108|reward: 1216.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 17|Iteration 108|reward: 1216.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 17|Iteration 109|reward: 1230.0|last_reward_at:  108|Elapsed Time: 0:00:00||

Episode 17|Iteration 109|reward: 1230.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 17|Iteration 115|reward: 1236.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 17|Iteration 115|reward: 1236.0|last_reward_at:  115|Elapsed Time: 0:00:00||

Episode 17|Iteration 117|reward: 1242.0|last_reward_at:  115|Elapsed Time: 0:00:00||

Episode 17|Iteration 117|reward: 1242.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 17|Iteration 120|reward: 6242.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 17|Iteration 120|reward: 6242.0|last_reward_at:  120|Elapsed Time: 0:00:00||




  Episode 17 ended at t=120 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 1/16 (0.06)
    explore-remote: 9/19 (0.32)
    explore-connect: 0/28 (0.00)
    exploit-local: 16/3 (0.84)
    exploit-remote: 3/9 (0.25)
    exploit-connect: 11/5 (0.69)
  exploit deflected to exploration: 0
  ## Episode: 18/20 'DQL' ϵ=0.5624, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 18|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 18|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 18|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 18|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 18|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 18|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 18|Iteration 4|reward:  134.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 18|Iteration 4|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 5|reward:  136.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 18|Iteration 5|reward:  136.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 18|Iteration 6|reward:  236.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 18|Iteration 6|reward:  236.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 18|Iteration 8|reward:  250.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 18|Iteration 8|reward:  250.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 18|Iteration 10|reward:  254.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 18|Iteration 10|reward:  254.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 12|reward:  260.0|last_reward_at:   10|Elapsed Time: 0:00:00||

Episode 18|Iteration 12|reward:  260.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 18|Iteration 13|reward:  262.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 18|Iteration 13|reward:  262.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 18|Iteration 14|reward:  362.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 18|Iteration 14|reward:  362.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 18|Iteration 18|reward:  376.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 18|Iteration 18|reward:  376.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 18|Iteration 31|reward:  376.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 18|Iteration 31|reward:  382.0|last_reward_at:   18|Elapsed Time: 0:00:00||

Episode 18|Iteration 31|reward:  382.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 18|Iteration 32|reward:  482.0|last_reward_at:   31|Elapsed Time: 0:00:00||

Episode 18|Iteration 32|reward:  482.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 18|Iteration 34|reward:  496.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 18|Iteration 34|reward:  496.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 18|Iteration 37|reward:  498.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 18|Iteration 37|reward:  498.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 18|Iteration 38|reward:  502.0|last_reward_at:   37|Elapsed Time: 0:00:00||

Episode 18|Iteration 38|reward:  502.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 18|Iteration 39|reward:  504.0|last_reward_at:   38|Elapsed Time: 0:00:00||

Episode 18|Iteration 39|reward:  504.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 40|reward:  604.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 18|Iteration 40|reward:  604.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 18|Iteration 41|reward:  618.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 18|Iteration 41|reward:  618.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 18|Iteration 45|reward:  624.0|last_reward_at:   41|Elapsed Time: 0:00:00||

Episode 18|Iteration 45|reward:  624.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 18|Iteration 47|reward:  724.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 18|Iteration 47|reward:  724.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 18|Iteration 48|reward:  738.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 18|Iteration 48|reward:  738.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 18|Iteration 51|reward:  744.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 18|Iteration 51|reward:  744.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 18|Iteration 57|reward:  844.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 18|Iteration 57|reward:  844.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 18|Iteration 58|reward:  858.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 18|Iteration 58|reward:  858.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 18|Iteration 59|reward:  864.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 18|Iteration 59|reward:  864.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 18|Iteration 64|reward:  964.0|last_reward_at:   59|Elapsed Time: 0:00:00||

Episode 18|Iteration 64|reward:  964.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 18|Iteration 65|reward:  978.0|last_reward_at:   64|Elapsed Time: 0:00:00||

Episode 18|Iteration 65|reward:  978.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 18|Iteration 67|reward: 1078.0|last_reward_at:   65|Elapsed Time: 0:00:00||

Episode 18|Iteration 67|reward: 1078.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 18|Iteration 68|reward: 1092.0|last_reward_at:   67|Elapsed Time: 0:00:00||

Episode 18|Iteration 68|reward: 1092.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 18|Iteration 69|reward: 1098.0|last_reward_at:   68|Elapsed Time: 0:00:00||

Episode 18|Iteration 69|reward: 1098.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 18|Iteration 79|reward: 1098.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 18|Iteration 84|reward: 1100.0|last_reward_at:   69|Elapsed Time: 0:00:00||

Episode 18|Iteration 84|reward: 1100.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 18|Iteration 86|reward: 1106.0|last_reward_at:   84|Elapsed Time: 0:00:00||

Episode 18|Iteration 86|reward: 1106.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 18|Iteration 89|reward: 1112.0|last_reward_at:   86|Elapsed Time: 0:00:00||

Episode 18|Iteration 89|reward: 1112.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 18|Iteration 99|reward: 1112.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 18|Iteration 101|reward: 1118.0|last_reward_at:   89|Elapsed Time: 0:00:00||

Episode 18|Iteration 101|reward: 1118.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 18|Iteration 103|reward: 1124.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 18|Iteration 103|reward: 1124.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 18|Iteration 104|reward: 1130.0|last_reward_at:  103|Elapsed Time: 0:00:00||

Episode 18|Iteration 104|reward: 1130.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 18|Iteration 109|reward: 1136.0|last_reward_at:  104|Elapsed Time: 0:00:00||

Episode 18|Iteration 109|reward: 1136.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 18|Iteration 112|reward: 1236.0|last_reward_at:  109|Elapsed Time: 0:00:00||

Episode 18|Iteration 112|reward: 1236.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 18|Iteration 117|reward: 1250.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 18|Iteration 117|reward: 1250.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 18|Iteration 120|reward: 6250.0|last_reward_at:  117|Elapsed Time: 0:00:00||

Episode 18|Iteration 120|reward: 6250.0|last_reward_at:  120|Elapsed Time: 0:00:00||




  Episode 18 ended at t=120 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/15 (0.12)
    explore-remote: 9/9 (0.50)
    explore-connect: 1/34 (0.03)
    exploit-local: 16/3 (0.84)
    exploit-remote: 4/3 (0.57)
    exploit-connect: 10/14 (0.42)
  exploit deflected to exploration: 0
  ## Episode: 19/20 'DQL' ϵ=0.5514, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 19|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 19|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 19|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 19|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 19|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 19|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 19|Iteration 4|reward:  130.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 19|Iteration 4|reward:  130.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 19|Iteration 6|reward:  136.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 19|Iteration 6|reward:  136.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 19|Iteration 9|reward:  142.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 19|Iteration 9|reward:  142.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 19|Iteration 12|reward:  242.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 19|Iteration 12|reward:  242.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 19|Iteration 13|reward:  256.0|last_reward_at:   12|Elapsed Time: 0:00:00||

Episode 19|Iteration 13|reward:  256.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 19|Iteration 14|reward:  356.0|last_reward_at:   13|Elapsed Time: 0:00:00||

Episode 19|Iteration 14|reward:  356.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 19|Iteration 16|reward:  370.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 19|Iteration 16|reward:  370.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 19|Iteration 19|reward:  376.0|last_reward_at:   16|Elapsed Time: 0:00:00||

Episode 19|Iteration 19|reward:  376.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 19|Iteration 21|reward:  476.0|last_reward_at:   19|Elapsed Time: 0:00:00||

Episode 19|Iteration 21|reward:  476.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 19|Iteration 23|reward:  490.0|last_reward_at:   21|Elapsed Time: 0:00:00||

Episode 19|Iteration 23|reward:  490.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 19|Iteration 24|reward:  494.0|last_reward_at:   23|Elapsed Time: 0:00:00||

Episode 19|Iteration 24|reward:  494.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 25|reward:  500.0|last_reward_at:   24|Elapsed Time: 0:00:00||

Episode 19|Iteration 25|reward:  500.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 19|Iteration 27|reward:  502.0|last_reward_at:   25|Elapsed Time: 0:00:00||

Episode 19|Iteration 27|reward:  502.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 19|Iteration 29|reward:  504.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 19|Iteration 29|reward:  504.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 19|Iteration 32|reward:  604.0|last_reward_at:   29|Elapsed Time: 0:00:00||

Episode 19|Iteration 32|reward:  604.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 19|Iteration 33|reward:  618.0|last_reward_at:   32|Elapsed Time: 0:00:00||

Episode 19|Iteration 33|reward:  618.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 19|Iteration 34|reward:  624.0|last_reward_at:   33|Elapsed Time: 0:00:00||

Episode 19|Iteration 34|reward:  624.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 19|Iteration 36|reward:  724.0|last_reward_at:   34|Elapsed Time: 0:00:00||

Episode 19|Iteration 36|reward:  724.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 19|Iteration 40|reward:  738.0|last_reward_at:   36|Elapsed Time: 0:00:00||

Episode 19|Iteration 40|reward:  738.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 42|reward:  740.0|last_reward_at:   40|Elapsed Time: 0:00:00||

Episode 19|Iteration 42|reward:  740.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 19|Iteration 43|reward:  742.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 19|Iteration 43|reward:  742.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 19|Iteration 47|reward:  842.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 19|Iteration 47|reward:  842.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 19|Iteration 49|reward:  856.0|last_reward_at:   47|Elapsed Time: 0:00:00||

Episode 19|Iteration 49|reward:  856.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 19|Iteration 52|reward:  862.0|last_reward_at:   49|Elapsed Time: 0:00:00||

Episode 19|Iteration 52|reward:  862.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 19|Iteration 53|reward:  962.0|last_reward_at:   52|Elapsed Time: 0:00:00||

Episode 19|Iteration 53|reward:  962.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 19|Iteration 55|reward:  976.0|last_reward_at:   53|Elapsed Time: 0:00:00||

Episode 19|Iteration 55|reward:  976.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 19|Iteration 56|reward: 1076.0|last_reward_at:   55|Elapsed Time: 0:00:00||

Episode 19|Iteration 56|reward: 1076.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 19|Iteration 57|reward: 1082.0|last_reward_at:   56|Elapsed Time: 0:00:00||

Episode 19|Iteration 57|reward: 1082.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 19|Iteration 58|reward: 1096.0|last_reward_at:   57|Elapsed Time: 0:00:00||

Episode 19|Iteration 58|reward: 1096.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 19|Iteration 60|reward: 1196.0|last_reward_at:   58|Elapsed Time: 0:00:00||

Episode 19|Iteration 60|reward: 1196.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 19|Iteration 61|reward: 1210.0|last_reward_at:   60|Elapsed Time: 0:00:00||

Episode 19|Iteration 61|reward: 1210.0|last_reward_at:   61|Elapsed Time: 0:00:00||

Episode 19|Iteration 63|reward: 6210.0|last_reward_at:   61|Elapsed Time: 0:00:00||

Episode 19|Iteration 63|reward: 6210.0|last_reward_at:   63|Elapsed Time: 0:00:00||




  Episode 19 ended at t=63 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/7 (0.22)
    explore-remote: 5/8 (0.38)
    explore-connect: 0/10 (0.00)
    exploit-local: 14/0 (1.00)
    exploit-remote: 3/1 (0.75)
    exploit-connect: 11/2 (0.85)
  exploit deflected to exploration: 0
  ## Episode: 20/20 'DQL' ϵ=0.5458, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 20|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 20|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 20|Iteration 3|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 20|Iteration 3|reward:  114.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 5|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 20|Iteration 5|reward:  128.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 20|Iteration 6|reward:  134.0|last_reward_at:    5|Elapsed Time: 0:00:00||

Episode 20|Iteration 6|reward:  134.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 20|Iteration 8|reward:  234.0|last_reward_at:    6|Elapsed Time: 0:00:00||

Episode 20|Iteration 8|reward:  234.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 20|Iteration 9|reward:  248.0|last_reward_at:    8|Elapsed Time: 0:00:00||

Episode 20|Iteration 9|reward:  248.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 20|Iteration 11|reward:  254.0|last_reward_at:    9|Elapsed Time: 0:00:00||

Episode 20|Iteration 11|reward:  254.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 20|Iteration 14|reward:  260.0|last_reward_at:   11|Elapsed Time: 0:00:00||

Episode 20|Iteration 14|reward:  260.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 20|Iteration 20|reward:  266.0|last_reward_at:   14|Elapsed Time: 0:00:00||

Episode 20|Iteration 20|reward:  266.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 20|Iteration 26|reward:  268.0|last_reward_at:   20|Elapsed Time: 0:00:00||

Episode 20|Iteration 26|reward:  268.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 20|Iteration 27|reward:  368.0|last_reward_at:   26|Elapsed Time: 0:00:00||

Episode 20|Iteration 27|reward:  368.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 20|Iteration 28|reward:  382.0|last_reward_at:   27|Elapsed Time: 0:00:00||

Episode 20|Iteration 28|reward:  382.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 20|Iteration 35|reward:  388.0|last_reward_at:   28|Elapsed Time: 0:00:00||

Episode 20|Iteration 35|reward:  388.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 20|Iteration 39|reward:  394.0|last_reward_at:   35|Elapsed Time: 0:00:00||

Episode 20|Iteration 39|reward:  394.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 20|Iteration 42|reward:  402.0|last_reward_at:   39|Elapsed Time: 0:00:00||

Episode 20|Iteration 42|reward:  402.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 20|Iteration 43|reward:  502.0|last_reward_at:   42|Elapsed Time: 0:00:00||

Episode 20|Iteration 43|reward:  502.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 20|Iteration 44|reward:  516.0|last_reward_at:   43|Elapsed Time: 0:00:00||

Episode 20|Iteration 44|reward:  516.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 20|Iteration 45|reward:  520.0|last_reward_at:   44|Elapsed Time: 0:00:00||

Episode 20|Iteration 45|reward:  520.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 20|Iteration 48|reward:  522.0|last_reward_at:   45|Elapsed Time: 0:00:00||

Episode 20|Iteration 48|reward:  522.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 20|Iteration 51|reward:  524.0|last_reward_at:   48|Elapsed Time: 0:00:00||

Episode 20|Iteration 51|reward:  524.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 20|Iteration 54|reward:  526.0|last_reward_at:   51|Elapsed Time: 0:00:00||

Episode 20|Iteration 54|reward:  526.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 20|Iteration 64|reward:  526.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 20|Iteration 74|reward:  526.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 20|Iteration 82|reward:  526.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 20|Iteration 82|reward:  528.0|last_reward_at:   54|Elapsed Time: 0:00:00||

Episode 20|Iteration 82|reward:  528.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 20|Iteration 90|reward:  628.0|last_reward_at:   82|Elapsed Time: 0:00:00||

Episode 20|Iteration 90|reward:  628.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 20|Iteration 91|reward:  642.0|last_reward_at:   90|Elapsed Time: 0:00:00||

Episode 20|Iteration 91|reward:  642.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 20|Iteration 93|reward:  648.0|last_reward_at:   91|Elapsed Time: 0:00:00||

Episode 20|Iteration 93|reward:  648.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 20|Iteration 94|reward:  748.0|last_reward_at:   93|Elapsed Time: 0:00:00||

Episode 20|Iteration 94|reward:  748.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 20|Iteration 100|reward:  762.0|last_reward_at:   94|Elapsed Time: 0:00:00||

Episode 20|Iteration 100|reward:  762.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 20|Iteration 101|reward:  768.0|last_reward_at:  100|Elapsed Time: 0:00:00||

Episode 20|Iteration 101|reward:  768.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 20|Iteration 102|reward:  774.0|last_reward_at:  101|Elapsed Time: 0:00:00||

Episode 20|Iteration 102|reward:  774.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 20|Iteration 112|reward:  774.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 20|Iteration 112|reward:  874.0|last_reward_at:  102|Elapsed Time: 0:00:00||

Episode 20|Iteration 112|reward:  874.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 20|Iteration 114|reward:  880.0|last_reward_at:  112|Elapsed Time: 0:00:00||

Episode 20|Iteration 114|reward:  880.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 20|Iteration 116|reward:  894.0|last_reward_at:  114|Elapsed Time: 0:00:00||

Episode 20|Iteration 116|reward:  894.0|last_reward_at:  116|Elapsed Time: 0:00:00||

Episode 20|Iteration 118|reward:  900.0|last_reward_at:  116|Elapsed Time: 0:00:00||

Episode 20|Iteration 118|reward:  900.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 20|Iteration 119|reward: 1000.0|last_reward_at:  118|Elapsed Time: 0:00:00||

Episode 20|Iteration 119|reward: 1000.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 20|Iteration 121|reward: 1014.0|last_reward_at:  119|Elapsed Time: 0:00:00||

Episode 20|Iteration 121|reward: 1014.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 20|Iteration 122|reward: 1114.0|last_reward_at:  121|Elapsed Time: 0:00:00||

Episode 20|Iteration 122|reward: 1114.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 20|Iteration 124|reward: 1128.0|last_reward_at:  122|Elapsed Time: 0:00:00||

Episode 20|Iteration 124|reward: 1128.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 20|Iteration 126|reward: 1134.0|last_reward_at:  124|Elapsed Time: 0:00:00||

Episode 20|Iteration 126|reward: 1134.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 20|Iteration 127|reward: 1234.0|last_reward_at:  126|Elapsed Time: 0:00:00||

Episode 20|Iteration 127|reward: 1234.0|last_reward_at:  127|Elapsed Time: 0:00:00||

Episode 20|Iteration 131|reward: 1248.0|last_reward_at:  127|Elapsed Time: 0:00:00||

Episode 20|Iteration 131|reward: 1248.0|last_reward_at:  131|Elapsed Time: 0:00:00||

Episode 20|Iteration 132|reward: 6248.0|last_reward_at:  131|Elapsed Time: 0:00:00||

Episode 20|Iteration 132|reward: 6248.0|last_reward_at:  132|Elapsed Time: 0:00:00||




  Episode 20 ended at t=132 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/15 (0.12)
    explore-remote: 7/20 (0.26)
    explore-connect: 1/29 (0.03)
    exploit-local: 16/2 (0.89)
    exploit-remote: 5/23 (0.18)
    exploit-connect: 10/2 (0.83)
  exploit deflected to exploration: 0
simulation ended


In [11]:
# -----------------------------------------
# 6) DQL 평가(Exploit) (✅ 파라미터 유지 + 평가에서만 LLM 옵션)
# -----------------------------------------
llm_chat = make_openai_chat_callable(model_id, llm_token_yaml) if use_llm else None

eval_learner = (
    LLMPrunedExploitWrapper(
        base_learner=dql_run["learner"],
        llm_chat=llm_chat,
        llm_every_steps=llm_every_steps,
        candidate_pool=candidate_pool,
        llm_topk=llm_topk,
        obs_max_chars=llm_obs_max_chars,
    )
    if use_llm
    else dql_run["learner"]
)

dql_exploit_run = learner.epsilon_greedy_search(
    gym_env,
    ep,
    learner=eval_learner,
    episode_count=eval_episode_count,
    iteration_count=iteration_count,
    epsilon=0.0,
    epsilon_minimum=0.00,
    render=False,
    plot_episodes_length=False,
    verbosity=Verbosity.Quiet,
    render_last_episode_rewards_to=os.path.join(plots_dir, f"dql-{gymid}"),
    title=("Exploiting DQL (LLM-pruned)" if use_llm else "Exploiting DQL"),
)

[OpenAI] key_prefix= sk-proj- len= 164 yaml= /mnt/c/Users/cabin/git_repo/CyberSecurity-LLM/llm_token.yaml
###### Exploiting DQL (LLM-pruned)
Learning with: episode_count=3,iteration_count=200,ϵ=0.0,ϵ_min=0.0, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10
  ## Episode: 1/3 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 1|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 1|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 1|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:00||

Episode 1|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 4|reward:  134.0|last_reward_at:    3|Elapsed Time: 0:00:00||

Episode 1|Iteration 4|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:00||

Episode 1|Iteration 5|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 1|Iteration 6|reward:  234.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 1|Iteration 6|reward:  234.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 1|Iteration 7|reward:  248.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 1|Iteration 7|reward:  248.0|last_reward_at:    7|Elapsed Time: 0:00:01||

Episode 1|Iteration 8|reward:  250.0|last_reward_at:    7|Elapsed Time: 0:00:01||

Episode 1|Iteration 8|reward:  250.0|last_reward_at:    8|Elapsed Time: 0:00:01||

Episode 1|Iteration 10|reward:  252.0|last_reward_at:    8|Elapsed Time: 0:00:03||

Episode 1|Iteration 10|reward:  252.0|last_reward_at:   10|Elapsed Time: 0:00:03||

Episode 1|Iteration 12|reward:  352.0|last_reward_at:   10|Elapsed Time: 0:00:03||

Episode 1|Iteration 12|reward:  352.0|last_reward_at:   12|Elapsed Time: 0:00:03||

Episode 1|Iteration 13|reward:  366.0|last_reward_at:   12|Elapsed Time: 0:00:03||

Episode 1|Iteration 13|reward:  366.0|last_reward_at:   13|Elapsed Time: 0:00:03||

Episode 1|Iteration 15|reward:  366.0|last_reward_at:   13|Elapsed Time: 0:00:04||

Episode 1|Iteration 20|reward:  366.0|last_reward_at:   13|Elapsed Time: 0:00:06||

Episode 1|Iteration 21|reward:  372.0|last_reward_at:   13|Elapsed Time: 0:00:06||

Episode 1|Iteration 21|reward:  372.0|last_reward_at:   21|Elapsed Time: 0:00:06||

Episode 1|Iteration 22|reward:  472.0|last_reward_at:   21|Elapsed Time: 0:00:06||

Episode 1|Iteration 22|reward:  472.0|last_reward_at:   22|Elapsed Time: 0:00:06||

Episode 1|Iteration 23|reward:  486.0|last_reward_at:   22|Elapsed Time: 0:00:06||

Episode 1|Iteration 23|reward:  486.0|last_reward_at:   23|Elapsed Time: 0:00:06||

Episode 1|Iteration 24|reward:  586.0|last_reward_at:   23|Elapsed Time: 0:00:06||

Episode 1|Iteration 24|reward:  586.0|last_reward_at:   24|Elapsed Time: 0:00:06||

Episode 1|Iteration 25|reward:  586.0|last_reward_at:   24|Elapsed Time: 0:00:07||

Episode 1|Iteration 26|reward:  600.0|last_reward_at:   24|Elapsed Time: 0:00:07||

Episode 1|Iteration 26|reward:  600.0|last_reward_at:   26|Elapsed Time: 0:00:07||

Episode 1|Iteration 29|reward:  606.0|last_reward_at:   26|Elapsed Time: 0:00:07||

Episode 1|Iteration 29|reward:  606.0|last_reward_at:   29|Elapsed Time: 0:00:07||

Episode 1|Iteration 30|reward:  606.0|last_reward_at:   29|Elapsed Time: 0:00:09||

Episode 1|Iteration 31|reward:  706.0|last_reward_at:   29|Elapsed Time: 0:00:09||

Episode 1|Iteration 31|reward:  706.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 1|Iteration 32|reward:  720.0|last_reward_at:   31|Elapsed Time: 0:00:09||

Episode 1|Iteration 32|reward:  720.0|last_reward_at:   32|Elapsed Time: 0:00:09||

Episode 1|Iteration 33|reward:  820.0|last_reward_at:   32|Elapsed Time: 0:00:09||

Episode 1|Iteration 33|reward:  820.0|last_reward_at:   33|Elapsed Time: 0:00:09||

Episode 1|Iteration 34|reward:  834.0|last_reward_at:   33|Elapsed Time: 0:00:09||

Episode 1|Iteration 34|reward:  834.0|last_reward_at:   34|Elapsed Time: 0:00:09||

Episode 1|Iteration 35|reward:  840.0|last_reward_at:   34|Elapsed Time: 0:00:10||

Episode 1|Iteration 35|reward:  840.0|last_reward_at:   35|Elapsed Time: 0:00:10||

Episode 1|Iteration 36|reward:  940.0|last_reward_at:   35|Elapsed Time: 0:00:10||

Episode 1|Iteration 36|reward:  940.0|last_reward_at:   36|Elapsed Time: 0:00:10||

Episode 1|Iteration 37|reward:  954.0|last_reward_at:   36|Elapsed Time: 0:00:10||

Episode 1|Iteration 37|reward:  954.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 1|Iteration 38|reward: 1054.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 1|Iteration 38|reward: 1054.0|last_reward_at:   38|Elapsed Time: 0:00:10||

Episode 1|Iteration 39|reward: 1068.0|last_reward_at:   38|Elapsed Time: 0:00:10||

Episode 1|Iteration 39|reward: 1068.0|last_reward_at:   39|Elapsed Time: 0:00:10||

Episode 1|Iteration 40|reward: 1068.0|last_reward_at:   39|Elapsed Time: 0:00:12||

Episode 1|Iteration 45|reward: 1074.0|last_reward_at:   39|Elapsed Time: 0:00:13||

Episode 1|Iteration 45|reward: 1074.0|last_reward_at:   45|Elapsed Time: 0:00:13||

Episode 1|Iteration 46|reward: 1174.0|last_reward_at:   45|Elapsed Time: 0:00:13||

Episode 1|Iteration 46|reward: 1174.0|last_reward_at:   46|Elapsed Time: 0:00:13||

Episode 1|Iteration 47|reward: 1188.0|last_reward_at:   46|Elapsed Time: 0:00:13||

Episode 1|Iteration 47|reward: 1188.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 1|Iteration 48|reward: 6188.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 1|Iteration 48|reward: 6188.0|last_reward_at:   48|Elapsed Time: 0:00:13||




  Episode 1 ended at t=48 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/0 (1.00)
    explore-remote: 1/0 (1.00)
    explore-connect: 0/6 (0.00)
    exploit-local: 14/0 (1.00)
    exploit-remote: 1/2 (0.33)
    exploit-connect: 11/11 (0.50)
  exploit deflected to exploration: 0
  ## Episode: 2/3 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 2|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 2|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Episode 2|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 2|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:01||

Episode 2|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:01||

Episode 2|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:01||

Episode 2|Iteration 4|reward:  134.0|last_reward_at:    3|Elapsed Time: 0:00:01||

Episode 2|Iteration 4|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 2|Iteration 5|reward:  234.0|last_reward_at:    4|Elapsed Time: 0:00:01||

Episode 2|Iteration 5|reward:  234.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 2|Iteration 6|reward:  248.0|last_reward_at:    5|Elapsed Time: 0:00:01||

Episode 2|Iteration 6|reward:  248.0|last_reward_at:    6|Elapsed Time: 0:00:01||

Episode 2|Iteration 7|reward:  254.0|last_reward_at:    6|Elapsed Time: 0:00:02||

Episode 2|Iteration 7|reward:  254.0|last_reward_at:    7|Elapsed Time: 0:00:02||

Episode 2|Iteration 8|reward:  256.0|last_reward_at:    7|Elapsed Time: 0:00:02||

Episode 2|Iteration 8|reward:  256.0|last_reward_at:    8|Elapsed Time: 0:00:02||

Episode 2|Iteration 10|reward:  258.0|last_reward_at:    8|Elapsed Time: 0:00:02||

Episode 2|Iteration 10|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:02||

Episode 2|Iteration 12|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:03||

Episode 2|Iteration 13|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:03||

Episode 2|Iteration 17|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:05||

Episode 2|Iteration 22|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:06||

Episode 2|Iteration 27|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:07||

Episode 2|Iteration 32|reward:  258.0|last_reward_at:   10|Elapsed Time: 0:00:08||

Episode 2|Iteration 35|reward:  358.0|last_reward_at:   10|Elapsed Time: 0:00:08||

Episode 2|Iteration 35|reward:  358.0|last_reward_at:   35|Elapsed Time: 0:00:08||

Episode 2|Iteration 36|reward:  372.0|last_reward_at:   35|Elapsed Time: 0:00:08||

Episode 2|Iteration 36|reward:  372.0|last_reward_at:   36|Elapsed Time: 0:00:08||

Episode 2|Iteration 37|reward:  378.0|last_reward_at:   36|Elapsed Time: 0:00:10||

Episode 2|Iteration 37|reward:  378.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 2|Iteration 38|reward:  478.0|last_reward_at:   37|Elapsed Time: 0:00:10||

Episode 2|Iteration 38|reward:  478.0|last_reward_at:   38|Elapsed Time: 0:00:10||

Episode 2|Iteration 39|reward:  492.0|last_reward_at:   38|Elapsed Time: 0:00:10||

Episode 2|Iteration 39|reward:  492.0|last_reward_at:   39|Elapsed Time: 0:00:10||

Episode 2|Iteration 40|reward:  498.0|last_reward_at:   39|Elapsed Time: 0:00:10||

Episode 2|Iteration 40|reward:  498.0|last_reward_at:   40|Elapsed Time: 0:00:10||

Episode 2|Iteration 42|reward:  498.0|last_reward_at:   40|Elapsed Time: 0:00:12||

Episode 2|Iteration 46|reward:  598.0|last_reward_at:   40|Elapsed Time: 0:00:12||

Episode 2|Iteration 46|reward:  598.0|last_reward_at:   46|Elapsed Time: 0:00:12||

Episode 2|Iteration 47|reward:  612.0|last_reward_at:   46|Elapsed Time: 0:00:13||

Episode 2|Iteration 47|reward:  612.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 2|Iteration 48|reward:  618.0|last_reward_at:   47|Elapsed Time: 0:00:13||

Episode 2|Iteration 48|reward:  618.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 2|Iteration 49|reward:  718.0|last_reward_at:   48|Elapsed Time: 0:00:13||

Episode 2|Iteration 49|reward:  718.0|last_reward_at:   49|Elapsed Time: 0:00:13||

Episode 2|Iteration 50|reward:  732.0|last_reward_at:   49|Elapsed Time: 0:00:13||

Episode 2|Iteration 50|reward:  732.0|last_reward_at:   50|Elapsed Time: 0:00:13||

Episode 2|Iteration 51|reward:  832.0|last_reward_at:   50|Elapsed Time: 0:00:13||

Episode 2|Iteration 51|reward:  832.0|last_reward_at:   51|Elapsed Time: 0:00:13||

Episode 2|Iteration 52|reward:  832.0|last_reward_at:   51|Elapsed Time: 0:00:14||

Episode 2|Iteration 53|reward:  846.0|last_reward_at:   51|Elapsed Time: 0:00:14||

Episode 2|Iteration 53|reward:  846.0|last_reward_at:   53|Elapsed Time: 0:00:14||

Episode 2|Iteration 57|reward:  846.0|last_reward_at:   53|Elapsed Time: 0:00:15||

Episode 2|Iteration 58|reward:  846.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 2|Iteration 59|reward:  852.0|last_reward_at:   53|Elapsed Time: 0:00:16||

Episode 2|Iteration 59|reward:  852.0|last_reward_at:   59|Elapsed Time: 0:00:16||

Episode 2|Iteration 60|reward:  952.0|last_reward_at:   59|Elapsed Time: 0:00:16||

Episode 2|Iteration 60|reward:  952.0|last_reward_at:   60|Elapsed Time: 0:00:16||

Episode 2|Iteration 61|reward:  966.0|last_reward_at:   60|Elapsed Time: 0:00:16||

Episode 2|Iteration 61|reward:  966.0|last_reward_at:   61|Elapsed Time: 0:00:16||

Episode 2|Iteration 62|reward:  966.0|last_reward_at:   61|Elapsed Time: 0:00:17||

Episode 2|Iteration 63|reward: 1066.0|last_reward_at:   61|Elapsed Time: 0:00:17||

Episode 2|Iteration 63|reward: 1066.0|last_reward_at:   63|Elapsed Time: 0:00:17||

Episode 2|Iteration 64|reward: 1080.0|last_reward_at:   63|Elapsed Time: 0:00:17||

Episode 2|Iteration 64|reward: 1080.0|last_reward_at:   64|Elapsed Time: 0:00:17||

Episode 2|Iteration 67|reward: 1086.0|last_reward_at:   64|Elapsed Time: 0:00:19||

Episode 2|Iteration 67|reward: 1086.0|last_reward_at:   67|Elapsed Time: 0:00:19||

Episode 2|Iteration 68|reward: 1186.0|last_reward_at:   67|Elapsed Time: 0:00:19||

Episode 2|Iteration 68|reward: 1186.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Episode 2|Iteration 69|reward: 1200.0|last_reward_at:   68|Elapsed Time: 0:00:19||

Episode 2|Iteration 69|reward: 1200.0|last_reward_at:   69|Elapsed Time: 0:00:19||

Episode 2|Iteration 70|reward: 6200.0|last_reward_at:   69|Elapsed Time: 0:00:19||

Episode 2|Iteration 70|reward: 6200.0|last_reward_at:   70|Elapsed Time: 0:00:19||




  Episode 2 ended at t=70 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 3/0 (1.00)
    explore-remote: 1/6 (0.14)
    explore-connect: 1/3 (0.25)
    exploit-local: 13/0 (1.00)
    exploit-remote: 3/17 (0.15)
    exploit-connect: 10/13 (0.43)
  exploit deflected to exploration: 0
  ## Episode: 3/3 'Exploiting DQL (LLM-pruned)' ϵ=0.0000, γ=0.015, lr=0.01, replaymemory=10000,
batch=512, target_update=10


Episode 3|Iteration 0|reward: ------|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   14.0|last_reward_at: ----|Elapsed Time: 0:00:00||

Episode 3|Iteration 1|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:00||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 2|reward:   14.0|last_reward_at:    1|Elapsed Time: 0:00:01||

Episode 3|Iteration 2|reward:  114.0|last_reward_at:    1|Elapsed Time: 0:00:02||

Episode 3|Iteration 2|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:02||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 3|reward:  114.0|last_reward_at:    2|Elapsed Time: 0:00:02||

Episode 3|Iteration 3|reward:  128.0|last_reward_at:    2|Elapsed Time: 0:00:02||

Episode 3|Iteration 3|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:02||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 4|reward:  128.0|last_reward_at:    3|Elapsed Time: 0:00:02||

Episode 3|Iteration 4|reward:  134.0|last_reward_at:    3|Elapsed Time: 0:00:02||

Episode 3|Iteration 4|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:02||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 5|reward:  134.0|last_reward_at:    4|Elapsed Time: 0:00:03||

Episode 3|Iteration 5|reward:  234.0|last_reward_at:    4|Elapsed Time: 0:00:03||

Episode 3|Iteration 5|reward:  234.0|last_reward_at:    5|Elapsed Time: 0:00:03||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 6|reward:  234.0|last_reward_at:    5|Elapsed Time: 0:00:03||

Episode 3|Iteration 6|reward:  248.0|last_reward_at:    5|Elapsed Time: 0:00:03||

Episode 3|Iteration 6|reward:  248.0|last_reward_at:    6|Elapsed Time: 0:00:03||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 7|reward:  248.0|last_reward_at:    6|Elapsed Time: 0:00:03||

Episode 3|Iteration 7|reward:  250.0|last_reward_at:    6|Elapsed Time: 0:00:04||

Episode 3|Iteration 7|reward:  250.0|last_reward_at:    7|Elapsed Time: 0:00:04||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 8|reward:  250.0|last_reward_at:    7|Elapsed Time: 0:00:05||

Episode 3|Iteration 8|reward:  256.0|last_reward_at:    7|Elapsed Time: 0:00:05||

Episode 3|Iteration 8|reward:  256.0|last_reward_at:    8|Elapsed Time: 0:00:05||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 9|reward:  256.0|last_reward_at:    8|Elapsed Time: 0:00:05||

Episode 3|Iteration 9|reward:  258.0|last_reward_at:    8|Elapsed Time: 0:00:05||

Episode 3|Iteration 9|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:05||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 11|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:05||

Episode 3|Iteration 12|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:07||

Episode 3|Iteration 17|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:08||

Episode 3|Iteration 22|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:10||

Episode 3|Iteration 27|reward:  258.0|last_reward_at:    9|Elapsed Time: 0:00:11||

Episode 3|Iteration 29|reward:  358.0|last_reward_at:    9|Elapsed Time: 0:00:11||

Episode 3|Iteration 29|reward:  358.0|last_reward_at:   29|Elapsed Time: 0:00:11||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 30|reward:  358.0|last_reward_at:   29|Elapsed Time: 0:00:12||

Episode 3|Iteration 30|reward:  372.0|last_reward_at:   29|Elapsed Time: 0:00:12||

Episode 3|Iteration 30|reward:  372.0|last_reward_at:   30|Elapsed Time: 0:00:12||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 31|reward:  372.0|last_reward_at:   30|Elapsed Time: 0:00:12||

Episode 3|Iteration 31|reward:  378.0|last_reward_at:   30|Elapsed Time: 0:00:12||

Episode 3|Iteration 31|reward:  378.0|last_reward_at:   31|Elapsed Time: 0:00:12||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 32|reward:  378.0|last_reward_at:   31|Elapsed Time: 0:00:12||

Episode 3|Iteration 32|reward:  378.0|last_reward_at:   31|Elapsed Time: 0:00:13||

Episode 3|Iteration 33|reward:  478.0|last_reward_at:   31|Elapsed Time: 0:00:13||

Episode 3|Iteration 33|reward:  478.0|last_reward_at:   33|Elapsed Time: 0:00:13||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 34|reward:  478.0|last_reward_at:   33|Elapsed Time: 0:00:13||

Episode 3|Iteration 34|reward:  492.0|last_reward_at:   33|Elapsed Time: 0:00:13||

Episode 3|Iteration 34|reward:  492.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 35|reward:  492.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 3|Iteration 35|reward:  494.0|last_reward_at:   34|Elapsed Time: 0:00:13||

Episode 3|Iteration 35|reward:  494.0|last_reward_at:   35|Elapsed Time: 0:00:13||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 36|reward:  494.0|last_reward_at:   35|Elapsed Time: 0:00:14||

Episode 3|Iteration 36|reward:  500.0|last_reward_at:   35|Elapsed Time: 0:00:14||

Episode 3|Iteration 36|reward:  500.0|last_reward_at:   36|Elapsed Time: 0:00:14||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 37|reward:  500.0|last_reward_at:   36|Elapsed Time: 0:00:14||

Episode 3|Iteration 37|reward:  500.0|last_reward_at:   36|Elapsed Time: 0:00:15||

Episode 3|Iteration 42|reward:  500.0|last_reward_at:   36|Elapsed Time: 0:00:17||

Episode 3|Iteration 45|reward:  600.0|last_reward_at:   36|Elapsed Time: 0:00:17||

Episode 3|Iteration 45|reward:  600.0|last_reward_at:   45|Elapsed Time: 0:00:17||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 46|reward:  600.0|last_reward_at:   45|Elapsed Time: 0:00:17||

Episode 3|Iteration 46|reward:  614.0|last_reward_at:   45|Elapsed Time: 0:00:17||

Episode 3|Iteration 46|reward:  614.0|last_reward_at:   46|Elapsed Time: 0:00:17||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 47|reward:  614.0|last_reward_at:   46|Elapsed Time: 0:00:17||

Episode 3|Iteration 47|reward:  620.0|last_reward_at:   46|Elapsed Time: 0:00:18||

Episode 3|Iteration 47|reward:  620.0|last_reward_at:   47|Elapsed Time: 0:00:18||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 48|reward:  620.0|last_reward_at:   47|Elapsed Time: 0:00:18||

Episode 3|Iteration 48|reward:  720.0|last_reward_at:   47|Elapsed Time: 0:00:18||

Episode 3|Iteration 48|reward:  720.0|last_reward_at:   48|Elapsed Time: 0:00:18||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 49|reward:  720.0|last_reward_at:   48|Elapsed Time: 0:00:18||

Episode 3|Iteration 49|reward:  734.0|last_reward_at:   48|Elapsed Time: 0:00:18||

Episode 3|Iteration 49|reward:  734.0|last_reward_at:   49|Elapsed Time: 0:00:18||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 50|reward:  734.0|last_reward_at:   49|Elapsed Time: 0:00:19||

Episode 3|Iteration 50|reward:  834.0|last_reward_at:   49|Elapsed Time: 0:00:19||

Episode 3|Iteration 50|reward:  834.0|last_reward_at:   50|Elapsed Time: 0:00:19||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 51|reward:  834.0|last_reward_at:   50|Elapsed Time: 0:00:19||

Episode 3|Iteration 51|reward:  848.0|last_reward_at:   50|Elapsed Time: 0:00:19||

Episode 3|Iteration 51|reward:  848.0|last_reward_at:   51|Elapsed Time: 0:00:19||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 52|reward:  848.0|last_reward_at:   51|Elapsed Time: 0:00:19||

Episode 3|Iteration 52|reward:  848.0|last_reward_at:   51|Elapsed Time: 0:00:21||

Episode 3|Iteration 57|reward:  854.0|last_reward_at:   51|Elapsed Time: 0:00:22||

Episode 3|Iteration 57|reward:  854.0|last_reward_at:   57|Elapsed Time: 0:00:22||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 58|reward:  854.0|last_reward_at:   57|Elapsed Time: 0:00:22||

Episode 3|Iteration 59|reward:  954.0|last_reward_at:   57|Elapsed Time: 0:00:22||

Episode 3|Iteration 59|reward:  954.0|last_reward_at:   59|Elapsed Time: 0:00:22||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 60|reward:  954.0|last_reward_at:   59|Elapsed Time: 0:00:22||

Episode 3|Iteration 60|reward:  968.0|last_reward_at:   59|Elapsed Time: 0:00:22||

Episode 3|Iteration 60|reward:  968.0|last_reward_at:   60|Elapsed Time: 0:00:22||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,discovered,,,"[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 61|reward:  968.0|last_reward_at:   60|Elapsed Time: 0:00:23||

Episode 3|Iteration 61|reward: 1068.0|last_reward_at:   60|Elapsed Time: 0:00:23||

Episode 3|Iteration 61|reward: 1068.0|last_reward_at:   61|Elapsed Time: 0:00:23||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 62|reward: 1068.0|last_reward_at:   61|Elapsed Time: 0:00:23||

Episode 3|Iteration 62|reward: 1068.0|last_reward_at:   61|Elapsed Time: 0:00:24||

Episode 3|Iteration 63|reward: 1082.0|last_reward_at:   61|Elapsed Time: 0:00:24||

Episode 3|Iteration 63|reward: 1082.0|last_reward_at:   63|Elapsed Time: 0:00:24||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 64|reward: 1082.0|last_reward_at:   63|Elapsed Time: 0:00:25||

Episode 3|Iteration 67|reward: 1082.0|last_reward_at:   63|Elapsed Time: 0:00:27||

Episode 3|Iteration 72|reward: 1082.0|last_reward_at:   63|Elapsed Time: 0:00:28||

Episode 3|Iteration 73|reward: 1088.0|last_reward_at:   63|Elapsed Time: 0:00:28||

Episode 3|Iteration 73|reward: 1088.0|last_reward_at:   73|Elapsed Time: 0:00:28||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 74|reward: 1088.0|last_reward_at:   73|Elapsed Time: 0:00:29||

Episode 3|Iteration 74|reward: 1188.0|last_reward_at:   73|Elapsed Time: 0:00:29||

Episode 3|Iteration 74|reward: 1188.0|last_reward_at:   74|Elapsed Time: 0:00:29||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 75|reward: 1188.0|last_reward_at:   74|Elapsed Time: 0:00:29||

Episode 3|Iteration 75|reward: 1202.0|last_reward_at:   74|Elapsed Time: 0:00:29||

Episode 3|Iteration 75|reward: 1202.0|last_reward_at:   75|Elapsed Time: 0:00:29||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"


Episode 3|Iteration 76|reward: 1202.0|last_reward_at:   75|Elapsed Time: 0:00:29||

Episode 3|Iteration 76|reward: 6202.0|last_reward_at:   75|Elapsed Time: 0:00:29||

Episode 3|Iteration 76|reward: 6202.0|last_reward_at:   76|Elapsed Time: 0:00:29||

Unnamed: 0_level_0,status,properties,local_attacks,remote_attacks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
start,owned,[],[ScanExplorerRecentFiles],[]
1_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
2_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
3_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
4_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
5_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
6_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
7_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"
8_WindowsNode,owned,"[Windows, Win10, Win10Patched]","[CrackKeepPassX, CrackKeepPass, ScanExplorerRe...","[ProbeLinux, ProbeWindows]"
9_LinuxNode,owned,"[MySql, Ubuntu, nginx/1.10.3]","[CrackKeepPassX, ScanExplorerRecentFiles, Sudo...","[ProbeLinux, ProbeWindows]"





  Episode 3 ended at t=76 
  Breakdown [Reward/NoReward (Success rate)]
    explore-local: 2/0 (1.00)
    explore-remote: 1/5 (0.17)
    explore-connect: 1/6 (0.14)
    exploit-local: 14/0 (1.00)
    exploit-remote: 4/14 (0.22)
    exploit-connect: 10/19 (0.34)
  exploit deflected to exploration: 0
simulation ended


In [12]:
# -----------------------------------------
# 7) 플롯 (기존 그대로)
# -----------------------------------------
all_runs = [
    dql_run,
    dql_exploit_run,
]

themodel = dqla.CyberBattleStateActionModel(ep)
p.plot_averaged_cummulative_rewards(
    all_runs=all_runs,
    title=(
        f"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count}\n"
        f"State: {[f.name() for f in themodel.state_space.feature_selection]} "
        f"({len(themodel.state_space.feature_selection)})\n"
        f"Action: abstract_action ({themodel.action_space.flat_size()})"
    ),
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumrewards.png"),
)

contenders = [dql_run, dql_exploit_run]
p.plot_episodes_length(contenders)
p.plot_averaged_cummulative_rewards(
    title=f"Agent Benchmark top contenders\nmax_nodes:{ep.maximum_node_count}\n",
    all_runs=contenders,
    save_at=os.path.join(plots_dir, f"benchmark-{gymid}-cumreward_contenders.png"),
)

for r in contenders:
    p.plot_all_episodes(r)


FigureCanvasAgg is non-interactive, and thus cannot be shown


FigureCanvasAgg is non-interactive, and thus cannot be shown




FigureCanvasAgg is non-interactive, and thus cannot be shown

