# Setup Experimental Table from experiment_values.json


In [6]:
import json
from copy import deepcopy
import pandas as pd

In [7]:
CONFIG_PATH = "experiment_values.json"

with open(CONFIG_PATH) as f:
    config = json.load(f)

defaults = config["defaults"]
grids = config["grids"]
experiments = config["experiments"]

all_param_names = set()
for agent_params in defaults.values():
    all_param_names.update(agent_params.keys())

rows = []

for agent, sweep in experiments.items():
    default_params = defaults[agent]

    for grid in grids:
        for param_name, values in sweep.items():
            for val in values:
                params = deepcopy(default_params)
                params[param_name] = val

                row = {
                    "agent": agent,
                    "grid": grid,
                    "param_changed": param_name,
                    "param_value": val
                }

                for pname in sorted(all_param_names):
                    row[pname] = params.get(pname, float('nan'))

                rows.append(row)

df = pd.DataFrame(rows)


df = df.loc[df["agent"] != "RandomAgent"]
df.to_csv("experiment_results/experiment_table.csv", index = False)

# Run Experiment

## This is a copy of train.py with minor updates


In [8]:
import json, io, sys, re, importlib, inspect
from copy import deepcopy
from pathlib import Path
from argparse import Namespace
import numpy as np
import pandas as pd
from tqdm import trange

from world.reward_functions import custom_reward_function
from world import Environment
from agents import BaseAgent



In [9]:
import os, json, datetime, random

def load_agent(agent_name: str, env: Environment, config: dict):
    info = config[agent_name]
    mod  = importlib.import_module(info["module"])
    cls  = getattr(mod, info["class"])
    init_args = info.get("init_args", {})
    sig = inspect.signature(cls.__init__)
    if 'env' in sig.parameters:
        return cls(env=env, **init_args), info["train_mode"], info["init_args"]
    else:
        return cls(**init_args), info["train_mode"], info["init_args"]

def update_agent(agent: BaseAgent, args: Namespace, state, next_state, reward, action):
    params = inspect.signature(agent.update).parameters
    names  = set(params)
    if {"state","next_state"}.issubset(names):
        agent.update(state=state, next_state=next_state, reward=reward, action=action)
    elif {"next_state","reward","action"}.issubset(names):
        agent.update(next_state=next_state, reward=reward, action=action)
    elif {"state","reward","action"}.issubset(names):
        agent.update(state=state, reward=reward, action=action)
    else:
        agent.update()

def train_and_eval(args: Namespace, config: dict):
    start = tuple(args.agent_start_pos)

    for grid_fp in args.GRID:
        env = Environment(
            Path(grid_fp),
            args.no_gui,
            sigma=args.sigma,
            agent_start_pos=start,
            reward_fn=custom_reward_function,
            target_fps=args.fps,
            random_seed=args.random_seed
        )
        env.reset()
        agent, mode, init_args = load_agent(args.agent, env, config)

        if mode == "q_learning":
            #Max difference for convergence check
            metrics = {"iterations": 0, "steps_taken": 0, "deltas": [], "rewards": []}
            delta = 1e-6

            for ep in trange(args.episodes, desc=f"Training {args.agent}"):
                # Save a copy of the current Q-table for convergence check
                prev_q_table = {
                    s: np.copy(q_values) for s, q_values in agent.q_table.items()
                }
                state = env.reset()
                ep_reward = 0.0
                for _ in range(args.iter):
                    action = agent.take_action(state)
                    next_state, reward, terminated, info = env.step(action)
                    ep_reward += reward
                    if terminated:
                        break
                    agent.update(state, next_state, reward, info["actual_action"])
                    state = next_state

                if ep >= args.episodes/4:
                    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
                    agent.alpha = max(agent.alpha_min, agent.alpha * agent.alpha_decay)

                common_states = set(agent.q_table.keys()) & set(prev_q_table.keys())
                if not common_states:
                    max_diff = 1
                else:
                    max_diff = max(
                        np.max(np.abs(agent.q_table[s] - prev_q_table[s]))
                        for s in common_states
                    )
                metrics["deltas"].append(max_diff)
                metrics["rewards"].append(ep_reward)
                metrics["steps_taken"] = env.world_stats["total_steps"]

                if max_diff < delta:
                    metrics["iterations"] = ep
                    break

            if metrics["iterations"] == 0:
                metrics["iterations"] = args.episodes

            agent.metrics = metrics

            agent.eval_mode()

        elif mode == "value_iteration":
            state = env.reset()
            for _ in trange(args.iter, desc=f"[Train] {args.agent}"):
                a  = agent.take_action(state)
                ns, r, done, info = env.step(a)
                update_agent(agent, args, state, ns, r, info["actual_action"])
                state = ns
                if done: break
            agent.metrics["steps_taken"] = env.world_stats["total_steps"]

        elif mode == "monte_carlo":
            delta = 1e-6

            metrics = {"iterations": 0, "steps_taken": 0, "deltas": [], "rewards": []}

            for episode in trange(args.episodes, desc=f"Training {args.agent}"):
                prev_q = {s: np.copy(agent.q_table[s]) for s in agent.q_table}

                state = env.reset()
                terminated = False
                ep_reward = 0.0
                for _ in range(args.iter):
                    action = agent.take_action(state)
                    next_state, reward, terminated, info = env.step(action)
                    ep_reward += reward
                    if terminated:
                        break
                    agent.update(state, action, reward, next_state, False)
                    state = next_state

                agent.update(state, action, reward, next_state, True)

                if episode >= args.episodes/4:
                    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
                    agent.alpha = max(agent.alpha_min, agent.alpha * agent.alpha_decay)

                # Convergence check
                common_states = set(agent.q_table.keys()) & set(prev_q.keys())
                if not common_states:
                    max_diff = 1
                else:
                    max_diff = max(
                        np.max(np.abs(agent.q_table[s] - prev_q[s]))
                        for s in common_states
                    )

                metrics["deltas"].append(max_diff)
                metrics["rewards"].append(ep_reward)
                metrics["steps_taken"] = env.world_stats["total_steps"]

                if max_diff < delta:
                    metrics["iterations"] = episode
                    break

            if metrics["iterations"] == 0:
                metrics["iterations"] = args.episodes

            agent.metrics = metrics
            agent.epsilon = 0.0  # Switch to greedy

        else:  # iterative / random
            state = env.reset()
            for _ in trange(args.iter, desc=f"[Train] {args.agent}"):
                a = agent.take_action(state)
                ns, r, done, info = env.step(a)
                update_agent(agent, args, state, ns, r, info["actual_action"])
                state = ns
                if done: break

        if hasattr(agent, "metrics"):
            its = agent.metrics.get("iterations", None)
            print(f"[Metrics] {args.agent} converged in {its} iterations")
            metrics_dir = "metrics"
            os.makedirs(metrics_dir, exist_ok=True)
            grid_name = Path(grid).stem  # Extract just the filename without extension
            param_str = "_".join(f"{k}-{v}" for k, v in init_args.items())
            fname = f"{args.agent}_grid-{grid_name}_{param_str}_{random.randrange(1,9999)}.json"

            path = os.path.join(metrics_dir, fname)
        try:
            with open(path, "w") as mf:
                json.dump(agent.metrics, mf, indent=2)
                print(f"[Metrics] Saved convergence data to {path}")
        except Exception as e:
            print(f"[Metrics] ERROR saving metrics: {e}")


    # capture evaluation output
    buf = io.StringIO()
    old_out, old_err = sys.stdout, sys.stderr
    sys.stdout, sys.stderr = buf, buf
    try:
        Environment.evaluate_agent(
            Path(args.GRID[0]),
            agent,
            args.iter,
            args.sigma,
            agent_start_pos=start,
            reward_fn=custom_reward_function,
            random_seed=args.random_seed,
            show_images=False
        )
    finally:
        sys.stdout, sys.stderr = old_out, old_err

    text = buf.getvalue()
    metrics = {}
    for line in text.splitlines():
        m = re.match(r"\s*([a-z_]+)\s*:\s*([-+]?[0-9]*\.?[0-9]+)", line)
        if m:
            k, v = m.group(1), m.group(2)
            metrics[k] = int(v) if v.isdigit() else float(v)
    return metrics



In [10]:

df = pd.read_csv("experiment_results/experimental_table.csv")
base_cfg = json.load(open("agent_config.json"))
exp_defs = json.load(open("experiment_values.json"))

rows = []
for idx, row in df.iterrows():
    agent = row["agent"]
    grid  = row["grid"]
    print(f"{idx+1}: {agent} on {grid} | {row['param_changed']}={row['param_value']}")

    init_args, cli_args = {}, {}
    for c,v in row.items():
        if pd.isna(v) or c in {"agent","grid","param_changed","param_value"}:
            continue
        if c in {"episodes","iter"}:
            cli_args[c] = int(v)
        elif c == "sigma":
            cli_args[c] = float(v)
        else:
            init_args[c] = float(v)

    cfg = deepcopy(base_cfg)
    defaults_init = cfg[agent].get("init_args", {})
    cfg[agent]["init_args"] = {**defaults_init, **init_args}

    default_sigma = exp_defs["defaults"][agent].get("sigma", 0.0)
    sigma = cli_args.get("sigma", default_sigma)

    ns = Namespace(
        GRID=[f"grid_configs/{grid}.npy"],
        agent=agent,
        no_gui=True,
        sigma=sigma,
        fps=5,
        episodes=cli_args.get("episodes",
                              exp_defs["defaults"][agent].get("episodes",2000)),
        iter=cli_args.get("iter",
                          exp_defs["defaults"][agent].get("iter",2000)),
        random_seed=42,
        agent_start_pos=[1,1]
    )

    metrics = train_and_eval(ns, cfg)

    result = row.to_dict()
    result.update(metrics)
    result["sigma"] = sigma
    rows.append(result)

out_df = pd.DataFrame(rows)
out_df.to_csv("result_multi_experiment.csv", index=False)

1: QLearningAgent on A1_grid | gamma=0.6


Training QLearningAgent:  20%|█▉        | 393/2000 [00:00<00:02, 695.01it/s]


[Metrics] QLearningAgent converged in 393 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.6_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_3215.json
2: QLearningAgent on A1_grid | gamma=0.99


Training QLearningAgent:  25%|██▍       | 498/2000 [00:00<00:01, 807.48it/s]


[Metrics] QLearningAgent converged in 498 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.99_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1951.json
3: QLearningAgent on A1_grid | alpha=0.3


Training QLearningAgent:  17%|█▋        | 335/2000 [00:00<00:02, 784.62it/s]


[Metrics] QLearningAgent converged in 335 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.3_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_2403.json
4: QLearningAgent on A1_grid | alpha=0.5


Training QLearningAgent:  10%|▉         | 190/2000 [00:00<00:02, 782.49it/s]


[Metrics] QLearningAgent converged in 190 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.5_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6892.json
5: QLearningAgent on A1_grid | epsilon=0.5


Training QLearningAgent:  19%|█▉        | 386/2000 [00:00<00:02, 590.33it/s]


[Metrics] QLearningAgent converged in 386 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.5_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_7740.json
6: QLearningAgent on A1_grid | epsilon_decay=0.8


Training QLearningAgent:  24%|██▍       | 484/2000 [00:00<00:01, 794.00it/s]


[Metrics] QLearningAgent converged in 484 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.8_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6496.json
7: QLearningAgent on A1_grid | sigma=0.0


Training QLearningAgent:  25%|██▌       | 505/2000 [00:00<00:01, 838.87it/s]


[Metrics] QLearningAgent converged in 505 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_7501.json
8: QLearningAgent on A1_grid | sigma=0.5


Training QLearningAgent:  20%|█▉        | 396/2000 [00:00<00:02, 647.63it/s]


[Metrics] QLearningAgent converged in 396 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_8672.json
9: QLearningAgent on A1_grid | episodes=5000.0


Training QLearningAgent:   9%|▉         | 460/5000 [00:00<00:05, 819.90it/s]


[Metrics] QLearningAgent converged in 460 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6488.json
10: QLearningAgent on A1_grid | iter=5000.0


Training QLearningAgent:  24%|██▍       | 480/2000 [00:00<00:01, 804.44it/s]


[Metrics] QLearningAgent converged in 480 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-A1_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_3423.json
11: QLearningAgent on Maze | gamma=0.6


Training QLearningAgent:  25%|██▌       | 508/2000 [00:00<00:02, 523.44it/s]


[Metrics] QLearningAgent converged in 508 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.6_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6113.json
12: QLearningAgent on Maze | gamma=0.99


Training QLearningAgent:  28%|██▊       | 570/2000 [00:01<00:02, 562.03it/s]


[Metrics] QLearningAgent converged in 570 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.99_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1392.json
13: QLearningAgent on Maze | alpha=0.3


Training QLearningAgent:  23%|██▎       | 463/2000 [00:00<00:02, 587.63it/s]


[Metrics] QLearningAgent converged in 463 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.3_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_7424.json
14: QLearningAgent on Maze | alpha=0.5


Training QLearningAgent:  13%|█▎        | 255/2000 [00:00<00:03, 522.72it/s]


[Metrics] QLearningAgent converged in 255 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.5_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6358.json
15: QLearningAgent on Maze | epsilon=0.5


Training QLearningAgent:  22%|██▏       | 448/2000 [00:01<00:04, 369.81it/s]


[Metrics] QLearningAgent converged in 448 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.5_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_4614.json
16: QLearningAgent on Maze | epsilon_decay=0.8


Training QLearningAgent:  26%|██▌       | 512/2000 [00:00<00:02, 559.53it/s]


[Metrics] QLearningAgent converged in 512 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.8_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1284.json
17: QLearningAgent on Maze | sigma=0.0


Training QLearningAgent:  25%|██▌       | 509/2000 [00:00<00:02, 566.48it/s]


[Metrics] QLearningAgent converged in 509 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_9691.json
18: QLearningAgent on Maze | sigma=0.5


Training QLearningAgent:  19%|█▉        | 387/2000 [00:01<00:04, 379.16it/s]


[Metrics] QLearningAgent converged in 387 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_8798.json
19: QLearningAgent on Maze | episodes=5000.0


Training QLearningAgent:  16%|█▌        | 794/5000 [00:01<00:06, 616.97it/s]


[Metrics] QLearningAgent converged in 794 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_4291.json
20: QLearningAgent on Maze | iter=5000.0


Training QLearningAgent:  26%|██▌       | 523/2000 [00:00<00:02, 542.12it/s]


[Metrics] QLearningAgent converged in 523 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-Maze_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_4762.json
21: QLearningAgent on test_grid | gamma=0.6


Training QLearningAgent:   3%|▎         | 62/2000 [00:00<00:01, 1480.29it/s]


[Metrics] QLearningAgent converged in 62 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.6_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_9450.json
22: QLearningAgent on test_grid | gamma=0.99


Training QLearningAgent:   4%|▎         | 74/2000 [00:00<00:01, 1590.00it/s]


[Metrics] QLearningAgent converged in 74 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.99_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_7658.json
23: QLearningAgent on test_grid | alpha=0.3


Training QLearningAgent:   2%|▏         | 47/2000 [00:00<00:01, 1448.33it/s]


[Metrics] QLearningAgent converged in 47 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.3_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_2590.json
24: QLearningAgent on test_grid | alpha=0.5


Training QLearningAgent:   1%|▏         | 25/2000 [00:00<00:01, 1560.06it/s]


[Metrics] QLearningAgent converged in 25 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.5_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1495.json
25: QLearningAgent on test_grid | epsilon=0.5


Training QLearningAgent:   4%|▎         | 70/2000 [00:00<00:01, 1236.20it/s]


[Metrics] QLearningAgent converged in 70 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.5_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_7735.json
26: QLearningAgent on test_grid | epsilon_decay=0.8


Training QLearningAgent:   3%|▎         | 69/2000 [00:00<00:01, 1438.86it/s]


[Metrics] QLearningAgent converged in 69 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.8_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_4337.json
27: QLearningAgent on test_grid | sigma=0.0


Training QLearningAgent:   4%|▎         | 74/2000 [00:00<00:01, 1632.44it/s]


[Metrics] QLearningAgent converged in 74 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_8585.json
28: QLearningAgent on test_grid | sigma=0.5


Training QLearningAgent:   3%|▎         | 69/2000 [00:00<00:01, 1275.73it/s]


[Metrics] QLearningAgent converged in 69 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_324.json
29: QLearningAgent on test_grid | episodes=5000.0


Training QLearningAgent:   2%|▏         | 75/5000 [00:00<00:03, 1431.31it/s]


[Metrics] QLearningAgent converged in 75 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_9631.json
30: QLearningAgent on test_grid | iter=5000.0


Training QLearningAgent:   4%|▎         | 71/2000 [00:00<00:01, 1576.47it/s]


[Metrics] QLearningAgent converged in 71 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-test_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6522.json
31: QLearningAgent on large_grid | gamma=0.6


Training QLearningAgent:   8%|▊         | 150/2000 [00:00<00:03, 497.50it/s]


[Metrics] QLearningAgent converged in 150 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.6_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_8311.json
32: QLearningAgent on large_grid | gamma=0.99


Training QLearningAgent:  11%|█         | 222/2000 [00:00<00:03, 554.71it/s]


[Metrics] QLearningAgent converged in 222 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.99_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1732.json
33: QLearningAgent on large_grid | alpha=0.3


Training QLearningAgent:   9%|▉         | 176/2000 [00:00<00:02, 617.37it/s]


[Metrics] QLearningAgent converged in 176 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.3_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_2187.json
34: QLearningAgent on large_grid | alpha=0.5


Training QLearningAgent:   6%|▋         | 128/2000 [00:00<00:02, 654.56it/s]


[Metrics] QLearningAgent converged in 128 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.5_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_9515.json
35: QLearningAgent on large_grid | epsilon=0.5


Training QLearningAgent:  14%|█▍        | 289/2000 [00:00<00:03, 500.54it/s]


[Metrics] QLearningAgent converged in 289 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.5_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_6796.json
36: QLearningAgent on large_grid | epsilon_decay=0.8


Training QLearningAgent:  12%|█▏        | 239/2000 [00:00<00:02, 603.40it/s]


[Metrics] QLearningAgent converged in 239 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.8_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_2638.json
37: QLearningAgent on large_grid | sigma=0.0


Training QLearningAgent:  13%|█▎        | 260/2000 [00:00<00:02, 644.16it/s]


[Metrics] QLearningAgent converged in 260 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_2737.json
38: QLearningAgent on large_grid | sigma=0.5


Training QLearningAgent:  13%|█▎        | 266/2000 [00:00<00:03, 516.27it/s]


[Metrics] QLearningAgent converged in 266 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_1742.json
39: QLearningAgent on large_grid | episodes=5000.0


Training QLearningAgent:   5%|▍         | 239/5000 [00:00<00:07, 618.81it/s]


[Metrics] QLearningAgent converged in 239 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_8973.json
40: QLearningAgent on large_grid | iter=5000.0


Training QLearningAgent:  15%|█▌        | 300/2000 [00:00<00:02, 646.16it/s]


[Metrics] QLearningAgent converged in 300 iterations
[Metrics] Saved convergence data to metrics\QLearningAgent_grid-large_grid_gamma-0.9_alpha-0.2_alpha_decay-0.99_alpha_min-0.01_epsilon-0.2_epsilon_decay-0.9_epsilon_min-0.01_num_actions-[0, 1, 2, 3]_216.json
41: ValueIterationAgent on A1_grid | gamma=0.6


[Train] ValueIterationAgent:   1%|          | 16/2000 [00:00<00:00, 30504.03it/s]


[Metrics] ValueIterationAgent converged in 25 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-06_gamma-0.6_107.json
42: ValueIterationAgent on A1_grid | gamma=0.95


[Train] ValueIterationAgent:   1%|          | 15/2000 [00:00<00:00, 27618.33it/s]


[Metrics] ValueIterationAgent converged in 185 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-06_gamma-0.95_4558.json
43: ValueIterationAgent on A1_grid | theta=1e-05


[Train] ValueIterationAgent:   1%|          | 15/2000 [00:00<00:00, 25795.23it/s]


[Metrics] ValueIterationAgent converged in 82 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-05_gamma-0.9_4558.json
44: ValueIterationAgent on A1_grid | sigma=0.0


[Train] ValueIterationAgent:   1%|          | 11/2000 [00:00<00:00, 27962.03it/s]


[Metrics] ValueIterationAgent converged in 91 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-06_gamma-0.9_435.json
45: ValueIterationAgent on A1_grid | sigma=0.5


[Train] ValueIterationAgent:   1%|          | 23/2000 [00:00<00:00, 30269.53it/s]


[Metrics] ValueIterationAgent converged in 99 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-06_gamma-0.9_8786.json
46: ValueIterationAgent on A1_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 15/5000 [00:00<00:00, 25742.45it/s]


[Metrics] ValueIterationAgent converged in 93 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-A1_grid_theta-1e-06_gamma-0.9_4558.json
47: ValueIterationAgent on Maze | gamma=0.6


[Train] ValueIterationAgent: 100%|██████████| 2000/2000 [00:00<00:00, 66531.90it/s]


[Metrics] ValueIterationAgent converged in 24 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-06_gamma-0.6_6228.json
48: ValueIterationAgent on Maze | gamma=0.95


[Train] ValueIterationAgent:   2%|▏         | 36/2000 [00:00<00:00, 45248.71it/s]


[Metrics] ValueIterationAgent converged in 192 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-06_gamma-0.95_9460.json
49: ValueIterationAgent on Maze | theta=1e-05


[Train] ValueIterationAgent:   2%|▏         | 36/2000 [00:00<00:00, 44189.33it/s]


[Metrics] ValueIterationAgent converged in 89 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-05_gamma-0.9_9460.json
50: ValueIterationAgent on Maze | sigma=0.0


[Train] ValueIterationAgent:   2%|▏         | 33/2000 [00:00<00:00, 35049.89it/s]


[Metrics] ValueIterationAgent converged in 97 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-06_gamma-0.9_6202.json
51: ValueIterationAgent on Maze | sigma=0.5


[Train] ValueIterationAgent:  52%|█████▏    | 1047/2000 [00:00<00:00, 63440.81it/s]


[Metrics] ValueIterationAgent converged in 105 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-06_gamma-0.9_1989.json
52: ValueIterationAgent on Maze | iter=5000.0


[Train] ValueIterationAgent:   1%|          | 36/5000 [00:00<00:00, 38053.16it/s]


[Metrics] ValueIterationAgent converged in 100 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-Maze_theta-1e-06_gamma-0.9_9460.json
53: ValueIterationAgent on test_grid | gamma=0.6


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 12336.19it/s]


[Metrics] ValueIterationAgent converged in 21 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-06_gamma-0.6_1680.json
54: ValueIterationAgent on test_grid | gamma=0.95


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 14513.16it/s]


[Metrics] ValueIterationAgent converged in 182 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-06_gamma-0.95_1680.json
55: ValueIterationAgent on test_grid | theta=1e-05


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 11155.06it/s]


[Metrics] ValueIterationAgent converged in 78 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-05_gamma-0.9_1680.json
56: ValueIterationAgent on test_grid | sigma=0.0


[Train] ValueIterationAgent:   0%|          | 2/2000 [00:00<00:00, 6477.69it/s]


[Metrics] ValueIterationAgent converged in 87 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-06_gamma-0.9_3658.json
57: ValueIterationAgent on test_grid | sigma=0.5


[Train] ValueIterationAgent:   0%|          | 5/2000 [00:00<00:00, 16617.69it/s]


[Metrics] ValueIterationAgent converged in 99 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-06_gamma-0.9_1425.json
58: ValueIterationAgent on test_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 3/5000 [00:00<00:00, 9946.97it/s]


[Metrics] ValueIterationAgent converged in 90 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-test_grid_theta-1e-06_gamma-0.9_1680.json
59: ValueIterationAgent on large_grid | gamma=0.6


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 37718.56it/s]


[Metrics] ValueIterationAgent converged in 28 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-06_gamma-0.6_4553.json
60: ValueIterationAgent on large_grid | gamma=0.95


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 37068.53it/s]


[Metrics] ValueIterationAgent converged in 187 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-06_gamma-0.95_4553.json
61: ValueIterationAgent on large_grid | theta=1e-05


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 42560.16it/s]


[Metrics] ValueIterationAgent converged in 85 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-05_gamma-0.9_4553.json
62: ValueIterationAgent on large_grid | sigma=0.0


[Train] ValueIterationAgent:   1%|          | 16/2000 [00:00<00:00, 30685.35it/s]


[Metrics] ValueIterationAgent converged in 94 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-06_gamma-0.9_9655.json
63: ValueIterationAgent on large_grid | sigma=0.5


[Train] ValueIterationAgent:   1%|▏         | 29/2000 [00:00<00:00, 43224.88it/s]


[Metrics] ValueIterationAgent converged in 105 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-06_gamma-0.9_9460.json
64: ValueIterationAgent on large_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 20/5000 [00:00<00:00, 40079.35it/s]


[Metrics] ValueIterationAgent converged in 96 iterations
[Metrics] Saved convergence data to metrics\ValueIterationAgent_grid-large_grid_theta-1e-06_gamma-0.9_4553.json
65: MCAgentOn on A1_grid | gamma=0.95


Training MCAgentOn:  94%|█████████▍| 1890/2000 [00:13<00:00, 143.65it/s]


[Metrics] MCAgentOn converged in 1890 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.95_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_2381.json
66: MCAgentOn on A1_grid | gamma=0.999


Training MCAgentOn:  95%|█████████▍| 1899/2000 [00:02<00:00, 912.66it/s] 


[Metrics] MCAgentOn converged in 1899 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.999_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_9148.json
67: MCAgentOn on A1_grid | epsilon=0.2


Training MCAgentOn:  91%|█████████ | 1811/2000 [00:01<00:00, 1074.39it/s]


[Metrics] MCAgentOn converged in 1811 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.2_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_1758.json
68: MCAgentOn on A1_grid | epsilon=0.5


Training MCAgentOn:  86%|████████▋ | 1730/2000 [00:01<00:00, 1115.78it/s]


[Metrics] MCAgentOn converged in 1730 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.5_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_41.json
69: MCAgentOn on A1_grid | sigma=0.0


Training MCAgentOn:  78%|███████▊  | 1560/2000 [00:01<00:00, 1091.45it/s]


[Metrics] MCAgentOn converged in 1560 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_6953.json
70: MCAgentOn on A1_grid | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:02<00:00, 725.70it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_3890.json
71: MCAgentOn on A1_grid | episodes=5000.0


Training MCAgentOn:  52%|█████▏    | 2610/5000 [00:02<00:02, 1054.13it/s]


[Metrics] MCAgentOn converged in 2610 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_2310.json
72: MCAgentOn on A1_grid | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:02<00:00, 809.46it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-A1_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_5466.json
73: MCAgentOn on Maze | gamma=0.95


Training MCAgentOn:  95%|█████████▍| 1897/2000 [00:13<00:00, 142.03it/s]


[Metrics] MCAgentOn converged in 1897 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.95_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_5349.json
74: MCAgentOn on Maze | gamma=0.999


Training MCAgentOn: 100%|██████████| 2000/2000 [00:14<00:00, 142.49it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.999_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_680.json
75: MCAgentOn on Maze | epsilon=0.2


Training MCAgentOn: 100%|██████████| 2000/2000 [00:14<00:00, 142.37it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.2_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_508.json
76: MCAgentOn on Maze | epsilon=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:14<00:00, 138.81it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.5_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_1616.json
77: MCAgentOn on Maze | sigma=0.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:14<00:00, 142.00it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_5024.json
78: MCAgentOn on Maze | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:15<00:00, 129.40it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_6356.json
79: MCAgentOn on Maze | episodes=5000.0


Training MCAgentOn: 100%|██████████| 5000/5000 [00:37<00:00, 133.86it/s]


[Metrics] MCAgentOn converged in 5000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_8705.json
80: MCAgentOn on Maze | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:34<00:00, 58.10it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-Maze_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_4846.json
81: MCAgentOn on test_grid | gamma=0.95


Training MCAgentOn:  85%|████████▍ | 1693/2000 [00:00<00:00, 2602.13it/s]


[Metrics] MCAgentOn converged in 1693 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.95_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_860.json
82: MCAgentOn on test_grid | gamma=0.999


Training MCAgentOn:  80%|████████  | 1607/2000 [00:00<00:00, 2677.37it/s]


[Metrics] MCAgentOn converged in 1607 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.999_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_130.json
83: MCAgentOn on test_grid | epsilon=0.2


Training MCAgentOn:  81%|████████  | 1624/2000 [00:00<00:00, 2925.56it/s]


[Metrics] MCAgentOn converged in 1624 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.2_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_2986.json
84: MCAgentOn on test_grid | epsilon=0.5


Training MCAgentOn:  84%|████████▍ | 1676/2000 [00:00<00:00, 2504.23it/s]


[Metrics] MCAgentOn converged in 1676 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.5_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_8548.json
85: MCAgentOn on test_grid | sigma=0.0


Training MCAgentOn:  83%|████████▎ | 1666/2000 [00:00<00:00, 3582.11it/s]


[Metrics] MCAgentOn converged in 1666 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_5531.json
86: MCAgentOn on test_grid | sigma=0.5


Training MCAgentOn:  89%|████████▊ | 1772/2000 [00:00<00:00, 1895.10it/s]


[Metrics] MCAgentOn converged in 1772 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_6772.json
87: MCAgentOn on test_grid | episodes=5000.0


Training MCAgentOn:  48%|████▊     | 2380/5000 [00:00<00:01, 2557.34it/s]


[Metrics] MCAgentOn converged in 2380 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_451.json
88: MCAgentOn on test_grid | iter=5000.0


Training MCAgentOn:  81%|████████▏ | 1629/2000 [00:00<00:00, 2660.26it/s]


[Metrics] MCAgentOn converged in 1629 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-test_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_8797.json
89: MCAgentOn on large_grid | gamma=0.95


Training MCAgentOn: 100%|██████████| 2000/2000 [00:13<00:00, 143.31it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.95_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_8682.json
90: MCAgentOn on large_grid | gamma=0.999


Training MCAgentOn: 100%|██████████| 2000/2000 [00:03<00:00, 534.40it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.999_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_9512.json
91: MCAgentOn on large_grid | epsilon=0.2


Training MCAgentOn: 100%|██████████| 2000/2000 [00:09<00:00, 211.51it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.2_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_9148.json
92: MCAgentOn on large_grid | epsilon=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:03<00:00, 623.80it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.5_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_4548.json
93: MCAgentOn on large_grid | sigma=0.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:06<00:00, 297.85it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_4298.json
94: MCAgentOn on large_grid | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 260.34it/s]


[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_5027.json
95: MCAgentOn on large_grid | episodes=5000.0


Training MCAgentOn:  55%|█████▌    | 2752/5000 [00:06<00:05, 442.29it/s]


[Metrics] MCAgentOn converged in 2752 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_1681.json
96: MCAgentOn on large_grid | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:21<00:00, 91.35it/s]

[Metrics] MCAgentOn converged in 2000 iterations
[Metrics] Saved convergence data to metrics\MCAgentOn_grid-large_grid_gamma-0.99_alpha-0.1_alpha_decay-0.99_alpha_min-1e-08_epsilon-0.1_epsilon_decay-0.99_epsilon_min-0.01_action_space-[0, 1, 2, 3]_4343.json





In [12]:
out_df

Unnamed: 0,agent,grid,param_changed,param_value,alpha,episodes,epsilon,epsilon_decay,gamma,iter,sigma,theta,cumulative_reward,total_steps,total_agent_moves,total_failed_moves,total_targets_reached,targets_remaining
0,QLearningAgent,A1_grid,gamma,0.60,0.2,2000.0,0.2,0.9,0.60,2000,0.1,,35.0,16,16,0,1,0
1,QLearningAgent,A1_grid,gamma,0.99,0.2,2000.0,0.2,0.9,0.99,2000,0.1,,28.0,17,16,1,1,0
2,QLearningAgent,A1_grid,alpha,0.30,0.3,2000.0,0.2,0.9,0.90,2000,0.1,,35.0,16,16,0,1,0
3,QLearningAgent,A1_grid,alpha,0.50,0.5,2000.0,0.2,0.9,0.90,2000,0.1,,28.0,17,16,1,1,0
4,QLearningAgent,A1_grid,epsilon,0.50,0.2,2000.0,0.5,0.9,0.90,2000,0.1,,35.0,16,16,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,MCAgentOn,large_grid,epsilon,0.50,,2000.0,0.5,,0.99,2000,0.1,,-528.0,99,19,80,1,0
92,MCAgentOn,large_grid,sigma,0.00,,2000.0,0.1,,0.99,2000,0.0,,-14000.0,2000,0,2000,0,1
93,MCAgentOn,large_grid,sigma,0.50,,2000.0,0.1,,0.99,2000,0.5,,-9578.0,2000,737,1263,0,1
94,MCAgentOn,large_grid,episodes,5000.00,,5000.0,0.1,,0.99,2000,0.1,,-122.0,41,19,22,1,0
