# Setup Experimental Table from experiment_values.json


In [1]:
import json
from copy import deepcopy
import pandas as pd

In [2]:
CONFIG_PATH = "experiment_values.json"

with open(CONFIG_PATH) as f:
    config = json.load(f)

defaults = config["defaults"]
grids = config["grids"]
experiments = config["experiments"]

all_param_names = set()
for agent_params in defaults.values():
    all_param_names.update(agent_params.keys())

rows = []

for agent, sweep in experiments.items():
    default_params = defaults[agent]

    for grid in grids:
        for param_name, values in sweep.items():
            for val in values:
                params = deepcopy(default_params)
                params[param_name] = val

                row = {
                    "agent": agent,
                    "grid": grid,
                    "param_changed": param_name,
                    "param_value": val
                }

                for pname in sorted(all_param_names):
                    row[pname] = params.get(pname, float('nan'))

                rows.append(row)

df = pd.DataFrame(rows)


df = df.loc[df["agent"] != "RandomAgent"]
df.to_csv("experiment_results/experiment_table.csv", index = False)

# Run Experiment

## This is a copy of train.py with minor updates


In [3]:
import json, io, sys, re, importlib, inspect
from copy import deepcopy
from pathlib import Path
from argparse import Namespace
import numpy as np
import pandas as pd
from tqdm import trange

from world.reward_functions import custom_reward_function
from world import Environment
from agents import BaseAgent



pygame 2.6.1 (SDL 2.28.4, Python 3.12.8)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
def load_agent(agent_name: str, env: Environment, config: dict):
    info = config[agent_name]
    mod  = importlib.import_module(info["module"])
    cls  = getattr(mod, info["class"])
    init_args = info.get("init_args", {})
    sig = inspect.signature(cls.__init__)
    if 'env' in sig.parameters:
        return cls(env=env, **init_args), info["train_mode"]
    else:
        return cls(**init_args), info["train_mode"]

def update_agent(agent: BaseAgent, args: Namespace, state, next_state, reward, action):
    params = inspect.signature(agent.update).parameters
    names  = set(params)
    if {"state","next_state"}.issubset(names):
        agent.update(state=state, next_state=next_state, reward=reward, action=action)
    elif {"next_state","reward","action"}.issubset(names):
        agent.update(next_state=next_state, reward=reward, action=action)
    elif {"state","reward","action"}.issubset(names):
        agent.update(state=state, reward=reward, action=action)
    else:
        agent.update()

def train_and_eval(args: Namespace, config: dict):
    start = tuple(args.agent_start_pos)
    for grid_fp in args.GRID:
        env = Environment(
            Path(grid_fp),
            args.no_gui,
            sigma=args.sigma,
            agent_start_pos=start,
            reward_fn=custom_reward_function,
            target_fps=args.fps,
            random_seed=args.random_seed
        )
        env.reset()
        agent, mode = load_agent(args.agent, env, config)

        if mode == "q_learning":
            #Max difference for convergence check
            metrics = {"iterations": 0, "deltas": [], "rewards": []}
            delta = 1e-6
            
            for ep in trange(args.episodes, desc=f"Training {args.agent}"):
                # Save a copy of the current Q-table for convergence check
                prev_q_table = {
                    s: np.copy(q_values) for s, q_values in agent.q_table.items()
                }
                state = env.reset()
                ep_reward = 0.0
                for _ in range(args.iter):
                    action = agent.take_action(state)
                    next_state, reward, terminated, info = env.step(action)
                    ep_reward += reward
                    if terminated:
                        break
                    agent.update(state, next_state, reward, info["actual_action"])
                    state = next_state
                
                if ep >= args.episodes/4:
                    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
                    agent.alpha = max(agent.alpha_min, agent.alpha * agent.alpha_decay)

                common_states = set(agent.q_table.keys()) & set(prev_q_table.keys())
                if not common_states:
                    max_diff = 1
                else:
                    max_diff = max(
                        np.max(np.abs(agent.q_table[s] - prev_q_table[s]))
                        for s in common_states
                    )
                metrics["deltas"].append(max_diff)
                metrics["rewards"].append(ep_reward)
                if max_diff < delta:
                    metrics["iterations"] = ep
                    break

            if metrics["iterations"] == 0:
                metrics["iterations"] = args.episodes

            agent.metrics = metrics
    
            agent.eval_mode()

        elif mode == "value_iteration":
            state = env.reset()
            for _ in trange(args.iter, desc=f"[Train] {args.agent}"):
                a  = agent.take_action(state)
                ns, r, done, info = env.step(a)
                update_agent(agent, args, state, ns, r, info["actual_action"])
                state = ns
                if done: break

        elif mode == "monte_carlo":
            delta = 1e-6

            metrics = {"iterations": 0, "deltas": [], "rewards": []}

            for episode in trange(args.episodes, desc=f"Training {args.agent}"):
                prev_q = {s: np.copy(agent.q_table[s]) for s in agent.q_table}

                state = env.reset()
                terminated = False
                ep_reward = 0.0
                for _ in range(args.iter):
                    action = agent.take_action(state)
                    next_state, reward, terminated, info = env.step(action)
                    ep_reward += reward
                    if terminated:
                        break
                    agent.update(state, action, reward, next_state, False)
                    state = next_state

                agent.update(state, action, reward, next_state, True)

                if episode >= args.episodes/4:
                    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
                    agent.alpha = max(agent.alpha_min, agent.alpha * agent.alpha_decay)

                # Convergence check
                common_states = set(agent.q_table.keys()) & set(prev_q.keys())
                if not common_states:
                    max_diff = 1
                else:
                    max_diff = max(
                        np.max(np.abs(agent.q_table[s] - prev_q[s]))
                        for s in common_states
                    )

                metrics["deltas"].append(max_diff)
                metrics["rewards"].append(ep_reward)

                if max_diff < delta:
                    metrics["iterations"] = episode
                    break

            if metrics["iterations"] == 0:
                metrics["iterations"] = args.episodes

            agent.metrics = metrics
            agent.epsilon = 0.0  # Switch to greedy

        else:  # iterative / random
            state = env.reset()
            for _ in trange(args.iter, desc=f"[Train] {args.agent}"):
                a = agent.take_action(state)
                ns, r, done, info = env.step(a)
                update_agent(agent, args, state, ns, r, info["actual_action"])
                state = ns
                if done: break

    # capture evaluation output
    buf = io.StringIO()
    old_out, old_err = sys.stdout, sys.stderr
    sys.stdout, sys.stderr = buf, buf
    try:
        Environment.evaluate_agent(
            Path(args.GRID[0]),
            agent,
            args.iter,
            args.sigma,
            agent_start_pos=start,
            reward_fn=custom_reward_function,
            random_seed=args.random_seed,
            show_images=False
        )
    finally:
        sys.stdout, sys.stderr = old_out, old_err

    text = buf.getvalue()
    metrics = {}
    for line in text.splitlines():
        m = re.match(r"\s*([a-z_]+)\s*:\s*([-+]?[0-9]*\.?[0-9]+)", line)
        if m:
            k, v = m.group(1), m.group(2)
            metrics[k] = int(v) if v.isdigit() else float(v)
    return metrics



In [5]:

df = pd.read_csv("experiment_results/experimental_table.csv")
base_cfg = json.load(open("agent_config.json"))
exp_defs = json.load(open("experiment_values.json"))

rows = []
for idx, row in df.iterrows():
    agent = row["agent"]
    grid  = row["grid"]
    print(f"{idx+1}: {agent} on {grid} | {row['param_changed']}={row['param_value']}")

    init_args, cli_args = {}, {}
    for c,v in row.items():
        if pd.isna(v) or c in {"agent","grid","param_changed","param_value"}:
            continue
        if c in {"episodes","iter"}:
            cli_args[c] = int(v)
        elif c == "sigma":
            cli_args[c] = float(v)
        else:
            init_args[c] = float(v)

    cfg = deepcopy(base_cfg)
    defaults_init = cfg[agent].get("init_args", {})
    cfg[agent]["init_args"] = {**defaults_init, **init_args}

    default_sigma = exp_defs["defaults"][agent].get("sigma", 0.0)
    sigma = cli_args.get("sigma", default_sigma)

    ns = Namespace(
        GRID=[f"grid_configs/{grid}.npy"],
        agent=agent,
        no_gui=True,
        sigma=sigma,
        fps=5,
        episodes=cli_args.get("episodes",
                              exp_defs["defaults"][agent].get("episodes",2000)),
        iter=cli_args.get("iter",
                          exp_defs["defaults"][agent].get("iter",2000)),
        random_seed=42,
        agent_start_pos=[1,1]
    )

    metrics = train_and_eval(ns, cfg)

    result = row.to_dict()
    result.update(metrics)
    result["sigma"] = sigma
    rows.append(result)

out_df = pd.DataFrame(rows)
out_df.to_csv("result_multi_experiment.csv", index=False)

1: QLearningAgent on A1_grid | gamma=0.6


Training QLearningAgent:   0%|          | 0/2000 [00:00<?, ?it/s]

Training QLearningAgent:  19%|█▉        | 387/2000 [00:00<00:00, 1898.81it/s]


2: QLearningAgent on A1_grid | gamma=0.99


Training QLearningAgent:  24%|██▍       | 481/2000 [00:00<00:00, 2052.07it/s]


3: QLearningAgent on A1_grid | alpha=0.3


Training QLearningAgent:  17%|█▋        | 335/2000 [00:00<00:00, 2395.37it/s]


4: QLearningAgent on A1_grid | alpha=0.5


Training QLearningAgent:   9%|▉         | 186/2000 [00:00<00:00, 2260.20it/s]


5: QLearningAgent on A1_grid | epsilon=0.5


Training QLearningAgent:  20%|██        | 401/2000 [00:00<00:00, 1697.42it/s]


6: QLearningAgent on A1_grid | epsilon_decay=0.8


Training QLearningAgent:  24%|██▍       | 489/2000 [00:00<00:00, 2203.29it/s]


7: QLearningAgent on A1_grid | sigma=0.0


Training QLearningAgent:  26%|██▌       | 514/2000 [00:00<00:00, 2278.58it/s]


8: QLearningAgent on A1_grid | sigma=0.5


Training QLearningAgent:  17%|█▋        | 341/2000 [00:00<00:01, 1533.88it/s]


9: QLearningAgent on A1_grid | episodes=5000.0


Training QLearningAgent:  10%|▉         | 479/5000 [00:00<00:02, 2187.47it/s]


10: QLearningAgent on A1_grid | iter=5000.0


Training QLearningAgent:  25%|██▍       | 497/2000 [00:00<00:00, 2320.08it/s]


11: QLearningAgent on Maze | gamma=0.6


Training QLearningAgent:  24%|██▍       | 482/2000 [00:00<00:01, 1315.09it/s]


12: QLearningAgent on Maze | gamma=0.99


Training QLearningAgent:  26%|██▋       | 525/2000 [00:00<00:01, 1208.01it/s]


13: QLearningAgent on Maze | alpha=0.3


Training QLearningAgent:  25%|██▍       | 491/2000 [00:00<00:01, 1392.51it/s]


14: QLearningAgent on Maze | alpha=0.5


Training QLearningAgent:  13%|█▎        | 257/2000 [00:00<00:01, 1385.98it/s]


15: QLearningAgent on Maze | epsilon=0.5


Training QLearningAgent:  20%|██        | 407/2000 [00:00<00:01, 863.27it/s]


16: QLearningAgent on Maze | epsilon_decay=0.8


Training QLearningAgent:  22%|██▏       | 447/2000 [00:00<00:01, 1317.65it/s]


17: QLearningAgent on Maze | sigma=0.0


Training QLearningAgent:  25%|██▌       | 509/2000 [00:00<00:01, 1431.65it/s]


18: QLearningAgent on Maze | sigma=0.5


Training QLearningAgent:  20%|█▉        | 390/2000 [00:00<00:01, 834.02it/s]


19: QLearningAgent on Maze | episodes=5000.0


Training QLearningAgent:  15%|█▍        | 746/5000 [00:00<00:02, 1599.43it/s]


20: QLearningAgent on Maze | iter=5000.0


Training QLearningAgent:  26%|██▌       | 521/2000 [00:00<00:00, 1495.22it/s]


21: QLearningAgent on test_grid | gamma=0.6


Training QLearningAgent:   4%|▎         | 73/2000 [00:00<00:00, 4471.67it/s]


22: QLearningAgent on test_grid | gamma=0.99


Training QLearningAgent:   4%|▎         | 73/2000 [00:00<00:00, 4563.92it/s]


23: QLearningAgent on test_grid | alpha=0.3


Training QLearningAgent:   2%|▏         | 47/2000 [00:00<00:00, 4217.09it/s]


24: QLearningAgent on test_grid | alpha=0.5


Training QLearningAgent:   1%|▏         | 26/2000 [00:00<00:02, 920.68it/s]


25: QLearningAgent on test_grid | epsilon=0.5


Training QLearningAgent:   3%|▎         | 66/2000 [00:00<00:00, 3004.22it/s]


26: QLearningAgent on test_grid | epsilon_decay=0.8


Training QLearningAgent:   4%|▎         | 73/2000 [00:00<00:00, 4264.29it/s]


27: QLearningAgent on test_grid | sigma=0.0


Training QLearningAgent:   4%|▍         | 75/2000 [00:00<00:00, 4958.67it/s]


28: QLearningAgent on test_grid | sigma=0.5


Training QLearningAgent:   4%|▍         | 78/2000 [00:00<00:00, 3710.22it/s]


29: QLearningAgent on test_grid | episodes=5000.0


Training QLearningAgent:   2%|▏         | 76/5000 [00:00<00:01, 4143.06it/s]


30: QLearningAgent on test_grid | iter=5000.0


Training QLearningAgent:   4%|▎         | 72/2000 [00:00<00:00, 4407.97it/s]


31: QLearningAgent on large_grid | gamma=0.6


Training QLearningAgent:  11%|█         | 222/2000 [00:00<00:01, 1533.54it/s]


32: QLearningAgent on large_grid | gamma=0.99


Training QLearningAgent:  10%|█         | 206/2000 [00:00<00:01, 1564.46it/s]


33: QLearningAgent on large_grid | alpha=0.3


Training QLearningAgent:   9%|▉         | 181/2000 [00:00<00:01, 1670.08it/s]


34: QLearningAgent on large_grid | alpha=0.5


Training QLearningAgent:   4%|▍         | 86/2000 [00:00<00:01, 1487.81it/s]


35: QLearningAgent on large_grid | epsilon=0.5


Training QLearningAgent:  16%|█▌        | 316/2000 [00:00<00:01, 1353.10it/s]


36: QLearningAgent on large_grid | epsilon_decay=0.8


Training QLearningAgent:  11%|█         | 220/2000 [00:00<00:01, 1602.67it/s]


37: QLearningAgent on large_grid | sigma=0.0


Training QLearningAgent:  11%|█         | 222/2000 [00:00<00:01, 1655.33it/s]


38: QLearningAgent on large_grid | sigma=0.5


Training QLearningAgent:  13%|█▎        | 262/2000 [00:00<00:01, 1377.69it/s]


39: QLearningAgent on large_grid | episodes=5000.0


Training QLearningAgent:   4%|▍         | 195/5000 [00:00<00:03, 1553.11it/s]


40: QLearningAgent on large_grid | iter=5000.0


Training QLearningAgent:  11%|█▏        | 229/2000 [00:00<00:01, 1592.66it/s]


41: ValueIterationAgent on A1_grid | gamma=0.6


[Train] ValueIterationAgent:   1%|          | 16/2000 [00:00<00:00, 74071.59it/s]


42: ValueIterationAgent on A1_grid | gamma=0.95


[Train] ValueIterationAgent:   1%|          | 15/2000 [00:00<00:00, 72817.78it/s]


43: ValueIterationAgent on A1_grid | theta=1e-05


[Train] ValueIterationAgent:   1%|          | 15/2000 [00:00<00:00, 68759.08it/s]


44: ValueIterationAgent on A1_grid | sigma=0.0


[Train] ValueIterationAgent:   1%|          | 11/2000 [00:00<00:00, 58180.76it/s]


45: ValueIterationAgent on A1_grid | sigma=0.5


[Train] ValueIterationAgent:   1%|          | 23/2000 [00:00<00:00, 91962.81it/s]


46: ValueIterationAgent on A1_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 15/5000 [00:00<00:00, 64067.78it/s]


47: ValueIterationAgent on Maze | gamma=0.6


[Train] ValueIterationAgent: 100%|██████████| 2000/2000 [00:00<00:00, 138590.54it/s]


48: ValueIterationAgent on Maze | gamma=0.95


[Train] ValueIterationAgent:   2%|▏         | 36/2000 [00:00<00:00, 83932.71it/s]


49: ValueIterationAgent on Maze | theta=1e-05


[Train] ValueIterationAgent:   2%|▏         | 36/2000 [00:00<00:00, 101135.26it/s]


50: ValueIterationAgent on Maze | sigma=0.0


[Train] ValueIterationAgent:   2%|▏         | 33/2000 [00:00<00:00, 104461.91it/s]


51: ValueIterationAgent on Maze | sigma=0.5


[Train] ValueIterationAgent:  52%|█████▏    | 1047/2000 [00:00<00:00, 138086.80it/s]


52: ValueIterationAgent on Maze | iter=5000.0


[Train] ValueIterationAgent:   1%|          | 36/5000 [00:00<00:00, 112935.64it/s]


53: ValueIterationAgent on test_grid | gamma=0.6


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 23258.62it/s]


54: ValueIterationAgent on test_grid | gamma=0.95


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 25627.11it/s]


55: ValueIterationAgent on test_grid | theta=1e-05


[Train] ValueIterationAgent:   0%|          | 3/2000 [00:00<00:00, 19722.43it/s]


56: ValueIterationAgent on test_grid | sigma=0.0


[Train] ValueIterationAgent:   0%|          | 2/2000 [00:00<00:00, 16131.94it/s]

57: ValueIterationAgent on test_grid | sigma=0.5







[Train] ValueIterationAgent:   0%|          | 5/2000 [00:00<00:00, 33825.03it/s]


58: ValueIterationAgent on test_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 3/5000 [00:00<00:00, 12495.44it/s]


59: ValueIterationAgent on large_grid | gamma=0.6


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 80659.69it/s]


60: ValueIterationAgent on large_grid | gamma=0.95


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 77172.11it/s]


61: ValueIterationAgent on large_grid | theta=1e-05


[Train] ValueIterationAgent:   1%|          | 20/2000 [00:00<00:00, 59833.15it/s]


62: ValueIterationAgent on large_grid | sigma=0.0


[Train] ValueIterationAgent:   1%|          | 16/2000 [00:00<00:00, 78766.27it/s]


63: ValueIterationAgent on large_grid | sigma=0.5


[Train] ValueIterationAgent:   1%|▏         | 29/2000 [00:00<00:00, 104407.57it/s]


64: ValueIterationAgent on large_grid | iter=5000.0


[Train] ValueIterationAgent:   0%|          | 20/5000 [00:00<00:00, 87381.33it/s]


65: MCAgentOn on A1_grid | gamma=0.95


Training MCAgentOn:  94%|█████████▍| 1890/2000 [00:06<00:00, 273.95it/s]


66: MCAgentOn on A1_grid | gamma=0.999


Training MCAgentOn:  95%|█████████▍| 1899/2000 [00:00<00:00, 2097.37it/s]


67: MCAgentOn on A1_grid | epsilon=0.2


Training MCAgentOn:  91%|█████████ | 1811/2000 [00:00<00:00, 2748.15it/s]


68: MCAgentOn on A1_grid | epsilon=0.5


Training MCAgentOn:  86%|████████▋ | 1730/2000 [00:00<00:00, 2934.77it/s]


69: MCAgentOn on A1_grid | sigma=0.0


Training MCAgentOn:  78%|███████▊  | 1560/2000 [00:00<00:00, 2703.00it/s]


70: MCAgentOn on A1_grid | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:01<00:00, 1588.81it/s]


71: MCAgentOn on A1_grid | episodes=5000.0


Training MCAgentOn:  52%|█████▏    | 2610/5000 [00:01<00:00, 2554.45it/s]


72: MCAgentOn on A1_grid | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:01<00:00, 1742.43it/s]


73: MCAgentOn on Maze | gamma=0.95


Training MCAgentOn:  95%|█████████▍| 1897/2000 [00:06<00:00, 282.18it/s]


74: MCAgentOn on Maze | gamma=0.999


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 269.03it/s]


75: MCAgentOn on Maze | epsilon=0.2


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 267.00it/s]


76: MCAgentOn on Maze | epsilon=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 283.01it/s]


77: MCAgentOn on Maze | sigma=0.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 278.41it/s]


78: MCAgentOn on Maze | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 250.16it/s]


79: MCAgentOn on Maze | episodes=5000.0


Training MCAgentOn: 100%|██████████| 5000/5000 [00:18<00:00, 266.57it/s]


80: MCAgentOn on Maze | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:18<00:00, 105.59it/s]


81: MCAgentOn on test_grid | gamma=0.95


Training MCAgentOn:  85%|████████▍ | 1693/2000 [00:00<00:00, 8355.45it/s]


82: MCAgentOn on test_grid | gamma=0.999


Training MCAgentOn:  80%|████████  | 1607/2000 [00:00<00:00, 8754.02it/s]


83: MCAgentOn on test_grid | epsilon=0.2


Training MCAgentOn:  81%|████████  | 1624/2000 [00:00<00:00, 10153.04it/s]


84: MCAgentOn on test_grid | epsilon=0.5


Training MCAgentOn:  84%|████████▍ | 1676/2000 [00:00<00:00, 7824.24it/s]


85: MCAgentOn on test_grid | sigma=0.0


Training MCAgentOn:  83%|████████▎ | 1666/2000 [00:00<00:00, 13461.98it/s]


86: MCAgentOn on test_grid | sigma=0.5


Training MCAgentOn:  89%|████████▊ | 1772/2000 [00:00<00:00, 5258.01it/s]


87: MCAgentOn on test_grid | episodes=5000.0


Training MCAgentOn:  48%|████▊     | 2380/5000 [00:00<00:00, 8246.36it/s]


88: MCAgentOn on test_grid | iter=5000.0


Training MCAgentOn:  81%|████████▏ | 1629/2000 [00:00<00:00, 8669.14it/s]


89: MCAgentOn on large_grid | gamma=0.95


Training MCAgentOn: 100%|██████████| 2000/2000 [00:07<00:00, 273.44it/s]


90: MCAgentOn on large_grid | gamma=0.999


Training MCAgentOn: 100%|██████████| 2000/2000 [00:01<00:00, 1116.39it/s]


91: MCAgentOn on large_grid | epsilon=0.2


Training MCAgentOn: 100%|██████████| 2000/2000 [00:04<00:00, 410.17it/s]


92: MCAgentOn on large_grid | epsilon=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:01<00:00, 1415.55it/s]


93: MCAgentOn on large_grid | sigma=0.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:03<00:00, 583.29it/s] 


94: MCAgentOn on large_grid | sigma=0.5


Training MCAgentOn: 100%|██████████| 2000/2000 [00:04<00:00, 496.80it/s]


95: MCAgentOn on large_grid | episodes=5000.0


Training MCAgentOn:  55%|█████▌    | 2752/5000 [00:03<00:02, 860.66it/s] 


96: MCAgentOn on large_grid | iter=5000.0


Training MCAgentOn: 100%|██████████| 2000/2000 [00:11<00:00, 168.30it/s]


In [6]:
STOP CODE

SyntaxError: invalid syntax (285669689.py, line 1)

In [None]:
out_df

Unnamed: 0,agent,grid,param_changed,param_value,alpha,episodes,epsilon,epsilon_decay,gamma,iter,sigma,theta,cumulative_reward,total_steps,total_agent_moves,total_failed_moves,total_targets_reached,targets_remaining
0,QLearningAgent,A1_grid,gamma,0.60,0.2,2000.0,0.2,0.9,0.60,2000,0.1,,28.0,17,16,1,1,0
1,QLearningAgent,A1_grid,gamma,0.99,0.2,2000.0,0.2,0.9,0.99,2000,0.1,,28.0,17,16,1,1,0
2,QLearningAgent,A1_grid,alpha,0.30,0.3,2000.0,0.2,0.9,0.90,2000,0.1,,28.0,17,16,1,1,0
3,QLearningAgent,A1_grid,alpha,0.50,0.5,2000.0,0.2,0.9,0.90,2000,0.1,,35.0,16,16,0,1,0
4,QLearningAgent,A1_grid,epsilon,0.50,0.2,2000.0,0.5,0.9,0.90,2000,0.1,,28.0,17,16,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,MCAgentOn,large_grid,epsilon,0.50,,2000.0,0.5,,0.99,2000,0.1,,-528.0,99,19,80,1,0
92,MCAgentOn,large_grid,sigma,0.00,,2000.0,0.1,,0.99,2000,0.0,,-14000.0,2000,0,2000,0,1
93,MCAgentOn,large_grid,sigma,0.50,,2000.0,0.1,,0.99,2000,0.5,,-9578.0,2000,737,1263,0,1
94,MCAgentOn,large_grid,episodes,5000.00,,5000.0,0.1,,0.99,2000,0.1,,-122.0,41,19,22,1,0
