# Double Q-Learning Demo

Use this notebook to train the tabular agents defined in this repo and compare their behaviour side-by-side.

In [None]:
from dataclasses import asdict
from algorithms.q_learning import QLearningAgent
from algorithms.double_q_learning import DoubleQLearningAgent
from environment.env_wrapper import make_env
from utils import plotting

ENV_ID = 'FrozenLake-v1'
EPISODES = 1000
MAX_STEPS = 200
ALGOS = {
    'q': QLearningAgent,
    'double_q': DoubleQLearningAgent,
}

results = {}
for name, cls in ALGOS.items():
    env = make_env(ENV_ID, seed=0)
    agent = cls(env.n_states, env.n_actions)
    history = agent.train(env, episodes=EPISODES, max_steps=MAX_STEPS)
    hist_dict = asdict(history)
    results[name] = hist_dict
    plotting.plot_rewards(hist_dict, title=f"{name} on {ENV_ID}")
    plotting.plot_epsilon(hist_dict)
