In [1]:
%reload_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt

from agents import *
from environments import RaceTrack
from multiproc_utils import TrainPool

from IPython.display import Image

from itertools import product

In [2]:
track = RaceTrack(RaceTrack.track_impossible)
track.set_eval()

#ns = [1, 2, 3]
ns = [1]
alphas = [0.01, 0.1]
epsilons = [0.01, 0.1]
final_alpha = 0.0
final_epsilon = 0.0
selectors = [EpsilonGreedy(epsilon=epsilon) for epsilon in epsilons]

In [3]:
learners = [NStepExpectedSarsa(n=n, alpha=alpha) for n, alpha in product(ns, alphas)]

agents = Agent.combinations(
    num_states=track.num_states,
    num_actions=track.num_actions,
    selectors=selectors,
    learners=learners,
    scheduless=[[]],
    planners=[None],
)

In [None]:
num_episodes = 10_000
num_runs = 100

pool = TrainPool(agents, track, quiet=True)
agents = pool.train_average(num_episodes, num_runs)

Training Runs:   0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
trail_length = 4
start_from = 0
end_at = num_episodes

fig, ax = plt.subplots(figsize=(6, 6))
fig.suptitle("Comparing Expected Sarsa variants")

names = [f"$\\alpha$ = {agent.learner.alpha}, $\\varepsilon$ = {agent.selector.epsilon}" for agent in agents]
for agent, name in zip(agents, names):
    xs, ys = agent.smoothed_ep_lengths(trail_length)
    ax.plot(xs[start_from:end_at], ys[start_from:end_at], label=name)
    
ax.legend()
ax.set_yscale("log")
plt.show()

In [None]:
track.set_eval()
trainer = Trainer(agents[0], track)
trainer.play_episode()

episode, _, _ = trainer.play_episode(100)
print("Episode finished! Rendering animation...")
track.set_train()
ep_name = "Playground Testing"
filename = track.render_episode(episode, ep_name=ep_name)
Image(filename)