In [1]:
%reload_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
    
from agents import *
from environments import RaceTrack
from IPython.display import Image

In [2]:
track = RaceTrack(RaceTrack.track_impossible)
track.set_eval()

#n = 5
alpha = 0.5
epsilon = 0.1
final_alpha = 0.0
final_epsilon = 0.0
selector = EpsilonGreedy(epsilon=epsilon)

alpha_schedule = SigmoidSchedule(
    ["learner", "alpha"],
    alpha,
    0.0,
)
epsilon_schedule = LinearSchedule(
    ["selector", "epsilon"],
    epsilon,
    final_epsilon,
)

In [3]:
learner = NStepTreeBackup(n=2, alpha=alpha)
tb2_agent = Agent(
    num_states=track.num_states,
    num_actions=track.num_actions,
    selector=selector,
    learner=learner,
    schedules=[
        alpha_schedule,
        epsilon_schedule,
    ],
)

learner = NStepTreeBackup(n=3, alpha=alpha)
tb3_agent = Agent(
    num_states=track.num_states,
    num_actions=track.num_actions,
    selector=selector,
    learner=learner,
    schedules=[
        alpha_schedule,
        epsilon_schedule,
    ],
)

In [4]:
tb2_trainer = Trainer(agent=tb2_agent, env=track)
tb3_trainer = Trainer(agent=tb3_agent, env=track)

In [None]:
from multiprocessing import Pool

num_episodes=4000


def train(trainer, num_episodes):
    trainer.train(num_episodes=num_episodes)


with Pool(2) as p:
    p.starmap(train, [(x, num_episodes) for x in [tb2_trainer, tb3_trainer]])

In [None]:
tb2_trailing_returns = np.zeros_like(tb2_agent.ep_returns)
tb2_trailing_returns[0] = tb2_agent.ep_returns[0]

step_size = 1 / 70
for i in range(1, len(tb_agent.ep_returns)):
    tb2_trailing_returns[i] = step_size * tb_agent.ep_returns[i] + (1 - step_size) * tb2_trailing_returns[i-1]

tb3_trailing_returns = np.zeros_like(nses_agent.ep_returns)
tb3_trailing_returns[0] = nses_agent.ep_returns[0]

for i in range(1, len(nses_agent.ep_returns)):
    tb3_trailing_returns[i] = step_size * nses_agent.ep_returns[i] + (1 - step_size) * tb3_trailing_returns[i-1]

start_from = 800
plt.plot(tb2_trailing_returns[start_from:], label=f"2-step Tree Backup")
plt.plot(tb3_trailing_returns[start_from:], label=f"3-step Expected Sarsa")
plt.legend()
plt.show()

In [None]:
tb2_episode, _, _ = tb2_trainer.play_episode()
print("Episode finished! Rendering animation...")

tb2_ep_name = "2-step Tree Backup"
filename = track.render_episode(tb2_episode, ep_name=tb2_ep_name)

Image(filename)

tb3_episode, _, _ = tb3_trainer.play_episode()
print("Episode finished! Rendering animation...")

tb3_ep_name = "3-step Tree Backup"
filename = track.render_episode(tb3_episode, ep_name=tb3_ep_name)

Image(filename)