# Agent Evaluation Notebook

In [14]:
# Imports
from PIL import ImageDraw
from IPython.display import Video
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('dark_background')
from utils import Utils, Config
from agent import *
from training import *

In [15]:
# Settings
CFG = 'configurations/cfg.yml'
CKPT = 'experiments/test/version_5/checkpoints/step=52500-hp_metric=933.2999877929688.ckpt'

In [None]:
# Load modules
cfg = Config.from_yaml(CFG)
lm = LitModule.load_from_checkpoint(CKPT, cfg=cfg.lit_module)
env = Environment(gym_name='CarRacing-v2', record=True)
agent = Agent(env, (lm.net.world_model, lm.net.actor), action_repeat=1)

## Real Environment

In [None]:
# Perform test episode & display video
obs, _ = agent.reset()
done = False
observations, actions, rewards = [], [], []
while not done:
    new_obs, action, reward, term, trunc = agent.act(obs, exploit=True)
    done = max(term, trunc)
    observations += [np.array(Utils.ten2img(obs['image'][0]).resize((256,256)))]
    actions += [action]
    rewards += [reward]
    obs = new_obs

print(f'RETURN: {sum(rewards).item():.2f}')
Video('./rl-video-episode-0.mp4', embed=True)

In [None]:
# View action plots
actions = torch.cat(actions)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 
fig = plt.figure(figsize=(12,12))
gs = fig.add_gridspec(3, hspace=0)
axs = gs.subplots(sharex=True, sharey=True)
axs[0].plot(actions[:,0], colors[0])
axs[0].legend(['Steering'])
axs[1].plot(actions[:,1], colors[1])
axs[1].legend(['Throttle'])
axs[2].plot(actions[:,2], colors[2])
axs[2].legend(['Brake']);

## Dream Environment

In [None]:
# Let model dream & display video
N_STEPS = 500
obs, _ = agent.reset()
obs['image'] = obs['image'].float()/255.0
_, initial_posterior = lm.net.world_model.encode_state(obs, lm.net.world_model.h0)
dream = lm.net.world_model.dream(initial_posterior.unflatten(-1, (32,32)), lm.net.world_model._get_inital_recurrent_state(1), lm.net.actor, N_STEPS-1, exploit=True)
(states, actions, continues, action_distribution,
 reward_distribution, continue_distribution) = dream
 
seq = lm.net.world_model.image_decoder(states.flatten(0,1)).mode.unflatten(0, (1,N_STEPS))[0]
score = 0.0
vid = []
for o, r, a in zip(seq, reward_distribution.mean[0], actions[0]):
    o = Utils.ten2img(o.clamp(0,1)).resize((256,256)).convert('RGB')
    draw = ImageDraw.Draw(o)
    score += r.item()
    draw.text((20,20), f'SCORE: {score:.2f}', fill=(255,0,255))
    vid += [np.array(o)]
Utils.create_mp4(vid, './dream-test.mp4', 30)
Video('./dream-test.mp4', embed=True)

In [None]:
# View dream action plots
actions = actions[0].detach()
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 
fig = plt.figure(figsize=(12,12))
gs = fig.add_gridspec(3, hspace=0)
axs = gs.subplots(sharex=True, sharey=True)
axs[0].plot(actions[:,0], colors[0])
axs[0].legend(['Steering'])
axs[1].plot(actions[:,1], colors[1])
axs[1].legend(['Throttle'])
axs[2].plot(actions[:,2], colors[2])
axs[2].legend(['Brake']);