In [6]:
import hydra
import omegaconf
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from IPython import display
import moviepy.editor as mpy
from IPython.display import Video

from src.policies.epsilon_greedy import GreedyPolicy

In [7]:
def run_agent(env, policy, seed=1234):
    obs, _ = env.reset(seed=seed)
    rewards = []
    frames = []
    frames.append(env.render())

    while True:
        action = policy(obs).item()
        next_obs, reward, terminated, truncated, _ = env.step(action=action)
        obs = next_obs
        rewards.append(reward)
        frames.append(env.render())
        
        if terminated or truncated:
            break

    return rewards, frames

In [8]:
run = Path("logs","runs", "train", "2024-09-19_18-41-20")
cfg = omegaconf.OmegaConf.load(run.joinpath(".hydra", "config.yaml"))

In [9]:
env = hydra.utils.instantiate(cfg.environment, render_mode="rgb_array")()
policy = GreedyPolicy(
    action_space=env.action_space,
    observation_space=env.observation_space,
    Qnet=hydra.utils.instantiate(cfg.policy.Qnet),
)
policy.load_checkpoint(run.joinpath("checkpoints", "checkpoint.pt"))


seed = 0
rewards, frames = run_agent(env=env, policy=policy, seed=seed)

filename = run.joinpath("movies")
filename.mkdir(parents=True, exist_ok=True)
filename = filename.joinpath(cfg.environment.id + "__seed_%d.mp4"%seed)

clip = mpy.ImageSequenceClip(frames, fps=30)
clip.write_videofile(filename.as_posix(), codec="libx264")
Video(filename, embed=True)

Moviepy - Building video logs/runs/train/2024-09-19_18-41-20/movies/LunarLander-v2__seed_0.mp4.
Moviepy - Writing video logs/runs/train/2024-09-19_18-41-20/movies/LunarLander-v2__seed_0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready logs/runs/train/2024-09-19_18-41-20/movies/LunarLander-v2__seed_0.mp4
