In [57]:
import gymnasium as gym
import torch
from src.data.services.dimensions import get_dims
import json

In [58]:
env_name = "MountainCar-v0"

In [59]:
env = gym.make(env_name)

In [60]:
from src.nn.services.dqn import DQN

In [61]:
state_dim, action_dim = get_dims(env)

In [62]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [63]:
dqn_golden = DQN(state_dim, action_dim).to(device)

In [64]:
from src.training.services.train import full_training as train

In [65]:
from src.data.services.memory import ReplayMemory

In [66]:
from torch.optim import Adam

In [67]:
batch_size = 64
gamma = 0.99
num_episodes = 10000
target_update = 50
epsilon = 0.2

In [68]:
import cv2

In [69]:
def create_video(frames, fps=10, output_name="output"):
    out = cv2.VideoWriter(f"{output_name}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frames[0].shape[1], frames[0].shape[0]))
    for frame in frames:
        out.write(frame)
    out.release()

In [70]:
from src.policy.services.epsilon_greedy import epsilon_greedy

In [71]:
def test_dqn(dqn, env, device, output_name="output", output_dir="./tmpvideo"):
    frames = []
    rewards = []
    for i in range(10):
        cum_reward = 0
        state, _ = env.reset()
        done = False
        while not done:
            if i == 0:
                frames.append(env.render())
            action = epsilon_greedy(
                torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device), dqn, 0)
            next_state, reward, terminated, truncated, _ = env.step(action)
            cum_reward += reward
            done = terminated or truncated
            state = next_state
        rewards.append(cum_reward)
    create_video(frames, output_name=f"{output_dir}/{output_name}")
    return rewards

In [16]:
bf_rewards = test_dqn(dqn_golden, gym.make(env_name, render_mode="rgb_array"), device, output_name="before_training_lunader_lander")

In [17]:
print(bf_rewards)

[-200.0, -200.0, -200.0, -200.0, -200.0, -200.0, -200.0, -200.0, -200.0, -200.0]


In [72]:
pretrain_path = "./src/pretrain/data/MountainCar-v0_10_000.npy"
pretrain_epochs = 500
pretrain_batch_size = batch_size
pretrain_optm_lr = 0.001
pretrain_optimizer = Adam(dqn_golden.parameters(), lr=pretrain_optm_lr)

In [73]:
from tqdm.auto import tqdm

In [74]:
from src.pretrain.services.train import pre_train
pre_train(
    dqn_golden,
    pretrain_optimizer,
    pretrain_path,
    pretrain_epochs,
    pretrain_batch_size,
    device=device,
)

100%|██████████| 500/500 [02:22<00:00,  3.52it/s]


In [75]:
pretrain_rewards = test_dqn(dqn_golden, gym.make(env_name, render_mode="rgb_array"), device, output_name="pretrain_mountain_car")

In [76]:
print(pretrain_rewards)

[-200.0, -200.0, -118.0, -116.0, -200.0, -200.0, -200.0, -115.0, -122.0, -117.0]
