In [1]:
import gymnasium as gym
import torch
from src.data.services.dimensions import get_dims

In [2]:
env_name = "MountainCar-v0"

In [3]:
env = gym.make(env_name)

In [4]:
from src.nn.services.dqn import DQN

In [5]:
state_dim, action_dim = get_dims(env)

In [6]:
dqn = DQN(state_dim, action_dim)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
dqn = dqn.to(device)

In [9]:
from src.training.services.train import train

In [10]:
from src.data.services.memory import ReplayMemory

In [11]:
rm = ReplayMemory(10000)

In [12]:
target_net = DQN(state_dim, action_dim)

In [13]:
target_net.load_state_dict(dqn.state_dict())

<All keys matched successfully>

In [14]:
target_net = target_net.to(device)

In [15]:
from torch.optim import Adam

In [16]:
optm = Adam(dqn.parameters(), lr=0.001)

In [17]:
batch_size = 32
gamma = 0.99
num_episodes = 5000
target_update = 10
epsilon = 0.1

In [18]:
import cv2

In [19]:
def create_video(frames, fps=10, output_name="output"):
    out = cv2.VideoWriter(f"{output_name}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frames[0].shape[1], frames[0].shape[0]))
    for frame in frames:
        out.write(frame)
    out.release()

In [20]:
from src.policy.services.epsilon_greedy import epsilon_greedy

In [21]:
def test_dqn(dqn, env, device, output_name="output"):
    frames = []
    rewards = []
    state, _ = env.reset()
    done = False
    while not done:
        frames.append(env.render())
        action = epsilon_greedy(
            torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device), dqn, 0)
        next_state, reward, terminated, truncated, _ = env.step(action)
        rewards.append(reward)
        done = terminated or truncated
        state = next_state
    create_video(frames, output_name=output_name)
    return rewards

In [22]:
bf_rewards = test_dqn(dqn, gym.make(env_name, render_mode="rgb_array"), device, output_name="before_training_mountain_car")

In [23]:
train(
    env,
    rm,
    dqn,
    target_net,
    optm,
    batch_size,
    gamma,
    device,
    num_episodes,
    epsilon,
    target_update
)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [None]:
af_rewards = test_dqn(dqn, gym.make(env_name, render_mode="rgb_array"), device, output_name="after_training_mountain_car")