In [1]:
import gymnasium as gym
import torch
from src.data.services.dimensions import get_dims

In [2]:
env_name = "LunarLander-v2"

In [3]:
env = gym.make(env_name)

In [4]:
from src.nn.services.dqn import DQN

In [5]:
state_dim, action_dim = get_dims(env)

In [6]:
dqn = DQN(state_dim, action_dim)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
dqn = dqn.to(device)

In [9]:
from src.training.services.train import train

In [10]:
from src.data.services.memory import ReplayMemory

In [11]:
rm = ReplayMemory(10000)

In [12]:
target_net = DQN(state_dim, action_dim)

In [13]:
target_net.load_state_dict(dqn.state_dict())

<All keys matched successfully>

In [14]:
target_net = target_net.to(device)

In [15]:
from torch.optim import Adam

In [16]:
optm = Adam(dqn.parameters(), lr=0.001)

In [17]:
batch_size = 32
gamma = 0.99
num_episodes = 10000
target_update = 10
epsilon = 0.1

In [18]:
import cv2

In [19]:
def create_video(frames, fps=10, output_name="output"):
    out = cv2.VideoWriter(f"{output_name}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frames[0].shape[1], frames[0].shape[0]))
    for frame in frames:
        out.write(frame)
    out.release()

In [20]:
from src.policy.services.epsilon_greedy import epsilon_greedy

In [27]:
def test_dqn(dqn, env, device, output_name="output"):
    frames = []
    rewards = []
    state, _ = env.reset()
    done = False
    while not done:
        frames.append(env.render())
        action = epsilon_greedy(
            torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device), dqn, 0)
        next_state, reward, terminated, truncated, _ = env.step(action)
        rewards.append(reward)
        done = terminated or truncated
        state = next_state
    create_video(frames, output_name=output_name)
    return rewards

In [22]:
bfr = test_dqn(dqn, gym.make(env_name, render_mode="rgb_array"), device, output_name="before_training_lunar_lander")

In [23]:
train(
    env,
    rm,
    dqn,
    target_net,
    optm,
    batch_size,
    gamma,
    device,
    num_episodes,
    epsilon,
    target_update
)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [28]:
ar = test_dqn(dqn, gym.make(env_name, render_mode="rgb_array"), device, output_name="after_training_lunar_lander")

In [29]:
ar

[-1.1690059378129547,
 -1.2707376472253884,
 -1.2428917626960754,
 -1.215026785475203,
 -1.1849563551311917,
 -1.152857711561495,
 -1.1187729245358184,
 -1.0829673446362733,
 -1.0455451991578286,
 -1.006726738783243,
 -0.9666645953565194,
 -0.925445113264459,
 -0.8831985198620771,
 -0.8401118869306003,
 -0.7962102488471601,
 -0.7516562172953627,
 -0.706520693577886,
 -0.6609014700523801,
 -0.6149086534049673,
 -0.5686314549139411,
 -0.5221802312233876,
 -0.47563569401657446,
 -0.4290721988277255,
 -0.3826732718635242,
 -0.41674297511730174,
 -0.2882865344874972,
 -0.24313687340250567,
 -0.19873317168256222,
 -0.15530474959922458,
 -0.11309527979466338,
 -0.07239112507434697,
 -0.03362402447999102,
 0.0027430133253290023,
 0.03609634123378669,
 0.0656470817426964,
 0.0904497350228155,
 0.10910494491727718,
 0.11991310932702959,
 0.12050084797027694,
 0.10771563650496319,
 0.07710185400938485,
 0.022546866418622358,
 -0.06446343677663435,
 -0.19581141101070898,
 -0.38769098715147265,
 -0