### 0. Import Dependencies

In [37]:
import gymnasium as gym

In [38]:
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

In [39]:
import torch

### 1. Test Random Environment

In [40]:
environment_name = "LunarLander-v2"

In [41]:
env = gym.make(environment_name, render_mode="human")

In [27]:
observation, info = env.reset()
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(action)
        score += reward

        if terminated or truncated:
            done = True
            observation, info = env.reset()

    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:-321.9361291948418
Episode:2 Score:-54.46637076577062
Episode:3 Score:25.082779316715445
Episode:4 Score:-123.42909330492785
Episode:5 Score:-136.83723576124498
Episode:6 Score:-176.84204629765202
Episode:7 Score:-289.8219221742401
Episode:8 Score:-162.5477972048547
Episode:9 Score:-230.3834218475242
Episode:10 Score:-248.65455177510654


### 2. Build and Train the Model

In [50]:
env = gym.make(environment_name, render_mode="human")
env = Monitor(env)
env = DummyVecEnv([lambda: env])
model = DQN('MlpPolicy', env, verbose=1)

Using cpu device


In [51]:
model.learn(total_timesteps=100000, log_interval=4)

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91       |
|    ep_rew_mean      | -129     |
|    exploration_rate | 0.965    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 41       |
|    time_elapsed     | 8        |
|    total_timesteps  | 364      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 80.4     |
|    ep_rew_mean      | -158     |
|    exploration_rate | 0.939    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 43       |
|    time_elapsed     | 14       |
|    total_timesteps  | 643      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 80.3     |
|    ep_rew_mean      | -141     |
|    exploration_rate | 0.908    |
| time/               |          |
|    episodes       

<stable_baselines3.dqn.dqn.DQN at 0x2c4c848d0>

### 3. Save and Test the Model

In [52]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)
env.close()

In [53]:
model.save("DQN_LunarLander")

In [63]:
del model

In [67]:
env = gym.make("LunarLander-v2", render_mode="human")
model = DQN.load("DQN_LunarLander", env=env)
obs, info = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        obs, info = env.reset()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


KeyboardInterrupt: 

In [68]:
env.close()