In [4]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np
import os

In [5]:
class TimeLimitRewardWrapper(gym.Wrapper):
    def __init__(self, env, max_seconds=10, fps=50):
        super().__init__(env)
        self.max_steps = max_seconds * fps  # 10 seconds × 50 FPS = 500 steps
        self.current_step = 0

    def reset(self, **kwargs):
        self.current_step = 0
        return self.env.reset(**kwargs)

    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action)
        self.current_step += 1

        if self.current_step >= self.max_steps:
            truncated = True
            reward -= 100  # Apply heavy penalty if timeout without landing

        return observation, reward, terminated, truncated, info


In [6]:
# ✅ Setup environment
env_id = "LunarLander-v3"

def make_env():
    env = gym.make(env_id, render_mode="rgb_array")
    env = TimeLimitRewardWrapper(env, max_seconds=10, fps=50)
    return env

# DummyVecEnv for SB3
env = DummyVecEnv([make_env])

# ✅ Folder for training videos
training_video_folder = "./training_videos/"
os.makedirs(training_video_folder, exist_ok=True)

# ✅ Wrap with video recorder
env = VecVideoRecorder(
    env,
    training_video_folder,
    record_video_trigger=lambda step: step < 5000,
    video_length=500,
    name_prefix="training"
)


In [7]:
# ✅ Model setup
model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=1e-3,
    buffer_size=1000000,
    learning_starts=1000,
    batch_size=64,
    gamma=0.99,
    train_freq=4,
    target_update_interval=500,
    verbose=1,
    tensorboard_log="./dqn_lunar_tensorboard/"
)

# ✅ Train the model
model.learn(total_timesteps=500000)

# ✅ Save the model
model.save("dqn_lunarlander_v3")

# ✅ Close training env
env.close()


Using cpu device
Logging to ./dqn_lunar_tensorboard/DQN_2
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.992    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 321      |
|    time_elapsed     | 1        |
|    total_timesteps  | 403      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-0-to-step-500.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-0-to-step-500.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-0-to-step-500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-0-to-step-500.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 285      |
|    time_elapsed     | 2        |
|    total_timesteps  | 780      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-501-to-step-1001.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-501-to-step-1001.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-501-to-step-1001.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-501-to-step-1001.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.977    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 272      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1187     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.959    |
|    n_updates        | 46       |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-1002-to-step-1502.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-1002-to-step-1502.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-1002-to-step-1502.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-1002-to-step-1502.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.97     |
| time/               |          |
|    episodes         | 16       |
|    fps              | 253      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1583     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.84     |
|    n_updates        | 145      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.962    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 241      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1996     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.521    |
|    n_updates        | 2

                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-1503-to-step-2003.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.955    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 235      |
|    time_elapsed     | 10       |
|    total_timesteps  | 2363     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.548    |
|    n_updates        | 340      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2004-to-step-2504.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2004-to-step-2504.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2004-to-step-2504.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2004-to-step-2504.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.948    |
| time/               |          |
|    episodes         | 28       |
|    fps              | 230      |
|    time_elapsed     | 11       |
|    total_timesteps  | 2761     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.357    |
|    n_updates        | 440      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2505-to-step-3005.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2505-to-step-3005.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2505-to-step-3005.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-2505-to-step-3005.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.94     |
| time/               |          |
|    episodes         | 32       |
|    fps              | 222      |
|    time_elapsed     | 14       |
|    total_timesteps  | 3181     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.93     |
|    n_updates        | 545      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-3006-to-step-3506.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-3006-to-step-3506.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-3006-to-step-3506.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-3006-to-step-3506.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.932    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 218      |
|    time_elapsed     | 16       |
|    total_timesteps  | 3562     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.511    |
|    n_updates        | 640      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.926    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 217      |
|    time_elapsed     | 18       |
|    total_timesteps  | 3913     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.388    |
|    n_updates        | 7

                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-3507-to-step-4007.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.918    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 214      |
|    time_elapsed     | 20       |
|    total_timesteps  | 4326     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.9      |
|    n_updates        | 831      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4008-to-step-4508.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4008-to-step-4508.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4008-to-step-4508.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4008-to-step-4508.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.911    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 212      |
|    time_elapsed     | 22       |
|    total_timesteps  | 4710     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.914    |
|    n_updates        | 927      |
----------------------------------
Saving video to /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4509-to-step-5009.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4509-to-step-5009.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4509-to-step-5009.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/training_videos/training-step-4509-to-step-5009.mp4
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.904    |
| time/               |          |
|    episodes         | 52       |
|    fps              | 213      |
|    time_elapsed     | 23       |
|    total_timesteps  | 5047     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.33     |
|    n_updates        | 1011     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.898    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 224      |
|    time_elapsed     | 23       |
|    total_timesteps  | 5376     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.933    |
|    n_updates        | 1

In [8]:
# ✅ Reload environment for evaluation (also with time wrapper)
eval_env = gym.make(env_id, render_mode="rgb_array")
eval_env = TimeLimitRewardWrapper(eval_env, max_seconds=10, fps=50)
eval_env = DummyVecEnv([lambda: eval_env])

# ✅ Reload model
model = DQN.load("dqn_lunarlander_v3", env=eval_env)

# ✅ Evaluate
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"✅ Mean Reward over 10 Evaluation Episodes: {mean_reward:.2f} ± {std_reward:.2f}")




✅ Mean Reward over 10 Evaluation Episodes: -23.81 ± 18.66


In [9]:
# ✅ New folder for final videos
final_video_folder = "./final_model_videos/"
os.makedirs(final_video_folder, exist_ok=True)

# ✅ Setup environment for final recording
record_env = gym.make(env_id, render_mode="rgb_array")
record_env = TimeLimitRewardWrapper(record_env, max_seconds=10, fps=50)
record_env = DummyVecEnv([lambda: record_env])

record_env = VecVideoRecorder(
    record_env,
    final_video_folder,
    record_video_trigger=lambda step: True,
    video_length=1000,
    name_prefix="final_model"
)

# ✅ Play 5 episodes
obs = record_env.reset()
episode = 0
while episode < 5:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, infos = record_env.step(action)
    if dones[0]:
        obs = record_env.reset()
        episode += 1

record_env.close()
print("✅ Final 5 Episodes Recorded Successfully!")


MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-500-to-step-1500.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-500-to-step-1500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-500-to-step-1500.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1000-to-step-2000.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1000-to-step-2000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1000-to-step-2000.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1500-to-step-2500.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1500-to-step-2500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-1500-to-step-2500.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2000-to-step-3000.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2000-to-step-3000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2000-to-step-3000.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4
MoviePy - Building video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4.
MoviePy - Writing video /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4



                                                            

MoviePy - Done !
MoviePy - video ready /home/amd/prajwal/RL_Playground/LunarLander/final_model_videos/final_model-step-2500-to-step-3500.mp4
✅ Final 5 Episodes Recorded Successfully!


