In [2]:
import sys
print("Python executable:", sys.executable)
import gymnasium as gym
import stable_baselines3
import torch
import pygame

print("All imports successful!")

Python executable: C:\Users\omar\anaconda3\envs\lunarlander\python.exe
All imports successful!


In [4]:
# 📌 Step 1: Imports
import gymnasium as gym  # Use gymnasium instead of gym
import numpy as np
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.logger import configure
import torch
import os

env = gym.make("LunarLanderContinuous-v3", render_mode="human")  # Use v3
env = Monitor(env)  # For logging
env = DummyVecEnv([lambda: env])  # Vectorized wrapper for compatibility


n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))


log_dir = "./ddpg_lunar_tensorboard/"
os.makedirs(log_dir, exist_ok=True)

new_logger = configure(log_dir, ["stdout", "tensorboard"])


model = DDPG(
    "MlpPolicy",
    env,
    learning_rate=1e-3,
    batch_size=64,
    gamma=0.99,
    action_noise=action_noise,
    learning_starts=50000,
    verbose=1,
    tensorboard_log=log_dir,
)


model.set_logger(new_logger)


model.learn(total_timesteps=250000)


model.save("ddpg_lunarlander_model")


test_env = gym.make("LunarLanderContinuous-v3", render_mode="human")
obs, _ = test_env.reset()
done = False
while not done:
    test_env.render()
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    done = terminated or truncated
test_env.close()

Logging to ./ddpg_lunar_tensorboard/
Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 126      |
|    ep_rew_mean     | -206     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 41       |
|    time_elapsed    | 12       |
|    total_timesteps | 502      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 128      |
|    ep_rew_mean     | -254     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 44       |
|    time_elapsed    | 22       |
|    total_timesteps | 1025     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 117      |
|    ep_rew_mean     | -257     |
| time/              |          |
|    episodes        | 12       |
|    fps             | 45       |
|    time_elapsed    | 30   