In [1]:
import gymnasium as gym
import pygame
import os
import numpy as np
from stable_baselines3 import DQN, DDPG
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, CallbackList
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise
import tensorboard



In [None]:
learning_rate = 0.001
batch_size = 64
discount_factor = 0.99
total_timesteps = 250000

In [None]:
models_dir = "models/DQN"
logs_dir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

In [None]:
eval_env = gym.make("LunarLander-v2")

checkpoint_callback = CheckpointCallback(save_freq=10000,
                                         save_path=models_dir,
                                         name_prefix='DQN_model',
                                         )

eval_callback = EvalCallback(eval_env,
                            best_model_save_path=models_dir,
                            log_path = logs_dir,
                            eval_freq=100,
                            )

callback = CallbackList([checkpoint_callback, eval_callback])

In [None]:
env  = gym.make("LunarLander-v2")
env.reset()

In [None]:
model_DQN = DQN('MlpPolicy', env, learning_rate=learning_rate, batch_size=batch_size, gamma=discount_factor, verbose=1, tensorboard_log=logs_dir)

In [None]:
model_DQN.learn(total_timesteps=total_timesteps, callback=callback ,tb_log_name = "DQN")

In [None]:
env.close()

In [None]:
# #Initialize the environment
# env = gym.make('LunarLander-v2')
# # env = gym.make("CartPole-v1")
# # Define parameters
# learning_rate = 0.001
# batch_size = 64
# discount_factor = 0.99
# total_timesteps = 250000

# # # Create a monitor to log results
# env = Monitor(env, filename='./Discrete/discrete_lander/')

# # Create the DQN model
# model = DQN('MlpPolicy', env, learning_rate=learning_rate, batch_size=batch_size, gamma=discount_factor, verbose=1, tensorboard_log="./Discrete/dqn_lunar_lander_tensorboard/")

# # Define a callback for saving the model
# checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./Discrete/dqn_checkpoints/', name_prefix='dqn_model')

# # Train the agent
# model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback, tb_log_name="first_run" )

# # Save the final model
# model.save("./Discrete/dqn_lunar_lander")

# # tensorboard --logdir='./Discrete/dqn_lunar_lander_tensorboard/'
# # >tensorboard --logdir 

In [None]:
# # Train the agent
# model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback, tb_log_name="first_run")

# # Save the final model
# model.save("./Discrete/dqn_lunar_lander")

In [None]:
# Close the Pygame window
pygame.quit()

In [None]:
episodes = 1
env = gym.make('LunarLander-v2', render_mode = 'human')
# env = gym.make('CartPole-v1')

num_actions = env.action_space.n

model_DQN = DQN.load("C:\\Users\\msaee\\Desktop\\Semester 10\\RL\\Quizzes\\Quiz 3\\Material\\DQN\\models\\DQN\\best_model.zip")

for episode in range(0, episodes):
    state, info = env.reset()
    # state = state[0]
    terminated = False
    score = 0

    while not terminated :
        action, _states = model_DQN.predict(state, deterministic=True)
        state, reward, terminated , truncated, info  = env.step(action)
        score += reward
    
    print(score)


# Close the Pygame window
pygame.quit()

# Continous

In [None]:
# DDPG
learning_rate = 0.001
batch_size = 64
discount_factor = 0.99
total_timesteps = 250000

In [None]:
models_dir = "models/DDPG"
logs_dir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

In [None]:
eval_env = gym.make(
    'LunarLander-v2',
    continuous = True,
)

checkpoint_callback = CheckpointCallback(save_freq=10000,
                                         save_path=models_dir,
                                         name_prefix='DDPG_model',
                                         )

eval_callback = EvalCallback(eval_env,
                            best_model_save_path=models_dir,
                            log_path = logs_dir,
                            eval_freq=100,
                            )

callback = CallbackList([checkpoint_callback, eval_callback])

In [13]:
env_continuous = gym.make(
    'LunarLander-v2',
    continuous = True,
)

env_continuous.reset()

(array([ 4.4326781e-04,  1.3996991e+00,  4.4878118e-02, -4.9871606e-01,
        -5.0680025e-04, -1.0165600e-02,  0.0000000e+00,  0.0000000e+00],
       dtype=float32),
 {})

In [14]:
# Define the action noise for DDPG
n_actions = env_continuous.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [None]:
model_DDPG = DDPG('MlpPolicy', env, 
                learning_rate=learning_rate, 
                batch_size=batch_size, 
                gamma=discount_factor, 
                learning_starts=50000, 
                verbose=1,
                tensorboard_log=logs_dir)

In [None]:
model_DDPG.learn(total_timesteps=total_timesteps, callback=callback ,tb_log_name = "DDPG")

In [None]:
env_continuous.close()

In [24]:
episodes = 2
env = gym.make(
    'LunarLander-v2',
    continuous = True,
    render_mode = 'human',
)

num_actions = env.action_space.shape[-1]

# Define the path to the model
model_DDPG_path = os.path.join("models", "DDPG", "best_model")

# Load the model 
try:
    model_DDPG = DDPG.load(model_DDPG_path)
    print("Model loaded successfully.")
except FileNotFoundError:
    print(f"Model file not found at path: {model_DDPG_path}. Please check the path.")


for episode in range(0, episodes):
    state, info = env.reset()
    # state = state[0]
    terminated = False
    score = 0

    while not terminated :
        action, _states = model_DDPG.predict(state, deterministic=True)
        state, reward, terminated , truncated, info  = env.step(action)
        score += reward
    
    print(score)


# Close the Pygame window
pygame.quit()

Model loaded successfully.
Current working directory: c:\Users\msaee\Desktop\Semester 10\RL\Quizzes\Quiz 3\Material\DQN


In [21]:
pygame.quit()

In [None]:
###---------------- TESTS ----------------###
env_continuous.action_space.sample().shape