In [2]:
import gymnasium as gym
import pygame
import os
import numpy as np
from stable_baselines3 import DQN, DDPG
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, CallbackList
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise
import tensorboard



# Discrete

In [None]:
learning_rate = 0.001
batch_size = 64
discount_factor = 0.99
total_timesteps = 250000

In [None]:
models_dir = "models/DQN"
logs_dir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

In [None]:
eval_env = gym.make("LunarLander-v2")

checkpoint_callback = CheckpointCallback(save_freq=10000,
                                         save_path=models_dir,
                                         name_prefix='DQN_model',
                                         )

eval_callback = EvalCallback(eval_env,
                            best_model_save_path=models_dir,
                            log_path = logs_dir,
                            eval_freq=100,
                            )

callback = CallbackList([checkpoint_callback, eval_callback])

In [None]:
env  = gym.make("LunarLander-v2")
env.reset()

In [None]:
model_DQN = DQN('MlpPolicy', env, 
                learning_rate=learning_rate, 
                batch_size=batch_size, 
                gamma=discount_factor, 
                learning_starts=50000, 
                verbose=1,
                device='cuda',
                tensorboard_log=logs_dir)

In [None]:
model_DQN.learn(total_timesteps=total_timesteps, callback=callback ,tb_log_name = "DQN")

In [None]:
env.close()

In [None]:
episodes = 1
env = gym.make('LunarLander-v2', render_mode = 'human')

num_actions = env.action_space.n

# Define the path to the model
model_DQN_path = os.path.join("models", "DQN", "best_model")

# Load the model 
try:
    model_DQN = DQN.load(model_DQN_path)
    print("Model loaded successfully.")
except FileNotFoundError:
    print(f"Model file not found at path: {model_DQN_path}. Please check the path.")

for episode in range(0, episodes):
    state, info = env.reset()
    # state = state[0]
    terminated = False
    score = 0

    while not terminated :
        action, _states = model_DQN.predict(state, deterministic=True)
        state, reward, terminated , truncated, info  = env.step(action)
        score += reward
    
    print(score)


# Close the Pygame window
pygame.quit()