In [1]:
%load_ext autoreload
%autoreload 2

from DQN import *
from lunar import LunarLanderEnv

ZLUDA device successfully loaded!


In [2]:
def test_lunar_lander(steps_to_run_before_pause, agent, episodes=1, render_mode="human"):
    """
    Test the Lunar Lander environment with a given agent.
    
    Parameters:
    steps_to_run_before_pause (int): Number of steps to run before pausing for user input.
    agent: The agent to be tested in the environment.
    
    Returns:
    None
    """
    # Initialize the environment
    lunar = LunarLanderEnv(render_mode=render_mode)
    
    if(agent is not None):
        # Set the agent's environment
        agent.lunar = lunar
        
    for _ in range(episodes):
        counter, score = 0, 0

        while True:
            if steps_to_run_before_pause != 0 and counter % steps_to_run_before_pause == 0:
                input("Press Enter to continue...")

            if(agent is not None):
                _, reward, done, action = agent.act()
                
            else:
                # Sample a random action from the action space
                action = lunar.env.action_space.sample()
            
                # Take a step in the environment
                _, reward, done = lunar.take_action(action, verbose=True)
                
            score += reward
            
            counter += 1
            
            if done:
                print(f"Episode finished, score: {score}")
                break
        if(agent is not None):
            # Reset the agent's environment for the next episode
            agent.lunar.reset()
        else:
            # Reset the environment for the next episode
            lunar.reset()
        
    # Close the environment
    lunar.close()

In [3]:
lunar = LunarLanderEnv(render_mode=None)
training_agent = DQNAgent(lunar)
training_agent.train()

QNetwork:
 DQN(
  (input_layer1): Linear(in_features=8, out_features=64, bias=True)
  (layer1_layer2): Linear(in_features=64, out_features=64, bias=True)
  (layer2_output): Linear(in_features=64, out_features=4, bias=True)
)
Starting training (code 353080)...
[2025-06-04T18:10:19] Training DQN agent with parameters:
  - gamma: 0.99
  - epsilon: 1.0
  - epsilon_decay: 0.995
  - epsilon_min: 0.01
  - learning_rate: 0.001
  - batch_size: 64
  - episodes: 1500
  - target_network_update_freq: 10
  - replays_per_episode: 1000

[2025-06-04T18:10:20] Episode 1 (0.07%) had score: -373.35
[2025-06-04T18:10:20]     Saving model to training/training_353080/episode_1_(-373.35).h5
[2025-06-04T18:10:22] Episode 51 (3.40%) had score: -230.36
[2025-06-04T18:10:22]     Saving model to training/training_353080/episode_51_(-230.36).h5
[2025-06-04T18:10:24] Episode 101 (6.73%) had score: -196.75
[2025-06-04T18:10:24]     Saving model to training/training_353080/episode_101_(-196.75).h5
[2025-06-04T18:10:28

In [4]:
lunar = LunarLanderEnv(render_mode=None)
training_agent_2 = DQNAgentDoubleDPrioritizedReplay(lunar=lunar, learning_rate=0.001, episodes=1500)
training_agent_2.train()

QNetwork:
 DQN(
  (input_layer1): Linear(in_features=8, out_features=64, bias=True)
  (layer1_layer2): Linear(in_features=64, out_features=64, bias=True)
  (layer2_output): Linear(in_features=64, out_features=4, bias=True)
)
Starting training (code 324800)...
[2025-06-04T18:27:42] Training DQN agent with parameters:
  - gamma: 0.99
  - epsilon: 1.0
  - epsilon_decay: 0.995
  - epsilon_min: 0.01
  - learning_rate: 0.001
  - batch_size: 64
  - episodes: 1500
  - target_network_update_freq: 10
  - replays_per_episode: 1000

[2025-06-04T18:27:42] Episode 1 (0.07%) had score: -120.39
[2025-06-04T18:27:42]     Saving model to training/training_324800/episode_1_(-120.39).h5
[2025-06-04T18:27:45] Episode 51 (3.40%) had score: -203.36
[2025-06-04T18:27:45]     Saving model to training/training_324800/episode_51_(-203.36).h5
[2025-06-04T18:27:49] Episode 101 (6.73%) had score: -162.30
[2025-06-04T18:27:49]     Saving model to training/training_324800/episode_101_(-162.30).h5
[2025-06-04T18:27:56

In [5]:
lunar = LunarLanderEnv(render_mode=None)
training_agent_3 = DQNAgentDoubleDPrioritizedReplay(lunar=lunar, episodes=14)
training_agent_3.train()

QNetwork:
 DQN(
  (input_layer1): Linear(in_features=8, out_features=64, bias=True)
  (layer1_layer2): Linear(in_features=64, out_features=64, bias=True)
  (layer2_output): Linear(in_features=64, out_features=4, bias=True)
)
Starting training (code 14676)...
[2025-06-03T04:33:08] Training DQN agent with parameters:
  - gamma: 0.99
  - epsilon: 1.0
  - epsilon_decay: 0.995
  - epsilon_min: 0.01
  - learning_rate: 0.001
  - batch_size: 64
  - episodes: 14
  - target_network_update_freq: 10
  - replays_per_episode: 1000

[2025-06-03T04:33:08] Episode #1 (0.01%) starting...
[2025-06-03T04:33:08]     Replay 1 (0.01%) had score: -212.46
[2025-06-03T04:33:08]     Saving model to training/training_14676/episode_1_replay_1_(-212.46).h5
[2025-06-03T04:33:23]     Replay 51 (0.36%) had score: -132.28
[2025-06-03T04:33:23]     Saving model to training/training_14676/episode_1_replay_51_(-132.28).h5
[2025-06-03T04:33:42]     Replay 101 (0.72%) had score: -114.08
[2025-06-03T04:33:42]     Saving mode

In [9]:
execution_agent = DQNAgent(lunar, epsilon=0.0)
execution_agent.load_model("training/second pytorch/episode_1451_(215.66).h5")
test_lunar_lander(render_mode=None,steps_to_run_before_pause=0, agent=execution_agent, episodes=10)

QNetwork:
 DQN(
  (input_layer1): Linear(in_features=8, out_features=64, bias=True)
  (layer1_layer2): Linear(in_features=64, out_features=64, bias=True)
  (layer2_output): Linear(in_features=64, out_features=4, bias=True)
)
Episode finished, score: 254.13362118753213
Episode finished, score: 211.35514468504394
Episode finished, score: 247.58263987328513
Episode finished, score: 46.923646897123206
Episode finished, score: 197.61600722066544
Episode finished, score: 262.789847310524
Episode finished, score: 250.93729577281
Episode finished, score: 281.29887584115716
Episode finished, score: 119.10988813388727
Episode finished, score: -20.57736781237884
Environment closed.
