In [1]:
import gymnasium as gym
import numpy as np
from src.dqn import QNetwork
from src.dqn import run_dqn
import torch
import matplotlib.pyplot as plt

## Train the Agent

In [2]:
env = gym.make('LunarLander-v2', render_mode="human")

In [3]:
scores = run_dqn()

# Plot the scores
plt.figure()
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Total Reward')
plt.xlabel('Episode')
plt.show()

env.close()

Episode 100	Average Score: -164.89	Epsilon: 0.61
Episode 200	Average Score: -87.28	Epsilon: 0.370
Episode 292	Average Score: -32.43	Epsilon: 0.23

# Load Model

In [None]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

model = QNetwork(state_size, action_size)
model.load_state_dict(torch.load('data/checkpoints/checkpoint_final.pth'))
model.eval()

In [None]:
def select_action(state):
    state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert state to tensor
    with torch.no_grad():
        action_values = model(state)
    return np.argmax(action_values.numpy())

num_episodes = 10

for episode in range(num_episodes):
    state = env.reset()[0]  # Get the initial state
    done = False
    total_reward = 0
    while not done:
        env.render()  # Render the environment
        action = select_action(state)  # Choose action
        next_state, reward, done, _, _ = env.step(action)  # Take action
        total_reward += reward 
        state = next_state  
    print(f"Episode {episode + 1}: Total Reward: {total_reward}")

env.close()