In [31]:
import torch
import matplotlib.pyplot as plt
import gymnasium as gym
import numpy as np
from time import sleep

from DDPG import DDPGAgent

In [32]:
# set seed
seed = 31
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [33]:
# Initialize environment
env = gym.make('LunarLanderContinuous-v2')

action_size = env.action_space.shape[0]
state_size = env.observation_space.shape[0]
min_action = env.action_space.low[0]
max_action = env.action_space.high[0]

episodes = 1000    
discount = 0.99
batch_size = 32
exploration_noise = 0.1
hidden_size = 64
actor_lr = 0.0005
critic_lr = 0.0005
reward_scale = 0.01

# create DDPG Agent
agent = DDPGAgent(state_size=state_size, 
                  action_size=action_size, 
                  hidden_size=hidden_size, 
                  actor_lr=actor_lr, 
                  critic_lr=critic_lr, 
                  discount=discount, 
                  min_action=min_action,
                  max_action=max_action, 
                  exploration_noise=exploration_noise)

In [34]:
# Load trained agent
agent.actor.load_state_dict(torch.load('Trained_Agents/DDPG_actor.pth'))
agent.critic.load_state_dict(torch.load('Trained_Agents/DDPG_critic.pth'))

<All keys matched successfully>

In [48]:
# Set the agent to evaluation mode
agent.actor.eval()
agent.critic.eval()

episodes = 10
rewards = []

for ep in range(episodes):
    state, _ = env.reset()
    total_reward = 0
    while True:
        # Select an action from the agent's policy
        action = agent.act(state, add_noise=False)  # No noise during testing

        # Enter action into the env
        next_state, reward, done, _, _ = env.step(action)

        total_reward += reward

        if done:
            rewards.append(total_reward)
            print(f'Episode: {ep+1}, Total Reward: {total_reward}')
            break

        state = next_state

# Print average reward
print(f'Average Reward: {np.mean(rewards)}')


Episode: 1, Total Reward: 224.95680188093806
Episode: 2, Total Reward: 53.16196304233853
Episode: 3, Total Reward: 247.8586926057429
Episode: 4, Total Reward: 233.5692406986544
Episode: 5, Total Reward: 173.59438052951504
Episode: 6, Total Reward: 8.142750890254803
Episode: 7, Total Reward: 176.49631806541117
Episode: 8, Total Reward: 2.026573319603756
Episode: 9, Total Reward: 201.50171099011067
Episode: 10, Total Reward: -17.277588836938335
Average Reward: 130.40308431856312
