In [33]:
import torch
import matplotlib.pyplot as plt
import gymnasium as gym
import numpy as np
from time import sleep

from REINFORCE import MCPGAgent

In [34]:
# initialize environment
env = gym.make('LunarLander-v2')
action_size = env.action_space.n
state_size = env.observation_space.shape[0]

# set seed
seed = 31
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


# hyperparameters
episodes = 10 # run agent for this many episodes
hidden_size = 256 # number of units in NN hidden layers
actor_lr = 0.002 # learning rate for actor
value_function_lr = 0.002 # learning rate for value function
discount = 0.99 # discount factor gamma value
reward_scale = 0.01 #scale reward by this amount

# create agent
agent = MCPGAgent(state_size, 
                  action_size, 
                  actor_lr, 
                  value_function_lr, 
                  discount,
                  hidden_size)

In [35]:
# load the saved actor and value function networks
agent.actor_net.load_state_dict(torch.load('Trained_Agents/MC_actor.pth'))
agent.vf_net.load_state_dict(torch.load('Trained_Agents/MC_valueF.pth'))

<All keys matched successfully>

In [38]:
# set the agent to evaluation mode
agent.actor_net.eval()
agent.vf_net.eval()

# run the agent for a few episodes and print the total reward for each episode
cumulative_reward = 0
for i in range(episodes):
    state, _ = env.reset()
    # state = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = agent.act(state)
        next_state, reward, done, _, _ = env.step(action)
        # next_state, reward, done, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f'Episode {i+1} \ttotal reward: {total_reward:.2f}')


Episode 1 	total reward: 163.42
Episode 2 	total reward: 231.25
Episode 3 	total reward: 284.67
Episode 4 	total reward: 248.77
Episode 5 	total reward: 230.76
Episode 6 	total reward: 36.93
Episode 7 	total reward: 167.30
Episode 8 	total reward: 14.35
Episode 9 	total reward: 199.36
Episode 10 	total reward: 218.56
