In [1]:
import torch
import matplotlib.pyplot as plt
import gym
import numpy as np
from scipy.signal import savgol_filter

from REINFORCE import MCPGAgent

In [8]:
# initialize environment
env = gym.make('LunarLander-v2', render_mode='rgb_array')
action_size = env.action_space.n
state_size = env.observation_space.shape[0]

# set seed
seed = 31
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


# hyperparameters
episodes = 100 # run agent for this many episodes
hidden_size = 256 # number of units in NN hidden layers
actor_lr = 0.002 # learning rate for actor
value_function_lr = 0.002 # learning rate for value function
discount = 0.99 # discount factor gamma value
reward_scale = 0.01 #scale reward by this amount

# create agent
agent = MCPGAgent(state_size, 
                action_size, 
                actor_lr, 
                value_function_lr, 
                discount,
                hidden_size)

In [9]:
# load the saved actor and value function networks
agent.actor_net.load_state_dict(torch.load('..\\Trained_Agents\\MC_actor.pth'))
agent.vf_net.load_state_dict(torch.load('..\\Trained_Agents\\MC_valueF.pth'))

<All keys matched successfully>

In [10]:
# set the agent to evaluation mode
agent.actor_net.eval()
agent.vf_net.eval()

# run the agent for a few episodes and print the total reward for each episode
cumulative_reward = 0
for i in range(10):
    state, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = agent.act(state)
        next_state, reward, done, _, _ = env.step(action)
        total_reward += reward
        state = next_state
    print(f'Episode {i+1}/10, total reward: {total_reward:.2f}')


Episode 1/10, total reward: -56.10
Episode 2/10, total reward: -22.19
Episode 3/10, total reward: -8.95
Episode 4/10, total reward: -75.41
Episode 5/10, total reward: -38.77
Episode 6/10, total reward: -33.90
Episode 7/10, total reward: -77.68
Episode 8/10, total reward: -73.06
Episode 9/10, total reward: -47.71
Episode 10/10, total reward: -43.02


add show video