In [1]:
import torch
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
from collections import deque
from tqdm import tqdm

from walker import PPO, Normalize

In [50]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize environment
env = gym.make('Walker2d-v4', render_mode='rgb_array')

# Number of state and action
N_S = env.observation_space.shape[0]
N_A = env.action_space.shape[0]

# Initialize PPO model
ppo = PPO(N_S, N_A)
normalize = Normalize(N_S)

# Load the saved model
log_dir = "../runs/20240709_14-44-31/15000/ppo/"
ppo.load(log_dir)
normalize.load_params(log_dir + "../../normalize_params.npy")
ppo.actor_net.eval()

# Test the model
now_state, _ = env.reset()
now_state = normalize(now_state)

test_total_reward = 0
test_episodes = 10  # Number of episodes to test
for episode_id in range(test_episodes):
    now_state, _ = env.reset()
    now_state = normalize(now_state)
    score = 0
    for _ in range(1000):
#         env.render()

        #with torch.no_grad():
            #ppo.actor_net.eval()
        a = ppo.actor_net.choose_action(torch.from_numpy(np.array(now_state).astype(np.float32)).unsqueeze(0))[0]
        now_state, r, done, _, _ = env.step(a)
        now_state = normalize(now_state)
        score += r

        if done:
            env.reset()
            break
    print("episode: ", episode_id, "\tscore: ", score)
    
# for _ in range(test_episodes):
#     state, _ = env.reset()
#     state = normalize(state)
#     done = False
#     episode_reward = 0
#     while not done:
#         action = ppo.actor_net.choose_action(torch.from_numpy(np.array(state).astype(np.float32)).unsqueeze(0))[0]
#         next_state, reward, truncated, terminated, info = env.step(action)
#         episode_reward += reward
#         state = normalize(next_state)
#         done = truncated or terminated
#     test_total_reward += episode_reward
# average_test_reward = test_total_reward / test_episodes
# print('Average test reward: {:.2f}'.format(average_test_reward))

episode:  0 	score:  -28.49085352701228
episode:  1 	score:  918.9236616052549
episode:  2 	score:  244.0886463266289
episode:  3 	score:  391.16404756512503
episode:  4 	score:  403.39598925514485
episode:  5 	score:  385.83547040053736
episode:  6 	score:  43.305298353778305
episode:  7 	score:  314.4721261001506
episode:  8 	score:  305.97972889533946
episode:  9 	score:  330.19372103715784
