In [1]:
import torch
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
from collections import deque
from tqdm import tqdm

from walker import PPO, Normalize

RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
CYAN = "\033[36m"
RESET = "\033[0m"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [1]:
# Initialize environment
env = gym.make('Walker2d-v4', render_mode='human')

log_dir = "../runs/20240711_15-21-10/"

# Number of state and action
N_S = env.observation_space.shape[0]
N_A = env.action_space.shape[0]

# Initialize PPO model
ppo = PPO(N_S, N_A, log_dir)
normalize = Normalize(N_S)

# Load the saved model
ppo.actor_net.load_model()
ppo.actor_net.eval()

In [50]:
# Test the model
state, _ = env.reset()
state = normalize(state)

test_total_reward = 0
test_episodes = 10  # Number of episodes to test
for episode_id in range(test_episodes):
    state, _ = env.reset()
    state = normalize(state)
    score = 0
    for _ in range(1000):
        action = ppo.actor_net.choose_action(state)
        # print(f"{YELLOW}walker velocity: {RESET}", state[8])
        state, reward, done, _, _ = env.step(action)
        state = normalize(state)
        score += reward

        if done:
            env.reset()
            break
    
    print("episode: ", episode_id, "\tscore: ", score)
env.close()

episode:  0 	score:  -28.49085352701228
episode:  1 	score:  918.9236616052549
episode:  2 	score:  244.0886463266289
episode:  3 	score:  391.16404756512503
episode:  4 	score:  403.39598925514485
episode:  5 	score:  385.83547040053736
episode:  6 	score:  43.305298353778305
episode:  7 	score:  314.4721261001506
episode:  8 	score:  305.97972889533946
episode:  9 	score:  330.19372103715784
