In [None]:
import torch
from Env.environment import make_env
from Dqn.dqn_agent import DQNAgent
from Policy.ppo_agent import PPOAgent
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env_name = "BipedalWalker-v3"

# Chọn thuật toán 
algo = "PPO"  

# Đường dẫn model
if algo == "DQN":
    model_path = r"D:\code_etc\Python\_File_chay_code\DRL\Bidepal_Gym\Model\actor_dqn.pth"
elif algo == "PPO":
    model_path = r"D:\code_etc\Python\_File_chay_code\DRL\Bidepal_Gym\Model\actor_ppo.pth"
else:
    raise ValueError("Chỉ hỗ trợ thuật toán DQN hoặc PPO")

env = make_env(env_name, render_mode="human", seed=42)
state_dim = env.observation_space.shape[0]

if algo == "DQN":
    action_dim = env.action_space.n
    agent = DQNAgent(state_dim, action_dim)
    agent.load(model_path)
elif algo == "PPO":
    action_dim = env.action_space.shape[0]
    agent = PPOAgent(state_dim, action_dim, device=device)
    agent.load(model_path)

print(f"Loaded {algo} model from {model_path}")

state, _ = env.reset()
done = False
total_reward = 0

while not done:
    env.render()
    if algo == "DQN":
        action = agent.act(state, deterministic=True)
    else:  # PPO
        action = agent.select_action(state)[0]

    state, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated
    total_reward += reward
    time.sleep(0.01)  # Tăng hoặc giảm tốc độ render nếu muốn

print(f"Total reward: {total_reward:.2f}")
env.close()
