In [None]:
from pistonball_CleanRL import *
import torch
from supersuit import color_reduction_v0, frame_stack_v1, resize_v1
from pettingzoo.butterfly import pistonball_v6
import pandas as pd
import cv2
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
save_dir = "saved_models"
log_dir = "training_logs"
model_name = "model_episode_500.pt"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent_module = "ADG"
n_pistons = 10

In [None]:
# 读取日志文件
log_path = os.path.join(log_dir, "training_log.csv")
logs = pd.read_csv(log_path)

alpha = 0.1  # 平滑系数，可以根据需要调整
logs_smoothed = logs.copy()
logs_smoothed['episodic_return_smoothed'] = logs['episodic_return'].ewm(alpha=alpha, adjust=False).mean()
logs_smoothed['value_loss_smoothed'] = logs['value_loss'].ewm(alpha=alpha, adjust=False).mean()
logs_smoothed['policy_loss_smoothed'] = logs['policy_loss'].ewm(alpha=alpha, adjust=False).mean()

# 设置 seaborn 样式
sns.set_theme(style="whitegrid")

# 绘制 episodic return 曲线
plt.figure(figsize=(12, 6))
sns.lineplot(data=logs_smoothed, x='episode', y='episodic_return_smoothed', label='Episodic Return', color='blue')
plt.xlabel('Episode')
plt.ylabel('Episodic Return')
plt.title('Episodic Return per Episode')
plt.legend()
plt.show()

# 绘制 value loss 和 policy loss 曲线
plt.figure(figsize=(12, 6))
# sns.lineplot(data=logs_smoothed, x='episode', y='value_loss_smoothed', label='Value Loss', color='orange')
sns.lineplot(data=logs_smoothed, x='episode', y='policy_loss_smoothed', label='Policy Loss', color='green')
plt.xlabel('Episode')
plt.ylabel('Loss')
plt.title('Value Loss and Policy Loss per Episode')
plt.legend()
plt.show()

In [None]:
log_path_PPO = os.path.join(log_dir, "training_log_PPO.csv")
log_path_ADG = os.path.join(log_dir, "training_log_ADG4.csv")
log_PPO = pd.read_csv(log_path_PPO)
log_ADG = pd.read_csv(log_path_ADG)

alpha = 0.1  # 平滑系数，可以根据需要调整
log_PPO_smoothed = log_PPO.copy()
log_PPO_smoothed['episodic_return_smoothed'] = log_PPO_smoothed['episodic_return'].ewm(alpha=alpha, adjust=False).mean()
log_ADG_smoothed = log_ADG.copy()
log_ADG_smoothed['episodic_return_smoothed'] = log_ADG_smoothed['episodic_return'].ewm(alpha=alpha, adjust=False).mean()

# 设置 seaborn 样式
sns.set_theme(style="whitegrid")

# 绘制 episodic return 曲线
plt.figure(figsize=(12, 6))
sns.lineplot(data=log_PPO_smoothed, x='episode', y='episodic_return_smoothed', label='PPO', color='blue')
sns.lineplot(data=log_ADG_smoothed, x='episode', y='episodic_return_smoothed', label='ADG', color='red')
plt.xlabel('Episode')
plt.ylabel('Episodic Return')
plt.title('Episodic Return per Episode')
plt.legend()
plt.show()


In [None]:
""" RENDER THE POLICY """
env = pistonball_v6.parallel_env(render_mode="rgb_array", continuous=False, n_pistons=n_pistons)
env = color_reduction_v0(env)
env = resize_v1(env, 64, 64)
env = frame_stack_v1(env, stack_size=4)
num_actions = env.action_space(env.possible_agents[0]).n
render_episodes = 3

vedio_filename = "pistonball_v6.mp4"
vedio_path = os.path.join(log_dir, vedio_filename)
vedio_fps = 30
vedio_framesize = (env.unwrapped.screen_width, env.unwrapped.screen_height)
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
vedio_out = cv2.VideoWriter(vedio_path, fourcc, vedio_fps, vedio_framesize)

if agent_module == "ADG":
    agent = Agent_ADG(num_actions=num_actions).to(device)
else:
    agent = Agent(num_actions=num_actions).to(device)
model_path = os.path.join(save_dir, model_name)

if model_name:
    agent.load_state_dict(torch.load(model_path, map_location=device))
    print("Model loaded successfully.")

agent.eval()
num_agents = len(env.possible_agents)

In [None]:
with torch.no_grad():
    for episode in range(render_episodes):
        obs, infos = env.reset(seed=None)
        obs = batchify_obs(obs, device)
        terms = [False]
        truncs = [False]
        while not any(terms) and not any(truncs):
            vedio_frame = env.render()
            vedio_out.write(vedio_frame)
            actions, logprobs, _, values = agent(obs)
            obs, rewards, terms, truncs, infos = env.step(unbatchify(actions, env))
            obs = batchify_obs(obs, device)
            terms = [terms[a] for a in terms]
            truncs = [truncs[a] for a in truncs]
vedio_out.release()

In [None]:
with torch.no_grad():
    for episode in range(render_episodes):
        obs, infos = env.reset(seed=None)
        obs = batchify_obs(obs, device).unsqueeze(1)
        terms = [False]
        truncs = [False]
        while not any(terms) and not any(truncs):
            vedio_frame = env.render()
            vedio_out.write(vedio_frame)

            actions = torch.ones(num_agents + 2, dtype=torch.int).to(device)
            depend_actions = torch.full((num_agents, 2), 3, dtype=torch.int).to(device)
            for ind in range(num_agents - 1, -1, -1):
                depend_actions[ind] = actions[ind+1:ind+3]
                actions[ind], _, _, _ = agent(obs[ind], depend_actions[ind].unsqueeze(0))
            actions = actions[:-2]            
            
            obs, rewards, terms, truncs, infos = env.step(unbatchify(actions, env))
            obs = batchify_obs(obs, device).unsqueeze(1)

            terms = [terms[a] for a in terms]
            truncs = [truncs[a] for a in truncs]
vedio_out.release()