# This file generates the graphics used in the paper

In [None]:
import numpy as np
import pennylane as qml
import pandas as pd
import matplotlib.pyplot as plt

metrics_path = "results/vqc76.csv" 
# metrics_path = "results/pqc41.csv" 
# metrics_path = "results/ddpg19.csv"

df = pd.read_csv(metrics_path)

df.columns = [col.lower() for col in df.columns]

df["episode"] = pd.to_numeric(df["episode"], errors="coerce")
df["total reward"] = pd.to_numeric(df["total reward"], errors="coerce")

df["reward_moving_avg"] = df["total reward"].rolling(window=100, min_periods=1).mean()

df["reward_volatility"] = df["total reward"].rolling(window=100, min_periods=1).std()

plt.figure(figsize=(10, 5))
plt.plot(df["episode"], df["reward_moving_avg"], label="Moving Avg Reward", color="blue")
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.title("Reward Convergence - DDPG")
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(df["episode"], df["reward_volatility"], label="Reward Volatility", color="red")
plt.xlabel("Episodes")
plt.ylabel("Volatility (Rolling Std Dev)")
plt.title("Reward Volatility Over Episodes")
plt.legend()
plt.grid(True)
plt.show()

reward_mean = df["total reward"].mean()
reward_std = df["total reward"].std()
reward_last_500_std = df["total reward"].tail(500).std()

print(f"Mean Reward: {reward_mean:.2f}")
print(f"Overall Reward Std Dev: {reward_std:.2f}")
print(f"Reward Std Dev (Last 500 Episodes): {reward_last_500_std:.2f}")

if reward_last_500_std < 1.0:
    print("The agent has likely converged (low variance in last 500 episodes).")
else:
    print("The agent has NOT fully converged (variance is still high).")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

ddpg_csv = "results/ddpg19.csv"
pqc_csv = "results/pqc34.csv"
vqc_csv = "results/vqc74.csv"

def load_rewards(csv_path):
    df = pd.read_csv(csv_path)
    for col in ["ma_reward", "reward_ma", "moving_avg_reward", "reward", "Total Reward"]:
        if col in df.columns:
            return df[col].dropna().to_numpy()
    raise ValueError(f"No known reward columns found in {csv_path}. Columns: {df.columns.tolist()}")

ddpg_rewards = load_rewards(ddpg_csv)
pqc_rewards = load_rewards(pqc_csv)
vqc_rewards = load_rewards(vqc_csv)

plt.figure(figsize=(8, 6))
sns.kdeplot(ddpg_rewards, label="DDPG")
sns.kdeplot(pqc_rewards, label="PQC")
sns.kdeplot(vqc_rewards, label="VQC")
plt.title("Reward Distributions")
plt.xlabel("Moving Average Reward")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

ddpg_data = np.load("ddpg_policy_policy_outputs.npz")["actions"]
pqc_data = np.load("pqc_policy_policy_outputs.npz")["actions"]
vqc_data = np.load("vqc_policy_policy_outputs.npz")["actions"]

ddpg_data = np.clip(ddpg_data, -1.2, 1.2)
pqc_data = np.clip(pqc_data, -1.2, 1.2)
vqc_data = np.clip(vqc_data, -1.2, 1.2)

labels = ["DDPG", "PQC", "VQC"]
data = [ddpg_data, pqc_data, vqc_data]
colors = ["#1f77b4", "#ff7f0e", "#2ca02c"]

for dim in range(ddpg_data.shape[1]):
    plt.figure(figsize=(10, 5))
    for d, label, color in zip(data, labels, colors):
        sns.kdeplot(d[:, dim], label=label, fill=True, alpha=0.4, linewidth=2, color=color, clip=(-1.2, 1.2))
    plt.title(f"Action Dimension {dim+1} Distribution (Sanitized)")
    plt.xlabel("Action Value")
    plt.ylabel("Density")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

ddpg_outputs = np.load("ddpg_policy_policy_outputs.npz")["actions"]
pqc_outputs = np.load("pqc_policy_policy_outputs.npz")["actions"]
vqc_outputs = np.load("vqc_policy_policy_outputs.npz")["actions"]

ddpg_outputs = ddpg_outputs[np.isfinite(ddpg_outputs).all(axis=1)]
pqc_outputs = pqc_outputs[np.isfinite(pqc_outputs).all(axis=1)]
vqc_outputs = vqc_outputs[np.isfinite(vqc_outputs).all(axis=1)]

for i in range(ddpg_outputs.shape[1]):
    plt.figure(figsize=(10, 5))
    sns.kdeplot(ddpg_outputs[:, i], label="DDPG", fill=True)
    sns.kdeplot(pqc_outputs[:, i], label="PQC", fill=True)
    sns.kdeplot(vqc_outputs[:, i], label="VQC", fill=True)
    plt.title(f"Action Dimension {i+1} Distribution (Sanitized)")
    plt.xlabel("Action Value")
    plt.ylabel("Density")
    plt.legend()
    plt.show()
