In [None]:
import os, sys, pickle
import pandas as pd
import numpy as np

from tqdm import tqdm

from matplotlib import pyplot as plt
plt.rcParams.update({
	"text.usetex": True,
	# "text.usetex": False,
	"text.latex.preamble": r"\usepackage{amsmath}",
	"font.size": 14,
})
print(plt.get_backend())

sys.path.append(os.path.join("..", "training"))

In [None]:
also_pdf = True
close_figs = False

activate_mini_plot = False

gamma = 0.99
n_IC_per_replay = 200

lr = 1e-2
parameterization = "low"
agent_update = 50

methods_list = [f"Adam_lr_{lr:.1e}", f"approx_newton_rad_{10 * lr:.1e}", f"gauss_newton_rad_{10 * lr:.1e}", os.path.join("sb3_td3", "lr_1.00e-03_bs_100000_batch_1024_tau_1.0e-01_pd_10_msv_1")]
labels = ["Adam (MPC)", "Approx. Newton (MPC)", "Gauss-Newton (MPC)", "SB3 TD3 (NN)"]
linestyles = ["-", "--", "-.", ":"]
colors = ["C0", "C1", "C2", "C3"]

training_results_basepath = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}")
fig_basepath = os.path.join("..", "data", "sb3_comparison", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", "figs")

if not os.path.exists(fig_basepath):
	os.makedirs(fig_basepath)

In [None]:
plt.close("all")
learning_curve_data = {}
for method in methods_list:
	print(f"Loading performance data for method {method}...")
	if "sb3" not in method:
		performance_path = os.path.join(training_results_basepath, method, f"{parameterization}_parameterization", f"processed_results_list.pkl")
	else:
		performance_path = os.path.join("..", "data", method, f"monitor.csv")

	if not os.path.exists(performance_path):
		print(f"Performance data not found for method {method}, skipping...")
		continue

	if "sb3" not in method:
		with open(performance_path, "rb") as f:
			episode_data = pickle.load(f)
	else:
		episode_data = pd.read_csv(performance_path, skiprows=1)

	learning_curve_data[method] = episode_data
	

In [None]:
for method, item in learning_curve_data.items():
	if "sb3" not in method:
		rewards = [ep["cum_reward"]["mean"] for ep in item]
		episode_list = [idx * n_IC_per_replay for idx in range(len(rewards))]
		
	else:
		sb3_reward_array = item["r"].to_numpy()
		splitted_reward_array = np.split(sb3_reward_array, [(idx + 1) * n_IC_per_replay for idx in range(len(sb3_reward_array) // n_IC_per_replay - 1)])
		splitted_reward_array
		rewards = [np.mean(arr) for arr in splitted_reward_array]
		episode_list = [idx * n_IC_per_replay for idx in range(len(rewards))]

	learning_curve_data[method] = {
		"episode_list": episode_list,
		"clc_list": rewards
	}

In [None]:
plt.close("all")

scaling_factor = 1.0
fig, ax = plt.subplots(figsize=(scaling_factor * 5, scaling_factor * 3.5), constrained_layout=True)

min_episode = 0
max_episode = 0
for method, linestyle, color, label in zip(methods_list, linestyles, colors, labels):
	episode_list = learning_curve_data[method]["episode_list"]
	clc_list = learning_curve_data[method]["clc_list"]

	min_episode = min(min_episode, min(episode_list))
	max_episode = max(max_episode, max(episode_list))

	ax.plot(episode_list, clc_list, label=label, linestyle=linestyle, color=color)

ax.set_xlabel("Episodes")
ax.set_ylabel(r"$J(\boldsymbol{\theta})$")

ax.set_ylim([-1000, -50])
ax.set_xlim([min_episode, max_episode])

fig.legend(loc = "outside lower center", fontsize = 12, ncols = 2)

if activate_mini_plot:
	sub_plot = plt.axes([0.54, 0.42, 0.35, 0.325])
	for method, linestyle, color, label in zip(methods_list, linestyles, colors, labels):
		episode_list = learning_curve_data[method]["episode_list"]
		clc_list = learning_curve_data[method]["clc_list"]

		min_episode = min(min_episode, min(episode_list))
		max_episode = max(max_episode, max(episode_list))

		sub_plot.plot(episode_list, clc_list, label=label, linestyle=linestyle, color=color)
	sub_plot.set_xlim([min_episode, max_episode])
	sub_plot.set_xticks([0, 5000, 10000])
	sub_plot.set_ylim([-250, -50])

plt.savefig(os.path.join(fig_basepath, "sb3_learning_curve_comparison.png"))
if also_pdf:
	plt.savefig(os.path.join(fig_basepath, "sb3_learning_curve_comparison.pdf"))
if close_figs:
	plt.close(fig)

# Performance of the TD algorithm at different training stages

In [None]:
for method in methods_list:
	print("Method:", method)
	
	initial_episodes = 0
	initial_performance = learning_curve_data[method]["clc_list"][0]
	
	intermediate_episodes = learning_curve_data[method]["episode_list"][len(learning_curve_data[method]["clc_list"]) // 2]
	intermediate_performance = learning_curve_data[method]["clc_list"][len(learning_curve_data[method]["clc_list"]) // 2]
	
	final_episodes = learning_curve_data[method]["episode_list"][-1]
	final_performance = learning_curve_data[method]["clc_list"][-1]

	print(f"Initial performance ({initial_episodes} Episodes): {initial_performance:.2f}")
	print(f"Intermediate performance ({intermediate_episodes} Episodes): {intermediate_performance:.2f}")
	print(f"Final performance ({final_episodes} Episodes): {final_performance:.2f}")

	print()