In [None]:
import os, pickle
import numpy as np

from matplotlib import pyplot as plt
plt.rcParams.update({
	"text.usetex": True,
	# "text.usetex": False,
	"text.latex.preamble": r"\usepackage{amsmath}",
	"font.size": 14,
})

In [None]:
gamma = 0.99
n_IC_per_replay = 200

parameterizations = ['low', 'medium', 'high']
titles = ["Low", "Medium", "High"]

## Adam with $\alpha = 10^{-2}$

In [None]:

learning_rate = 1e-2
Adam_dict = {}

for parameterization in parameterizations:
	data_path = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", f"Adam_lr_{learning_rate:.1e}", f"{parameterization}_parameterization")
	Adam_dict[parameterization] = {
		"data_path": data_path,
		"label": r"Adam $\alpha=10^{-2}$",
		"title": parameterization
	}
	parameters = []
	policy_gradients = []
	
	if os.path.exists(data_path):
		folders = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f)) and not f == "agent_update" and not f == "memories" and not f == "figs"]

		for idx in range(len(folders)):
			if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl")):
				with open(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl"), "rb") as f:
					rl_params = pickle.load(f)
				parameters.append(rl_params.master.full())

				if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl")):
					with open(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl"), "rb") as f:
						pg = pickle.load(f)
					policy_gradients.append(pg)

			else:
				not_accessable_path = os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl")
				print(f"Missing rl_params.pkl in {not_accessable_path}")
				break

		if len(parameters) == 0:
			print(f"No parameters found in {data_path}")
			parameters = np.empty((0, 2, 1))
			policy_gradients = np.empty((0, 2, 1))
		else:
			parameters = np.stack(parameters, axis = 0)
			policy_gradients = np.stack(policy_gradients, axis = 0)
		Adam_dict[parameterization]["parameters"] = parameters
		Adam_dict[parameterization]["policy_gradients"] = policy_gradients

		if os.path.exists(os.path.join(data_path, "processed_results_list.pkl")):
			with open(os.path.join(data_path, "processed_results_list.pkl"), "rb") as f:
				processed_results_list = pickle.load(f)
			clc_list = [item["cum_reward"]["mean"] for item in processed_results_list]
			Adam_dict[parameterization]['clc_list'] = clc_list

		if os.path.exists(os.path.join(data_path, "training_time.pkl")):
			with open(os.path.join(data_path, "training_time.pkl"), "rb") as f:
				training_time = pickle.load(f)
			Adam_dict[parameterization]['training_time'] = training_time
	else:
		print(f"Data path {data_path} does not exist.")
		continue

# Load approximate Newton with trust region and $\alpha = 10^{-1}$

In [None]:

trust_region_radius = 1e-1
approx_newton_tr_dict = {}

for parameterization in parameterizations:
	data_path = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", f"approx_newton_rad_{trust_region_radius:.1e}", f"{parameterization}_parameterization")
	approx_newton_tr_dict[parameterization] = {
		"data_path": data_path,
		"label": r"Approx. Newton $\alpha=10^{-1}$"
	}
	parameters = []
	policy_gradients = []
	policy_hessians = []
	
	if os.path.exists(data_path):
		folders = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f)) and not f == "agent_update" and not f == "memories" and not f == "figs"]


		for idx in range(len(folders)):
			if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl")):
				with open(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl"), "rb") as f:
					rl_params = pickle.load(f)
				parameters.append(rl_params.master.full())
				rl_params_not_accessable = False

				if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl")):
					with open(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl"), "rb") as f:
						pg = pickle.load(f)
					policy_gradients.append(pg)

				if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "policy_hessian.pkl")):
					with open(os.path.join(data_path, f"agent_update_{idx}", "policy_hessian.pkl"), "rb") as f:
						ph = pickle.load(f)
					policy_hessians.append(ph)

			else:
				rl_params_not_accessable = True

			if rl_params_not_accessable:
				not_accessable_path = os.path.join(data_path, f"agent_update_{idx}")
				print(f"Missing rl_params.pkl or revert.pkl in {not_accessable_path}")
				break

		if len(parameters) - 1 == 0:
			print(f"No parameters found in {data_path}")
			parameters = np.empty((0, 2, 1))
			policy_gradients = np.empty((0, 2, 1))
			policy_hessians = np.empty((0, 2, 2))
		else:
			parameters = np.stack(parameters, axis = 0)
			policy_gradients = np.stack(policy_gradients, axis = 0)
			policy_hessians = np.stack(policy_hessians, axis = 0)
			
		approx_newton_tr_dict[parameterization]["parameters"] = parameters
		approx_newton_tr_dict[parameterization]["policy_gradients"] = policy_gradients
		approx_newton_tr_dict[parameterization]["policy_hessians"] = policy_hessians

		if os.path.exists(os.path.join(data_path, "processed_results_list.pkl")):
			with open(os.path.join(data_path, "processed_results_list.pkl"), "rb") as f:
				processed_results_list = pickle.load(f)
			clc_list = [item["cum_reward"]["mean"] for item in processed_results_list]
			approx_newton_tr_dict[parameterization]['clc_list'] = clc_list
			approx_newton_tr_dict[parameterization]['clc_final'] = clc_list

		if os.path.exists(os.path.join(data_path, "training_time.pkl")):
			with open(os.path.join(data_path, "training_time.pkl"), "rb") as f:
				training_time = pickle.load(f)
			approx_newton_tr_dict[parameterization]['training_time'] = training_time
		
	else:
		print(f"Data path {data_path} does not exist.")
		continue

# Load Gauss-Newton with trust region and $\alpha_0 = 10^{-1}$

In [None]:

trust_region_radius = 1e-1
gauss_newton_tr_dict = {}

for parameterization in parameterizations:
	data_path = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", f"gauss_newton_rad_{trust_region_radius:.1e}", f"{parameterization}_parameterization")
	gauss_newton_tr_dict[parameterization] = {
		"data_path": data_path,
		"label": r"Gauss-Newton $\alpha=10^{-1}$ (proposed)"
	}
	parameters = []
	policy_gradients = []
	policy_hessians = []
	
	if os.path.exists(data_path):
		folders = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f)) and not f == "agent_update" and not f == "memories" and not f == "figs"]


		for idx in range(len(folders)):
			if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl")):
				with open(os.path.join(data_path, f"agent_update_{idx}", "rl_params.pkl"), "rb") as f:
					rl_params = pickle.load(f)
				parameters.append(rl_params.master.full())
				rl_params_not_accessable = False

				if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl")):
					with open(os.path.join(data_path, f"agent_update_{idx}", "policy_gradients.pkl"), "rb") as f:
						pg = pickle.load(f)
					policy_gradients.append(pg)

				if os.path.exists(os.path.join(data_path, f"agent_update_{idx}", "policy_hessian.pkl")):
					with open(os.path.join(data_path, f"agent_update_{idx}", "policy_hessian.pkl"), "rb") as f:
						ph = pickle.load(f)
					policy_hessians.append(ph)

			else:
				rl_params_not_accessable = True

			if rl_params_not_accessable:
				not_accessable_path = os.path.join(data_path, f"agent_update_{idx}")
				print(f"Missing rl_params.pkl or revert.pkl in {not_accessable_path}")

		parameters = np.stack(parameters, axis = 0)
		policy_gradients = np.stack(policy_gradients, axis = 0)
		policy_hessians =  np.stack(policy_hessians, axis = 0)

		gauss_newton_tr_dict[parameterization]["parameters"] = parameters
		gauss_newton_tr_dict[parameterization]["policy_gradients"] = policy_gradients
		gauss_newton_tr_dict[parameterization]["policy_hessians"] = policy_hessians

		
		if os.path.exists(os.path.join(data_path, "processed_results_list.pkl")):
			with open(os.path.join(data_path, "processed_results_list.pkl"), "rb") as f:
				processed_results_list = pickle.load(f)
			clc_list = [item["cum_reward"]["mean"] for item in processed_results_list]
			gauss_newton_tr_dict[parameterization]['clc_list'] = clc_list
			gauss_newton_tr_dict[parameterization]['clc_final'] = clc_list

		if os.path.exists(os.path.join(data_path, "training_time.pkl")):
			with open(os.path.join(data_path, "training_time.pkl"), "rb") as f:
				training_time = pickle.load(f)
			gauss_newton_tr_dict[parameterization]['training_time'] = training_time
	else:
		print(f"Data path {data_path} does not exist.")
		continue

# Learning curves

In [None]:
figpath = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", f"learning_curves_scalability.png")

plt.close("all")

ncols = 1
nrows = len(parameterizations)

titles = [
	"Low (2)",
	"Medium (13)",
    "High (33)",
]

k = 1.0
figsize = (k * 5 * ncols, k * 2 * nrows)

fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=figsize, constrained_layout=True, sharex=True,)

for axis, title in zip(ax, titles):
	axis.set_title(title)

idx_init_step = 0
idx_final_step = 0
for idx, parameterization in enumerate(parameterizations):
    if "clc_list" in Adam_dict[parameterization].keys():
        indices = np.arange(len(Adam_dict[parameterization]['clc_list']))
        if indices[-1] > idx_final_step:
            idx_final_step = indices[-1]
        ax[idx].plot(
            indices,
            Adam_dict[parameterization]['clc_list'],
            label=Adam_dict[parameterization]['label'],
            linestyle = '-',
			color = 'tab:blue'
        )
    if "clc_final" in approx_newton_tr_dict[parameterization].keys():
        indices = np.arange(len(approx_newton_tr_dict[parameterization]['clc_final']))
        if indices[-1] > idx_final_step:
            idx_final_step = indices[-1]
        ax[idx].plot(
            indices,
            approx_newton_tr_dict[parameterization]['clc_final'],
            label=approx_newton_tr_dict[parameterization]['label'],
            linestyle ='--',
			color = 'tab:orange'
        )
    if "clc_final" in gauss_newton_tr_dict[parameterization].keys():
        indices = np.arange(len(gauss_newton_tr_dict[parameterization]['clc_final']))
        if indices[-1] > idx_final_step:
            idx_final_step = indices[-1]
        ax[idx].plot(
            indices,
            gauss_newton_tr_dict[parameterization]['clc_final'],
            label=gauss_newton_tr_dict[parameterization]['label'],
            linestyle = ':',
			color = 'tab:green'
        )

        

for axis in ax:
    axis.set_ylabel(r"$J(\boldsymbol{\theta})$")
    # axis.legend(loc = "lower right")
    axis.grid(True, which = "both")

ax[-1].set_xlabel(r"Iteration $k$")
ax[-1].set_xlim([idx_init_step, idx_final_step])
# ax[-1].set_ylim([-90, -40])

ax[-1].set_xticks(np.arange(idx_init_step, idx_final_step + 1, 5.0), minor=True)

# handles, labels = ax[-1].get_legend_handles_labels()
# fig.legend(loc = "outside lower center", handles = handles, labels = labels)
ax[-1].legend(loc = "lower right", fontsize = 12)

fig.savefig(figpath, bbox_inches='tight', dpi = 300.0)

figpath_pdf = figpath.replace(".png", ".pdf")
fig.savefig(figpath_pdf, bbox_inches='tight', dpi = 1200.0)

# Tabular evaluation of the performance

In [None]:
methods_list = ["Adam", "Approx. Newton", "Gauss-Newton"]

for method_type in methods_list:
	print(f"Method: {method_type}")

	if method_type == "Gauss-Newton":
		current_dict = gauss_newton_tr_dict
	elif method_type == "Adam":
		current_dict = Adam_dict
	elif method_type == "Approx. Newton":
		current_dict = approx_newton_tr_dict
	else:
		print(f"Unknown method: {method_type}")
		continue

	for parameterization_type in parameterizations:
		initial_performance = current_dict[parameterization_type]["clc_list"][0]
		final_performance = current_dict[parameterization_type]["clc_list"][-1]

		print(f"Parameterization type: {parameterization_type}")
		print(f"Initial performance: {initial_performance:.4f}")
		print(f"Final performance: {final_performance:.4f}")
		print()


	print()



# Training times

In [None]:
exclude_initial_sample = True # This makes sense because the first one also includes the initialization of all subprocesses (kernel launches)

lower_percentile = 10
upper_percentile = 90

for parameterization in parameterizations:
	print(f"Parameterization: {parameterization}")
	if "training_time" in Adam_dict[parameterization].keys():
		mean_time = np.mean(Adam_dict[parameterization]['training_time'][1:] if exclude_initial_sample else Adam_dict[parameterization]['training_time'])
		std_time = np.std(Adam_dict[parameterization]['training_time'][1:] if exclude_initial_sample else Adam_dict[parameterization]['training_time'])
		median_time = np.median(Adam_dict[parameterization]['training_time'][1:] if exclude_initial_sample else Adam_dict[parameterization]['training_time'])
		lower_percentile_time = np.percentile(Adam_dict[parameterization]['training_time'][1:] if exclude_initial_sample else Adam_dict[parameterization]['training_time'], lower_percentile)
		upper_percentile_time = np.percentile(Adam_dict[parameterization]['training_time'][1:] if exclude_initial_sample else Adam_dict[parameterization]['training_time'], upper_percentile)

		Adam_dict[parameterization]['mean_training_time'] = mean_time
		Adam_dict[parameterization]['std_training_time'] = std_time
		Adam_dict[parameterization]['median_training_time'] = median_time
		Adam_dict[parameterization]['10th_percentile_training_time'] = lower_percentile_time
		Adam_dict[parameterization]['90th_percentile_training_time'] = upper_percentile_time

		print(f"Adam Mean Training Time: {mean_time:.2f} s, Std: {std_time:.2f} s")
	else:
		print("No training time data for Adam.")

	if "training_time" in approx_newton_tr_dict[parameterization].keys():
		mean_time = np.mean(approx_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else approx_newton_tr_dict[parameterization]['training_time'])
		std_time = np.std(approx_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else approx_newton_tr_dict[parameterization]['training_time'])
		median_time = np.median(approx_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else approx_newton_tr_dict[parameterization]['training_time'])
		lower_percentile_time = np.percentile(approx_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else approx_newton_tr_dict[parameterization]['training_time'], lower_percentile)
		upper_percentile_time = np.percentile(approx_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else approx_newton_tr_dict[parameterization]['training_time'], upper_percentile)

		approx_newton_tr_dict[parameterization]['mean_training_time'] = mean_time
		approx_newton_tr_dict[parameterization]['std_training_time'] = std_time
		approx_newton_tr_dict[parameterization]['median_training_time'] = median_time
		approx_newton_tr_dict[parameterization]['10th_percentile_training_time'] = lower_percentile_time
		approx_newton_tr_dict[parameterization]['90th_percentile_training_time'] = upper_percentile_time

		print(f"Approx. Newton TR Mean Training Time: {mean_time:.2f} s, Std: {std_time:.2f} s")
	else:
		print("No training time data for Approx. Newton TR.")

	if "training_time" in gauss_newton_tr_dict[parameterization].keys():
		mean_time = np.mean(gauss_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else gauss_newton_tr_dict[parameterization]['training_time'])
		std_time = np.std(gauss_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else gauss_newton_tr_dict[parameterization]['training_time'])
		median_time = np.median(gauss_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else gauss_newton_tr_dict[parameterization]['training_time'])
		lower_percentile_time = np.percentile(gauss_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else gauss_newton_tr_dict[parameterization]['training_time'], lower_percentile)
		upper_percentile_time = np.percentile(gauss_newton_tr_dict[parameterization]['training_time'][1:] if exclude_initial_sample else gauss_newton_tr_dict[parameterization]['training_time'], upper_percentile)

		gauss_newton_tr_dict[parameterization]['mean_training_time'] = mean_time
		gauss_newton_tr_dict[parameterization]['std_training_time'] = std_time
		gauss_newton_tr_dict[parameterization]['median_training_time'] = median_time
		gauss_newton_tr_dict[parameterization]['10th_percentile_training_time'] = lower_percentile_time
		gauss_newton_tr_dict[parameterization]['90th_percentile_training_time'] = upper_percentile_time

		print(f"Gauss-Newton TR Mean Training Time: {mean_time:.2f} s, Std: {std_time:.2f} s")
	else:
		print("No training time data for Gauss-Newton TR.")

	print()

In [None]:
figpath = os.path.join("..", "data", "dimensionality_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", f"training_times_mean_scalability.png")

plt.close("all")

ncols = 1
nrows = 1

k = 0.75
figsize = (k * 7.5 * ncols, k * 4 * nrows)

fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=figsize, constrained_layout=True, sharex=True, sharey=True)


x_labels = ['Low (2)', 'Medium (13)', 'High (33)']

k_sigma = 3.0

results_mean = {
	"Adam": [],
	"Approx. Newton": [],
	"Gauss-Newton": [],
}

labels = ["Adam", "Approx. Newton", "Gauss-Newton (proposed)"]

print("Collecting mean training times:")
for parameterization in parameterizations:
	print(f"Parameterization: {parameterization}")
	if "mean_training_time" in Adam_dict[parameterization].keys():
		results_mean["Adam"].append(Adam_dict[parameterization]['mean_training_time'])
	else:
		results_mean["Adam"].append(0)

	if "mean_training_time" in approx_newton_tr_dict[parameterization].keys():
		results_mean["Approx. Newton"].append(approx_newton_tr_dict[parameterization]['mean_training_time'])
	else:
		results_mean["Approx. Newton"].append(0)

	if "mean_training_time" in gauss_newton_tr_dict[parameterization].keys():
		results_mean["Gauss-Newton"].append(gauss_newton_tr_dict[parameterization]['mean_training_time'])
	else:
		results_mean["Gauss-Newton"].append(0)

print()

results_std = {
	"Adam": [],
	"Approx. Newton": [],
	"Gauss-Newton": [],
}

print("Collecting std training times:")
for parameterization in parameterizations:
	print(f"Parameterization: {parameterization}")
	if "std_training_time" in Adam_dict[parameterization].keys():
		results_std["Adam"].append(Adam_dict[parameterization]['std_training_time'])
	else:
		results_std["Adam"].append(0)

	if "std_training_time" in approx_newton_tr_dict[parameterization].keys():
		results_std["Approx. Newton"].append(approx_newton_tr_dict[parameterization]['std_training_time'])
	else:
		results_std["Approx. Newton"].append(0)

	if "std_training_time" in gauss_newton_tr_dict[parameterization].keys():
		results_std["Gauss-Newton"].append(gauss_newton_tr_dict[parameterization]['std_training_time'])
	else:
		results_std["Gauss-Newton"].append(0)


width = 1 / (len(results_mean) + 1)
multiplier = 0
x = np.arange(len(x_labels))
hatches = ["", "/", "*"]

for hatch, label, (method, times) in zip(hatches, labels, results_mean.items()):
	rect = ax.bar(
		x + multiplier * width,
		times,
		width = width,
		hatch = hatch,
		label=label
	)
	ax.errorbar(x + multiplier * width, times, yerr=k_sigma * np.array(results_std[method]), fmt='none', ecolor='black', capsize=5)
	ax.bar_label(rect, padding=10, fmt='%.1f', rotation=90)
	multiplier += 1

ax.set_ylim([0, 650])

ax.legend(loc = "upper left", fontsize = 12)
# fig.legend(loc = "outside lower center", ncols=3)

ax.set_ylabel(r"Time per RL iteration $\left[\mathrm{s}\,\mathrm{iter}^{-1}\right]$")
ax.set_xticks(x + width, x_labels)

fig.savefig(figpath, bbox_inches='tight', dpi = 300.0)

figpath_pdf = figpath.replace(".png", ".pdf")
fig.savefig(figpath_pdf, bbox_inches='tight', dpi = 1200.0)