In [None]:
import os, pickle
import numpy as np

from matplotlib import pyplot as plt
plt.rcParams.update({
	"text.usetex": True,
	# "text.usetex": False,
    "text.latex.preamble": r"\usepackage{amsmath}",
	"font.size": 14,
})

In [None]:
gamma = 0.99
n_IC_per_replay = 200

lr_adam = 1e-2

methods_list = ["Adam", "Approx. Newton", "Gauss-Newton"]
scale_list = ["scaled", "unscaled", "malscaled"]
data_basepath = os.path.join("..", "data", "ic_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}")

levels = 20
figpath = os.path.join("..", "data", "ic_investigation", f"gamma{gamma:.3f}, n_IC_per_replay{n_IC_per_replay}", "figs", "param_traj_2d_full.png")

if not os.path.exists(os.path.dirname(figpath)):
	os.makedirs(os.path.dirname(figpath))


# Load the data of the different algorithms

In [None]:
Adam_dict = {key: {"lr": lr_adam} for key in scale_list}
AN_dict = {key: {"lr": lr_adam} for key in scale_list}
GN_dict = {key: {"lr": lr_adam} for key in scale_list}

for method in methods_list:
    print(f"Loading the results for method: {method}")
    for scale_type in scale_list:
        method_path = os.path.join(data_basepath, f"scale_type_{scale_type}",)
        if method == "Adam":
            print(f"Attempting to load Adam results for scale type: {scale_type}")
            method_path = os.path.join(method_path, f"Adam_lr_{lr_adam:.1e}")
        elif method == "Approx. Newton":
            print(f"Attempting to load Approx. Newton results for scale type: {scale_type}")
            method_path = os.path.join(method_path, f"approx_newton_rad_{lr_adam:.1e}")
        elif method == "Gauss-Newton":
            print(f"Attempting to load Gauss-Newton results for scale type: {scale_type}")
            method_path = os.path.join(method_path, f"gauss_newton_rad_{lr_adam:.1e}")
        else:
            print(f"Unknown method: {method}")
            continue
            
        if not os.path.exists(method_path):
            print(f"Method data path not found: {method_path}")
            continue

        initial_condition_folders = os.listdir(method_path)
        print(f"Walking through the folders for each different initial condition replay. There are: {initial_condition_folders}")

        for ic_folder in initial_condition_folders:
            initial_condition_path = os.path.join(method_path, ic_folder)
            param_trajectory = []
            clc_trajectory = []


            folders = os.listdir(initial_condition_path)
            folders = [folder for folder in folders if folder.startswith("agent_update_")]
            for idx in range(len(folders)):
                parameter_path = os.path.join(initial_condition_path, f"agent_update_{idx}", "rl_params.pkl")

                if not os.path.exists(parameter_path):
                    print(f"Parameter path not found: {parameter_path}. Ending the parameter loading for this initial condition.")
                    params = np.zeros((1, 2))
                    param_trajectory.append(params)
                    break

                with open(parameter_path, "rb") as f:
                    params = pickle.load(f).master
                param_trajectory.append(params)
            param_trajectory = np.hstack(param_trajectory).T
            print(f"{ic_folder} Number of loaded params: {param_trajectory.shape[0]}")

            clc_path = os.path.join(initial_condition_path, "processed_results_list.pkl")
            if not os.path.exists(clc_path):
                print(f"CLC path not found: {clc_path}. Skipping CLC loading for this initial condition.")
                clc_trajectory = [0]
            else:
                try:
                    with open(clc_path, "rb") as f:
                        processed_results_list = pickle.load(f)
                except:
                    clc_path = os.path.join(initial_condition_path, "unprocessed_results_list.pkl")
                    with open(clc_path, "rb") as f:
                        processed_results_list = pickle.load(f)
                clc_trajectory = np.array([result["cum_reward"]["mean"] for result in processed_results_list])

            results = {
                "params": param_trajectory,
                "clc": clc_trajectory,
            }

            if method == "Adam":
                Adam_dict[scale_type][ic_folder] = results
            elif method == "Approx. Newton":
                AN_dict[scale_type][ic_folder] = results
            elif method == "Gauss-Newton":
                GN_dict[scale_type][ic_folder] = results

    print("Finished loading all method results.\n")

In [None]:
for method in methods_list:
    if method == "Gauss-Newton":
        current_dict = GN_dict
    elif method == "Adam":
        current_dict = Adam_dict
    elif method == "Approx. Newton":
        current_dict = AN_dict
    else:
        print(f"Unknown method: {method}")
            
    for scale_type in scale_list:
        clc_trajectory_list = []
        
        for initial_condition, results_dict in current_dict[scale_type].items():
            if initial_condition.startswith("IC_"):
                clc_trajectory_list.append(results_dict["clc"])

        max_length = 0
        for clc_tr in clc_trajectory_list:
            if len(clc_tr) > max_length:
                max_length = len(clc_tr)

        clc_trajectory_list_padded = []
        for clc_tr in clc_trajectory_list:
            filled_clc_tr = np.concatenate([clc_tr, clc_tr[-1] * np.ones((max_length - len(clc_tr)))])
            clc_trajectory_list_padded.append(filled_clc_tr)


        if len(clc_trajectory_list_padded) == 0:
            print(f"No CLC trajectories found to stack for method: {method} and scaling type: {scale_type}.")
            stacked_clc_trajectories = np.zeros((2, 2))
        else:
            stacked_clc_trajectories = np.vstack(clc_trajectory_list_padded)

        current_dict[scale_type]["median_clc"] = np.median(stacked_clc_trajectories, axis=0)
        current_dict[scale_type]["min_clc"] = np.min(stacked_clc_trajectories, axis=0)
        current_dict[scale_type]["max_clc"] = np.max(stacked_clc_trajectories, axis=0)

    if method == "Gauss-Newton":
        GN_dict = current_dict
    elif method == "Adam":
        Adam_dict = current_dict
    elif method == "Approx. Newton":
        AN_dict = current_dict
            

# Load the contour plot data

In [None]:

contour_plot_data = {key: {} for key in scale_list}

for scale_type in scale_list:
	if scale_type == "scaled":
		# Loading the parametric results
		parametric_results_path = os.path.join("..", "data", "parametric_results_scaled_params", f"n_initial_conditions_{n_IC_per_replay}_gamma_{gamma:.3f}")

		if not os.path.exists(parametric_results_path):
			print(f"Parametric results path not found: {parametric_results_path}")
	elif scale_type == "unscaled":
		# Loading the parametric results
		parametric_results_path = os.path.join("..", "data", "parametric_results", f"n_initial_conditions_{n_IC_per_replay}_gamma_{gamma:.3f}")

		if not os.path.exists(parametric_results_path):
			print(f"Parametric results path not found: {parametric_results_path}")
	elif scale_type == "malscaled":
		# Loading the parametric results
		parametric_results_path = os.path.join("..", "data", "parametric_results_malscaled_params", f"n_initial_conditions_{n_IC_per_replay}_gamma_{gamma:.3f}")

		if not os.path.exists(parametric_results_path):
			print(f"Parametric results path not found: {parametric_results_path}")


	alpha_mesh_path = os.path.join(parametric_results_path, "param_range_alpha_mesh.pkl")
	if os.path.exists(alpha_mesh_path):
		with open(alpha_mesh_path, "rb") as f:
			alpha_mesh = pickle.load(f)
	else:
		alpha_mesh = None
		print(f"File not found: {alpha_mesh_path}")

	beta_mesh_path = os.path.join(parametric_results_path, "param_range_beta_mesh.pkl")
	if os.path.exists(beta_mesh_path):
		with open(beta_mesh_path, "rb") as f:
			beta_mesh = pickle.load(f)
	else:
		beta_mesh = None
		print(f"File not found: {beta_mesh_path}")

	clc_mesh_path = os.path.join(parametric_results_path, "processed_results_mesh.pkl")
	if os.path.exists(clc_mesh_path):
		with open(clc_mesh_path, "rb") as f:
			clc_mesh = pickle.load(f)
	else:
		clc_mesh = None
		print(f"File not found: {clc_mesh_path}")

	if alpha_mesh is None or beta_mesh is None or clc_mesh is None:
		print("At least one of the contour meshes were not found. Initialize with default results (they are wrong)")
		alpha_min = 0.0
		alpha_max = 0.2
		beta_min = -0.75
		beta_max = 0.25

		alpha_mesh, beta_mesh = np.meshgrid(np.linspace(alpha_min, alpha_max, 100), np.linspace(beta_min, beta_max, 100))
		clc_mesh = np.zeros(alpha_mesh.shape)
		clc_mean_mesh = np.zeros(clc_mesh.shape)
		clc_std_mesh = np.zeros(clc_mesh.shape)

	else:
		print(f"All results for the contours of {scale_type} were found. Finalizing the mean CLC mesh.")
		clc_mean_mesh = np.zeros(clc_mesh.shape)
		clc_std_mesh = np.zeros(clc_mesh.shape)
		for idx in range(clc_mesh.shape[0]):
			for jdx in range(clc_mesh.shape[1]):
				clc_mean_mesh[idx, jdx] = clc_mesh[idx, jdx]["return"]["mean"]
				clc_std_mesh[idx, jdx] = clc_mesh[idx, jdx]["return"]["std"]

	contour_plot_data[scale_type] = {
		"alpha": alpha_mesh,
		"beta": beta_mesh,
		"clc_mean": clc_mean_mesh,
		"clc_std": clc_std_mesh
	}

# Plot the contour plot

In [None]:
plt.close("all")

alpha = 1.0

ncols = 3
nrows = 3

k = 0.75
figsize = (k * 5 * ncols, k * 4 * nrows)

fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=figsize, constrained_layout=True)

# Plot the contour background
for idx, (scale_type, scale_contour_data) in enumerate(contour_plot_data.items()):
	for jdx in range(ax[idx, :].shape[0]):
		alpha_mesh = scale_contour_data["alpha"]
		beta_mesh = scale_contour_data["beta"]
		clc_mesh = scale_contour_data["clc_mean"]

		ax[idx, jdx].contourf(alpha_mesh, beta_mesh, clc_mesh, levels = levels)
		ax[idx, jdx].set_xlim([alpha_mesh.min(), alpha_mesh.max()])


for idx, method in enumerate(methods_list):
	ax[0, idx].set_title(method)

for idx, method in enumerate(methods_list):
	for scale_idx, scale_type in enumerate(scale_list):
		if method == "Adam":
			param_trajectories_dict = Adam_dict[scale_type]
		elif method == "Approx. Newton":
			param_trajectories_dict = AN_dict[scale_type]
		elif method == "Gauss-Newton":
			param_trajectories_dict = GN_dict[scale_type]

		for ic_ic, (ic_key, param_trajectory) in enumerate(param_trajectories_dict.items()):
			if ic_key.startswith("IC_"):
				param_trajectory = param_trajectory["params"]
				ax[scale_idx, idx].plot(param_trajectory[:, 0], param_trajectory[:, 1], alpha = alpha, color = f"C{ic_ic}", lw = 1)


for axis in ax.flatten():
	axis.set_xlabel(r"$\theta_\alpha$")
	axis.set_ylabel(r"$\theta_\beta$")

	# axis.set_ylim([-1.0, 0.5])

for idx, scale in enumerate(scale_list):
	pos = ax[idx, -1].get_position()
	x_pos = pos.x0 + pos.width + 0.095

	if idx == 0:
		y_pos = pos.y0 + pos.height * (1 - (1 + idx) * 0.18)
	elif idx == 1:
		y_pos = pos.y0 + pos.height * (1 - (1 + idx) * 0.2)
	elif idx == 2:
		y_pos = pos.y0 + pos.height * (1 - (1 + idx) * 0.225)

	if scale == "scaled":
		title = r"$\tilde{\mu}_\alpha = (1 + 0.1 \theta_\alpha) \cdot 0.95$"
	elif scale == "unscaled":
		title = r"$\tilde{\mu}_\alpha = (1 + 1 \theta_\alpha) \cdot 0.95$"
	elif scale == "malscaled":
		title = r"$\tilde{\mu}_\alpha = (1 + 10 \theta_\alpha) \cdot 0.95$"
	fig.text(x_pos, y_pos, title, va='center', ha='center', fontsize = 16, rotation=-90)



fig.savefig(figpath, bbox_inches='tight', dpi = 300.0)

figpath_pdf = figpath.replace(".png", ".pdf")
fig.savefig(figpath_pdf, bbox_inches='tight', dpi = 1200.0)

# Plot the learning curves

In [None]:
figpath = os.path.join(os.path.dirname(figpath), "learning_curve_full.png")

plt.close("all")

ncols = 1
nrows = 3

alpha = 0.25

scaling_factor = 1.0
figsize = (scaling_factor * 5 * ncols, scaling_factor * 2.5 * nrows)

fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=figsize, constrained_layout=True, sharex=True)

max_steps = 0
for idx, scale_type in enumerate(scale_list):
	if scale_type == "scaled":
		# title = r"$\tilde{\mu}_\alpha = (1 + 0.1 \theta_\alpha) \cdot 0.95$"
		title = r"Properly scaled $\xi = 0.1$"
	elif scale_type == "unscaled":
		# title = r"$\tilde{\mu}_\alpha = (1 + 1 \theta_\alpha) \cdot 0.95$"
		title = r"Not scaled $\xi = 1$"
	elif scale_type == "malscaled":
		title = r"Not properly scaled $\xi = 10$"
	
	ax[idx].set_title(title)
	for method, linestyle in zip(methods_list, ['-', '--', '-.']):
		if method == "Adam":
			param_trajectories_dict = Adam_dict[scale_type]
		elif method == "Approx. Newton":
			param_trajectories_dict = AN_dict[scale_type]
		elif method == "Gauss-Newton":
			param_trajectories_dict = GN_dict[scale_type]

		max_steps = max(max_steps, param_trajectories_dict["median_clc"].shape[0])
		ax[idx].plot(param_trajectories_dict["median_clc"], label = method, linestyle = linestyle)
		ax[idx].fill_between(
			x = np.arange(param_trajectories_dict["min_clc"].shape[0]),
			y1 = param_trajectories_dict["min_clc"],
			y2 = param_trajectories_dict["max_clc"],
			alpha = alpha,
		)


for axis in ax.flatten():
	axis.set_ylabel(r"$J(\boldsymbol{\theta})$")
	axis.set_xlabel(r"Iteration $k$")
	axis.grid()
	axis.set_ylim([-102, -63])
	axis.set_xlim([0, max_steps - 1])

handles, labels = ax[0].get_legend_handles_labels()
fig.legend(handles=handles, labels = labels, loc='outside lower center', ncols = 3, fontsize = 12)



fig.savefig(figpath, bbox_inches='tight', dpi = 300.0)

figpath_pdf = figpath.replace(".png", ".pdf")
fig.savefig(figpath_pdf, bbox_inches='tight', dpi = 1200.0)

# Tabular results

In [None]:
for method_type in methods_list:
	print(f"Method: {method_type}")

	if method_type == "Gauss-Newton":
		current_dict = GN_dict
	elif method_type == "Adam":
		current_dict = Adam_dict
	elif method_type == "Approx. Newton":
		current_dict = AN_dict
	else:
		print(f"Unknown method: {method_type}")
		continue

	for scale_type in scale_list:
		best_performance = current_dict[scale_type]["max_clc"][-1]
		median_performance = current_dict[scale_type]["median_clc"][-1]
		worst_performance = current_dict[scale_type]["min_clc"][-1]

		print(f"Scale type: {scale_type}")
		print(f"Best performance: {best_performance:.4f}")
		print(f"Median performance: {median_performance:.4f}")
		print(f"Worst performance: {worst_performance:.4f}")
		print()


	print()

