In [None]:
import dill
import gpytorch
from helpers import util_functions, plotting_functions
import itertools
from lodegp import LODEGP
from matplotlib import pyplot as plt
import numpy as np
import os
from pathlib import Path
import torch

#### Notes on file structure

- results are stored in ../results/results
- figures are stored in ../results/figures
- folder structure is based on data (i.e. start/end point, number of datapoints and noise level)
- file names are based on the actual model i.e. system name, l1 and l2 values 
- file names are separated by "_"
- relevant results files are ...
    - f"{filename_addendum}_MLL.pkl"
    - f"{filename_addendum}_MLL_logs.pkl"
    - f"{filename_addendum}_MAP.pkl"
    - f"{filename_addendum}_MAP_logs.pkl"
    - f"{filename_addendum}_mean_ode_satisfaction_error_MLL.pkl"
    - f"{filename_addendum}_mean_ode_satisfaction_error_MAP.pkl"
    - f"{filename_addendum}_sample_ode_satisfaction_error_MLL.pkl"
    - f"{filename_addendum}_sample_ode_satisfaction_error_MAP.pkl"
    - f"{filename_addendum}_MLL_model_train_MSEs.pkl"
    - f"{filename_addendum}_MAP_model_train_MSEs.pkl"
    - f"{filename_addendum}_MLL_model_test_MSEs.pkl"
    - f"{filename_addendum}_MAP_model_test_MSEs.pkl"
- relevant figure files are ...
    - f"MLL_model_posterior_{system_name}_l1-{l1_param_val}_l2-{l2_param_val}_{START}-{END}-{COUNT}_{noise_level}.png"
    - f"MAP_model_posterior_{system_name}_l1-{l1_param_val}_l2-{l2_param_val}_{START}-{END}-{COUNT}_{noise_level}.png"

#### Notes on relevant metrics / summaries / statistics
- Visualize the trends for all metrics over increasing training data and noise
    - Maybe plot number of datapoints on X and noise level on Y and color the points for MSE/MLL/MAP/... ? (This only works if working with a single model, as we have overlap otherwise)
- Display some instances of good/bad trainings for both a lot and few datapoints
- Show the difference that we get when we have few datapoints at the start and across the whole domain
- Lineplot with different colors for different models, X is number of datapoints, Y is MSE/MLL/MAP/...
    - When comparing the ODE satisfaction use a log-scale
- Look at some interesting individual cases of the data
    - Highlight the slow speed of the moon system in changing its values
    - 

In [None]:
# Helper function to load a specific result from a model
def load_model_result(model_name, result_name, results_path=None):
    if results_path is None:
        results_path = Path.cwd()
        results_path = results_path.joinpath('results').joinpath("results")
    # Construct the file path based on the model and result names
    file_path = results_path.joinpath(f'{model_name}_{result_name}.pkl')
    
    # Check if the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Result file '{file_path}' does not exist.")
    
    # Load the result using dill
    with open(file_path, 'rb') as file:
        result = dill.load(file)
    
    return result

load_model_result("Bipendulum_l1-1.0_l2-2.0", "MLL", results_path=Path.cwd().parent.joinpath("results_bak").joinpath("results").joinpath("2-3-1_0.0"))

In [None]:
def construct_model_name(model_name, l1, l2):
    """
    Constructs a model name based on the provided parameters.
    
    Args:
        model_name (str): The base name of the model.
        l1 (float): The first parameter for the model.
        l2 (float): The second parameter for the model.
    
    Returns:
        str: The constructed model name.
    """
    return f"{model_name}_l1-{l1}_l2-{l2}"


def construct_experiment_name(start, end, count, noise):
    return f"{start}-{end}-{count}_{noise}"


def construct_experiment_path(start, end, count, noise, results_path=None):
    """
    Constructs the path for the experiment results based on the provided parameters.
    
    Args:
        start (float): The start value for the experiment.
        end (float): The end value for the experiment.
        count (int): The number of samples in the experiment.
        noise (float): The noise level in the experiment.
        result_path (str, optional): The base path for results. Defaults to None.
    
    Returns:
        str: The constructed path for the experiment results.
    """
    if results_path is None:
        results_path = Path.cwd()
        results_path = results_path.joinpath('results').joinpath("results")
    return results_path.joinpath(f"{start}-{end}-{count}_{noise}")


print(construct_experiment_path(2, 3, 100, 0.1, results_path=Path.cwd().parent.joinpath("results").joinpath("results")).joinpath(construct_model_name("Bipendulum", 1.0, 2.0)))
print(construct_experiment_path(2, 3, 100, 0.1, results_path=Path.cwd().parent.joinpath("results").joinpath("results")).exists())

In [None]:
# Easy Metric Call Dict
emcd = {"avg neg MLL" : "MLL",
"MLL logs": "MLL_logs",
"avg neg MAP" : "MAP",
"MAP logs": "MAP_logs",
"mean MLL ODE error": "mean_ode_satisfaction_error_MLL",
"mean MAP ODE error": "mean_ode_satisfaction_error_MAP",
"sample MLL ODE error": "sample_ode_satisfaction_error_MLL",
"sample MAP ODE error": "sample_ode_satisfaction_error_MAP",
"MLL train MSEs": "MLL_model_train_MSEs",
"MAP train MSEs": "MAP_model_train_MSEs",
"MLL test MSEs": "MLL_model_test_MSEs",
"MAP test MSEs": "MAP_model_test_MSEs",
"MLL state dict": "MLL_state_dict",
"MAP state dict": "MAP_state_dict"}

# More used as a reference than for actual use
all_model_names = ["Bipendulum", "Bipendulum first equation", "Bipendulum second equation", "Bipendulum Sum", "Bipendulum Sum eq2 diffed", "Bipendulum moon gravitation", "Bipendulum Parameterized", "No system"]

all_latex_model_names = {"Bipendulum":"$\\text{Bip}$",
                          "Bipendulum first equation":"$\\text{Bip}^1$",
                          "Bipendulum second equation":"$\\text{Bip}^2$",
                          "Bipendulum Sum":"$\\text{Bip}^{1 + 2}$",
                          "Bipendulum Sum eq2 diffed":"$\\text{Bip}^{1 + \partial \cdot 2}$",
                          "Bipendulum moon gravitation":"$\\text{Bip}^{\\text{Moon}}$",
                          "Bipendulum Parameterized":"$\\text{Bip}^{\\text{Param}}$",
                          "No system": "$0$"}

all_l1_l2_combinations = list([[1.0, 2.0], [1.0, 3.0], [2.0, 3.0], [3.0, 6.0]])

all_model_settings = list(itertools.chain(itertools.product(["Bipendulum", "Bipendulum first equation", "Bipendulum second equation", "Bipendulum Sum", "Bipendulum Sum eq2 diffed", "Bipendulum moon gravitation"], [[1.0, 2.0], [1.0, 3.0], [2.0, 3.0], [3.0, 6.0]]), [("Bipendulum Parameterized", [1.0, 2.0]),  ("No system", [1.0, 2.0])]))

all_ranges = [(2, 12), (2, 3)]
all_dataset_sizes = [2, 5, 10, 15, 20, 30, 40, 50, 70, 100]
all_noises = [0.0, 0.1, 0.2, 0.3]

all_experiment_settings = list(itertools.product(all_ranges, all_dataset_sizes, all_noises))

In [None]:
def add_rope_lengths_to_model_name(model_name, l1, l2):
    """
    Adds the rope lengths to the model name.
    
    Args:
        model_name (str): The base name of the model.
        l1 (float): The first rope length.
        l2 (float): The second rope length.
    
    Returns:
        str: The model name with rope lengths included.
    """
    if model_name in ["$0$", "No system", "Bipendulum Parameterized", "$\\text{Bip}^{\\text{Param}}$"]:
        return model_name
    l1 = int(l1)
    l2 = int(l2)
    return f"{model_name[:-1]}_{{{{{l1}}},{{{l2}}}}}$"

In [None]:
# Define line style settings for the various model settings
# Each model name gets their own color
# Each l1-l2 combination gets its own line style
# Use viridis colormap for colors
import matplotlib.cm as cm
import matplotlib.colors as mcolors
# Define a color map
cmap = cm.get_cmap('rainbow', len(all_model_names))
# Create a dictionary to map model names to colors
#"Bipendulum":  "blue",
#"Bipendulum first equation":  "orange",
#"Bipendulum second equation":  "green",
#"Bipendulum Sum":  "yellow",
#"Bipendulum Sum eq2 diffed":  "red",
#"Bipendulum moon gravitation":  "purple",
#"Bipendulum Parameterized":  "brown",
#"No system":  "pink"
model_colors = { 
    "Bipendulum": mcolors.to_hex(cmap(0)),
    "Bipendulum Parameterized": mcolors.to_hex(cmap(1)),
    "Bipendulum Sum": mcolors.to_hex(cmap(2)),
    "Bipendulum Sum eq2 diffed": mcolors.to_hex(cmap(3)),
    "Bipendulum first equation": mcolors.to_hex(cmap(4)),
    "Bipendulum second equation": mcolors.to_hex(cmap(5)),
    "Bipendulum moon gravitation": mcolors.to_hex(cmap(6)),
    "No system": mcolors.to_hex(cmap(7))
}
# Define line styles for the l1-l2 combinations
line_styles = {
    (1.0, 2.0):  "-",
    (1.0, 3.0):  "-.",
    (2.0, 3.0):  "--",
    (3.0, 6.0):  "dotted"
}
def get_line_plot_settings(model_name, l1, l2):
    """
    Get the line plot settings for a given model name and l1-l2 combination.
    
    Args:
        model_name (str): The name of the model.
        l1 (float): The first parameter for the model.
        l2 (float): The second parameter for the model.
    
    Returns:
        dict: A dictionary containing the color and linestyle for the plot.
    """
    return model_colors[model_name], line_styles[(l1, l2)]
    

In [None]:

def unpack_experiment_setting(experiment_setting):
    return *experiment_setting[0], *experiment_setting[1:]

def unpack_model_setting(model_setting):
    return model_setting[0], *model_setting[1]


In [None]:
base_results_path = Path.cwd().parent.joinpath("results").joinpath("results")

In [None]:
load_model_result(f"{construct_model_name(all_model_settings[0][0], *all_model_settings[0][1])}", emcd['avg neg MLL'],  construct_experiment_path(*all_experiment_settings[0][0], *all_experiment_settings[0][1:], results_path=base_results_path))


In [None]:
load_model_result(f"{construct_model_name(*unpack_model_setting(all_model_settings[0]))}", emcd['avg neg MLL'],  construct_experiment_path(*unpack_experiment_setting(all_experiment_settings[0]), results_path=base_results_path))

In [None]:
for experiment_setting, model_setting in itertools.product(all_experiment_settings, all_model_settings):
    try:
        model_name = construct_model_name(*unpack_model_setting(model_setting))
        experiment_path = construct_experiment_path(*unpack_experiment_setting(experiment_setting), results_path=base_results_path)
        result = load_model_result(f"{model_name}", emcd['avg neg MLL'], experiment_path)
        print(f"Successfully loaded result for {model_setting} in experiment {experiment_setting}")
    except FileNotFoundError as e:
        print(e)
    except Exception as e:
        print(f"An error occurred while loading result for {model_setting} in experiment {experiment_setting}: {e}")

In [None]:
def verify_data_generating_fkt_ode_error(data):
    train_x = torch.linspace(0, 1, 1)
    train_y = torch.linspace(0, 1, 1) 
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks = 3)
    model = LODEGP.LODEGP(train_x, train_y, likelihood, num_tasks=3, ODE_name="Bipendulum", l1=1.0, l2=2.0)

    y0_func = lambda x: float(781/8000)*torch.sin(x)/x - float(1/20)*torch.cos(x)/x**2 + float(1/20)*torch.sin(x)/x**3
    y1_func = lambda x: float(881/8000)*torch.sin(x)/x - float(1/40)*torch.cos(x)/x**2 + float(1/40)*torch.sin(x)/x**3
    y2_func = lambda x: float(688061/800000)*torch.sin(x)/x - float(2543/4000)*torch.cos(x)/x**2 + float(1743/4000)*torch.sin(x)/x**3 - float(3/5)*torch.cos(x)/x**4 + float(3/5)*torch.sin(x)/x**5 
    y_func = lambda x: torch.stack([y0_func(x), y1_func(x), y2_func(x)], dim=-1)
    for i, row in enumerate(model.A):
        error = util_functions.calculate_differential_equation_error_numeric(row, model.sage_locals, y_func, data)
        error = torch.sum(error)
        print(f"Error in row {i}: {error}")

In [None]:
verify_data_generating_fkt_ode_error(torch.linspace(2, 3, 5))

In [None]:
def get_all_results_for_metric(target_metric, experiment_setting_filter=None, model_setting_filter=None):

    all_results = {}#{str(experiment_setting)+str(model_setting) : None for experiment_setting, model_setting in itertools.product(all_experiment_settings, all_model_settings)}

    for experiment_setting, model_setting in itertools.product(all_experiment_settings, all_model_settings):
        if experiment_setting_filter:
            if not experiment_setting_filter(experiment_setting):
                continue
        if model_setting_filter:
            if not model_setting_filter(model_setting):
                continue
        model_name = construct_model_name(*unpack_model_setting(model_setting))
        experiment_path = construct_experiment_path(*unpack_experiment_setting(experiment_setting), results_path=base_results_path)
        result = load_model_result(f"{model_name}", target_metric, experiment_path)
        all_results[str(experiment_setting) + str(model_setting)] = result
    return all_results

# How does MAP behave over increasing dataset size

In [None]:

#fig, axs = plt.subplots(8, 1, figsize=(10, 20))
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
if type(axs) is not list:
    axs = [axs]
for i, noise in enumerate([0.3]):#all_noises
    max_map = [-np.inf] * len(all_dataset_sizes)
    min_map = [np.inf] * len(all_dataset_sizes)
    for model_setting in all_model_settings:
        #if model_setting[1][0] != 1.0 or model_setting[1][1] != 2.0:
        #    continue
        all_neg_maps = []
        for dataset_size in all_dataset_sizes:
            target_metric = emcd['avg neg MAP']
            # [(2, 12), (2, 3)], [1, 2, 5, 10, 20, 50, 100], [0.0, 0.1, 0.2, 0.3]
            experiment_setting_filter = lambda x: x[0] == (2, 12) and x[1] == dataset_size and x[2] == noise 
            model_setting_filter = lambda x: x[0] == model_setting[0] and x[1] == model_setting[1]
            cur_neg_map = list(get_all_results_for_metric(target_metric, experiment_setting_filter, model_setting_filter).values())[0].item()
            all_neg_maps.append(cur_neg_map)
            if cur_neg_map < min_map[all_dataset_sizes.index(dataset_size)]:
                min_map[all_dataset_sizes.index(dataset_size)] = all_neg_maps[-1]
            if cur_neg_map > max_map[all_dataset_sizes.index(dataset_size)]:
                max_map[all_dataset_sizes.index(dataset_size)] = all_neg_maps[-1]
        
        color, linestyle =  get_line_plot_settings(model_setting[0], model_setting[1][0], model_setting[1][1])
        label = add_rope_lengths_to_model_name(all_latex_model_names[model_setting[0]], model_setting[1][0], model_setting[1][1])
        axs[2*i].plot([d for d in all_dataset_sizes if d < 1000], all_neg_maps, label=label, color=color, linestyle=linestyle)
        axs[2*i].set_xlabel("Dataset Size")
        axs[2*i].set_ylabel("Average Negative MAP")
        axs[2*i].set_title(f"Average Negative MAP for {noise}. Smaller=Better")
        axs[2*i].grid()
        if i == 0:
            axs[i].legend(ncols=7, bbox_to_anchor=(1.00, -0.15))

    #axs[2*i+1].plot(all_dataset_sizes, np.array(min_map) - np.array(max_map))
    #axs[2*i+1].grid()
#fig.savefig(Path.cwd().joinpath("results/figures/avg_loss_0.3_2-12.png"), bbox_inches='tight')

# Train/Test MSE over dataset size

In [None]:
train_test = "train" # "train" or "test"
data_range = (2, 12)
target_noise = [0.0]
log_scale = True

fig, axs = plt.subplots(len(target_noise), 1, figsize=(10, 5))
#fig, axs = plt.subplots(len(target_noise), 1, figsize=(10, 28))
if type(axs) is not list:
    axs = [axs]
for i, noise in enumerate(target_noise):
    max_mse = [-np.inf] * len(all_dataset_sizes)
    min_mse = [np.inf] * len(all_dataset_sizes)
    for model_setting in all_model_settings:
        #if model_setting[1][0] != 1.0 or model_setting[1][1] != 2.0:
        #    continue
        all_avg_MSEs = []
        for dataset_size in all_dataset_sizes:
            target_metric = emcd[f'MAP {train_test} MSEs']
            # [(2, 12), (2, 3)], [1, 2, 5, 10, 20, 50, 100], [0.0, 0.1, 0.2, 0.3]
            experiment_setting_filter = lambda x: x[0] == data_range and x[1] == dataset_size and x[2] == noise 
            model_setting_filter = lambda x: x[0] == model_setting[0] and x[1] == model_setting[1]
            cur_MSE = list(get_all_results_for_metric(target_metric, experiment_setting_filter, model_setting_filter).values())[0]
            if log_scale:
                cur_avg_MSE = np.log(np.mean(cur_MSE))
            else:
                cur_avg_MSE = np.mean(cur_MSE)
            all_avg_MSEs.append(cur_avg_MSE)
            if cur_avg_MSE < min_mse[all_dataset_sizes.index(dataset_size)]:
                min_mse[all_dataset_sizes.index(dataset_size)] = all_avg_MSEs[-1]
            if cur_avg_MSE > max_mse[all_dataset_sizes.index(dataset_size)]:
                max_mse[all_dataset_sizes.index(dataset_size)] = all_avg_MSEs[-1]
        color, linestyle =  get_line_plot_settings(model_setting[0], model_setting[1][0], model_setting[1][1])
        label = add_rope_lengths_to_model_name(all_latex_model_names[model_setting[0]], model_setting[1][0], model_setting[1][1])
        axs[2*i].plot([d for d in all_dataset_sizes if d < 1000], all_avg_MSEs, label=label, color=color, linestyle=linestyle)
        axs[2*i].set_xlabel("Dataset Size")
        axs[2*i].set_ylabel(f"log avg. {train_test} MSE on MAP trained model")
        axs[2*i].set_title(f"log avg. {train_test} MSE on MAP trained model for noise {noise}. Smaller=Better")
        if i == 0:
            axs[i].legend(ncols=7, bbox_to_anchor=(1.00, -0.15))

    #axs[2*i+1].plot(all_dataset_sizes, np.array(min_mse) - np.array(max_mse))
    #axs[2*i+1].grid()
fig.savefig(Path.cwd().joinpath(f"results/figures/avg_{train_test}_mse_MAP_{target_noise[0]}_{data_range[0]}-{data_range[1]}.png"), bbox_inches='tight')

In [None]:
# Make a test for a single dataset size and noise where we plot the MSEs over the domain, i.e. the x-axis is the domain and the y-axis is the MSE


## Tabular format

In [None]:


for dataset_size in all_dataset_sizes:
    print(f"Dataset size {dataset_size}:\\\\")
    tabular_columns = ["Model name", "log MSE 0.0", "log MSE 0.1", "log MSE 0.2", "log MSE 0.3"]
    tabular_rows = []
    min_error = [np.inf]*len(all_noises)
    for model_setting in all_model_settings:
        tabular_row = []
        tabular_row.append(construct_model_name(*unpack_model_setting(model_setting)).replace("_", " "))
        for i, noise in enumerate(all_noises):
            all_avg_MSEs = []
            train_test = "train" # "train" or "test"
            target_metric = emcd[f'MAP {train_test} MSEs']


            # [(2, 12), (2, 3)], [1, 2, 5, 10, 20, 50, 100], [0.0, 0.1, 0.2, 0.3]
            experiment_setting_filter = lambda x: x[0] == (2, 12) and x[1] == dataset_size and x[2] == noise 
            model_setting_filter = lambda x: x[0] == model_setting[0] and x[1] == model_setting[1]
            cur_MSE = list(get_all_results_for_metric(target_metric, experiment_setting_filter, model_setting_filter).values())[0]
            cur_avg_MSE = np.round(np.log(np.mean(cur_MSE)), 3)
            all_avg_MSEs.append(cur_avg_MSE)
            tabular_row.append(cur_avg_MSE)
            if cur_avg_MSE < min_error[i]:
                min_error[i] = all_avg_MSEs

        tabular_rows.append(tabular_row)

    # Print as latex table
    print("\\begin{tabular}{|c|" + "|".join(["c"] * (len(tabular_columns) - 1)) + "|}")
    print("\\hline")
    print(" & ".join(tabular_columns) + " \\\\")
    print("\\hline")
    for row in tabular_rows:
        print(" & ".join([str(r) if not r == min_error[ind-1] else f"\\textbf{{{r}}}" for ind, r in enumerate(row)]) + " \\\\")
    print("\\hline")
    print("\\end{tabular}")

# The trained noise levels?

In [None]:


fig, axs = plt.subplots(4, 3, figsize=(18, 20))
for i, noise in enumerate(all_noises):
    START = 2
    END = 12 

    y0_func = lambda x: float(781/8000)*torch.sin(x)/x - float(1/20)*torch.cos(x)/x**2 + float(1/20)*torch.sin(x)/x**3
    y1_func = lambda x: float(881/8000)*torch.sin(x)/x - float(1/40)*torch.cos(x)/x**2 + float(1/40)*torch.sin(x)/x**3
    y2_func = lambda x: float(688061/800000)*torch.sin(x)/x - float(2543/4000)*torch.cos(x)/x**2 + float(1743/4000)*torch.sin(x)/x**3 - float(3/5)*torch.cos(x)/x**4 + float(3/5)*torch.sin(x)/x**5 
    y0_noise_level = (torch.max(y0_func(torch.linspace(START, END, 100)))*noise)
    y1_noise_level = (torch.max(y1_func(torch.linspace(START, END, 100)))*noise)
    y2_noise_level = (torch.max(y2_func(torch.linspace(START, END, 100)))*noise)

    gt_noise_levels = torch.tensor([y0_noise_level, y1_noise_level, y2_noise_level])
    print(f"{noise} noise levels: {gt_noise_levels.tolist()}")
    for channel in range(3):
        gt_noise = gt_noise_levels[channel].item()
        # Make a thick dashed red line for the ground truth noise level
        axs[i, channel].axhline(y=np.log(gt_noise), color='red', linestyle='--', linewidth=2)
    max_mll = [-np.inf] * len(all_dataset_sizes)
    min_mll = [np.inf] * len(all_dataset_sizes)
    for model_setting in all_model_settings:
        #if model_setting[1][0] != 1.0 or model_setting[1][1] != 2.0:
        #    continue
        all_noise_levels = []
        all_sqrt_noise_levels = []
        all_diffs_to_gt = []
        for dataset_size in all_dataset_sizes:

            target_metric = emcd['MAP state dict']
            # [(2, 12), (2, 3)], [1, 2, 5, 10, 20, 50, 100], [0.0, 0.1, 0.2, 0.3]
            experiment_setting_filter = lambda x: x[0] == (START, END) and x[1] == dataset_size and x[2] == noise 
            model_setting_filter = lambda x: x[0] == model_setting[0] and x[1] == model_setting[1]
            cur_state_dict = list(get_all_results_for_metric(target_metric, experiment_setting_filter, model_setting_filter).values())[0]

            cur_task_noises = cur_state_dict['likelihood.raw_task_noises']
            cur_global_noise = cur_state_dict['likelihood.raw_noise']
            # The sum of softplussed noises is the total noise for each channel
            cur_total_noises = torch.nn.functional.softplus(cur_task_noises) + torch.nn.functional.softplus(cur_global_noise)
            all_noise_levels.append(cur_total_noises)
            cur_total_noises_sqrt = torch.sqrt(cur_total_noises)
            all_sqrt_noise_levels.append(cur_total_noises_sqrt)

            diff_to_gt = torch.abs(cur_total_noises_sqrt - gt_noise_levels)
            all_diffs_to_gt.append(diff_to_gt)
        
        for channel in range(3):
            color, linestyle =  get_line_plot_settings(model_setting[0], model_setting[1][0], model_setting[1][1])
            label = add_rope_lengths_to_model_name(all_latex_model_names[model_setting[0]], model_setting[1][0], model_setting[1][1])
            axs[i, channel].plot(all_dataset_sizes, [np.log(all_sqrt_noise_levels[j][channel].item()) for j in range(len(all_dataset_sizes))], label=label, color=color, linestyle=linestyle)
            axs[i, channel].set_title(f"$\log\sigma_n$ for channel {channel}. Closer to $\log\sigma_n$ = {np.log(gt_noise_levels[channel].item()):.2f} is better")
            axs[i, channel].set_xlabel("Dataset Size")
            axs[i, channel].set_ylabel("$\log\sigma_n$")
            axs[i, channel].grid()

        if i == len(target_noise) - 1:
            axs[i, 1].legend(ncols=7, bbox_to_anchor=(1.50, -0.15))
     #axs[i, channel].plot(all_dataset_sizes, [np.log(all_diffs_to_gt[j][channel].item()) for j in range(len(all_dataset_sizes))], label=f"{model_setting[0]} l1-{model_setting[1][0]} l2-{model_setting[1][1]} channel {channel}")
            #axs[i, channel].set_title(f"log trained noise on MLL trained model for noise {noise} channel {channel}. Smaller=Better")

            #axs[i, channel].legend(bbox_to_anchor=(1.05, 1))

# ODE satisfaction
Note: ODE satisfaction is calculated on test_x i.e. with `torch.linspace(1e-3, 15, 100)`

In [None]:
data_range = (2, 12)
target_noise = [0.0, 0.1, 0.2, 0.3]
log_scale = True

fig, axs = plt.subplots(len(target_noise), 1, figsize=(10, 20))
#fig, axs = plt.subplots(len(target_noise), 1, figsize=(10, 28))
if type(axs) is not np.ndarray:
    axs = [axs]
for i, noise in enumerate(target_noise):
    for model_setting in all_model_settings:
        #if (model_setting[1][0] != 1.0 or model_setting[1][1] != 2.0):
        #    continue
        all_avg_ODE_satisfaction = []
        for dataset_size in all_dataset_sizes:
            target_metric = emcd['mean MAP ODE error']
            # [(2, 12), (2, 3)], [1, 2, 5, 10, 20, 50, 100], [0.0, 0.1, 0.2, 0.3]
            experiment_setting_filter = lambda x: x[0] == data_range and x[1] == dataset_size and x[2] == noise 
            model_setting_filter = lambda x: x[0] == model_setting[0] and x[1] == model_setting[1]
            cur_ODE_sat = list(get_all_results_for_metric(target_metric, experiment_setting_filter, model_setting_filter).values())[0]
            cur_avg_ODE_satisfaction = torch.log(torch.mean(torch.abs(cur_ODE_sat)))
            all_avg_ODE_satisfaction.append(cur_avg_ODE_satisfaction.detach().numpy())

        color, linestyle =  get_line_plot_settings(model_setting[0], model_setting[1][0], model_setting[1][1])
        label = add_rope_lengths_to_model_name(all_latex_model_names[model_setting[0]], model_setting[1][0], model_setting[1][1])
        axs[i].plot([d for d in all_dataset_sizes if d < 1000], all_avg_ODE_satisfaction, label=label, color=color, linestyle=linestyle)
        axs[i].set_xlabel("Dataset Size")
        axs[i].set_ylabel("log mean abs ODE error on MAP trained model")
        axs[i].set_title(f"log mean abs ODE error on MAP trained model for noise {noise}. Smaller=Better")
        # Make a horizontal line at -5
        axs[i].axhline(y=-5, color='red', linestyle='--', linewidth=2)
        if i == len(target_noise) - 1:
            axs[i].legend(ncols=7, bbox_to_anchor=(1.00, -0.15))

    #axs[2*i+1].plot(all_dataset_sizes, np.array(min_mse) - np.array(max_mse))
    #axs[2*i+1].grid()
#fig.savefig(Path.cwd().joinpath(f"results/figures/avg_ODE_error_MAP_{target_noise[0]}_{data_range[0]}-{data_range[1]}.png"), bbox_inches='tight')
