# Metrics

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import pyperclip
import math
import re
from datetime import datetime
import numpy as np
import sys
from typing import Optional

sys.path.append("../..")
from src.logging_metrics import MetricsLogger

RESULT_FOLDER = "../../metrics/final_results/"
METRICS_LOGGER_FOLDER = "../../metrics/logged_metrics/"

SET_TYPES = ["test", "train"]
EXPERIMENTS = ["interpolation", "interpolation_coords", "extrapolation"]
VALID_EXPERIMENTS = EXPERIMENTS + ["OLD", "DTdepth"]

USELESS_METRICS = [
    "explained_variance_score",
    "mean_absolute_percentage_error",
    "median_absolute_error",
    "timestamp",
]
USEFUL_METRICS = ["r2", "MSE", "MAE", "Prediction Time"]  # "PSNR",
assert set(USELESS_METRICS).isdisjoint(USEFUL_METRICS)
USELESS_MODELS = ["autoencoder"]

METRIC_TO_ABBREVIATION = {
    "r2_score": "r2",
    "explained_variance_score": "EVS",
    "mean_squared_error": "MSE",
    "mean_absolute_error": "MAE",
    "median_absolute_error": "MedAE",
    "mean_absolute_percentage_error": "MAPE",
    "peak_signal_noise_ratio": "PSNR",
}

COLUMNS_TO_BE_MAXIMIZED = [
    "r2_score",
    "explained_variance_score",
    "peak_signal_noise_ratio",
]
COLUMNS_TO_BE_MAXIMIZED += [
    abb for m, abb in METRIC_TO_ABBREVIATION.items() if m in COLUMNS_TO_BE_MAXIMIZED
]
COLUMNS_TO_BE_MINIMIZED = [
    "prediction_time",
    "mean_squared_error",
    "mean_absolute_error",
    "median_absolute_error",
    "mean_absolute_percentage_error",
    "Prediction Time",
]
COLUMNS_TO_BE_MINIMIZED += [
    abb for m, abb in METRIC_TO_ABBREVIATION.items() if m in COLUMNS_TO_BE_MINIMIZED
]

pd.set_option("display.max_colwidth", None)

### Training time

In [None]:
def __find_most_recent_metrics_log(model_name: str) -> Optional[str]:
    # code from MetricsLogger conventions
    model_name = (
        re.sub(r"[^a-zA-Z0-9 -_]", "", model_name)
        .replace(" ", "-")
        .split("_discr_factors")[0]
    )
    matching_files = [
        file
        for file in os.listdir(METRICS_LOGGER_FOLDER)
        if file.startswith(model_name)
    ]

    if not matching_files:
        # warnings.warn(f"No files found in {METRICS_LOGGER_FOLDER} with the prefix {model_name}")
        return None

    # Sort the files based on their timestamps (assuming the timestamp is at the end of the filename)
    sorted_files = sorted(
        matching_files,
        key=lambda file: datetime.strptime(file[-23:-4], "%d-%m-%Y_%H-%M-%S"),
        reverse=True,
    )

    return os.path.join(METRICS_LOGGER_FOLDER, sorted_files[0])


def compute_training_time(model_name: str) -> float:
    most_recent_log_file = __find_most_recent_metrics_log(model_name)
    if most_recent_log_file is None:
        return np.nan
    metrics_logger = MetricsLogger.from_csv(most_recent_log_file)
    training_time = metrics_logger.get_training_time("Validation loss")
    return np.nan if int(training_time) == 0 else training_time


compute_training_time(
    "conv_autoencoders_channels2-4-8-8_kernels3-4-5-5_strides2-2-2-2_paddings1-1-1-1_lin-layers4096-2048-1024-512-256-128-64-32-16-8_consider_ws"
)

### Loading metrics

In [None]:
def load_metrics(
    set_type: str,
    experiment: str,
    only_model_name: bool = True,
    columns_to_remove: list = USELESS_METRICS,
    infer_training_time_attempt: bool = False,
) -> pd.DataFrame:
    __check_arguments(set_type, experiment)
    filepath = __get_filepath(set_type, experiment)
    metrics = pd.read_csv(filepath)
    # remove uninteresting records
    for model_name in USELESS_MODELS:
        metrics = metrics[~metrics["model_description"].str.contains(model_name)]
    # metrics = metrics[~metrics['model_description'].str.contains('consider_ws')] #TODO ?

    metrics = __remove_duplicates(metrics)

    if infer_training_time_attempt:
        metrics["Training Time (s)"] = metrics["model_description"].apply(
            compute_training_time
        )

    if only_model_name:
        metrics = extract_model_name(metrics)
        metrics.drop("model_description", axis=1, inplace=True)
        # metrics.rename(columns={"model_description": "model_name"}, inplace=True)
        metrics.sort_values(by="model_name")
    else:
        metrics.sort_values(by="model_description")

    if columns_to_remove:
        metrics.drop(columns_to_remove, axis=1, inplace=True)

    return metrics.rename(columns=__rename_column)


def extract_model_name(
    metrics_df: pd.DataFrame, remove_name_from_description: bool = False
) -> pd.DataFrame:
    if "model_description" in metrics_df.columns:
        model_desc = "model_description"
        model_name = "model_name"
    else:
        model_desc = "Model Description"
        model_name = "Model Name"
    metrics_df.insert(
        0,
        model_name,
        metrics_df[model_desc]
        .str.split("_")
        .str[:2]
        .str.join(" ")
        .apply(__first_capital),
    )
    if remove_name_from_description:
        metrics_df[model_desc] = (
            metrics_df[model_desc].str.split("_").str[2:].str.join("_")
        )
    return metrics_df


def __check_arguments(
    set_type: Optional[str] = None,
    experiment: Optional[str] = None,
    metric: Optional[str] = None,
) -> None:
    if set_type and set_type not in SET_TYPES:
        raise ValueError("set_type should be 'train' or 'test'")
    if experiment and experiment not in VALID_EXPERIMENTS:
        raise ValueError("experiment argument not valid")
    if metric and metric not in METRIC_TO_ABBREVIATION.values():
        raise ValueError(
            f"Not a valid (abbreviated) metric, choose among: {METRIC_TO_ABBREVIATION.values()}"
        )


def __get_filepath(set_type: str, experiment: str) -> str:
    __check_arguments(set_type, experiment)
    filename = "_".join([f"{set_type}set", "results", experiment]) + ".csv"
    return os.path.join(RESULT_FOLDER, filename)


def __remove_duplicates(
    df: pd.DataFrame,
) -> pd.DataFrame:  # choosing the best according to MSE
    groups = df.groupby("model_description")

    # Find the row with minimum MSE for each group
    best_records = groups.apply(
        lambda group: group.loc[group["mean_squared_error"].idxmin()]
    )

    # Keep only the best records in the DataFrame
    df = best_records.reset_index(drop=True)

    return df


def __rename_column(column_name: str) -> str:
    if column_name in METRIC_TO_ABBREVIATION.keys():
        return METRIC_TO_ABBREVIATION[column_name]
    return column_name.replace("_", " ").title()


def __first_capital(value: str) -> str:
    return value[0].upper() + value[1:]


def compare_metrics(
    set_type1: str, experiment1: str, set_type2: str, experiment2: str, by_name: bool
) -> pd.DataFrame:
    if set_type1 == set_type2 and experiment1 == experiment2:
        raise ValueError("types and experiments are the same -> impossible to merge")

    metrics1 = load_metrics(set_type1, experiment1, only_model_name=by_name)
    metrics2 = load_metrics(set_type2, experiment2, only_model_name=by_name)

    if set_type1 != set_type2 and experiment1 == experiment2:
        suffix1 = f" ({set_type1})"
        suffix2 = f" ({set_type2})"
    elif set_type1 == set_type2 and experiment1 != experiment2:
        suffix1 = f" ({experiment1})"
        suffix2 = f" ({experiment2})"
    elif set_type1 != set_type2 and experiment1 != experiment2:
        suffix1 = f" ({set_type1}, {experiment1})"
        suffix2 = f" ({set_type2}, {experiment2})"

    return merge_metrics(metrics1, metrics2, suffix1, suffix2, by_name)


def merge_metrics(
    metrics1: pd.DataFrame,
    metrics2: pd.DataFrame,
    suffix1: str,
    suffix2: str,
    by_name: bool,
) -> pd.DataFrame:
    merge_on = "Model Name" if by_name else "Model Description"
    return (
        pd.merge(metrics1, metrics2, on=merge_on, suffixes=(suffix1, suffix2))
        .set_index(merge_on)
        .sort_index(axis=1)
        .reset_index()
        .sort_values(by=merge_on)
    )


def load_all_metrics(
    set_type: str,
    experiments: list[str] = EXPERIMENTS,
    columns_to_remove: list = USELESS_METRICS,
    infer_training_time_attempt: bool = False,
) -> pd.DataFrame:
    # TODO update method including with more set types?
    # the problem is that the information about the type is not contained
    # anywhere but in the name of the file
    all_metrics = []
    for experiment in experiments:
        metrics = load_metrics(
            set_type,
            experiment,
            only_model_name=False,
            columns_to_remove=columns_to_remove,
        )
        all_metrics.append(metrics)

    all_metrics = pd.concat(all_metrics).reset_index(drop=True)

    if infer_training_time_attempt:
        all_metrics["Training Time (s)"] = all_metrics["Model Description"].apply(
            compute_training_time
        )
    return all_metrics.sort_values(by="Model Description")


def load_metrics_by_model(
    model_name: str,
    set_type: str,
    experiments: list[str] = EXPERIMENTS,
    columns_to_remove: list = USELESS_METRICS,
    infer_training_time_attempt: bool = False,
):
    model_name = model_name.lower().replace(" ", "_")

    all_metrics = load_all_metrics(
        set_type,
        experiments=experiments,
        columns_to_remove=columns_to_remove,
        infer_training_time_attempt=infer_training_time_attempt,
    )
    all_metrics = all_metrics[
        all_metrics["Model Description"].str.lower().str.contains(model_name)
    ]
    return all_metrics.sort_values(by="Model Description")


def best_model_per_algo(by_metric: str, set_type: str):
    __check_arguments(set_type=set_type, metric=by_metric)
    all_metrics = load_all_metrics(set_type)
    return best_model_per_algo_in_df(all_metrics, by_metric=by_metric)


def best_model_per_algo_in_df(
    df: pd.DataFrame, by_metric: str, metric_name: str = None
):
    __check_arguments(metric=by_metric)
    if metric_name is None:
        metric_name = by_metric
    # add a colum to group the models on
    df["Model Name"] = (
        df["Model Description"]
        .str.split("_")
        .str[:2]
        .str.join(" ")
        .apply(__first_capital)
    )
    # group
    if by_metric in COLUMNS_TO_BE_MAXIMIZED:
        group = df.loc[df.groupby("Model Name")[metric_name].idxmax()]
    elif by_metric in COLUMNS_TO_BE_MINIMIZED:
        group = df.loc[df.groupby("Model Name")[metric_name].idxmin()]
    else:
        raise ValueError("metric not found")

    return group.set_index("Model Name").reset_index().sort_values(by="Model Name")

### Other comparisons

In [None]:
def extract_parameter(
    metrics_df: pd.DataFrame,
    parameter_names: list[str],
    parameter_type: type,
    regex: str,
) -> pd.DataFrame:
    # example of regex: r"approxGP_(\d+)fourier"
    metrics_df[parameter_names] = (
        metrics_df["Model Description"].str.extract(regex).astype(parameter_type)
    )
    metrics_df.sort_values(by=parameter_names, inplace=True)
    return metrics_df


def extract_reduction_factors(metrics_df: pd.DataFrame, also_coords: bool = False):
    regex = r"training_factors="
    variables = ["ti", "ct"]
    if also_coords:
        variables += ["xD", "yD"]
    for var in variables:
        parameter_name = f"reduction factor {var}"
        regex = regex.replace("(", "").replace(")", "") + var + r"(\d+)"
        metrics_df = extract_parameter(
            metrics_df,
            parameter_names=[parameter_name],
            parameter_type=int,
            regex=regex,
        )
        regex += "-"
    return metrics_df


def extract_reduction_ranges(metrics_df: pd.DataFrame, also_coords: bool = False):
    # training_ranges=ti0.15-0.4-ct0.3-0.7
    # r"ti([\d.]+)-([\d.]+)-ct([\d.]+)-([\d.]+)"
    regex = r"training_ranges="
    variables = ["ti", "ct"]
    if also_coords:
        variables += ["xD", "yD"]
    for var in variables:
        parameter_names = [f"a_{var}", f"b_{var}"]
        regex = regex.replace("(", "").replace(")", "") + var + r"([\d.]+)-([\d.]+)"
        metrics_df = extract_parameter(
            metrics_df,
            parameter_names=parameter_names,
            parameter_type=float,
            regex=regex,
        )
        regex += "-"
    return metrics_df


# TODO move this method to plotting.py
def plot_metrics_per_parameter(
    metrics_df: pd.DataFrame,
    parameter_name: str,
    metrics_to_plot: Optional[list[str]] = None,
):
    if metrics_to_plot:
        for metric in metrics_to_plot:
            if metric not in USEFUL_METRICS:
                raise ValueError("Metric to plot not valid")
    else:
        metrics_to_plot = USEFUL_METRICS
    ax = plt.gca()
    colors = plt.cm.tab10.colors

    for i, column in enumerate(metrics_to_plot):
        ax.plot(
            metrics_df[parameter_name],
            metrics_df[column],
            label=column,
            color=colors[i % len(colors)],
        )

    model_name = "".join(metrics_df["Model Description"].iloc[1].split("_")[:2])

    plt.xlabel(parameter_name)
    plt.ylabel("Metric value")
    plt.title(f"Results on {model_name} as {parameter_name} increases")
    plt.legend()
    plt.show()

### To Latex

In [None]:
"""
def print_latex_table_code__(df: pd.DataFrame) -> None:
    latex_table = df.style.hide(axis="index").to_latex(escape=False)
    #.set_table_styles(
    #    [{'selector': 'thead', 'props': [('background-color', '#606060'), ('color', 'white')]}]
    #)
    
    print(latex_table)
"""


def print_copy_latex_table_code(
    metrics: pd.DataFrame,
    digits_after_dec: int = 4,
    bolden_best_value: bool = False,
    column_format: Optional[str] = None,
) -> None:
    for column in metrics.columns:
        if bolden_best_value:
            if (
                column in COLUMNS_TO_BE_MAXIMIZED
                or column.split(" ")[0] in COLUMNS_TO_BE_MAXIMIZED
            ):
                metrics[column] = __format_highest_value_bold(
                    metrics[column], digits_after_dec=digits_after_dec
                )
                continue
            elif (
                column in COLUMNS_TO_BE_MINIMIZED
                or column.split(" ")[0] in COLUMNS_TO_BE_MINIMIZED
            ):
                metrics[column] = __format_lowest_value_bold(
                    metrics[column], digits_after_dec=digits_after_dec
                )
                continue
        if metrics[column].dtype.kind == "f":
            metrics[column] = [
                __format_float(val, digits_after_dec=digits_after_dec)
                for val in metrics[column]
            ]

    formatted_columns = [rf"\textbf{{{col}}}" for col in metrics.columns]
    latex_table = metrics.to_latex(
        index=False, escape=False, column_format=column_format, header=formatted_columns
    )  # , float_format=f"%.{digits_after_dec}f")

    # copy to clipboard
    pyperclip.copy(latex_table)

    print(latex_table)


def __format_highest_value_bold(values, digits_after_dec: int):
    max_value = values.max()
    return [
        __format_float(val, bold_value=max_value, digits_after_dec=digits_after_dec)
        for val in values
    ]


def __format_lowest_value_bold(values, digits_after_dec: int):
    min_value = values.min()
    return [
        __format_float(val, bold_value=min_value, digits_after_dec=digits_after_dec)
        for val in values
    ]


def __format_float(
    value: float, bold_value: Optional[float] = None, digits_after_dec: int = 4
) -> str:
    if not isinstance(value, float):

        return value
    if value == 0.0:
        return "0"
    if pd.isna(value):
        return "N/A"

    exponent = abs(int(math.log10(abs(value))))

    if exponent >= digits_after_dec:
        digits_after_dec = max(digits_after_dec - 3, 0)
        formatted_str = "{:.{digits}e}".format(value, digits=digits_after_dec)
        # Remove unnecessary 0 in the exponent
        formatted_str = formatted_str.replace("e+0", "e+").replace("e-0", "e-")
    else:
        formatted_str = "{:.{digits}f}".format(value, digits=digits_after_dec)

    if bold_value and value == bold_value:
        formatted_str = rf"\textbf{{{formatted_str}}}"

    return formatted_str

# Examples

In [None]:
# metrics of different models on the same experiment (including different reduction factors)
metrics = load_metrics(
    set_type="test",
    experiment="extrapolation",
    only_model_name=False,
    infer_training_time_attempt=False,
)
# print_copy_latex_table_code(metrics, digits_after_dec=4, bolden_best_value=True)
metrics["consider_ws"] = metrics["Model Description"].str.contains("consider_ws")
metrics.sort_values(by=["consider_ws", "r2"], inplace=True)
metrics

In [None]:
# same model with different parameters in different experiments
df = load_metrics_by_model(
    model_name="univariate DT",
    set_type="test",
    experiments=EXPERIMENTS,
    columns_to_remove=USELESS_METRICS,
    infer_training_time_attempt=False,
)
df

In [None]:
# comparison between two experiments merging the models
# (in case of many attempts with the same model for eadh experiment, all the combinations are considered)
comparison = compare_metrics(
    "test", "interpolation", "test", "extrapolation", by_name=True
)
comparison

In [None]:
# for each algorithm, the best model
best_model_per_algo("r2", "test")

In [None]:
load_all_metrics(
    set_type="test",
    experiments=EXPERIMENTS,
    columns_to_remove=USELESS_METRICS,
    infer_training_time_attempt=False,
)

# Results

## Interpolation

In [None]:
# INTERPOLATION - RESULTS
two = list()
for set_type in ["train", "test"]:
    metrics = load_metrics(
        set_type=set_type,
        experiment="interpolation",
        only_model_name=False,
        infer_training_time_attempt=False,
    )
    # metrics = compare_metrics("train", "interpolation", "test", "interpolation", by_name=False)
    metrics = extract_reduction_factors(metrics, also_coords=False)
    metrics = metrics[metrics["reduction factor ti"] == 12]
    toremove_columns = [
        col
        for col in metrics.columns
        if col.startswith("reduction factor")
        or col.startswith("Prediction Time")
        or col.startswith("MSE")
        or col.startswith("PSNR")
    ]
    metrics.drop(toremove_columns, axis=1, inplace=True)

    two.append(metrics)

metrics = merge_metrics(
    two[0], two[1], suffix1=" (train)", suffix2=" (test)", by_name=False
)
# metrics = best_model_per_algo_in_df(metrics, "MAE", "MAE (test)")
# metrics = metrics[['Model Name', 'r2 (train)', 'MAE (train)', 'r2 (test)', 'MAE (test)', 'Model Description']]
# print_copy_latex_table_code(metrics, bolden_best_value=True, column_format="lccccl")
metrics

### Interpolation - study of DT depth

In [None]:
# INTERPOLATION - DT DEPTH - RESULTS
two = list()
for set_type in ["train", "test"]:
    metrics = load_metrics(
        set_type=set_type,
        experiment="DTdepth",
        only_model_name=False,
        infer_training_time_attempt=False,
    )
    toremove_columns = [
        col
        for col in metrics.columns
        if col.startswith("reduction factor")
        or col.startswith("Prediction Time")
        or col.startswith("MSE")
        or col.startswith("PSNR")
    ]
    metrics.drop(toremove_columns, axis=1, inplace=True)

    two.append(metrics)

metrics = merge_metrics(
    two[0], two[1], suffix1=" (train)", suffix2=" (test)", by_name=False
)
metrics.sort_values(by="Model Description")
# metrics = best_model_per_algo_in_df(metrics, "MAE", "MAE (test)")
metrics = metrics[
    ["Model Description", "r2 (train)", "MAE (train)", "r2 (test)", "MAE (test)"]
]
print_copy_latex_table_code(metrics, bolden_best_value=False, column_format="lcccc")
metrics

## Interpolation on coords

In [None]:
# INTERPOLATION ON COORDS - RESULTS
two = []
for set_type in ["train", "test"]:
    metrics = load_metrics(
        set_type=set_type,
        experiment="interpolation_coords",
        only_model_name=False,
        infer_training_time_attempt=False,
    )
    metrics = extract_reduction_factors(metrics, also_coords=False)
    metrics = metrics[metrics["reduction factor ti"] == 16]
    toremove_columns = [
        col
        for col in metrics.columns
        if col.startswith("Prediction Time")
        or col.startswith("MSE")
        or col.startswith("PSNR")
    ]
    metrics.drop(toremove_columns, axis=1, inplace=True)

    two.append(metrics)

metrics = merge_metrics(
    two[0], two[1], suffix1=" (train)", suffix2=" (test)", by_name=False
)
metrics = best_model_per_algo_in_df(metrics, "MAE", "MAE (test)")
metrics = metrics[
    [
        "Model Name",
        "r2 (train)",
        "MAE (train)",
        "r2 (test)",
        "MAE (test)",
        "Model Description",
    ]
]
print_copy_latex_table_code(metrics, bolden_best_value=True, column_format="lccccl")
metrics

## Extrapolation

In [None]:
# EXTRAPOLATIONS - RESULTS
two = list()
for set_type in ["train", "test"]:
    metrics = load_metrics(
        set_type=set_type,
        experiment="extrapolation",
        only_model_name=False,
        infer_training_time_attempt=False,
    )
    metrics = extract_reduction_ranges(metrics, also_coords=False)
    # metrics = metrics[metrics["reduction factor ti"]==4]
    toremove_columns = [
        col
        for col in metrics.columns
        if col.startswith("a_")
        or col.startswith("b_")
        or col.startswith("Prediction Time")
        or col.startswith("MSE")
        or col.startswith("PSNR")
    ]
    metrics.drop(toremove_columns, axis=1, inplace=True)

    two.append(metrics)

metrics = merge_metrics(
    two[0], two[1], suffix1=" (train)", suffix2=" (test)", by_name=False
)
# metrics = best_model_per_algo_in_df(metrics, "MAE", "MAE (test)")
# metrics = metrics[['Model Name', 'r2 (train)', 'MAE (train)', 'r2 (test)', 'MAE (test)', 'Model Description']]
# print_copy_latex_table_code(metrics, bolden_best_value=True, column_format="lccccl")
metrics

## Model Complexity

In [None]:
metrics = load_all_metrics(
    set_type="test",
    experiments=EXPERIMENTS,
    columns_to_remove=USELESS_METRICS,
    infer_training_time_attempt=True,
)
metrics = extract_model_name(metrics, False)
metrics = metrics[metrics["r2"] > 0]
metrics.drop(["r2", "MSE", "MAE", "PSNR"], axis=1, inplace=True)
metrics = metrics[
    metrics["Model Description"].str.contains("NeRF_256fourier-features")
    | ~metrics["Model Description"].str.contains("NeRF")
]
metrics.reset_index(drop=True, inplace=True)

# fixing missing values for non-logged models (through experience and console logs)
training_time_label = "Training Time new"
nerf_range = metrics[metrics["Model Name"] == "Univariate NeRF"][
    "Training Time (s)"
].agg(lambda x: f"{int(x.min())}-{int(x.max())}")
uninn_range = metrics[metrics["Model Name"] == "Univariate NN"][
    "Training Time (s)"
].agg(lambda x: f"{int(x.min())}-{int(x.max())}")

for i in range(len(metrics)):
    if metrics.loc[i, "Model Name"] == "Univariate DT":
        metrics.loc[i, training_time_label] = "<20"
    elif metrics.loc[i, "Model Name"] == "Multivariate NN":
        metrics.loc[i, training_time_label] = "around 5/6 mins"
    elif metrics.loc[i, "Model Name"] in ["Multivariate LR", "Univariate LR"]:
        metrics.loc[i, training_time_label] = "<1"
    elif metrics.loc[i, "Model Name"] == "Univariate NeRF":
        metrics.loc[i, training_time_label] = nerf_range
    elif metrics.loc[i, "Model Name"] == "Univariate NN":
        metrics.loc[i, training_time_label] = uninn_range

metrics = extract_reduction_factors(metrics, also_coords=True)
metrics.drop(["reduction factor ct", "reduction factor yD"], axis=1, inplace=True)
metrics.sort_values(
    by=["Model Name", "reduction factor ti", "reduction factor xD"], inplace=True
)

metrics["Prediction Time (s)"] = metrics["Prediction Time"].apply(
    lambda x: "$10^{" + f"{math.floor(math.log10(x))}" + "}$"
)
metrics.rename(
    columns={
        "Prediction Time": "Prediction Time (precise)",
        "Training Time (s)": "Training Time (precise)",
        "Training Time new": "Training Time (s)",
    },
    inplace=True,
)

# only classic interpolation
metrics = metrics[
    metrics["reduction factor ti"].isin([4.0])
    & metrics["reduction factor xD"].isin([np.nan])
]

metrics = metrics[
    [
        "Model Name",
        "Training Time (s)",
        "Prediction Time (s)",
        "Model Description",
        "Training Time (precise)",
        "Prediction Time (precise)",
    ]
]
print_copy_latex_table_code(metrics, digits_after_dec=4, bolden_best_value=False)
metrics

# Other studies

## Number of components for approximate GP

In [None]:
"""Study on the number of components approximate GP (needs to consider the records with "consider_ws")"""

metrics = load_metrics_by_model("approxGP", set_type="test", experiments=["OLD"])
metrics = extract_parameter(
    metrics,
    parameter_names=["components_number"],
    parameter_type=int,
    regex=r"approxGP_(\d+)fourier",
)
plot_metrics_per_parameter(metrics, "components_number")
metrics

## Study of reduction factors

### Decision Tree - interpolation

In [None]:
"""DECISION TREE REDUCTION FACTORS - Study on the training reduction factors for Decision Tree"""

metrics_DT_rf = load_metrics_by_model(
    "DT", set_type="test", experiments=["interpolation"]
)
metrics_DT_rf = extract_reduction_factors(metrics_DT_rf, also_coords=False)
# leaving only deepest trees
metrics_DT_rf = extract_parameter(metrics_DT_rf, ["depth"], float, regex=r"depth(\d+)_")
metrics_DT_rf = metrics_DT_rf[metrics_DT_rf["depth"].isna()]
metrics_DT_rf.drop("depth", axis=1, inplace=True)
# cleaning the table
metrics_DT_rf["Model Description"] = "Univariate DT"
metrics_DT_rf.drop(
    ["Prediction Time", "MSE", "PSNR", "reduction factor ct"], axis=1, inplace=True
)
metrics_DT_rf = metrics_DT_rf[metrics_DT_rf["reduction factor ti"] < 64]
metrics_DT_rf["Training Perc"] = metrics_DT_rf["reduction factor ti"].apply(
    lambda x: math.ceil(100 / x) * math.ceil(86 / x) / 86
)
metrics_DT_rf["Simulations"] = metrics_DT_rf["Training Perc"].apply(
    lambda x: int(x * 86)
)

plot_metrics_per_parameter(
    metrics_DT_rf, "reduction factor ti", metrics_to_plot=["MAE"]
)
metrics_DT_rf["Training Percentage"] = metrics_DT_rf["Training Perc"].apply(
    lambda x: f"& $ \\approx {x:.2f}\\%$ \\\\"
)
# print_copy_latex_table_code(metrics_DT_rf, column_format="cccc")
metrics_DT_rf

### NeRF - interpolation

In [None]:
"""NERF REDUCTION FACTORS - Study on the training reduction factors for NeRF"""

metrics_NeRF_rf = load_metrics_by_model(
    "NeRF", set_type="test", experiments=["interpolation"]
)
metrics_NeRF_rf = extract_reduction_factors(metrics_NeRF_rf, also_coords=False)
# leaving only best structure
metrics_NeRF_rf = extract_parameter(
    metrics_NeRF_rf, ["# ff"], int, regex=r"_(\d+)fourier-features"
)
metrics_NeRF_rf = metrics_NeRF_rf[metrics_NeRF_rf["# ff"] == 256]
metrics_NeRF_rf.drop("# ff", axis=1, inplace=True)
# cleaning the table
metrics_NeRF_rf["Model Description"] = "NeRF"
metrics_NeRF_rf.drop(
    ["Prediction Time", "MSE", "PSNR", "reduction factor ct"], axis=1, inplace=True
)
metrics_NeRF_rf["Training Perc"] = metrics_NeRF_rf["reduction factor ti"].apply(
    lambda x: math.ceil(100 / x) * math.ceil(86 / x) / 86
)
metrics_NeRF_rf["Simulations"] = metrics_NeRF_rf["Training Perc"].apply(
    lambda x: int(x * 86)
)

plot_metrics_per_parameter(
    metrics_NeRF_rf, "reduction factor ti", metrics_to_plot=["MAE"]
)
metrics_NeRF_rf["Training Percentage"] = metrics_NeRF_rf["Training Perc"].apply(
    lambda x: f"& $ \\approx {x:.2f}\\%$ \\\\"
)
# print_copy_latex_table_code(metrics_NeRF_rf, column_format="cccc")
metrics_NeRF_rf

In [None]:
a = metrics_DT_rf.drop(
    [
        "reduction factor ti",
        "Training Perc",
        "Training Percentage",
        "MAE",
        "Model Description",
    ],
    axis=1,
)
b = metrics_NeRF_rf.drop(
    [
        "reduction factor ti",
        "Training Perc",
        "Training Percentage",
        "MAE",
        "Model Description",
    ],
    axis=1,
)
pd.merge(a, b, on="Simulations", suffixes=["_DT", "_NeRF"]).reset_index(
    drop=True
).to_clipboard()

### Visual Comparison - interpolation

In [None]:
# VISUAL COMPARISON
metric_name = "MAE"
ax = plt.gca()
ax.plot(metrics_NeRF_rf["Simulations"], metrics_NeRF_rf[metric_name], label="NeRF")
ax.plot(metrics_DT_rf["Simulations"], metrics_DT_rf[metric_name], label="DT")
plt.xlabel("# of simulations in Training Set")  # or "Reduction factor"
plt.ylabel(f"{metric_name} (test set)")
plt.legend()
plt.show()

metric_name = "r2"
ax = plt.gca()
ax.plot(metrics_NeRF_rf["Simulations"], metrics_NeRF_rf[metric_name], label="NeRF")
ax.plot(metrics_DT_rf["Simulations"], metrics_DT_rf[metric_name], label="DT")
plt.xlabel("# of simulations in Training Set")  # or "Reduction factor"
plt.ylabel(f"R2 (test set)")
plt.legend()
plt.show()

In [None]:
plt.rcParams.update({"font.size": 18})
fig, ax1 = plt.subplots()
fig.set_size_inches(8, 5)

ax1.set_xlabel("# of simulations in Training Set")  # , fontsize=18)
ax1.set_ylabel("MAE (test set)")  # , fontsize=18)
ax1.plot(metrics_NeRF_rf["Simulations"], metrics_NeRF_rf["MAE"], label="FEMLP MAE")
ax1.plot(metrics_DT_rf["Simulations"], metrics_DT_rf["MAE"], label="RDT MAE")
ax1.tick_params(axis="y")

ax2 = ax1.twinx()  # second axes that shares the same x-axis

color = "tab:blue"
ax2.set_ylabel("R2 (test set)", color=color)  # we already handled the x-label with ax1
ax2.plot(
    metrics_NeRF_rf["Simulations"],
    metrics_NeRF_rf["r2"],
    label="FEMLP R2",
    linestyle="dashed",
)
ax2.plot(
    metrics_DT_rf["Simulations"],
    metrics_DT_rf[metric_name],
    label="RDT R2",
    linestyle="dashed",
)
ax2.tick_params(axis="y", labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
fig.legend(
    ax1.get_legend_handles_labels()[1] + ax2.get_legend_handles_labels()[1],
    loc="center right",
    bbox_to_anchor=(0.85, 0.6),
)
plt.xscale("log", base=2)
plt.savefig("results_size-dependence.pdf", bbox_inches="tight")

### Decision Tree - interpolation on coords

In [None]:
"""DECISION TREE REDUCTION FACTORS ON INT.COORDS - Study on the training reduction factors for Decision Tree"""

metrics_DT_rf = load_metrics_by_model(
    "DT", set_type="test", experiments=["interpolation_coords"]
)
metrics_DT_rf = extract_reduction_factors(metrics_DT_rf, also_coords=False)
# leaving only deepest trees
metrics_DT_rf = extract_parameter(metrics_DT_rf, ["depth"], float, regex=r"depth(\d+)_")
metrics_DT_rf = metrics_DT_rf[metrics_DT_rf["depth"].isna()]
metrics_DT_rf.drop("depth", axis=1, inplace=True)

# cleaning the table
metrics_DT_rf["Model Description"] = "Univariate DT"
metrics_DT_rf.drop(
    ["Prediction Time", "MSE", "PSNR", "reduction factor ct"], axis=1, inplace=True
)
metrics_DT_rf = metrics_DT_rf[metrics_DT_rf["reduction factor ti"] < 16]

plot_metrics_per_parameter(
    metrics_DT_rf, "reduction factor ti", metrics_to_plot=["MAE"]
)
metrics_DT_rf["Training Percentage"] = metrics_DT_rf["reduction factor ti"].apply(
    lambda x: f"& $ \\approx {math.ceil(100/x) * math.ceil(86/x) * math.ceil(224/x) * math.ceil(32/x)/(86*224*32):.2f}\\%$ \\\\"
)
# print_copy_latex_table_code(metrics_DT_rf, column_format="cccc")

metrics_DT_rf

### NeRF - interpolation on coords

In [None]:
"""NERF REDUCTION FACTORS - Study on the training reduction factors for NeRF"""

metrics_NeRF_rf = load_metrics_by_model(
    "NeRF", set_type="test", experiments=["interpolation_coords"]
)
metrics_NeRF_rf = extract_reduction_factors(metrics_NeRF_rf, also_coords=True)

# leaving only best structure
metrics_NeRF_rf = extract_parameter(
    metrics_NeRF_rf, ["# ff"], int, regex=r"_(\d+)fourier-features"
)
metrics_NeRF_rf = metrics_NeRF_rf[metrics_NeRF_rf["# ff"] == 256]
metrics_NeRF_rf.drop("# ff", axis=1, inplace=True)

# cleaning the table
metrics_NeRF_rf["Model Description"] = "NeRF"
metrics_NeRF_rf.drop(
    ["Prediction Time", "MSE", "PSNR", "reduction factor ct"], axis=1, inplace=True
)
metrics_NeRF_rf = metrics_NeRF_rf[metrics_NeRF_rf["reduction factor ti"] < 16]

plot_metrics_per_parameter(
    metrics_NeRF_rf, "reduction factor ti", metrics_to_plot=["MAE"]
)
metrics_NeRF_rf["Training Percentage"] = metrics_NeRF_rf["reduction factor ti"].apply(
    lambda x: f"& $ \\approx {math.ceil(100/x) * math.ceil(86/x) * math.ceil(224/x) * math.ceil(32/x)/(86*224*32):.2f}\\%$ \\\\"
)
# print_copy_latex_table_code(metrics_NeRF_rf, column_format="cccc")

metrics_NeRF_rf

### Visual Comparison - interpolation on coords

In [None]:
percs = [0.4, 0.03, 0.01][::-1]
print(percs)
metric_name = "MAE"
ax = plt.gca()
ax.plot(
    metrics_NeRF_rf["reduction factor ti"], metrics_NeRF_rf[metric_name], label="NeRF"
)
ax.plot(metrics_DT_rf["reduction factor ti"], metrics_DT_rf[metric_name], label="DT")
plt.xlabel("Reduction factor")
plt.ylabel(f"{metric_name} (test set)")
plt.legend()
plt.show()

metric_name = "r2"
ax = plt.gca()
ax.plot(
    metrics_NeRF_rf["reduction factor ti"], metrics_NeRF_rf[metric_name], label="NeRF"
)
ax.plot(metrics_DT_rf["reduction factor ti"], metrics_DT_rf[metric_name], label="DT")
plt.xlabel("Reduction factor")
plt.ylabel(f"R2 (test set)")
plt.legend()
plt.show()

# Qualitative results

## Visual comparison of centreline predictions

In [None]:
from src.data_loaders import get_wake_datasets
import torch
import torch.nn as nn
import random
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.kernel_approximation import RBFSampler
import src.plotting as plotting
import os
import matplotlib.pyplot as plt

MODEL_FOLDER_OLD = "saved_models/discr_factors_x2_30_y-2_2_step0.125_reducedTI-CT/"
MODEL_FOLDER = "saved_models/discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01/"

DATA_FOLDER = "data/discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01"

INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR = {
    "ti": 4,
    "ct": 4,
}  # , 'x/D': 4, 'y/D': 4} INTERPOLATION ON COORDS NOT POSSIBLE PLOTTING
INPUT_VAR_TO_TRAIN_RANGES = {"ti": [(0.15, 0.4)], "ct": [(0.3, 0.7)]}

MODEL_COLORS = [
    "crimson",
    "deepskyblue",
    "green",
    "mediumorchid",
    "magenta",
    "navy",
    "mediumblue",
]

In [None]:
""" FOR UNIVARIATE """

# DATA
train_dataset_uni, valid_dataset, test_dataset_uni = get_wake_datasets(
    DATA_FOLDER,
    consider_ws=False,
    coords_as_input=True,
    input_var_to_train_reduction_factor=INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR,
    # input_var_to_train_ranges=INPUT_VAR_TO_TRAIN_RANGES,
)
del valid_dataset

num_cells_uni = test_dataset_uni.num_cells
num_fields_uni = len(test_dataset_uni) // num_cells_uni
input_space_uni = test_dataset_uni.inputs.shape[1]
output_space_uni = test_dataset_uni.outputs.shape[1]
field_indices_uni = list(range(num_fields_uni))

models_uni = list()
model_descs_uni = list()

# UNIVARIATE NN
hidden_layers_units = [50, 250]
activation_function = nn.ReLU()


class UnivariateNN(nn.Module):
    def __init__(self, input_space, output_space) -> None:
        super(UnivariateNN, self).__init__()
        layer_units = [input_space] + hidden_layers_units
        layers = list()
        for first, second in zip(layer_units, layer_units[1:]):
            layers += [nn.Linear(first, second), activation_function]
        layers.append(
            nn.Linear(layer_units[-1], output_space)
        )  # last layer not activated
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


model_path = os.path.join(
    MODEL_FOLDER, "univariate_NN_layers50-250_training_factors=ti4-ct4.pt"
)
model_desc = "Univariate MLP"

model = UnivariateNN(input_space_uni, output_space_uni)
model.load_state_dict(torch.load(model_path))
model.eval()

models_uni.append(model)
model_descs_uni.append(model_desc)

# NeRF
n_mapping = 256
hidden_layers_units = [256, 256, 256]


class FourierLayer(nn.Module):
    def __init__(self, n_features: int, n_mapping: int):
        super(FourierLayer, self).__init__()
        self.coefficients = nn.Parameter(
            torch.randn(n_mapping, n_features)
        )  # sampled from normal distribution with mean 0 and variance 1

    def forward(self, x):
        x_proj = 2 * torch.pi * x @ self.coefficients.t()
        out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)
        return out


class NeRF2D(nn.Module):
    def __init__(self, n_coordinates: int, n_features: int, output_space: int):
        super(NeRF2D, self).__init__()
        self.fourier_layer = FourierLayer(n_coordinates, n_mapping)
        layer_units = [n_mapping * 2 + n_features] + hidden_layers_units
        mlp_layers = list()
        for first, second in zip(layer_units, layer_units[1:]):
            mlp_layers += [nn.Linear(first, second), activation_function]
        mlp_layers.append(
            nn.Linear(layer_units[-1], output_space)
        )  # last layer not activated
        self.mlp = nn.Sequential(*mlp_layers)

    def forward(self, inputs):
        # Pass coordinates through the Fourier Layer
        coords = inputs[:, -2:]
        input_features = inputs[:, :-2]
        fourier_out = self.fourier_layer(coords)

        # Concatenate Fourier layer output with other input features
        input_features = torch.cat((fourier_out, input_features), dim=-1)

        # Feed-forward through the rest of the neural network
        x = self.mlp(input_features)

        return x


model_path = os.path.join(
    MODEL_FOLDER,
    "univariate_NeRF_256fourier-features_layers256-256-256_training_factors=ti4-ct4.pt",
)
model_desc = "Univariate NeRF"

model = NeRF2D(n_coordinates=2, n_features=2, output_space=1)
model.load_state_dict(torch.load(model_path))
model.eval()

models_uni.append(model)
model_descs_uni.append(model_desc)

# DECISION TREE
model = DecisionTreeRegressor()
model.fit(train_dataset_uni.inputs, train_dataset_uni.outputs)
model_desc = "Univariate DT"
models_uni.append(model)
model_descs_uni.append(model_desc)

# UNIVARIATE LINEAR REGRESSION
model = LinearRegression()
model.fit(train_dataset_uni.inputs, train_dataset_uni.outputs)
model_desc = "Univariate LR"
models_uni.append(model)
model_descs_uni.append(model_desc)

# UNIVARIATE APPROXIMATE GAUSSIAN PROCESS
rbf_sampler = RBFSampler(n_components=32, gamma="scale", random_state=0)
train_x_fourier = rbf_sampler.fit_transform(train_dataset_uni.inputs)
model = LinearRegression()
model.fit(train_x_fourier, train_dataset_uni.outputs)
model_desc = "Univariate AGP"
models_uni.append(model)
model_descs_uni.append(model_desc)

print(model_descs_uni)

In [None]:
""" FOR MULTIVARIATE """

# DATA
train_dataset_multi, valid_dataset, test_dataset_multi = get_wake_datasets(
    DATA_FOLDER,
    consider_ws=False,
    coords_as_input=False,
    input_var_to_train_reduction_factor=INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR,
    # input_var_to_train_ranges=INPUT_VAR_TO_TRAIN_RANGES
)
del valid_dataset

input_space_multi = train_dataset_multi.inputs.shape[1]
output_space_multi = train_dataset_multi.outputs.shape[1]
indices_multi = list(range(len(test_dataset_multi)))

models_multi = list()
model_descs_multi = list()

# MULTIVARIATE LINEAR REGRESSION
model = LinearRegression()
model.fit(train_dataset_multi.inputs, train_dataset_multi.outputs)
model_desc = "Multivariate LR"
models_multi.append(model)
model_descs_multi.append(model_desc)

# MULTIVARIATE MLP
HIDDEN_LAYERS_UNITS = [50, 500]
ACTIVATION_FUNCTION = nn.ReLU()


class MultivariateNN(nn.Module):
    def __init__(self, input_space, output_space) -> None:
        super(MultivariateNN, self).__init__()
        layer_units = [input_space] + HIDDEN_LAYERS_UNITS
        layers = list()
        for first, second in zip(layer_units, layer_units[1:]):
            layers += [nn.Linear(first, second), ACTIVATION_FUNCTION]
        layers.append(
            nn.Linear(layer_units[-1], output_space)
        )  # last layer not activated
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


model_path = os.path.join(
    MODEL_FOLDER, "multivariate_NN_layers50-500_training_factors=ti4-ct4.pt"
)
model_desc = "Multivariate MLP"

model = MultivariateNN(input_space_multi, output_space_multi)
model.load_state_dict(torch.load(model_path))
model.eval()

models_multi.append(model)
model_descs_multi.append(model_desc)

print(model_descs_multi)

In [None]:
UNI_INDEXES = [3190]  # 3190 for interpolation, 2897 for extrapolation
MULTI_INDEXES = [index + 4 for index in UNI_INDEXES]
EXCLUDED_MODELS = ["AGP", "LR"]
X_RANGE = np.arange(2, 30, 1 / 8)

In [None]:
model_descs_uni = ["MLP", "FEMLP", "RDT", "LR", "AGP"]

In [None]:
# random.shuffle(field_indices_uni)

dfs = [pd.DataFrame() for _ in UNI_INDEXES]
tis = []
cts = []

for i, field_idx in enumerate(UNI_INDEXES):
    plt.figure()
    df = dfs[i]
    df["x/D"] = X_RANGE
    init = True
    # UNIVARIATE
    for j, model in enumerate(models_uni):
        model_desc = model_descs_uni[j]
        if model_desc in EXCLUDED_MODELS:
            continue
        print(model_desc)
        try:
            ti, ct, _, wake_field, predicted_wake_field = (
                test_dataset_uni.get_parameters_for_plotting_univariate(
                    model, field_idx
                )
            )
        except Exception:
            inputs, outputs = test_dataset_uni[
                test_dataset_uni.slice_for_field(field_idx)
            ]
            inputs_fourier = rbf_sampler.transform(inputs)
            ti, ct, ws, wake_field, predicted_wake_field = (
                test_dataset_uni.get_parameters_for_plotting_univariate(
                    model, field_idx, transformed_inputs=inputs_fourier
                )
            )

        # TODO move this stuff to plotting.py
        original_centerline = wake_field[:, 16:17].squeeze(1).detach().numpy()
        predicted_centerline = (
            predicted_wake_field[:, 16:17].squeeze(1).detach().numpy()
        )
        if init:
            # plot original
            # plt.plot(original_centerline, label=f'Original', color='black')
            df["Ainslie"] = original_centerline
            tis.append(ti)
            cts.append(ct)
            init = False

        # Plot predicted_centerline
        # plt.plot(predicted_centerline, label=model_desc, color=MODEL_COLORS[j])
        df[model_desc] = predicted_centerline

for i, idx in enumerate(MULTI_INDEXES):
    # MULTIVARIATE
    df = dfs[i]
    # TODO move this stuff to plotting.py
    for j, model in enumerate(models_multi):
        model_desc = model_descs_multi[j]
        if model_desc.split(" ")[1] in EXCLUDED_MODELS:
            continue
        print(model_desc)
        ti, ct, _, wake_field, predicted_wake_field = (
            test_dataset_multi.get_parameters_for_plotting_multivariate(model, idx)
        )

        wake_field = wake_field.reshape(32, 224).T
        predicted_wake_field = predicted_wake_field.reshape(32, 224).T

        predicted_centerline = (
            predicted_wake_field[:, 16:17].squeeze(1).detach().numpy()
        )
        # plt.plot(predicted_centerline, label=model_desc, color=MODEL_COLORS[j])
        df[model_desc] = predicted_centerline

"""    
plt.xlabel('x/D (centreline)')
plt.ylabel('Wind Deficit')
#plt.title(f"TI={ti:.2f}, CT={ct:.2f}")
plt.legend()
plt.show()
plt.savefig(f"{ti:.2f}-{ct:.2f}.png", dpi=300)
plt.close()
"""

assert len(dfs) == len(tis) == len(cts)

for df, ti, ct in zip(dfs, tis, cts):
    """
    df.plot(x='x/D', y=["Ainslie", "FEMLP", "RDT", "MLP"],
        xlabel='x/D (centreline)',
        ylabel='Wind Deficit',
        color=["black"]+MODEL_COLORS,
        kind='line',
        title=f"TI={ti:.2f}, CT={ct:.2f}"
    )
    """
    df.to_excel(f"{ti:.2f}-{ct:.2f}.xlsx", index=False)

    # new plot for the paper (attempt 1)
    """
    plt.rcParams.update({'font.size': 15})
    fig, ax1 = plt.subplots()
    fig.set_size_inches(8, 5)

    ax1.set_xlabel('x [D]')
    ax1.set_ylabel('wind speed deficit')
    ax1.plot(df["x/D"], df["Ainslie"], label='Ainslie', linestyle='dashed', color='black')
    ax1.tick_params(axis='y') #???

    ax2 = ax1.twinx() # second axes that shares the same x-axis

    ax2.set_ylabel('relative error in wind speed deficit', color='tab:blue')
    ax2.tick_params(axis='y', labelcolor=color)
    ax2.plot(df["x/D"], df["MLP"] - df["Ainslie"], label='MLP', color=MODEL_COLORS[0])
    ax2.plot(df["x/D"], df["FEMLP"] - df["Ainslie"], label='FEMLP', color=MODEL_COLORS[1])
    ax2.plot(df["x/D"], df["RDT"] - df["Ainslie"], label='RDT', color=MODEL_COLORS[2])

    fig.legend(ax1.get_legend_handles_labels()[1] + ax2.get_legend_handles_labels()[1],
               loc="upper right",
               bbox_to_anchor=(0.907, 0.89))

    plt.show()
    """

    # new plot for the paper (attempt 2)
    plt.rcParams.update({"font.size": 18})
    fig, ax = plt.subplots()
    fig.set_size_inches(8, 5)

    ax.set_xlabel("x [D]")
    ax.set_ylabel("error in wind speed deficit")
    ax.tick_params(axis="y")
    ax.plot(df["x/D"], df["MLP"] - df["Ainslie"], label="MLP", color=MODEL_COLORS[0])
    ax.plot(
        df["x/D"], df["FEMLP"] - df["Ainslie"], label="FEMLP", color=MODEL_COLORS[1]
    )
    ax.plot(df["x/D"], df["RDT"] - df["Ainslie"], label="RDT", color=MODEL_COLORS[2])
    ax.axhline(0, color="black", linestyle="--")  # 0 line

    plt.legend()
    plt.savefig("centreline_errors.pdf", bbox_inches="tight")

In [None]:
fig.tight_layout()  # otherwise the right y-label is slightly clipped
fig.legend(
    ax1.get_legend_handles_labels()[1] + ax2.get_legend_handles_labels()[1],
    loc="center right",
    bbox_to_anchor=(0.85, 0.5),
)
plt.xscale("log", base=2)
plt.show()
plt.savefig("results_size-dependence.pdf", format="pdf")

## Visual predictions

In [None]:
with torch.no_grad():
    # univariate
    for model, model_desc in zip(models_uni, model_descs_uni):
        print(model_desc)
        # if univariate
        for field_idx in UNI_INDEXES:
            try:
                ti, ct, ws, wake_field, predicted_wake_field = (
                    test_dataset_uni.get_parameters_for_plotting_univariate(
                        model, field_idx
                    )
                )
                # print(ti, ct)
                # break
            except:
                # break
                inputs, outputs = test_dataset_uni[
                    test_dataset_uni.slice_for_field(field_idx)
                ]
                inputs_fourier = rbf_sampler.transform(inputs)
                ti, ct, ws, wake_field, predicted_wake_field = (
                    test_dataset_uni.get_parameters_for_plotting_univariate(
                        model, field_idx, transformed_inputs=inputs_fourier
                    )
                )
            plotting.plot_maps(
                test_dataset_uni.X_grid,
                test_dataset_uni.Y_grid,
                wake_field,
                predicted_wake_field,
                ti,
                ct,
                ws,
                error_to_plot="absolute",
            )
            folder = f"{ti:.2f}-{ct:.2f}/"
            if not os.path.exists(folder):
                os.makedirs(folder)
            filepath = os.path.join(folder, model_desc.split(" - ")[0] + ".png")
            plotting.save_cut_maps(
                test_dataset_uni.X_grid,
                test_dataset_uni.Y_grid,
                wake_field,
                predicted_wake_field,
                ti,
                ct,
                ws,
                error_to_plot="absolute",
                filepath=filepath,
            )

    # multivariate
    for model, model_desc in zip(models_multi, model_descs_multi):
        print(model_desc)
        for idx in MULTI_INDEXES:
            ti, ct, ws, wake_field, predicted_wake_field = (
                test_dataset_multi.get_parameters_for_plotting_multivariate(model, idx)
            )
            # print(ti, ct)
            # break
            plotting.plot_maps(
                test_dataset_multi.X_grid,
                test_dataset_multi.Y_grid,
                wake_field,
                predicted_wake_field,
                ti,
                ct,
                ws,
                error_to_plot="absolute",
            )
            folder = f"{ti:.2f}-{ct:.2f}/"
            if not os.path.exists(folder):
                os.makedirs(folder)
            filepath = os.path.join(folder, model_desc.split(" - ")[0] + ".png")
            plotting.save_cut_maps(
                test_dataset_multi.X_grid,
                test_dataset_multi.Y_grid,
                wake_field,
                predicted_wake_field,
                ti,
                ct,
                ws,
                error_to_plot="absolute",
                filepath=filepath,
            )