 # Highlight Loss of Performance in Higher Dimensions

In [31]:
import rareeventestimation as ree
import numpy as np
import pandas as pd
import plotly.express as px
from rareeventestimation.evaluation.constants import INDICATOR_APPROX_LATEX_NAME, BM_SOLVER_SCATTER_STYLE, MY_LAYOUT, DF_COLUMNS_TO_LATEX, LATEX_TO_HTML, WRITE_SCALE
from IPython.display import display, Markdown
# recommended: use autoreload for development: https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load Data

In [32]:
# Add new benchmark simulations to existing df
df_bm_agg = pd.read_json("https://archive.org/download/konstantinalthaus-rareeventestimation-data/benchmark_toy_problems_aggregated.json")
df_agg =pd.read_json("https://ia801504.us.archive.org/23/items/konstantinalthaus-rareeventestimation-data/resampled_toy_problems.json")

## Make Figures

In [33]:
for prob in ["Linear Problem (d=2)", "Linear Problem (d=50)"]:
    # filter
    my_mixture_model = "CBREE"
    my_obs_windows = 2
    my_epsilon = 1
    my_bm_cvar_tgt = 1
    this_df = df_agg.query("Problem == @prob & `Averaging Method`=='Average Estimate'")
    this_df = this_df[this_df['$N_{{ \\text{{obs}} }}$'].isin([0,2,5])]
    this_df = this_df[this_df['$\\Delta_{{\\text{{Target}}}}$'].isin([2,5,10])]
    cmap = ree.sr_to_color_dict(this_df["$\\Delta_{{\\text{{Target}}}}$"].astype(float))
    this_df["cvar_tgt_str"] = this_df["$\\Delta_{{\\text{{Target}}}}$"].astype(float).apply(str)
    this_df = this_df.sort_values(["$\\Delta_{{\\text{{Target}}}}$", "$N_{{ \\text{{obs}} }}$", "Sample Size"])
    # plot
    fig = px.line(
        this_df,
        x = "Relative Root MSE",
        y="Cost Mean",
        facet_col="Method",
        facet_row="$N_{{ \\text{{obs}} }}$",
        color_discrete_map=cmap,
        color="cvar_tgt_str",
        log_x=True,
        log_y=True,
        markers=True,
        hover_name="Sample Size",
        labels=LATEX_TO_HTML | {"cvar_tgt_str": LATEX_TO_HTML[DF_COLUMNS_TO_LATEX["cvar_tgt"]]})
    # add benchmark
    this_df_bm = df_bm_agg.query("Problem == @prob & cvar_tgt == 1")
    for bm_solver in this_df_bm.Solver.unique():
        dat =this_df_bm.query("Solver == @bm_solver")
        trace_dict = {
            "x" : dat["Relative Root MSE"],
            "y" : dat["Cost Mean"],
            "legendgrouptitle_text": "Benchmark Methods",
            "name": bm_solver,
            "legendgroup": "group",
            "mode": "markers+lines",
            "opacity": 0.8
        }
        num_rows = len(this_df["$N_{{ \\text{{obs}} }}$"].unique())
        cols_idx = []
        for i, method in enumerate(this_df["Method"].unique()):
            if method == "CBREE" and "GM" in bm_solver:
                cols_idx.append(i)
            if "MFN" in method and ("MFN" in bm_solver):
                cols_idx.append(i)
                
        trace_dict = trace_dict | BM_SOLVER_SCATTER_STYLE[bm_solver]
        fig = ree.add_scatter_to_subplots(fig, num_rows, cols_idx, **trace_dict)
    # style
    fig.update_layout(**MY_LAYOUT)
    if  "yaxis_exponentformat" in MY_LAYOUT.keys():
        fig = ree.update_axes_format(fig,  MY_LAYOUT["xaxis_exponentformat"], MY_LAYOUT["yaxis_exponentformat"])
    # overwrite N_obs = 0
    old_a = LATEX_TO_HTML[DF_COLUMNS_TO_LATEX["observation_window"]] + "=0"
    new_a = "No Divergence Check"
    fig.for_each_annotation(
        lambda a: a.update(text = new_a if a.text.startswith(old_a) else a.text))
    
    # adjust column name position
    fig.for_each_annotation(
        lambda a: a.update(yshift =  -10 if a.text.startswith("Method") else 0))
    # show and save
    fig.show()
    fig_title = "convergence plot" + prob + "gm vs nongm"
    fig.write_image(f"{fig_title}.png".replace(" ", "_").lower(), scale=WRITE_SCALE)# make and save caption
    fig_description = f"Solving the {prob} with the CBREE methods using  \
different parameters. \
We vary the stopping criterion $\\Delta_{{\\text{{Target}}}}$ (color), \
the divergence criterion $N_\\text{{obs}}$ (row) and \
the method  (column). \
The parameter $\\epsilon_{{\\text{{Target}}}} = {0.5}$ is fixed. \
Furthermore we plot also the performance of the benchmark methods EnKF \
and SiS. \
Each marker represents the empirical estimates based the successful portion of ${int(2*this_df.Seed.unique()[0]+1)}$ simulations."
    with open(f"{fig_title} desc.tex".replace(" ", "_").lower(), "w") as file:
        file.write(fig_description)
    display(Markdown(fig_description))

Solving the Linear Problem (d=2) with the CBREE methods using  different parameters. We vary the stopping criterion $\Delta_{\text{Target}}$ (color), the divergence criterion $N_\text{obs}$ (row) and the method  (column). The parameter $\epsilon_{\text{Target}} = 0.5$ is fixed. Furthermore we plot also the performance of the benchmark methods EnKF and SiS. Each marker represents the empirical estimates based the successful portion of $200$ simulations.

Solving the Linear Problem (d=50) with the CBREE methods using  different parameters. We vary the stopping criterion $\Delta_{\text{Target}}$ (color), the divergence criterion $N_\text{obs}$ (row) and the method  (column). The parameter $\epsilon_{\text{Target}} = 0.5$ is fixed. Furthermore we plot also the performance of the benchmark methods EnKF and SiS. Each marker represents the empirical estimates based the successful portion of $200$ simulations.

## Alternative Figure


In [34]:
# filter
my_mixture_model = "CBREE"
my_obs_windows = 2
my_epsilon = 1
my_bm_cvar_tgt = 1
problems = ["Linear Problem (d=2)", "Linear Problem (d=50)"]
this_df = df_agg.query(" `Averaging Method`=='Average Estimate'")
this_df = this_df[this_df['Problem'].isin(problems)]
this_df = this_df[this_df['$N_{{ \\text{{obs}} }}$'].isin([2])]
this_df = this_df[this_df['$\\Delta_{{\\text{{Target}}}}$'].isin([2,5,10])]
cmap = ree.sr_to_color_dict(this_df["$\\Delta_{{\\text{{Target}}}}$"].astype(float))
this_df["cvar_tgt_str"] = this_df["$\\Delta_{{\\text{{Target}}}}$"].astype(float).apply(str)
this_df = this_df.sort_values(["$\\Delta_{{\\text{{Target}}}}$", "$N_{{ \\text{{obs}} }}$", "Sample Size"])
# plot
fig = px.line(
    this_df,
    x = "Relative Root MSE",
    y="Cost Mean",
    facet_col="Method",
    facet_row="Problem",
    color_discrete_map=cmap,
    color="cvar_tgt_str",
    log_x=True,
    log_y=True,
    markers=True,
    hover_name="Sample Size",
    labels=LATEX_TO_HTML | {"cvar_tgt_str": LATEX_TO_HTML[DF_COLUMNS_TO_LATEX["cvar_tgt"]]})
# add benchmark
this_df_bm = df_bm_agg.query("cvar_tgt == 1")
methods = this_df.Method.unique()
for prob in problems:
    for method in this_df.Method.unique():
        if "CBREE" == method:
            bm_solvers = [s for s in this_df_bm.Solver.unique() if "GM" in s]
        if "MFN" in method:
            bm_solvers = [s for s in this_df_bm.Solver.unique() if "MFN" in s]
        for bm_solver in bm_solvers:
            dat =this_df_bm.query("Problem == @prob & Solver == @bm_solver")
            trace_dict = {
                "x" : dat["Relative Root MSE"],
                "y" : dat["Cost Mean"],
                "legendgrouptitle_text": "Benchmark Methods",
                "showlegend" : prob == problems[1], # avoids duplicated entries
                "name": bm_solver,
                "legendgroup": "group",
                "mode": "markers+lines",
                "opacity": 0.8
            }
            num_rows = [1-i   for i,p in enumerate(problems) if prob == p] # cave flipped y-axis by px
            cols_idx = [i  for i, m in enumerate(methods) if method == m]
            trace_dict = trace_dict | BM_SOLVER_SCATTER_STYLE[bm_solver]
            fig = ree.add_scatter_to_subplots(fig, num_rows, cols_idx, **trace_dict)

# style
if  "yaxis_exponentformat" in MY_LAYOUT.keys():
    fig = ree.update_axes_format(fig,  MY_LAYOUT["xaxis_exponentformat"], MY_LAYOUT["yaxis_exponentformat"])
# overwrite N_obs = 0 etc
new_labels = {LATEX_TO_HTML[DF_COLUMNS_TO_LATEX["observation_window"]] + "=0": "No Divergence Check",
              "Method=":"",
              "Problem=":""}
for old, new in new_labels.items():
    fig.for_each_annotation(
        lambda a: a.update(text = a.text.replace(old,new) if a.text.startswith(old) else a.text))

fig.update_layout(**MY_LAYOUT)
# show and save
fig.show()
fig_title = "Linear problems lower and higher dimensions"
fig.write_image(f"{fig_title}.pdf".replace(" ", "_").lower(),
                engine="kaleido")
fig_description = f"Solving the Linear Problem with the CBREE methods using  \
different parameters. \
We vary the stopping criterion $\\Delta_{{\\text{{Target}}}}$ (color), \
the problem's dimension $d$ (row) and \
the method (column). \
The parameters $\\epsilon_{{\\text{{Target}}}} = {0.5}$ \
and $N_\\text{{obs}}=2$ are fixed. \
Furthermore we plot also the performance of the benchmark methods EnKF \
and SiS. \
Each marker represents the empirical estimates based the successful portion of ${int(2*this_df.Seed.unique()[0]+1)}$ simulations."
with open(f"{fig_title} desc.tex".replace(" ", "_").lower(), "w") as file:
    file.write(fig_description)
display(Markdown(fig_description))

Solving the Linear Problem with the CBREE methods using  different parameters. We vary the stopping criterion $\Delta_{\text{Target}}$ (color), the problem's dimension $d$ (row) and the method (column). The parameters $\epsilon_{\text{Target}} = 0.5$ and $N_\text{obs}=2$ are fixed. Furthermore we plot also the performance of the benchmark methods EnKF and SiS. Each marker represents the empirical estimates based the successful portion of $200$ simulations.