# Visualize effect of resampling final ensemble

In [7]:
from os import path
import rareeventestimation as ree
import numpy as np
import pandas as pd
import plotly.express as px
from rareeventestimation.evaluation.constants import INDICATOR_APPROX_LATEX_NAME, BM_SOLVER_SCATTER_STYLE, MY_LAYOUT, DF_COLUMNS_TO_LATEX, LATEX_TO_HTML, WRITE_SCALE, CMAP
import plotly.graph_objects as go
from IPython.display import display, Markdown
# recommended: use autoreload for development: https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load Data
### Option 1: Get precomputed data online

In [4]:
# data is here: https://archive.org/details/konstantinalthaus-rareeventestimation-data
# you can got to this link and inspect the files pefore loading
df_agg = pd.read_json("https://archive.org/download/konstantinalthaus-rareeventestimation-data/resampling_in_final_step_data.json")
df_ess= pd.read_json("https://archive.org/download/konstantinalthaus-rareeventestimation-data/effective_sample_size_data.json")

### Option 2: Aggregate locally precomputed data

In [5]:
## uncomment to load existing data 
## or to compile data after computing it yourself:
# if not path.exists(path.join(out_dir, "processed_data.json")):
#     df = ree.load_data(out_dir, pattern)
#     # Nice solver names
#     df.loc[df["callback"].isna(),"Solver"] = "CBREE"
#     df.loc[df["callback"].isna(),"callback"] = "None"
#     df.loc[df["callback"].str.contains("gm"), "Solver"] = "CBREE (G)"
#     df = df.loc[~df["callback"].str.contains("vmfnm"),:].reset_index()
#     df = ree.add_evaluations(df)
#     df_agg = ree.aggregate_df(df)
#     df_agg.to_json(path.join(out_dir, "processed_data.json"))
# else:
#     df_agg = pd.read_json(path.join(out_dir, "processed_data.json"))
    
# if not  path.exists(path.join(out_dir, "ess_data.json")):
#     df = ree.load_data(out_dir, pattern)
#     # Nice solver names
#     df.loc[df["callback"].isna(),"Solver"] = "CBREE"
#     df.loc[df["callback"].isna(),"callback"] = "None"
#     df.loc[df["callback"].str.contains("gm"), "Solver"] = "CBREE (G)"
#     df = df.loc[~df["callback"].str.contains("vmfnm"),:].reset_index()
#     df = ree.add_evaluations(df)
#     df["VAR IS Weights"] = (df["Estimate"] * df["cvar_is_weights"] )**2
#     df["J_ESS"] = df["VAR IS Weights"] / df["Estimate Variance"]
#     df["J_ESS"] = df.apply(lambda x: x["J_ESS"][-1], axis=1)
#     df_ess = df[["Problem", "Solver", "Sample Size", "J_ESS"]]
#     df_ess.to_json(path.join(out_dir, "ess_data.json"))
# else:
#     df_ess = pd.read_json(path.join(out_dir, "ess_data.json"))

## Make figures

### Error-Cost plot

In [11]:
# data from creation
solver = ree.CBREE()
# plot
figs = ree.make_accuracy_plots(df_agg, layout=MY_LAYOUT, CMAP=CMAP)
fig = figs[0]
fig_name="resampling_in_final_step"
fig.update_yaxes(title_text = "LSF Evaluations")
fig.update_layout(title_text = "", height=800)
fig.write_image(fig_name + ".png",scale=WRITE_SCALE)
fig.show()

# make and save caption
fig_description = f"Solving the {df_agg.Problem.unique()[0]} with two CBREE methods using  \
$J \\in \\{{{', '.join(map(str, df_agg['Sample Size'].unique()))}\\}}$ particles, \
the stopping criterion $\\Delta_{{\\text{{Target}}}} = {solver.cvar_tgt}$, \
the stepsize tolerance $\\epsilon_{{\\text{{Target}}}} = {solver.stepsize_tolerance}$, \
controlling the increase of $\\sigma$ with $\\text{{Lip}}(\\sigma) = {solver.lip_sigma}$ \
and approximating the indicator function with {INDICATOR_APPROX_LATEX_NAME[solver.tgt_fun]}. \
No divergence check has been performed. \
Each simulation was repeated 200 times. \
While the markers present the empirical means of the visualized quantities, the error bars are drawn from first to the third quartile."
with open(fig_name + "_desc.tex", "w") as file:
    file.write(fig_description)
display(Markdown(fig_description))

Solving the Convex Problem with two CBREE methods using  $J \in \{250, 500, 1000, 2000, 3000, 4000, 5000, 6000\}$ particles, the stopping criterion $\Delta_{\text{Target}} = 2$, the stepsize tolerance $\epsilon_{\text{Target}} = 0.5$, controlling the increase of $\sigma$ with $\text{Lip}(\sigma) = 1$ and approximating the indicator function with $I_\text{alg}$. No divergence check has been performed. Each simulation was repeated 200 times. While the markers present the empirical means of the visualized quantities, the error bars are drawn from first to the third quartile.

### Study correlation of importace function evaluations

In [12]:
# sort
df_ess["J_ESS"] =(df_ess["Sample Size"] -  df_ess.J_ESS) / df_ess["Sample Size"]
df_ess.sort_values(by="Solver", inplace = True)
# plot
fig_hist = px.box(df_ess,
                  x = "Sample Size",
                        y = "J_ESS",
                        color="Solver",
                        points=False,
                        color_discrete_sequence = CMAP,
                        labels={"Solver": "Method"})
# style and save
fig_hist.update_layout(**MY_LAYOUT)
fig_hist.update_layout(height=800)
fig_hist.update_xaxes(title_text = "Sample Size <i>J</i>")
fig_hist.update_yaxes(title_text = f"Relative Error of ESS(<b><i>r</b></i>) Estimate")
fig_hist.write_image(fig_name + "_boxplot.png",scale=WRITE_SCALE)
fig_hist.show()