```
Copyright 2023 ServiceNow
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
```

This notebook takes the results stored in `./data/ror_results.csv` and create the colour plots for the main experiments, with fixed $\alpha$ and $\beta$ parameters.

The figures will be saved in the `./figures` folder, which should manually be created prior to running this notebook.

In [None]:
import sys
sys.path.append("..")

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import scipy
import seaborn as sns
import glob
import pickle

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pylab as pylab
params = {
    "axes.labelsize": 18,
    "axes.titlesize": 18,
    "xtick.labelsize": 18,
    "ytick.labelsize": 18,
}
pylab.rcParams.update(params)

In [None]:
from ror.experiments import h0_vs_h1 as exp_def

In [None]:
# Get rid of metrics we do not include in the paper
del exp_def.METRIC_FUNCTIONS["energy_0"]
del exp_def.METRIC_FUNCTIONS["energy_fast_0"]
del exp_def.METRIC_FUNCTIONS["variogram_0"]

In [None]:
exp_def.EXP_LIST

In [None]:
# Translation from internal names to paper names
METRIC_NAMES = {
    "crps_quantile": "CRPS-Q",
    "crps_slow": "CRPS-E",
    "energy_0.5": r"ES-Full$_{p=0.5}$",
    "energy_1": r"ES-Full$_{p=1}$",
    "energy_1.5": r"ES-Full$_{p=1.5}$",
    "energy_fast_0.5": r"ES-Partial$_{p=0.5}$",
    "energy_fast_1": r"ES-Partial$_{p=1}$",
    "energy_fast_1.5": r"ES-Partial$_{p=1.5}$",
    "variogram_0.5": "VG$_{p=0.5}$",
    "variogram_1": "VG$_{p=1}$",
    "variogram_2": "VG$_{p=2}$",
    "dawid_sebastiani": "DS",
}

EXP_NAMES = {
    "wrong_mean_single": r"Normal (Single,$\mu \!\!\uparrow\!\!$)",
    "wrong_mean_all": r"Normal (All,$\mu \!\!\uparrow\!\!$)",
    "wrong_std_single_lower": r"Normal (Single,$\sigma \!\!\downarrow\!\!$)",
    "wrong_std_single_higher": r"Normal (Single,$\sigma \!\!\uparrow\!\!$)",
    "wrong_std_all_lower": r"Normal (All,$\sigma \!\!\downarrow\!\!$)",
    "wrong_std_all_higher": r"Normal (All,$\sigma \!\!\uparrow\!\!$)",
    "wrong_exponential_single_lower": r"Exponential (Single,$\mu \!\!\downarrow\!\!$)",
    "wrong_exponential_single_higher": r"Exponential (Single,$\mu \!\!\uparrow\!\!$)",
    "wrong_exponential_all_lower": r"Exponential (All,$\mu \!\!\downarrow\!\!$)",
    "wrong_exponential_all_higher": r"Exponential (All,$\mu \!\!\uparrow\!\!$)",
    "missing_skew_all": r"Skew Normal (All,$\alpha \!\!\downarrow\!\!$)",
    "missing_covariance_full": "Full Cov (Missing)",
    "extra_covariance_full": "Full Cov (Extra)",
    "missing_covariance_checker": "Checker Cov (Missing)",
    "extra_covariance_checker": "Checker Cov (Extra)",
    "missing_covariance_block": "Block Cov (Missing)",
    "extra_covariance_block": "Block Cov (Extra)",
    "missing_mixture": "Mixture (Missing)",
    "extra_mixture": "Mixture (Extra)",
}

In [None]:
full_df = pd.read_csv("./data/ror_results.csv")
groupby_dfs = {index: df for index, df in full_df.groupby(["experiment", "metric", "dim", "num_samples"])}

In [None]:
def get_beta(exp_name, metric_name, dim, num_samples):
    if (exp_name, metric_name, dim, num_samples) not in groupby_dfs:
        return np.nan
    df = groupby_dfs[exp_name, metric_name, dim, num_samples]

    diffs = df.GT - df.FCST
    d_mean = diffs.mean()
    d_std = diffs.std()
    
    alpha = 0.05
    threshold = exp_def.NUM_DRAWS**0.5 * d_std * scipy.stats.norm.ppf(exp_def.ALPHA)
    return 1 - scipy.stats.norm.cdf((threshold - exp_def.NUM_DRAWS * d_mean) / (exp_def.NUM_DRAWS**0.5 * d_std))

In [None]:
def beta_from_num_draws(n):
    invcdf = scipy.stats.norm.ppf
    cdf = scipy.stats.norm.cdf
    
    result = np.zeros_like(n).astype(float)
    for i, n_val in enumerate(n):
        if n_val == 0:
            n_val = 1e-6

        diff_cdf = (exp_def.NUM_DRAWS / n_val) ** 0.5 * (invcdf(exp_def.ALPHA) - invcdf(1 - exp_def.BETA_NLL))
        result[i] = 1 - cdf(invcdf(exp_def.ALPHA) - diff_cdf)
    
    return result

def stat_power_from_num_draws(n):
    return 1 - beta_from_num_draws(n)

In [None]:
CONTOUR_STYLE = {
    0.8: "solid",
    0.5: "dashed",
    0.2: "dotted",
}
CONTOUR_COLOR = {
    0.8: matplotlib.colormaps["Reds"](0.8),
    0.5: matplotlib.colormaps["Reds"](0.5),
    0.2: matplotlib.colormaps["Reds"](0.2),
}
CONTOUR_WIDTH = 4

In [None]:
from scipy.interpolate import RBFInterpolator

def plot_contours(ax, log_dim, log_samples, stat_power, contour):
    r_log_dim = log_dim[None, :].repeat(log_samples.shape[0], axis=0).flatten()
    r_log_samples = log_samples[:, None].repeat(log_dim.shape[0], axis=1).flatten()
    
    X = np.array([r_log_dim, r_log_samples]).T
    y = np.array(stat_power.flatten())
    X = X[~np.isnan(y), :]
    y = y[~np.isnan(y)]
    f = RBFInterpolator(X, y, epsilon=1) #, kernel='gaussian', epsilon=0.25)
    
    xgrid = np.mgrid[3.5:12.5:100j, 5.5:14.5:100j]
    xflat = xgrid.reshape(2,-1).T
    yflat = f(xflat)
    ygrid = yflat.reshape(100,100)
    
    for c in contour:
        color = matplotlib.colormaps["Reds"](c)
        contour_output = ax.contour(
            *xgrid,
            ygrid,
            levels=[c],
            colors=[CONTOUR_COLOR[c]],
            linestyles=CONTOUR_STYLE[c],
            linewidths=CONTOUR_WIDTH,
        )

In [None]:
def plot_exp_metric(exp_name, metric_name, ax, cmp, contour=[]):
    stat_power = np.zeros((len(exp_def.NUM_SAMPLES_LIST), len(exp_def.DIM_LIST)))
    for i in range(len(exp_def.DIM_LIST)):
        for j in range(len(exp_def.NUM_SAMPLES_LIST)): 
            stat_power[j][i] = 1 - get_beta(exp_name, metric_name, exp_def.DIM_LIST[i], exp_def.NUM_SAMPLES_LIST[j])
    
    pcolor_output = ax.pcolormesh(
        np.log2(exp_def.DIM_LIST),
        np.log2(exp_def.NUM_SAMPLES_LIST),
        stat_power,
        shading="nearest",
        cmap=cmp,
        norm=colors.BoundaryNorm(boundaries=np.linspace(0, 1, 256), ncolors=256),
        edgecolors="white",
        linewidth=1,
    )
    
    if contour:
        plot_contours(
            ax,
            np.log2(exp_def.DIM_LIST),
            np.log2(exp_def.NUM_SAMPLES_LIST),
            stat_power,
            contour,
        )
#         contour_output = ax.contour(
#             np.log2(exp_def.DIM_LIST),
#             np.log2(exp_def.NUM_SAMPLES_LIST),
#             stat_power,
#             contour,
#             colors='k'
#         )
#         ax.clabel(contour_output, contour_output.levels, fmt="$1 - \\beta = %.1f$", inline=True)
       
    ax.set_title(f"{METRIC_NAMES[metric_name]}   {EXP_NAMES[exp_name]}")
    ax.set_xlabel("Number of variables $d$")
    ax.set_ylabel("Forecast sample size $m$")
    
    #ax.set_xlim(exp_def.DIM_LIST[0] / 2**0.5, exp_def.DIM_LIST[-1] * 2**0.5)
    #ax.set_ylim(exp_def.NUM_SAMPLES_LIST[0] / 2**0.5, exp_def.NUM_SAMPLES_LIST[-1] * 2**0.5)
    
    ax.xaxis.set_major_formatter("$2^{{ {x:.0f} }}$")
    ax.yaxis.set_major_formatter("$2^{{ {x:.0f} }}$")
    
    ax.set_aspect('equal')
    
    return pcolor_output

In [None]:
def plot_exp_metric_new_fig(exp_name, metric_name, figsize, cmp, contour=[]):
    fig = plt.figure(figsize=figsize)
    
    p_out = plot_exp_metric(exp_name, metric_name, plt.gca(), cmp, contour)
    
    cb = fig.colorbar(
        p_out,
        ax=fig.get_axes(),
        label=f"Statistical power ($1 - \\beta$) at $n = 30$",
        ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
        pad=0.25,
        shrink=0.82,
        aspect=15*0.82,
    )
    for c in contour:
        cb.ax.plot([0, 1], [c]*2, color=CONTOUR_COLOR[c], linestyle=CONTOUR_STYLE[c], linewidth=CONTOUR_WIDTH)
    cb.ax.set_yticklabels(["0.0", "0.2", "0.4", "0.6", "0.8", "1.0"]) 
    
    extra_ax = cb.ax.secondary_yaxis(
        "left",
    )
    extra_ax.set_ylabel('Ground-truth sample size ($n$)\nfor $1 - \\beta = 0.8$')
    num_draws_list = [10, 20, 30, 50, 100, 200, 500]
    extra_ax.set_ticks(stat_power_from_num_draws(num_draws_list).tolist() + [exp_def.ALPHA])
    extra_ax.set_yticklabels([f"{n}" for n in num_draws_list] + ["$\infty$"])
    
    fig.show()
    
    return fig

In [None]:
def plot_exp_metric_single_line(exp_metric_list, figsize, cmp, contour=[]):
    fig, axes = plt.subplots(
        figsize=figsize,
        ncols=len(exp_metric_list),
        nrows=1,
        sharex=False,
        sharey=True,
        squeeze=False,
        constrained_layout=True
    )
    for i, exp_metric in enumerate(exp_metric_list):
        p_out = plot_exp_metric(exp_metric[0], exp_metric[1], axes[0][i], cmp, contour)
    
    # Get rid of y axis label
    for i in range(1, len(exp_metric_list)):
        axes[0][i].set_ylabel(None)
       
    cb = fig.colorbar(
        p_out,
        ax=axes,
        label=f"Statistical power ($1 - \\beta$) at $n = 30$",
        ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
        pad=0.04,
        shrink=0.72,
        aspect=15*0.72,
    )
    for c in contour:
        cb.ax.plot([0, 1], [c]*2, color=CONTOUR_COLOR[c], linestyle=CONTOUR_STYLE[c], linewidth=CONTOUR_WIDTH)
    cb.ax.set_yticklabels(["0.0", "0.2", "0.4", "0.6", "0.8", "1.0"]) 
    
    extra_ax = cb.ax.secondary_yaxis(
        "left",
    )
    extra_ax.set_ylabel('Ground-truth sample size ($n$)\nfor $1 - \\beta = 0.8$')
    num_draws_list = [10, 20, 30, 50, 100, 200, 500]
    extra_ax.set_ticks(stat_power_from_num_draws(num_draws_list).tolist() + [exp_def.ALPHA])
    extra_ax.set_yticklabels([f"{n}" for n in num_draws_list] + ["$\infty$"])
    
    fig.show()
    
    return fig

In [None]:
PAGE_METRIC_ORDER = [
    ["crps_quantile", "crps_slow", "dawid_sebastiani"],
    ["energy_0.5", "energy_1", "energy_1.5"],
    ["energy_fast_0.5", "energy_fast_1", "energy_fast_1.5"],
    ["variogram_0.5", "variogram_1", "variogram_2"],
]

def plot_exp_metric_page(exp_name, figsize, cmp, contour=[]):
    fig, axes = plt.subplots(
        figsize=figsize,
        ncols=3,
        nrows=4,
        sharex=False,
        sharey=True,
        squeeze=False,
        constrained_layout=True
    )
    for i in range(4):
        for j in range(3):
            p_out = plot_exp_metric(exp_name, PAGE_METRIC_ORDER[i][j], axes[i][j], cmp, contour)
    
    # Get rid of y axis label
    for i in range(4):
        for j in range(1, 3):
            axes[i][j].set_ylabel(None)
    
    for i in range(4):
        cb = fig.colorbar(
            p_out,
            ax=axes[i],
            label=f"Statistical power ($1 - \\beta$) at $n = 30$",
            ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
            pad=0.04,
            shrink=0.72,
            aspect=15*0.72,
        )
        for c in contour:
            cb.ax.plot([0, 1], [c]*2, color=CONTOUR_COLOR[c], linestyle=CONTOUR_STYLE[c], linewidth=CONTOUR_WIDTH)
        cb.ax.set_yticklabels(["0.0", "0.2", "0.4", "0.6", "0.8", "1.0"]) 
    
        extra_ax = cb.ax.secondary_yaxis(
            "left",
        )
        extra_ax.set_ylabel('Ground-truth sample size ($n$)\nfor $1 - \\beta = 0.8$')
        num_draws_list = [10, 20, 30, 50, 100, 200, 500]
        extra_ax.set_ticks(stat_power_from_num_draws(num_draws_list).tolist() + [exp_def.ALPHA])
        extra_ax.set_yticklabels([f"{n}" for n in num_draws_list] + ["$\infty$"])
    
    fig.show()
    
    return fig

In [None]:
def plot_exp_metric_all(cmp, contour=[]):
    num_metrics = len(exp_def.METRIC_FUNCTIONS)
    num_exp = len(exp_def.EXP_LIST)
    fig, axes = plt.subplots(
        figsize=(9 * num_metrics, 8 * num_exp),
        ncols=num_metrics,
        nrows=num_exp,
        sharex=False,
        sharey=False,
        squeeze=True,
    )
    
    for i, metric_name in enumerate(exp_def.METRIC_FUNCTIONS.keys()):
        for j, exp_name in enumerate(exp_def.EXP_LIST):
            p_out = plot_exp_metric(exp_name, metric_name, axes[j][i], cmp, contour)
    
    cb = fig.colorbar(
        p_out,
        ax=axes,
        label=f"Probabilities of true positive with {exp_def.NUM_DRAWS} Ground-Truth samples",
        ticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
        pad=0.02,
    )
    for c, style in zip(contour, ["solid", "dashed", "dotted"]):
        cb.ax.plot([0, 1], [c]*2, color='k', linewidth=3)
    # cb.ax.plot([0, 1], [1 - exp_def.BETA_NLL]*2, color='w', linewidth=3)  # Add a notes to where the Log-likelihood lies
    cb.ax.set_yticklabels(["0.0", "0.2", "0.4", "0.6", "0.8 = NLL", "1.0"]) 

    extra_ax = cb.ax.secondary_yaxis(
        "left",
    )
    extra_ax.set_ylabel('Required Ground-Truth samples to reach 80% true positive')
    num_draws_list = [10, 20, 30, 50, 100, 200, 500]
    extra_ax.set_ticks(stat_power_from_num_draws(num_draws_list).tolist() + [exp_def.ALPHA])
    extra_ax.set_yticklabels([f"{n}" for n in num_draws_list] + ["$\infty$"])    
    
    fig.show()
    
    return fig

In [None]:
my_cmp = colors.LinearSegmentedColormap(
    'my_colormap',
    segmentdata={
        'red':   [[0.0,  0.8, 0.8],
                  [0.2,  1.0, 1.0],
                  [0.5,  0.0, 0.0],
                  [0.9,  0.5, 0.5],
                  [1.0,  0.0, 0.0]],
        'green': [[0.0,  0.8, 0.8],
                  [0.2,  1.0, 1.0],
                  [0.5,  0.5, 0.5],
                  [0.9,  0.0, 0.0],
                  [1.0,  0.0, 0.0]],
        'blue':  [[0.0,  0.0, 0.0],
                  [0.2,  1.0, 1.0],
                  [0.5,  1.0, 1.0],
                  [0.9,  0.5, 0.5],
                  [1.0,  0.0, 0.0]],
    },
    N=256
)
my_cmp.set_bad(color='grey')
my_cmp_r = my_cmp.reversed()

In [None]:
all_fig = plot_exp_metric_all(my_cmp_r, contour=[0.8, 0.5, 0.2])

all_fig.savefig("figures/all_metrics.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
fig = plot_exp_metric_single_line(
    [
        ("wrong_mean_all", "crps_quantile"),
        ("wrong_mean_all", "energy_fast_1"),
        ("wrong_mean_all", "variogram_1"),
    ],
    figsize=(17,6),
    cmp=my_cmp_r,
    contour=[0.8, 0.5, 0.2]
)
fig.savefig(f"figures/wrong_mean_all_combined.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
fig = plot_exp_metric_single_line(
    [
        ("missing_skew_all", "crps_quantile"),
        ("missing_skew_all", "energy_fast_1"),
        ("extra_mixture", "energy_fast_1"),
    ],
    figsize=(17,6),
    cmp=my_cmp_r,
    contour=[0.8, 0.5, 0.2]
)
fig.savefig(f"figures/moments_combined.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
fig = plot_exp_metric_new_fig(
    "wrong_std_all_higher",
    "variogram_1",
    figsize=(8,6),
    cmp=my_cmp_r,
    contour=[0.8, 0.5, 0.2]
)
fig.savefig(f"figures/variogram_monotonicity.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
fig = plot_exp_metric_single_line(
    [
        ("wrong_std_single_lower", "variogram_1"),
        ("wrong_std_single_higher", "variogram_1"),
        ("wrong_std_all_higher", "variogram_1"),
    ],
    figsize=(17,6),
    cmp=my_cmp_r,
    contour=[0.8, 0.5, 0.2]
)
fig.savefig(f"figures/variogram_combined.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
fig = plot_exp_metric_single_line(
    [
        ("missing_covariance_full", "energy_fast_1"),
        ("missing_covariance_full", "variogram_1"),
        ("missing_covariance_full", "dawid_sebastiani"),
    ],
    figsize=(17,6),
    cmp=my_cmp_r,
    contour=[0.8, 0.5, 0.2]
)
fig.savefig(f"figures/missing_covariance_combined.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
for exp in exp_def.EXP_LIST:
    fig = plot_exp_metric_page(
        exp,
        figsize=(17, 23),
        cmp=my_cmp_r,
        contour=[0.8, 0.5, 0.2]
    )
    fig.savefig(f"figures/full_page_{exp}.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

# Summary table

In [None]:
def get_single_summary_df(exp_name, metric_name):
    results = []
    for dim in exp_def.DIM_LIST:
        for num_samples in exp_def.NUM_SAMPLES_LIST:
            stat_power = 1 - get_beta(exp_name, metric_name, dim, num_samples)
            results.append({
                "dim": dim,
                "num_samples": num_samples,
                "stat_power": stat_power,
            })
    return pd.DataFrame(results)

def calc_ror50(df):
    df = df[df.num_samples > df.dim]
    df = df[~df.stat_power.isna()]

    return (df.stat_power > 0.5).mean()

def calc_best_mean_power(df):
    return df.groupby("dim").stat_power.max().mean()
    
def calc_max_power(df):
    return df.stat_power.max()
    
def calc_minmax_power(df):
    return df.groupby("dim").stat_power.max().min()
    
def all_summary_df(func):
    results = []
    for metric_name in exp_def.METRIC_FUNCTIONS:
        for exp_name in exp_def.EXP_LIST:
            results.append({
                "metric": metric_name,
                "exp": exp_name,
                "summary": func(get_single_summary_df(exp_name, metric_name)),
            })
    return pd.DataFrame(results)

In [None]:
def plot_ror_df(df, figsize, single_of_each=False):
    out_df = pd.pivot_table(df, values="summary", index="exp", columns="metric")

    out_df = out_df[METRIC_NAMES.keys()]
    if single_of_each:
        cols = [c for c in out_df.columns if not c.endswith(("_0.5", "_1.5", "_2"))]
        out_df = out_df[cols]
    out_df = out_df.loc[EXP_NAMES.keys()]

    xticklabels = [METRIC_NAMES[c] for c in out_df.columns]
    if single_of_each:
        for i in range(len(xticklabels)):
            xticklabels[i] = xticklabels[i].replace(r"$_{p=1}$", "")
                
    yticklabels = [EXP_NAMES[i] for i in out_df.index]

    fig = plt.figure(figsize=figsize)
    ax = fig.gca()
    sns.heatmap(
        out_df,
        annot=True,
        fmt=".2f",
        annot_kws={
            "fontsize": 18,
        },
        ax=ax,
        vmin=0,
        vmax=1,
        cbar=False,
        cmap="viridis",
        linewidths=1,
        linecolor="white",
        xticklabels=xticklabels,
        yticklabels=yticklabels,
    )
    ax.xaxis.tick_top()
    ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, ha="left", rotation_mode="anchor")
    ax.set_xlabel(None)
    ax.set_ylabel(None)
    fig.show()
    
    return fig

In [None]:
%%time
sdf = all_summary_df(calc_ror50)

fig = plot_ror_df(sdf, (7, 10), True)
fig.savefig(f"figures/ror_50_ratio_subset.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
%%time
sdf_best_mean_power = all_summary_df(calc_best_mean_power)

fig = plot_ror_df(sdf_best_mean_power, (7, 10), True)
fig.savefig(f"figures/best_mean_power_subset.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
%%time
sdf_max_power = all_summary_df(calc_max_power)

fig = plot_ror_df(sdf_max_power, (7, 10), True)
fig.savefig(f"figures/best_max_subset.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

In [None]:
%%time
sdf_minmax_power = all_summary_df(calc_minmax_power)

fig = plot_ror_df(sdf_minmax_power, (7, 10), True)
fig.savefig(f"figures/minmax_subset.pdf", bbox_inches="tight", pad_inches=0, transparent=False)

# Calibration results tables

In [None]:
# Have to remove the negative spaces due to different spacing that for matplotlib
calib_df = pd.DataFrame({EXP_NAMES[k].replace(r"\!\!", ""): exp_def.EXP_CALIBRATION[k] for k in exp_def.EXP_LIST})
calib_df.index.name = "d"
calib_df.transpose()

In [None]:
print(calib_df.transpose().to_latex(
    index=True,
    index_names=True,
    bold_rows=True,
    escape=False,
    float_format="%.4f",
    position="h",
))