```
Copyright 2023 ServiceNow
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
```

Measure the statistical power of multiple metrics for a single experiment, for varying values of the $\varepsilon$ parameter, which control the difference between the ground-state and the forecast distributions.

In [None]:
import sys
sys.path.append("..")

In [None]:
import numpy as np
import pandas as pd
import scipy
from pprint import pprint
import matplotlib.pyplot as plt

In [None]:
from ror.experiments import h0_vs_h1 as exp_def

We take 3 multivariate gaussian distributions:
* Ground-Truth: $y \sim N(\eta, \Lambda)$
* First forecast: $x \sim N(\mu, \Sigma)$
* Second forecast: $x \sim N(\mu', \Sigma')$

The negative log-likelihood for the first forecast is:
$$
NLL(y) =
\frac{d}{2} \log{2\pi}
+ \frac{1}{2} \log \det \Sigma
+ \frac{1}{2} (y-\mu)^T \Sigma^{-1} (y-\mu)
$$
And the difference between the negative log-likelihood of the first and second forecasts is:
$$
\Delta(y) =
\frac{1}{2} (\log \det \Sigma - \log \det \Sigma')
+ \frac{1}{2} (y-\mu)^T \Sigma^{-1} (y-\mu)
- \frac{1}{2} (y-\mu')^T \Sigma'^{-1} (y-\mu')
$$

The expectation of $\Delta$ over the Ground-Truth is:
$$
\mathbb{E}[\Delta] =
\frac{1}{2} (\log \det \Sigma - \log \det \Sigma')
+ \frac{1}{2} (\eta-\mu)^T \Sigma^{-1} (\eta-\mu)
- \frac{1}{2} (\eta-\mu')^T \Sigma'^{-1} (\eta-\mu')
+ \frac{1}{2} \mathrm{tr} \Lambda^T \Sigma^{-1}
- \frac{1}{2} \mathrm{tr} \Lambda^T \Sigma'^{-1}
$$

The expectation of $\Delta^2$ over the Ground-Truth is:
$$
\mathbb{E}[\Delta^2] =
\frac{1}{4} K(\Sigma, \Sigma')^2
+ \frac{1}{2} K(\Sigma, \Sigma') A(\mu, \Sigma) - \frac{1}{2} K(\Sigma, \Sigma') A(\mu', \Sigma')
+ \frac{1}{4} B(\mu, \Sigma, \mu, \Sigma) + \frac{1}{4} B(\mu', \Sigma', \mu', \Sigma') - \frac{1}{2} B(\mu, \Sigma, \mu', \Sigma')
$$

Where (note that $\Lambda^T = \Lambda$, and same for $\Sigma$ and $\Sigma'$):
\begin{eqnarray}
K(\Sigma, \Sigma') & = & \log \det \Sigma - \log \det \Sigma' \\
A(\mu, \Sigma) & = & (\eta-\mu)^T \Sigma^{-1} (\eta-\mu) + \mathrm{tr} \Lambda^T \Sigma^{-1} \\
B(\mu, \Sigma, \mu', \Sigma') & = & 
  \mathrm{tr} \Lambda^T \Sigma^{-1} \cdot \mathrm{tr} \Lambda^T \Sigma'^{-1} \\
& & + 2 \cdot \mathrm{tr} \Lambda^T (\Sigma^{-1})^T \Lambda \Sigma'^{-1} \\
& & + \mathrm{tr} \Lambda^T \Sigma^{-1} \cdot (\eta-\mu')^T \Sigma'^{-1} (\eta-\mu') \\
& & + \mathrm{tr} \Lambda^T \Sigma'^{-1} \cdot (\eta-\mu)^T \Sigma^{-1} (\eta-\mu) \\
& & + 4 \cdot (\eta-\mu) (\Sigma^{-1})^T \Lambda \Sigma'^{-1} (\eta-\mu') \\
& & + (\eta-\mu)^T \Sigma^{-1} (\eta-\mu) \cdot (\eta-\mu')^T \Sigma'^{-1} (\eta-\mu')
\end{eqnarray}

And rewritting $\mathbb{E}[\Delta]$ using these quantities:
$$
\mathbb{E}[\Delta] = \frac{1}{2} K(\Sigma, \Sigma') + \frac{1}{2} A(\mu, \Sigma) - \frac{1}{2}A(\mu', \Sigma')
$$

In [None]:
def k_eq(sigma1, sigma2):
    return np.linalg.slogdet(sigma1)[1] - np.linalg.slogdet(sigma2)[1]

def a_eq(mean, cov, mu, sigma):
    inv_sigma = np.linalg.inv(sigma)
    return (mean - mu) @ inv_sigma @ (mean - mu) + np.trace(cov @ inv_sigma)

def b_eq(mean, cov, mu1, sigma1, mu2, sigma2):
    inv_sigma1 = np.linalg.inv(sigma1)
    inv_sigma2 = np.linalg.inv(sigma2)
    result = np.trace(cov @ inv_sigma1) * np.trace(cov @ inv_sigma2)
    result += 2 * np.trace(cov @ inv_sigma1 @ cov @ inv_sigma2)
    result += np.trace(cov @ inv_sigma1) * ((mean - mu2) @ inv_sigma2 @ (mean - mu2))
    result += np.trace(cov @ inv_sigma2) * ((mean - mu1) @ inv_sigma1 @ (mean - mu1))
    result += 4 * ((mean - mu1) @ inv_sigma1 @ cov @ inv_sigma2 @ (mean - mu2))
    result += ((mean - mu1) @ inv_sigma1 @ (mean - mu1)) * ((mean - mu2) @ inv_sigma2 @ (mean - mu2))
    return result

In [None]:
# Delta = Ground-Truth metric - Forecast metric, so its mean is negative
def exp_delta(mean, cov, mu1, sigma1, mu2, sigma2):
    return 0.5 * k_eq(sigma1, sigma2) + 0.5 * a_eq(mean, cov, mu1, sigma1) - 0.5 * a_eq(mean, cov, mu2, sigma2)

def exp_delta2(mean, cov, mu1, sigma1, mu2, sigma2):
    result = 0.25 * k_eq(sigma1, sigma2) * k_eq(sigma1, sigma2)
    result += 0.5 * k_eq(sigma1, sigma2) * a_eq(mean, cov, mu1, sigma1)
    result += -0.5 * k_eq(sigma1, sigma2) * a_eq(mean, cov, mu2, sigma2)
    result += 0.25 * b_eq(mean, cov, mu1, sigma1, mu1, sigma1)
    result += 0.25 * b_eq(mean, cov, mu2, sigma2, mu2, sigma2)
    result += -0.5 * b_eq(mean, cov, mu1, sigma1, mu2, sigma2)
    return result

def std_delta(mean, cov, mu1, sigma1, mu2, sigma2):
    return (exp_delta2(mean, cov, mu1, sigma1, mu2, sigma2) - exp_delta(mean, cov, mu1, sigma1, mu2, sigma2)**2) ** 0.5

In [None]:
def get_threshold_for_alpha(d_mean, d_std, num_gt, alpha):   
    return num_gt**0.5 * d_std * scipy.stats.norm.ppf(alpha)

def get_beta_from_threshold(d_mean, d_std, num_gt, threshold):  
    return 1 - scipy.stats.norm.cdf((threshold - num_gt * d_mean) / (num_gt**0.5 * d_std))

def beta_nll(exp_name, epsilon, dim, num_gt, alpha):
    pair_diff = pair_diff = exp_def.EXP_GEN[exp_name](dim, epsilon)
    mu_gt, sigma_gt, mu_fcst, sigma_fcst = pair_diff.get_gaussian_parameters()
    d_mean = exp_delta(mu_gt, sigma_gt, mu_gt, sigma_gt, mu_fcst, sigma_fcst)
    d_std = std_delta(mu_gt, sigma_gt, mu_gt, sigma_gt, mu_fcst, sigma_fcst)
    
    t = get_threshold_for_alpha(d_mean, d_std, num_gt, alpha)
    return get_beta_from_threshold(d_mean, d_std, num_gt, t)   

In [None]:
rng = np.random.default_rng(12345)

def metric_beta(metric_name, exp_name, epsilon, dim, num_gt, alpha, num_samples, num_forecast):
    pair_diff = exp_def.EXP_GEN[exp_name](dim, epsilon)
    dist_gt, dist_fcst = pair_diff.get_distributions()
    metric_func = exp_def.METRIC_FUNCTIONS[metric_name]
    
    results = []
    for _ in range(num_samples):
        targets = dist_gt.sample(1, rng)[0]
        gt_forecasts = dist_gt.sample(num_samples, rng)
        forecasts = dist_fcst.sample(num_samples, rng)
        
        metric_gt = metric_func(targets, gt_forecasts)
        metric_fcst = metric_func(targets, forecasts)

        results.append(metric_gt - metric_fcst)
    results = np.array(results)
    d_mean = results.mean()
    d_std = results.std()
    
    t = get_threshold_for_alpha(d_mean, d_std, num_gt, alpha)
    return get_beta_from_threshold(d_mean, d_std, num_gt, t)

In [None]:
%%time
EXP_NAME = "missing_covariance_full"
D = 2**6
M = 2**12

results = []
for epsilon in np.linspace(0, 1, 41)[1:-1]:
    datum = {
        "epsilon": epsilon,
    }
    datum["NLL"] = 1 - beta_nll(EXP_NAME, epsilon, D, exp_def.NUM_DRAWS, exp_def.ALPHA)
    for metr in ["crps_quantile", "energy_fast_1", "variogram_1", "dawid_sebastiani"]:
        datum[metr] = 1 - metric_beta(metr, EXP_NAME, epsilon, D, exp_def.NUM_DRAWS, exp_def.ALPHA, exp_def.DRAWS_PER_TRIAL, M)
    results.append(datum)
df = pd.DataFrame(results).set_index("epsilon")

In [None]:
import matplotlib.pylab as pylab
params = {
    "axes.labelsize": 18,
    "axes.titlesize": 18,
    "xtick.labelsize": 18,
    "ytick.labelsize": 18,
    "axes.linewidth": 2,
}
pylab.rcParams.update(params)

In [None]:
fig = plt.figure(figsize=(8,6))

plt.axvline(exp_def.EXP_CALIBRATION[EXP_NAME][D], linewidth=3)
plt.plot(df["NLL"], label="NLL", color="k", linestyle="solid", linewidth=3)
plt.plot(df["crps_quantile"], label="CRPS-Q", color="b", linestyle="dotted", linewidth=3)
plt.plot(df["energy_fast_1"], label="ES-Partial$_{p=1}$", color="r", linestyle="dotted", linewidth=3)
plt.plot(df["variogram_1"], label="VG$_{p=1}$", color="g", linestyle="dashed", linewidth=3)
plt.plot(df["dawid_sebastiani"], label="DS", color="y", linestyle="dashdot", linewidth=3)

plt.xlim([df.index.min(), df.index.max()])
plt.ylim([0, 1])
plt.xlabel("Covariance between variables $\\rho = \\varepsilon$")
plt.ylabel("Statistical power $1 - \\beta$")

plt.legend(fontsize=18, handlelength=4)

fig.savefig(f"figures/calibration_corr.pdf", bbox_inches="tight", pad_inches=0, transparent=False)
fig.show()