In [20]:
import numpy as np
import pandas as pd
import sys
sys.path.append(".")
sys.path.append("..")
from quantile_binary_search.method import quantile_binary_search
from kaplan_et_al.single_quantile_algo import kaplan_quantile
from bisect import bisect_right

np.random.seed(42)

# Compare standard deviations

This is Table 2 in Appendix D.

In [28]:
ds = [1]
n = 10000
mean = 10
rhos = [0.001, 0.01]
trials = 100

df = pd.DataFrame()


algos = {
    "direct-075": lambda data, n, lower, upper, rho: direct_estimation(data, rho, lower, upper, 0.75),
    "direct-0841": lambda data, n, lower, upper, rho: direct_estimation(data, rho, lower, upper, 0.841),
    "direct-09": lambda data, n, lower, upper, rho: direct_estimation(data, rho, lower, upper, 0.9),
    "robust-kgroups1": lambda data, n, lower, upper, rho: robust_estimation(data, rho, lower, upper),
    "robust-kgroups4": lambda data, n, lower, upper, rho: robust_estimation(data, rho, lower, upper, k_groups=4),
    #"robust-kgroups16": lambda data, n, lower, upper, rho: robust_estimation(data, rho, lower, upper, k_groups=16),
}

inputs = {
    "normal": lambda: np.sort(np.random.normal(mean, sigma, size=n)),
}

def direct_estimation(data, rho, lower, upper, q):
    n = len(data)
    mean = kaplan_quantile(data, (lower, upper), .5, rho/2, False)
    x = kaplan_quantile(data, (lower, upper), q, rho/2, False)
    return x - mean

def robust_estimation(data, rho, lower, upper, k_groups=1):
    upper *= upper
    rho /= 2 # half of the rho budget is used for mean estimation
    rng = np.random.default_rng()
    rng.shuffle(data)
    odd, even = data[::2], data[1::2]
    pairwise = np.array([0.5* (x-y)**2 for (x, y) in zip(odd, even)])
    # Group into list of tuples to sum
    groups = list(zip(*[iter(pairwise)] * k_groups))
    # Sum and divide by k
    robust_std_estimates = np.array(list(map(lambda s: s/k_groups, map(sum, groups))))
    if True:
        for _ in range(2 * k_groups - 1):
            rng = np.random.default_rng()
            rng.shuffle(data)
            odd, even = data[::2], data[1::2]
            pairwise = np.array([0.5* (x-y)**2 for (x, y) in zip(odd, even)]) 
            # Group into list of tuples to sum
            groups = list(zip(*[iter(pairwise)]*k_groups))
            # Sum and divide by k
            robust_std_estimates = np.concatenate((robust_std_estimates, 
                np.array(list(map(lambda s: s/k_groups, map(sum, groups))))))
        rho /= 2 * k_groups
    std_predictions = np.sqrt(kaplan_quantile(robust_std_estimates, 
        (0, upper), .5, rho, False) / (1-2/(9*k_groups))**3 )
    return std_predictions



#data = np.sort(np.random.normal(mean, sigma, size=n))
j = 0
for sigma in [0.001, 1]:
    for rho in rhos:
        for input in inputs:
            data = inputs[input]()
            for d in ds:
                for algo, f in algos.items():
                    for i in range(trials):
                        estimate = f(data, n,  mean - 5 * sigma, mean + 5 * sigma, rho / (8 * d))
                        df = pd.concat([df, pd.DataFrame({
                            "algo": algo,
                            "input": input,
                            "d": d,
                            "n": n,
                            "rho": rho,
                            "mean": mean,
                            "sigma": sigma,
                            "rel-err": np.abs(sigma - estimate)/sigma
                        }, index=[j])])
                        j += 1

In [30]:
df[["algo", "input", "rho", "rel-err", "sigma"]].groupby(["input", "rho", "sigma", "algo"]).mean().round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rel-err
input,rho,sigma,algo,Unnamed: 4_level_1
normal,0.001,0.001,direct-075,0.332
normal,0.001,0.001,direct-0841,0.033
normal,0.001,0.001,direct-09,0.279
normal,0.001,0.001,robust-kgroups1,0.027
normal,0.001,0.001,robust-kgroups4,0.017
normal,0.001,1.0,direct-075,0.308
normal,0.001,1.0,direct-0841,0.042
normal,0.001,1.0,direct-09,0.314
normal,0.001,1.0,robust-kgroups1,0.025
normal,0.001,1.0,robust-kgroups4,0.012
