In [2]:
import pickle as pkl
import numpy as np

def bootstrap_estimate(data, n_samples=500000, ci=0.95):
    assert len(data.shape) == 1
    samples = data[np.random.choice(len(data), size=(len(data), n_samples))]
    samples = np.mean(samples, axis=0)
    mean = np.mean(data)

    delta = (1. - ci) * 0.5
    ci_low = np.quantile(samples, delta)
    ci_high = np.quantile(samples, 1. - delta)
    return mean, ci_low, ci_high, samples

data = pkl.load(open("dqc-exp-data.pkl", "rb"))
for (task, name), samples in data.items():
    mean, ci_low, ci_high, _ = bootstrap_estimate(samples[-1]) # use last step only

    # evaluated at 250K, 500K, 750K, 1M training steps
    #  SHARSA, HIQL, IQL, FBC, HFBC are evaluated with 15 trials per task. Others are done with 50.
    assert samples.shape == (4, 10), samples.shape

    data[(task, name)] = {"mean": mean, "ci_low": ci_low, "ci_high": ci_high}
    # print(samples)
    print(f"{task:40s} {name:25s} {mean:.2f} [{ci_low:.2f}, {ci_high:.2f}]")


cube-triple-play-oraclerep-v0            OS                        0.47 [0.41, 0.53]
cube-triple-play-oraclerep-v0            DQC h=5 h_a=1             0.95 [0.94, 0.97]
cube-triple-play-oraclerep-v0            DQC-naïve h=5 h_a=1       0.16 [0.07, 0.30]
cube-triple-play-oraclerep-v0            QC h=5                    0.20 [0.07, 0.36]
cube-triple-play-oraclerep-v0            NS n=5                    0.93 [0.91, 0.94]
cube-triple-play-oraclerep-v0            DQC h=25 h_a=1            0.76 [0.73, 0.80]
cube-triple-play-oraclerep-v0            DQC-naïve h=25 h_a=1      0.14 [0.08, 0.22]
cube-triple-play-oraclerep-v0            NS n=25                   0.30 [0.26, 0.35]
cube-triple-play-oraclerep-v0            DQC h=25 h_a=5            0.98 [0.98, 0.99]
cube-triple-play-oraclerep-v0            DQC-naïve h=25 h_a=5      0.27 [0.18, 0.38]
cube-triple-play-oraclerep-v0            QC-NS n=25 h_a=5          0.51 [0.22, 0.80]
cube-triple-play-oraclerep-v0            QC h=25                 