In [1]:
from sklearn.metrics.pairwise import rbf_kernel
import numpy as np
import matplotlib.pyplot as plt
from mmdew.fast_rbf_kernel import est_gamma
from mmdew.detectors import FastOKCUSUM 
from tqdm import tqdm
import pickle
import pandas as pd

In [2]:
from notebooks.data import MixedNormal, Uniform, Laplace

In [3]:
d=20
ref_size=1000
rng = np.random.default_rng()

In [4]:
with open('okcusum-statistics.pickle', 'rb') as handle:
    statistics = pickle.load(handle)

In [5]:
target_arls_log = np.arange(3,5.1,.25)
arl2thresh = { i : np.quantile(statistics, 1-(1/10**i)) for i in target_arls_log}

In [6]:
def edd(arl2thresh, statistics):
    arl2edd = {}
    for arl, thresh in arl2thresh.items():
        edd = [np.argmax(s + [np.inf]>thresh) for s in h1_stats]
        arl2edd[arl] = np.mean(edd) + 1 # account for counting from 0
    return arl2edd

In [7]:
d = 20
n_q = 500
qs = {
    "MixedNormal0.3" : MixedNormal(n_q,d,0.3),
    "MixedNormal0.7" : MixedNormal(n_q,d,0.7),
    "Laplace"        : Laplace(n_q,d),
    "Uniform"        : Uniform(n_q,d)
}

In [8]:
df = pd.DataFrame()

In [9]:
for name, q in qs.items():
    h1_stats = []
    for _ in tqdm(range(100)):
        ref = rng.normal(size=(10000,d))
        
        detector = FastOKCUSUM(ref,B_max=50,N=15)
        
        for elem in q.draw():
            detector.add_element(elem.reshape(1,-1))
            
        h1_stats += [detector.stats]
    df = pd.concat((df, pd.DataFrame(edd(arl2thresh=arl2thresh, statistics=h1_stats), index=[name])))

100%|█████████████████████████████████████████| 100/100 [15:57<00:00,  9.58s/it]
100%|█████████████████████████████████████████| 100/100 [14:51<00:00,  8.92s/it]
100%|█████████████████████████████████████████| 100/100 [15:08<00:00,  9.09s/it]
100%|█████████████████████████████████████████| 100/100 [15:36<00:00,  9.36s/it]


In [10]:
df = df.reset_index(names="data")

In [11]:
df

Unnamed: 0,data,3.0,3.25,3.5,3.75,4.0,4.25,4.5,4.75,5.0
0,MixedNormal0.3,7.79,8.16,8.47,8.84,9.14,9.35,9.67,10.2,10.71
1,MixedNormal0.7,32.15,34.22,36.48,40.13,42.03,44.15,49.9,56.41,61.92
2,Laplace,3.82,3.91,3.99,4.01,4.04,4.1,4.19,4.31,4.45
3,Uniform,3.85,3.9,3.97,4.0,4.0,4.05,4.14,4.35,4.55


In [12]:
df = df.melt(id_vars="data",var_name="logARL",value_name="EDD")

In [13]:
df["algorithm"] = "OKCUSUM"

In [14]:
df.to_csv("../results_rebuttal/arl-vs-edd/okcusum.csv")