In [1]:
from sklearn.metrics.pairwise import rbf_kernel
import numpy as np
import matplotlib.pyplot as plt
from mmdew.fast_rbf_kernel import est_gamma
from mmdew.detectors import ScanBStatistic 
from tqdm import tqdm
import pickle
import pandas as pd
import onlinecp.algos as algos
import onlinecp.utils.feature_functions as feat

In [2]:
from notebooks.data import MixedNormal, Uniform, Laplace

In [3]:
B=50 # in line with our ScanB/OKCUSUM value
d=20
ref_size=1000
rng = np.random.default_rng()

In [4]:
big_Lambda, small_lambda = algos.select_optimal_parameters(B)  # forget factors chosen with heuristic in the paper
thres_ff = small_lambda
# number of random features is set automatically with this criterion
m = int((1 / 4) / (small_lambda + big_Lambda) ** 2)
def feat_func(x):
    return feat.fourier_feat(x, W)

In [5]:
with open('newma-statistics.pickle', 'rb') as handle:
    statistics = pickle.load(handle)

In [6]:
target_arls_log = np.arange(3,5.1,.25)
arl2thresh = { i : np.quantile(statistics, 1-(1/10**i)) for i in target_arls_log}

In [7]:
def edd(arl2thresh, statistics):
    arl2edd = {}
    for arl, thresh in arl2thresh.items():
        edd = [np.argmax(s + [np.inf]>thresh) for s in h1_stats]
        arl2edd[arl] = np.mean(edd) + 1 # account for counting from 0
    return arl2edd

In [8]:
d = 20
n_q = 500
qs = {
    "MixedNormal0.3" : MixedNormal(n_q,d,0.3),
    "MixedNormal0.7" : MixedNormal(n_q,d,0.7),
    "Laplace"        : Laplace(n_q,d),
    "Uniform"        : Uniform(n_q,d)
}

In [9]:
df = pd.DataFrame()

In [10]:
for name, q in qs.items():
    h1_stats = []
    for _ in tqdm(range(100)):
        ref = rng.normal(size=(10000,d))

        W, sigmasq = feat.generate_frequencies(m, d, data=ref[:500], choice_sigma="median")
    
        detector = algos.NEWMA(ref[0], forget_factor=big_Lambda, forget_factor2=small_lambda, feat_func=feat_func,
                       adapt_forget_factor=thres_ff)
    
        detector.apply_to_data(ref[:400])
        detector.apply_to_data(q.draw())
            
        
            
        h1_stats += [[s[0] for s in detector.stat_stored][400:]]
    df = pd.concat((df, pd.DataFrame(edd(arl2thresh=arl2thresh, statistics=h1_stats), index=[name])))

100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.51it/s]
100%|█████████████████████████████████████████| 100/100 [00:10<00:00,  9.15it/s]
100%|█████████████████████████████████████████| 100/100 [00:07<00:00, 12.67it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.87it/s]


In [11]:
df = df.reset_index(names="data")

In [12]:
df

Unnamed: 0,data,3.0,3.25,3.5,3.75,4.0,4.25,4.5,4.75,5.0
0,MixedNormal0.3,7.52,7.99,8.35,8.67,9.03,9.48,9.85,10.37,10.64
1,MixedNormal0.7,26.55,28.67,32.91,35.51,37.95,39.86,46.55,49.89,52.3
2,Laplace,3.64,3.84,3.96,4.18,4.27,4.41,4.55,4.71,4.86
3,Uniform,4.14,4.26,4.46,4.64,4.77,4.99,5.19,5.31,5.43


In [13]:
df = df.melt(id_vars="data",var_name="logARL",value_name="EDD")

In [14]:
df["algorithm"] = "NewMA"

In [15]:
df.to_csv("../results_rebuttal/arl-vs-edd/newma.csv")