In [1]:
import numpy as np

In [2]:
import sys
import tqdm
from functools import partial

In [3]:
sys.path.append('./adwin')

In [4]:
from mdl.smdl import SMDL
from mdl.model import Norm1D

In [5]:
from cf.mycf import ChangeFinder
from bocpd.mybocpd import BOCD, StudentT, constant_hazard
from adwin2 import ADWIN2
from scaw.SCAW2 import MDLCPStat_adwin2, lnml_gaussian

In [6]:
from generate_data import generate_multiple_changing_variance_gradual
from evaluation import calc_auc_average, calc_falarms_benefit, InvRunLen

In [7]:
###
N = 10
N_trial = 10

MU = 0.0
COEF = 0.1

# true change points
cps_true = np.array([1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]) - 1
###

# ChangeFinder

In [8]:
smooth1 = 5
smooth2 = 5
order = 3

for r_cf in [0.003, 0.005, 0.01, 0.03, 0.1]:
    for order in [2, 3, 4, 5]:
        scores_cf = []
        for i in range(N_trial):
            X = generate_multiple_changing_variance_gradual(N, mu=MU, coef=COEF, seed=i)
            # ChangeFinder
            cf = ChangeFinder(r=r_cf, order1=order, order2=order, smooth1=smooth1, smooth2=smooth2)

            scores = []
            for x in X:
                score, _ = cf.update(x)
                scores.append(score)

            scores = np.array(scores)
            scores_cf.append(scores)

        scores_cf = np.array(scores_cf)
        auc_list = calc_auc_average(scores_cf)
        print('r_cf =', r_cf, 'order =', order, ':', np.mean(auc_list), '+/-', np.std(auc_list))

r_cf = 0.003 order = 2 : 0.3588160335539584 +/- 0.021862169409808282
r_cf = 0.003 order = 3 : 0.36907323208944115 +/- 0.03716784309142366
r_cf = 0.003 order = 4 : 0.39738354258875763 +/- 0.05534339319434691
r_cf = 0.003 order = 5 : 0.38389628185866465 +/- 0.017641298011712485
r_cf = 0.005 order = 2 : 0.3725131799786142 +/- 0.028439584661411676
r_cf = 0.005 order = 3 : 0.37724411804117436 +/- 0.022172217403366144
r_cf = 0.005 order = 4 : 0.3931310202893843 +/- 0.0415232843301209
r_cf = 0.005 order = 5 : 0.3780620944491818 +/- 0.021947881068372532
r_cf = 0.01 order = 2 : 0.44212402952044505 +/- 0.027345231453297758
r_cf = 0.01 order = 3 : 0.43118068738918 +/- 0.026780457663995474
r_cf = 0.01 order = 4 : 0.42303561734410744 +/- 0.02434513759163914
r_cf = 0.01 order = 5 : 0.4214076880388734 +/- 0.031062272894974402
r_cf = 0.03 order = 2 : 0.49452625153963814 +/- 0.025011122851040752
r_cf = 0.03 order = 3 : 0.4871535309484171 +/- 0.02378250350822708
r_cf = 0.03 order = 4 : 0.485189607273859

# BOCPD

In [9]:
ALPHA = 0.1
BETA = 1.0
KAPPA = 1.0
MU = 0.0
DELAY = 15

for LAMBDA in [100, 600]:
    for THRESHOLD in [0.1, 0.3]:
        scores_bocpd = []
        for i in range(N_trial):
            X = generate_multiple_changing_variance_gradual(N, mu=MU, coef=COEF, seed=i)

            # BOCPD
            bocd = BOCD(partial(constant_hazard, LAMBDA),
                        StudentT(ALPHA, BETA, KAPPA, MU), X)
            change_points = []
            scores = [np.nan] * DELAY
            for x in X[:DELAY]:
                bocd.update(x)
            for x in X[DELAY:]:
                bocd.update(x)
                if bocd.growth_probs[DELAY] >= THRESHOLD:
                    change_points.append(bocd.t - DELAY + 1)
                score = np.sum(bocd.growth_probs[:bocd.t - DELAY] * 1.0 / (1.0 + np.arange(1, bocd.t - DELAY + 1)))
                scores.append(score)

            scores_bocpd.append(scores)

        scores_bocpd = np.array(scores_bocpd)
        auc_list = calc_auc_average(scores_bocpd)
        print('LAMBDA =', LAMBDA, 'THRESHOLD =', THRESHOLD, ':', np.mean(auc_list), '+/-', np.std(auc_list))

LAMBDA = 100 THRESHOLD = 0.1 : 0.35356437047414085 +/- 0.02854168209338144
LAMBDA = 100 THRESHOLD = 0.3 : 0.35356437047414085 +/- 0.02854168209338144
LAMBDA = 600 THRESHOLD = 0.1 : 0.26926449438962663 +/- 0.03230963872835837
LAMBDA = 600 THRESHOLD = 0.3 : 0.26926449438962663 +/- 0.03230963872835837


# Adwin2

In [10]:
M = 5

for delta in [0.1, 0.3, 0.5, 0.7, 0.9]:
    scores_ad = []
    for i in range(N_trial):
        X = generate_multiple_changing_variance_gradual(N, mu=MU, coef=COEF, seed=i)
        
        ad = ADWIN2()
        scores = ad.transform(X, delta=delta, M=M)
        scores_ad.append(InvRunLen(scores))
    scores_ad = np.array(scores_ad)
    auc_list = calc_auc_average(scores_ad)
    print('delta =', delta, ':', np.mean(auc_list), '+/-', np.std(auc_list))

delta = 0.1 : 0.4575565472854996 +/- 0.024199813167372623
delta = 0.3 : 0.42130511660643466 +/- 0.030227476504685453
delta = 0.5 : 0.41841422706785236 +/- 0.05946546497336983
delta = 0.7 : 0.41716880821862184 +/- 0.05391695071609595
delta = 0.9 : 0.4309244859977531 +/- 0.04081560124500086


# D-MDL

In [11]:
h = 100
T = 100

mu_max = 20.0
sigma_min = 0.005

scores_list_0th = []
scores_list_1st = []
scores_list_2nd = []

for i in range(N_trial):
    X = generate_multiple_changing_variance_gradual(N, mu=MU, coef=COEF, seed=i)
    len_X = len(X)
    
    norm1d = Norm1D()
    smdl = SMDL(norm1d)

    scores_0th = np.array([np.nan]*h + [ smdl.calc_change_score(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_0th.append(scores_0th)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)

    scores_1st = np.array([np.nan]*h + [ smdl.calc_change_score_1st(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                    for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_1st.append(scores_1st)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)

    scores_2nd = np.array([np.nan]*h + [ smdl.calc_change_score_2nd(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                    for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_2nd.append(scores_2nd)

scores_list_0th = np.array(scores_list_0th)
scores_list_1st = np.array(scores_list_1st)
scores_list_2nd = np.array(scores_list_2nd)

auc_list_0th = calc_auc_average(scores_list_0th, T=T)
auc_list_1st = calc_auc_average(scores_list_1st, T=T)
auc_list_2nd = calc_auc_average(scores_list_2nd, T=T)

In [12]:
print(np.mean(auc_list_0th), '+/-', np.std(auc_list_0th))

0.52135782095532 +/- 0.05020986529447889


In [13]:
print(np.mean(auc_list_1st), '+/-', np.std(auc_list_1st))

0.532604917976205 +/- 0.022715836855180884


In [14]:
print(np.mean(auc_list_2nd), '+/-', np.std(auc_list_2nd))

0.5261295957011952 +/- 0.003088247642948327
