In [1]:
import numpy as np

In [2]:
import sys
import tqdm
from functools import partial

In [3]:
sys.path.append('./adwin')

In [4]:
from mdl.smdl import SMDL
from mdl.model import Norm1D
from cf.mycf import ChangeFinder
from bocpd.mybocpd import BOCD, StudentT, constant_hazard
from adwin2 import ADWIN2
from scaw.SCAW2 import MDLCPStat_adwin2, lnml_gaussian



In [5]:
from generate_data import generate_multiple_jumping_mean
from evaluation import calc_auc_average, calc_falarms_benefit, InvRunLen

In [6]:
###
N = 10
N_trial = 10

SIGMA = 1.0
COEF = 0.3

# true change points
cps_true = np.array([1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]) - 1
###

# ChangeFinder

In [7]:
smooth1 = 5
smooth2 = 5

for r_cf in [0.003, 0.005, 0.01, 0.03, 0.1]:
    for order in [2, 3, 4, 5]:
        scores_cf = []
        for seed in range(N_trial):
            X = generate_multiple_jumping_mean(N, sigma=SIGMA, coef=COEF, seed=seed)
            # ChangeFinder
            cf = ChangeFinder(r=r_cf, order1=order, order2=order, smooth1=smooth1, smooth2=smooth2)

            scores = []
            for x in X:
                score, _ = cf.update(x)
                scores.append(score)

            scores = np.array(scores)
            scores_cf.append(scores)

        scores_cf = np.array(scores_cf)
        auc_list = calc_auc_average(scores_cf)
        print('r_cf =', r_cf, 'order =', order, ':', np.mean(auc_list), '+/-', np.std(auc_list))

r_cf = 0.003 order = 2 : 0.5558215762961354 +/- 0.02538578200807747
r_cf = 0.003 order = 3 : 0.5442470916736044 +/- 0.025292378536008027
r_cf = 0.003 order = 4 : 0.5341158158685344 +/- 0.016515392799971388
r_cf = 0.003 order = 5 : 0.5257798191986099 +/- 0.01880280276161836
r_cf = 0.005 order = 2 : 0.5844636948154582 +/- 0.02733408821125516
r_cf = 0.005 order = 3 : 0.5744368591112559 +/- 0.028266369976654184
r_cf = 0.005 order = 4 : 0.5546243330196305 +/- 0.024249462289011384
r_cf = 0.005 order = 5 : 0.5516896789975989 +/- 0.028717226343280524
r_cf = 0.01 order = 2 : 0.5910699974059194 +/- 0.030588508391432397
r_cf = 0.01 order = 3 : 0.5799941338183662 +/- 0.031207960399719677
r_cf = 0.01 order = 4 : 0.5625601073225468 +/- 0.03277674080676222
r_cf = 0.01 order = 5 : 0.5628022134747409 +/- 0.03187343809542562
r_cf = 0.03 order = 2 : 0.5504029900701004 +/- 0.02307688774853607
r_cf = 0.03 order = 3 : 0.54285791984894 +/- 0.019631864408267363
r_cf = 0.03 order = 4 : 0.5386739986848614 +/- 0

# BOCPD

In [8]:
ALPHA = 0.1
BETA = 1.0
KAPPA = 1.0
MU = 0.0
DELAY = 15

for LAMBDA in [100, 600]:
    for THRESHOLD in [0.1, 0.3]:
        scores_bocpd = []
        for seed in range(N_trial):
            X = generate_multiple_jumping_mean(N, sigma=SIGMA, coef=COEF, seed=seed)

            # BOCPD
            bocd = BOCD(partial(constant_hazard, LAMBDA),
                        StudentT(ALPHA, BETA, KAPPA, MU), X)
            change_points = []
            scores = [np.nan] * DELAY
            for x in X[:DELAY]:
                bocd.update(x)
            for x in X[DELAY:]:
                bocd.update(x)
                if bocd.growth_probs[DELAY] >= THRESHOLD:
                    change_points.append(bocd.t - DELAY + 1)
                score = np.sum(bocd.growth_probs[:bocd.t - DELAY] * 1.0 / (1.0 + np.arange(1, bocd.t - DELAY + 1)))
                scores.append(score)

            scores_bocpd.append(scores)

        scores_bocpd = np.array(scores_bocpd)
        auc_list = calc_auc_average(scores_bocpd)
        print('LAMBDA =', LAMBDA, 'THRESHOLD =', THRESHOLD, ':', np.mean(auc_list), '+/-', np.std(auc_list))

LAMBDA = 100 THRESHOLD = 0.1 : 0.5457161372932278 +/- 0.05930877791777235
LAMBDA = 100 THRESHOLD = 0.3 : 0.5457161372932278 +/- 0.05930877791777235
LAMBDA = 600 THRESHOLD = 0.1 : 0.5147292057890229 +/- 0.0684109465949315
LAMBDA = 600 THRESHOLD = 0.3 : 0.5147292057890229 +/- 0.0684109465949315


# Adwin2

In [9]:
M = 5

for delta in [0.1, 0.3, 0.5, 0.7, 0.9]:
    scores_ad = []
    for seed in tqdm.tqdm(range(10)):
        X = generate_multiple_jumping_mean(N, sigma=SIGMA, coef=COEF, seed=seed)
        
        ad = ADWIN2()
        scores = ad.transform(X, delta=delta, M=M)
        scores_ad.append(InvRunLen(scores))
    
    scores_ad = np.array(scores_ad)
    auc_list = calc_auc_average(scores_ad)
    print('delta =', delta, ':', np.mean(auc_list), '+/-', np.std(auc_list))

delta = 0.1 : 0.5 +/- 0.0


delta = 0.3 : 0.5 +/- 0.0


delta = 0.5 : 0.5 +/- 0.0


delta = 0.7 : 0.5 +/- 0.0


delta = 0.9 : 0.5 +/- 0.0


# D-MDL

In [10]:
N = 10000
N_trial = 10
h = 100

mu_max = 50.0
sigma_min = 0.005

scores_list_0th = []
scores_list_1st = []
scores_list_2nd = []

for i in tqdm.tqdm(range(N_trial)):
    X = generate_multiple_jumping_mean(N, sigma=SIGMA, coef=COEF, seed=i)
    len_X = len(X)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)
    scores_0th = np.array([np.nan]*h + [ smdl.calc_change_score(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)    
    scores_list_0th.append(scores_0th)
    
    scores_1st = np.array([np.nan]*h + [ smdl.calc_change_score_1st(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_1st.append(scores_1st)

    scores_2nd = np.array([np.nan]*h + [ smdl.calc_change_score_2nd(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_2nd.append(scores_2nd)

scores_list_0th = np.array(scores_list_0th)
scores_list_1st = np.array(scores_list_1st)
scores_list_2nd = np.array(scores_list_2nd)
    
auc_list_0th = calc_auc_average(scores_list_0th)
auc_list_1st = calc_auc_average(scores_list_1st)
auc_list_2nd = calc_auc_average(scores_list_2nd)

In [11]:
print(np.mean(auc_list_0th), '+/-', np.std(auc_list_0th))

0.9176773107429514 +/- 0.015557969842061403


In [12]:
print(np.mean(auc_list_1st), '+/-', np.std(auc_list_1st))

0.48030241306966615 +/- 0.005961253114240665


In [13]:
print(np.mean(auc_list_2nd), '+/-', np.std(auc_list_2nd))

0.4941661155777533 +/- 0.006009435037222007
