In [1]:
import numpy as np

In [2]:
import sys
from functools import partial

In [3]:
sys.path.append('./adwin')

In [4]:
from mdl.smdl import SMDL
from mdl.model import Norm1D
from cf.mycf import ChangeFinder
from bocpd.mybocpd import BOCD, StudentT, constant_hazard
from adwin2 import ADWIN2
from scaw.SCAW2 import MDLCPStat_adwin2, lnml_gaussian

In [5]:
from generate_data import generate_multiple_jumping_variance
from evaluation import calc_auc_average, calc_falarms_benefit, InvRunLen

In [6]:
###
N = 10
N_trial = 10

MU = 0.0
COEF = 0.1

# true change points
cps_true = np.array([1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]) - 1
###

# ChangeFinder

In [7]:
smooth1 = 5
smooth2 = 5
order = 3

for r_cf in [0.003, 0.005, 0.01, 0.03, 0.1]:
    for order in [2, 3, 4, 5]:
        scores_cf = []
        for seed in range(N_trial):
            X = generate_multiple_jumping_variance(N, mu=MU, coef=COEF, seed=seed)
            # ChangeFinder
            cf = ChangeFinder(r=r_cf, order1=order, order2=order, smooth1=smooth1, smooth2=smooth2)

            scores = []
            for x in X:
                score, _ = cf.update(x)
                scores.append(score)

            scores = np.array(scores)
            scores_cf.append(scores)

        scores_cf = np.array(scores_cf)
        auc_list = calc_auc_average(scores_cf)
        print('r_cf =', r_cf, 'order =', order, ':', np.mean(auc_list), '+/-', np.std(auc_list))

r_cf = 0.003 order = 2 : 0.5259301953140862 +/- 0.019861647786869118
r_cf = 0.003 order = 3 : 0.5208561176757217 +/- 0.015253502992080661
r_cf = 0.003 order = 4 : 0.521349281682706 +/- 0.02280614936837708
r_cf = 0.003 order = 5 : 0.5231662907919492 +/- 0.017788128253011282
r_cf = 0.005 order = 2 : 0.5485026183321828 +/- 0.026398700251548026
r_cf = 0.005 order = 3 : 0.543063286568942 +/- 0.021748270372102856
r_cf = 0.005 order = 4 : 0.5380034484508873 +/- 0.023362005412228364
r_cf = 0.005 order = 5 : 0.5338636020086356 +/- 0.024469867948782212
r_cf = 0.01 order = 2 : 0.5924400906186975 +/- 0.02860451047004226
r_cf = 0.01 order = 3 : 0.5855121321449358 +/- 0.025664145722621556
r_cf = 0.01 order = 4 : 0.5752152395067744 +/- 0.01861777012025563
r_cf = 0.01 order = 5 : 0.5754842491980346 +/- 0.026074727311747804
r_cf = 0.03 order = 2 : 0.6079530068623868 +/- 0.022809931605297568
r_cf = 0.03 order = 3 : 0.6012264534183348 +/- 0.02072681444166332
r_cf = 0.03 order = 4 : 0.6004907696836804 +/-

# BOCPD

In [8]:
ALPHA = 0.1
BETA = 1.0
KAPPA = 1.0
MU = 0.0
DELAY = 15

mu = 0.0

for LAMBDA in [100, 600]:
    for THRESHOLD in [0.1, 0.3]:
        scores_bocpd = []
        for seed in range(N_trial):
            X = generate_multiple_jumping_variance(N, mu=MU, coef=COEF, seed=seed)

            # BOCPD
            bocd = BOCD(partial(constant_hazard, LAMBDA),
                        StudentT(ALPHA, BETA, KAPPA, MU), X)
            change_points = []
            scores = [np.nan] * DELAY
            for x in X[:DELAY]:
                bocd.update(x)
            for x in X[DELAY:]:
                bocd.update(x)
                if bocd.growth_probs[DELAY] >= THRESHOLD:
                    change_points.append(bocd.t - DELAY + 1)
                score = np.sum(bocd.growth_probs[:bocd.t - DELAY] * 1.0 / (1.0 + np.arange(1, bocd.t - DELAY + 1)))
                scores.append(score)

            scores_bocpd.append(scores)

        scores_bocpd = np.array(scores_bocpd)
        auc_list = calc_auc_average(scores_bocpd)
        print('LAMBDA =', LAMBDA, 'THRESHOLD =', THRESHOLD, ':', np.mean(auc_list), '+/-', np.std(auc_list))

LAMBDA = 100 THRESHOLD = 0.1 : 0.5742465416006823 +/- 0.022188508154366172
LAMBDA = 100 THRESHOLD = 0.3 : 0.5742465416006823 +/- 0.022188508154366172
LAMBDA = 600 THRESHOLD = 0.1 : 0.5281963189453311 +/- 0.035787437447743084
LAMBDA = 600 THRESHOLD = 0.3 : 0.5281963189453311 +/- 0.035787437447743084


# Adwin2

In [9]:
M = 5

for delta in [0.1, 0.3, 0.5, 0.7, 0.9]:
    scores_ad = []
    for seed in range(N_trial):
        X = generate_multiple_jumping_variance(N, mu=MU, coef=COEF, seed=seed)
        
        ad = ADWIN2()
        scores = ad.transform(X, delta=delta, M=M)
        scores_ad.append(InvRunLen(scores))
    scores_ad = np.array(scores_ad)
    auc_list = calc_auc_average(scores_ad)
    print('delta =', delta, ':', np.mean(auc_list), '+/-', np.std(auc_list))

delta = 0.1 : 0.44336488197236096 +/- 0.03646385471425184
delta = 0.3 : 0.4454841466006146 +/- 0.06746461457119995
delta = 0.5 : 0.49803839390371 +/- 0.06684629117401505
delta = 0.7 : 0.49202223440397386 +/- 0.05820238089213481
delta = 0.9 : 0.5224774520512716 +/- 0.03845013268912315


# D-MDL

In [10]:
h = 100

mu_max = 5.0
sigma_min = 0.005

scores_list_0th = []
scores_list_1st = []
scores_list_2nd = []

for i in range(N_trial):
    X = generate_multiple_jumping_variance(N, mu=MU, coef=COEF, seed=i)
    len_X = len(X)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)
    scores_0th = np.array([np.nan]*h + [ smdl.calc_change_score(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)    
    scores_list_0th.append(scores_0th)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)
    scores_1st = np.array([np.nan]*h + [ smdl.calc_change_score_1st(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_1st.append(scores_1st)

    norm1d = Norm1D()
    smdl = SMDL(norm1d)
    scores_2nd = np.array([np.nan]*h + [ smdl.calc_change_score_2nd(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_2nd.append(scores_2nd)
    
scores_list_0th = np.array(scores_list_0th)
scores_list_1st = np.array(scores_list_1st)
scores_list_2nd = np.array(scores_list_2nd)
    
auc_list_0th = calc_auc_average(scores_list_0th)
auc_list_1st = calc_auc_average(scores_list_1st)
auc_list_2nd = calc_auc_average(scores_list_2nd)

In [11]:
print(np.mean(auc_list_0th), '+/-', np.std(auc_list_0th))

0.8249345674124605 +/- 0.031398155753491745


In [12]:
print(np.mean(auc_list_1st), '+/-', np.std(auc_list_1st))

0.27228625911939464 +/- 0.015672714873395825


In [13]:
print(np.mean(auc_list_2nd), '+/-', np.std(auc_list_2nd))

0.48599491831458697 +/- 0.0037169810067321034
