In [1]:
import numpy as np

In [2]:
import sys
import tqdm
from functools import partial

In [3]:
sys.path.append('./adwin')

In [4]:
from mdl.smdl import SMDL
from mdl.model import Norm1D
from cf.mycf import ChangeFinder
from bocpd.mybocpd import BOCD, StudentT, constant_hazard
from adwin2 import ADWIN2
from scaw.SCAW2 import MDLCPStat_adwin2, lnml_gaussian

In [5]:
from generate_data import generate_multiple_changing_mean_gradual
from evaluation import calc_auc_average, calc_falarms_benefit, InvRunLen

In [6]:
###
N = 10
N_trial = 10

SIGMA = 1.0
COEF = 0.3
# true change points
cps_true = np.array([1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]) - 1
###

# ChangeFinder

In [7]:
smooth1 = 5
smooth2 = 5


for r_cf in [0.003, 0.005, 0.01, 0.03, 0.1]:
    for order in [2, 3, 4, 5]:
        scores_cf = []

        for i in range(N_trial):
            X = generate_multiple_changing_mean_gradual(N, sigma=SIGMA, coef=COEF, seed=i)

            # ChangeFinder
            cf = ChangeFinder(r=r_cf, order1=order, order2=order, smooth1=smooth1, smooth2=smooth2)

            scores = []
            for x in X:
                score, _ = cf.update(x)
                scores.append(score)

            scores = np.array(scores)
            scores_cf.append(scores)

        scores_cf = np.array(scores_cf)
        auc_list = calc_auc_average(scores_cf)
        print('r_cf =', r_cf, 'order =', order, ':', np.mean(auc_list), '+/-', np.std(auc_list))

r_cf = 0.003 order = 2 : 0.4492062336361773 +/- 0.027416174408318592
r_cf = 0.003 order = 3 : 0.45604155481956055 +/- 0.021666809262538358
r_cf = 0.003 order = 4 : 0.4829613377010413 +/- 0.017168242331332118
r_cf = 0.003 order = 5 : 0.4768801961245641 +/- 0.018109670983715766
r_cf = 0.005 order = 2 : 0.46080019039345566 +/- 0.02630977626974498
r_cf = 0.005 order = 3 : 0.4630936965046272 +/- 0.026587154878157642
r_cf = 0.005 order = 4 : 0.4697871881372088 +/- 0.019321911517344463
r_cf = 0.005 order = 5 : 0.47232447364293384 +/- 0.018064782861951158
r_cf = 0.01 order = 2 : 0.4985402249610888 +/- 0.035127721135277644
r_cf = 0.01 order = 3 : 0.49385323880020754 +/- 0.03289042261446822
r_cf = 0.01 order = 4 : 0.48719251686152437 +/- 0.03259074193369835
r_cf = 0.01 order = 5 : 0.48679292877740377 +/- 0.0327376185133799
r_cf = 0.03 order = 2 : 0.5047395822323573 +/- 0.029162340358474533
r_cf = 0.03 order = 3 : 0.49791978288148064 +/- 0.02686503248839184
r_cf = 0.03 order = 4 : 0.4938002201348

# BOCPD

In [8]:
ALPHA = 0.1
BETA = 1.0
KAPPA = 1.0
MU = 0.0
DELAY = 15

for LAMBDA in [100, 600]:
    for THRESHOLD in [0.1, 0.3]:
        scores_bocpd = []
        for i in range(N_trial):
            X = generate_multiple_changing_mean_gradual(N, sigma=SIGMA, coef=COEF, seed=i)

            # BOCPD
            bocd = BOCD(partial(constant_hazard, LAMBDA),
                        StudentT(ALPHA, BETA, KAPPA, MU), X)
            change_points = []
            scores = [np.nan] * DELAY
            for x in X[:DELAY]:
                bocd.update(x)
            for x in X[DELAY:]:
                bocd.update(x)
                if bocd.growth_probs[DELAY] >= THRESHOLD:
                    change_points.append(bocd.t - DELAY + 1)
                score = np.sum(bocd.growth_probs[:bocd.t - DELAY] * 1.0 / (1.0 + np.arange(1, bocd.t - DELAY + 1)))
                scores.append(score)

            scores_bocpd.append(scores)

        scores_bocpd = np.array(scores_bocpd)
        auc_list = calc_auc_average(scores_bocpd)
        print('LAMBDA =', LAMBDA, 'THRESHOLD =', THRESHOLD, ':', np.mean(auc_list), '+/-', np.std(auc_list))

LAMBDA = 100 THRESHOLD = 0.1 : 0.4063147273198924 +/- 0.04844171574690272
LAMBDA = 100 THRESHOLD = 0.3 : 0.4063147273198924 +/- 0.04844171574690272
LAMBDA = 600 THRESHOLD = 0.1 : 0.4159691242866278 +/- 0.03845106903470217
LAMBDA = 600 THRESHOLD = 0.3 : 0.4159691242866278 +/- 0.03845106903470217


# Adwin2

In [9]:
M = 5

for delta in [0.1, 0.3, 0.5, 0.7, 0.9]:
    scores_ad = []
    for i in range(N_trial):
        X = generate_multiple_changing_mean_gradual(N, sigma=SIGMA, coef=COEF, seed=i)
        
        # ADWIN2
        ad = ADWIN2()
        scores = ad.transform(X, delta=delta, M=M)
        scores_ad.append(InvRunLen(scores))
    scores_ad = np.array(scores_ad)
    auc_list = calc_auc_average(scores_ad)
    print('delta =', delta, ':', np.mean(auc_list), '+/-', np.std(auc_list))

delta = 0.1 : 0.5 +/- 0.0
delta = 0.3 : 0.5 +/- 0.0
delta = 0.5 : 0.5 +/- 0.0
delta = 0.7 : 0.5 +/- 0.0
delta = 0.9 : 0.5 +/- 0.0


# D-MDL

In [10]:
h = 100

T = 100
mu_max = 50.0
sigma_min = 0.005

scores_list_0th = []
scores_list_1st = []
scores_list_2nd = []
for i in range(N_trial):
    X = generate_multiple_changing_mean_gradual(N, sigma=SIGMA, coef=COEF, seed=i)
    len_X = len(X)
    
    norm1d = Norm1D()
    smdl = SMDL(norm1d)

    scores_0th = np.array([np.nan]*h + [ smdl.calc_change_score(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_0th.append(scores_0th)

    scores_1st = np.array([np.nan]*h + [ smdl.calc_change_score_1st(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_1st.append(scores_1st)

    scores_2nd = np.array([np.nan]*h + [ smdl.calc_change_score_2nd(X[(t-h):(t+h)], h, mu_max=mu_max, sigma_min=sigma_min) \
                                     for t in range(h, len_X-h)] + [np.nan]*h)
    scores_list_2nd.append(scores_2nd)
    
scores_list_0th = np.array(scores_list_0th)
scores_list_1st = np.array(scores_list_1st)
scores_list_2nd = np.array(scores_list_2nd)

auc_list_0th = calc_auc_average(scores_list_0th, T=T)
auc_list_1st = calc_auc_average(scores_list_1st, T=T)
auc_list_2nd = calc_auc_average(scores_list_2nd, T=T)

In [11]:
print(np.mean(auc_list_0th), '+/-', np.std(auc_list_0th))

0.6141580402022175 +/- 0.041135724719713884


In [12]:
print(np.mean(auc_list_1st), '+/-', np.std(auc_list_1st))

0.62954090129221417 +/- 0.02024658806676363


In [13]:
print(np.mean(auc_list_2nd), '+/-', np.std(auc_list_2nd))

0.6201857067964913 +/- 0.0033140487836213293
