In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os, sys
sys.path.append('/content/drive/MyDrive/CPD_BT')

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

from bt_cpd import *

import time
import bisect

import pandas as pd

import statsmodels.api as sm
from sklearn import linear_model

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [4]:
T = 4
Delta = 2000
m = np.array([Delta] * T)
cp_truth = np.cumsum(m)[:T-1]

n = 100
beta = np.zeros((T, n))

t = 0.9
kappa = np.log(t / (1 - t))
delta = 1
beta_ref = get_beta_with_gap(n, delta)
beta_ref *= kappa / (np.max(beta_ref) - np.min(beta_ref))
beta[0] = beta_ref[:]
beta[1] = change_type(beta_ref, 1)
beta[2] = change_type(beta_ref, 2)
beta[3] = change_type(beta_ref, 3)

print(max(beta[0]) - min(beta[0]))

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

2.1972245773362196
[12.81317951 11.09709382 12.81317951]


In [5]:
cp_truth

array([2000, 4000, 6000])

In [6]:
path = '/content/drive/MyDrive/CPD_BT/'
with open(path + 'data_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '.npy', 'rb') as f:
    X_train_list = np.load(f)
    Y_train_list = np.load(f)
    X_test_list = np.load(f)
    Y_test_list = np.load(f)

In [7]:
X_train_list.shape

(100, 8000, 100)

In [8]:
np.random.seed(0)

m_intervals = 50
grid_n = 250
gamma_list = [5, 80, 100]

nt = Delta * T
B = 100

run_time_wbs = np.zeros(B)
loc_error_wbs = np.zeros(B)
K_wbs = np.zeros(B)

cp_best_list = []
param_best_list = []

for b in range(B):
    X_train = X_train_list[b]
    Y_train = Y_train_list[b]
    X_test = X_test_list[b]
    Y_test = Y_test_list[b]

    start_time = time.time()

    wbs_fit = wbs_cv_sst(m_intervals, gamma_list, grid_n, smooth = 5, buffer = 5)
    tensor_train = wbs_fit.create_game_tensor(X_train, Y_train, step = 1)
    tensor_test = wbs_fit.create_game_tensor(X_test, Y_test, step = 1)
    cp_best, cp_val, cusum_val, threshold_best, grid = wbs_fit.fit(tensor_train, tensor_test)

    run_time_wbs[b] = time.time() - start_time
    loc_error_wbs[b] = cp_distance(cp_best, cp_truth)
    K_wbs[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(threshold_best)

    print(b)

print('---------- wbs -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_wbs.mean(), run_time_wbs.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_wbs.std(), run_time_wbs.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_wbs < T - 1), sum(K_wbs == T - 1), sum(K_wbs > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- wbs -----------
avg loc error: 1116.32, avg time: 19.345198669433593
std loc error: 694.8306682926424, std time: 1.9221049460782096
K < K*: 57, K = K*: 42, K > K*: 1


In [9]:
loc_error_wbs

array([1616.,  768., 1808., 1728.,  480., 1328., 1520.,  944.,  336.,
        416., 2048., 1200.,  320., 1488.,  384., 1120., 1584.,  192.,
       1504.,  400., 1568., 1008.,  400., 1024.,  576.,  512.,  112.,
        352., 1840.,  624., 1312., 1232., 1584., 1040., 1136., 1200.,
       1968., 1552.,  496., 1632., 1280., 2384.,  656., 2672., 1104.,
        432., 1856.,  224.,  288.,   96., 3152., 1968., 1808., 1392.,
        240.,  352.,  416.,  368., 1616., 1424., 1328.,  240., 1024.,
       1824.,  416., 1440.,  464., 1008., 1808.,  208.,  672., 1168.,
       1856.,  464.,   80., 1680.,  576., 2464.,  368., 1248., 1664.,
       2048., 1200., 1984., 2112.,  256., 1696., 2128.,  176.,  880.,
        880., 1072., 2272., 1552.,  400., 1712., 2032.,  512.,  432.,
        208.])

In [10]:
param_best_list

[5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5]

In [11]:
cp_best_list

[[3616, 6656],
 [1376, 3232, 6176],
 [2112, 4192],
 [2272, 6336],
 [1824, 3520, 5984],
 [3328, 6400],
 [1984, 4480],
 [2272, 3776, 5056],
 [2272, 4224, 6336],
 [2112, 4416, 5696],
 [1952, 6976],
 [1760, 4800],
 [1696, 4320, 5824],
 [3488, 6112],
 [1888, 4384, 5984],
 [2112, 5120],
 [3584, 5952],
 [1952, 4192, 6112],
 [2496, 5856],
 [1600, 3808, 5664],
 [2432, 5952],
 [1920, 4992],
 [1600, 4160, 6208],
 [2976, 5760],
 [2016, 3424, 6304],
 [2080, 4512, 5856],
 [1888, 3968, 5888],
 [2016, 4352, 6016],
 [3840, 5984],
 [2016, 3936, 5376],
 [2688, 5888],
 [3232, 5984],
 [1792, 4416],
 [1984, 4960],
 [1824, 4864],
 [3200, 5344],
 [3968, 5792],
 [3552, 5824],
 [1504, 4320, 5856],
 [2368, 6080],
 [1920, 5280],
 [4384],
 [1344, 4000, 6176],
 [3328],
 [1760, 4896],
 [1568, 4192, 5696],
 [2144, 6976],
 [1856, 3776, 5984],
 [2208, 4288, 5952],
 [1920, 3904, 6080],
 [5152],
 [3968, 5760],
 [3808, 4928],
 [3392, 6112],
 [2080, 3936, 5760],
 [2208, 4352, 5920],
 [1696, 3584, 5984],
 [1824, 3904, 6368]

In [12]:
import pickle
with open(path + 'sst_wbs_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, loc_error_wbs, run_time_wbs, K_wbs], f)