In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os, sys
sys.path.append('/content/drive/MyDrive/CPD_BT')

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

from bt_cpd import *

import time
import bisect

import pandas as pd

import statsmodels.api as sm
from sklearn import linear_model

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

  import pandas.util.testing as tm


In [4]:
T = 4
Delta = 2000
m = np.array([Delta] * T)
cp_truth = np.cumsum(m)[:T-1]
print(cp_truth)

n = 100

[2000 4000 6000]


In [5]:
path = '/content/drive/MyDrive/CPD_BT/experiment_random/'
with open(path + 'data_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '.npy', 'rb') as f:
    beta_list = np.load(f)
    X_train_list = np.load(f)
    Y_train_list = np.load(f)
    X_test_list = np.load(f)
    Y_test_list = np.load(f)

In [6]:
X_train_list.shape

(100, 8000, 100)

In [7]:
np.random.seed(0)

m_intervals = 50
grid_n = 250
gamma_list = [80, 100]
lam_list = [0.1]

nt = Delta * T
B = 100

run_time_wbs = np.zeros(B)
loc_error_wbs = np.zeros(B)
K_wbs = np.zeros(B)

cp_best_list = []
param_best_list = []

for b in range(B):
    X_train = X_train_list[b]
    Y_train = Y_train_list[b]
    X_test = X_test_list[b]
    Y_test = Y_test_list[b]

    start_time = time.time()
    wbs_fit = wbs_cv_bt(m_intervals, grid_n, lam_list, gamma_list, smooth = 5, buffer = 5)
    res = wbs_fit.fit((X_train, Y_train), (X_test, Y_test))
    cp_best, cp_val, cusum_val, threshold_best, grid = res   
    run_time_wbs[b] = time.time() - start_time
    loc_error_wbs[b] = cp_distance(cp_best, cp_truth)
    K_wbs[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(threshold_best)

    print(b)

print('---------- wbs -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_wbs.mean(), run_time_wbs.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_wbs.std(), run_time_wbs.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_wbs < T - 1), sum(K_wbs == T - 1), sum(K_wbs > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- wbs -----------
avg loc error: 459.36, avg time: 410.5220726919174
std loc error: 512.7858718802615, std time: 48.67291383007389
K < K*: 0, K = K*: 53, K > K*: 47


In [8]:
loc_error_wbs

array([ 896.,   16.,   16.,   16.,   16., 1456.,  864.,  576.,   16.,
         16.,   16.,  848.,   16.,   16.,  384., 1328.,   48.,  928.,
       1360.,  848.,   16.,   48.,  688.,  688.,   32.,   48.,   16.,
       1040.,   32., 1616., 1008.,   16.,   48.,   16.,   16.,   32.,
         16.,  464.,  624.,  992.,  784.,   16.,   96., 1104.,   16.,
        912.,  768., 1552., 1264.,   16.,   16.,   16.,   16.,   16.,
         16.,  624.,   48.,   16.,  768.,   48.,   48.,   16., 1104.,
         16.,   32., 1712., 1104.,   48.,  112.,  656.,  592., 1136.,
         16.,   64., 1648.,   16.,  720.,   16.,  960.,   48.,  864.,
         32.,  944.,   48.,   16.,  752.,  464.,   16.,  528.,  704.,
         16.,  608.,   48.,   16.,  944., 1520.,  912.,  752.,   16.,
       1424.])

In [9]:
cp_best_list

[[1280, 1984, 3136, 4000, 4896, 5312, 5984],
 [2016, 4000, 6016],
 [1984, 4000, 5984],
 [2016, 4000, 6016],
 [1984, 4000, 6016],
 [544, 1984, 4000, 6016],
 [1920, 4000, 4864, 5984],
 [2016, 3424, 4032, 5984],
 [1984, 4000, 5984],
 [1984, 4000, 6016],
 [1984, 4000, 5984],
 [1984, 4000, 5504, 6080, 6848],
 [1984, 4000, 5984],
 [2016, 4000, 6016],
 [1984, 3616, 4000, 6048],
 [1984, 4000, 6016, 7328],
 [1952, 4000, 5984],
 [2016, 3488, 3968, 4928, 6016],
 [2016, 4000, 6016, 7360],
 [2176, 3392, 3904, 4384, 5152, 5952],
 [1984, 4000, 6016],
 [1952, 4000, 6016],
 [2016, 4000, 5312, 6016, 6560],
 [2016, 4000, 5984, 6688],
 [2016, 3968, 5984],
 [2016, 3968, 6048],
 [1984, 4000, 6016],
 [2016, 4000, 5984, 6528, 7040],
 [1984, 4032, 5984],
 [1248, 1984, 4000, 6016, 7616],
 [1984, 3168, 4000, 6048, 7008],
 [2016, 4000, 6016],
 [1984, 4000, 5952],
 [1984, 4000, 6016],
 [1984, 4000, 5984],
 [1984, 3968, 5984],
 [2016, 4000, 5984],
 [2080, 2464, 4000, 6016],
 [1952, 4000, 6048, 6624],
 [2016, 4000, 

In [10]:
import pickle
with open(path + 'wbs_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, loc_error_wbs, run_time_wbs, K_wbs], f)