In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os, sys
sys.path.append('/content/drive/MyDrive/DCDP/covariance')

Mounted at /content/drive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

from cpd_utils import *

import time
import bisect

import pandas as pd

In [3]:
def get_covariance(p, delta1, delta2):
  cov = np.eye(p) * delta1
  for i in range(1, p):
      cov[i, i - 1] = delta2
      cov[i - 1, i] = delta2
  return cov

# DCDP

In [4]:
T = 4
Delta = 100
p = 10
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [5]:
grid_n = 50
gamma_list = [100,200,300]
lam_list = [0]

B = 100

run_time_dc = np.zeros(B)
loc_error_dc = np.zeros(B)
K_dc = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

np.random.seed(0)

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    cp_truth = cp_truth_list[b]

    start_time = time.time()
    dcdp = dcdp_cv_grid_covariance(grid_n, lam_list, gamma_list, smooth = 2, 
                    buffer = 4, step_refine = 1, buffer_refine = 4, lam_refine = 0.1)
    cp_best, param_best, cp_best_cand = dcdp.fit(Y_train, Y_test)
    run_time_dc[b] = time.time() - start_time
    loc_error_dc[b] = cp_distance(cp_best, cp_truth)
    K_dc[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dcdp -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_dc.mean(), run_time_dc.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_dc.std(), run_time_dc.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_dc < T - 1), sum(K_dc == T - 1), sum(K_dc > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dcdp -----------
avg loc error: 0.42, avg time: 0.6400042867660523
std loc error: 0.6352952069707437, std time: 0.0689087567539764
K < K*: 0, K = K*: 100, K > K*: 0


In [6]:
import pickle
with open(path + 'dcdp_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_dc, run_time_dc, K_dc], f)

In [7]:
T = 4
Delta = 100
p = 20
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [8]:
Y_train_list.shape

(100, 400, 20)

In [9]:
grid_n = 50
gamma_list = [500,600,700]
lam_list = [0]

B = 100

run_time_dc = np.zeros(B)
loc_error_dc = np.zeros(B)
K_dc = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

np.random.seed(0)

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    cp_truth = cp_truth_list[b]

    start_time = time.time()
    dcdp = dcdp_cv_grid_covariance(grid_n, lam_list, gamma_list, smooth = 2, 
                    buffer = 2, step_refine = 1, buffer_refine = 2, lam_refine = 0.1)
    cp_best, param_best, cp_best_cand = dcdp.fit(Y_train, Y_test)
    run_time_dc[b] = time.time() - start_time
    loc_error_dc[b] = cp_distance(cp_best, cp_truth)
    K_dc[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dcdp -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_dc.mean(), run_time_dc.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_dc.std(), run_time_dc.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_dc < T - 1), sum(K_dc == T - 1), sum(K_dc > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dcdp -----------
avg loc error: 0.66, avg time: 0.9612508654594422
std loc error: 4.377716299624726, std time: 0.05641314979971083
K < K*: 0, K = K*: 100, K > K*: 0


In [10]:
import pickle
with open(path + 'dcdp_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_dc, run_time_dc, K_dc], f)

In [11]:
T = 4
Delta = 500
p = 5
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 2
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [12]:
grid_n = 100
gamma_list = [100]
lam_list = [0]

B = 100

run_time_dc = np.zeros(B)
loc_error_dc = np.zeros(B)
K_dc = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

np.random.seed(0)

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    cp_truth = cp_truth_list[b]

    start_time = time.time()
    dcdp = dcdp_cv_grid_covariance(grid_n, lam_list, gamma_list, smooth = 10, 
                    buffer = 10, step_refine = 1, buffer_refine = 10, lam_refine = 0.1)
    cp_best, param_best, cp_best_cand = dcdp.fit(Y_train, Y_test)
    run_time_dc[b] = time.time() - start_time
    loc_error_dc[b] = cp_distance(cp_best, cp_truth)
    K_dc[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dcdp -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_dc.mean(), run_time_dc.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_dc.std(), run_time_dc.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_dc < T - 1), sum(K_dc == T - 1), sum(K_dc > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dcdp -----------
avg loc error: 4.95, avg time: 1.0082166194915771
std loc error: 6.057020719792858, std time: 0.21840795861314566
K < K*: 0, K = K*: 100, K > K*: 0


In [13]:
import pickle
with open(path + 'dcdp_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_dc, run_time_dc, K_dc], f)

In [14]:
T = 4
Delta = 500
p = 10
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [15]:
grid_n = 100
gamma_list = [500]
lam_list = [0]

B = 100

run_time_dc = np.zeros(B)
loc_error_dc = np.zeros(B)
K_dc = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    cp_truth = cp_truth_list[b]

    start_time = time.time()
    dcdp = dcdp_cv_grid_covariance(grid_n, lam_list, gamma_list, smooth = 10, 
                    buffer = 20, step_refine = 1, buffer_refine = 10, lam_refine = 0.1)
    cp_best, param_best, cp_best_cand = dcdp.fit(Y_train, Y_test)
    run_time_dc[b] = time.time() - start_time
    loc_error_dc[b] = cp_distance(cp_best, cp_truth)
    K_dc[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dcdp -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_dc.mean(), run_time_dc.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_dc.std(), run_time_dc.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_dc < T - 1), sum(K_dc == T - 1), sum(K_dc > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dcdp -----------
avg loc error: 0.27, avg time: 1.2930213832855224
std loc error: 0.48692915295759404, std time: 0.36571865303287193
K < K*: 0, K = K*: 100, K > K*: 0


In [16]:
import pickle
with open(path + 'dcdp_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_dc, run_time_dc, K_dc], f)

In [17]:
T = 4
Delta = 500
p = 20
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [18]:
grid_n = 100
gamma_list = [900]
lam_list = [0]

B = 100

run_time_dc = np.zeros(B)
loc_error_dc = np.zeros(B)
K_dc = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    cp_truth = cp_truth_list[b]

    start_time = time.time()
    dcdp = dcdp_cv_grid_covariance(grid_n, lam_list, gamma_list, smooth = 10, 
                    buffer = 20, step_refine = 1, buffer_refine = 10, lam_refine = 0.1)
    cp_best, param_best, cp_best_cand = dcdp.fit(Y_train, Y_test)
    run_time_dc[b] = time.time() - start_time
    loc_error_dc[b] = cp_distance(cp_best, cp_truth)
    K_dc[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dplr -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_dc.mean(), run_time_dc.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_dc.std(), run_time_dc.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_dc < T - 1), sum(K_dc == T - 1), sum(K_dc > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dplr -----------
avg loc error: 0.03, avg time: 2.081645007133484
std loc error: 0.17058722109231983, std time: 0.2820691900285055
K < K*: 0, K = K*: 100, K > K*: 0


In [19]:
import pickle
with open(path + 'dcdp_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_dc, run_time_dc, K_dc], f)