In [1]:
pip install changeforest

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

import os, sys
sys.path.append('/content/drive/MyDrive/DCDP/covariance')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

from cpd_utils import *
import changeforest

import time
import bisect

import pandas as pd

In [4]:
def get_covariance(p, delta1, delta2):
  cov = np.eye(p) * delta1
  for i in range(1, p):
      cov[i, i - 1] = delta2
      cov[i - 1, i] = delta2
  return cov

# ChangeForest

In [5]:
T = 4
Delta = 100
p = 10
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [6]:
Y_train_list.shape

(100, 400, 10)

In [7]:
np.random.seed(0)

nt = Delta * T
B = 100

run_time_rf = np.zeros(B)
loc_error_rf = np.zeros(B)
K_rf = np.zeros(B)
cp_best_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    Y_all = np.zeros((2 * nt, p))
    Y_all[np.arange(0, 2 * nt, 2), :] = Y_train
    Y_all[np.arange(1, 2 * nt, 2), :] = Y_test

    cp_truth = cp_truth_list[b]
    
    start_time = time.time()
    result = changeforest.changeforest(Y_all, "random_forest", "bs")
    cp_best = result.split_points()
    cp_best = [x // 2 for x in cp_best]
    
    run_time_rf[b] = time.time() - start_time
    loc_error_rf[b] = cp_distance(cp_best, cp_truth)
    K_rf[b] = len(cp_best)

    cp_best_list.append(cp_best)
    print(b)

print('---------- change forest - random forest -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_rf.mean(), run_time_rf.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_rf.std(), run_time_rf.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_rf < T - 1), sum(K_rf == T - 1), sum(K_rf > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- change forest - random forest -----------
avg loc error: 5.54, avg time: 0.7274704241752624
std loc error: 14.708786489714234, std time: 0.25128766221991156
K < K*: 0, K = K*: 88, K > K*: 12


In [8]:
import pickle
with open(path + 'rf_bs_n' + str(nt) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_p' + str(p) + '_kappa' + str(int(np.mean(diff) * 100)) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, loc_error_rf, run_time_rf, K_rf], f)

In [9]:
T = 4
Delta = 100
p = 20
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [10]:
np.random.seed(0)

nt = Delta * T
B = 100

run_time_rf = np.zeros(B)
loc_error_rf = np.zeros(B)
K_rf = np.zeros(B)
cp_best_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    Y_all = np.zeros((2 * nt, p))
    Y_all[np.arange(0, 2 * nt, 2), :] = Y_train
    Y_all[np.arange(1, 2 * nt, 2), :] = Y_test

    cp_truth = cp_truth_list[b]
    
    start_time = time.time()
    result = changeforest.changeforest(Y_all, "random_forest", "bs")
    cp_best = result.split_points()
    cp_best = [x // 2 for x in cp_best]
    
    run_time_rf[b] = time.time() - start_time
    loc_error_rf[b] = cp_distance(cp_best, cp_truth)
    K_rf[b] = len(cp_best)

    cp_best_list.append(cp_best)
    print(b)

print('---------- change forest - random forest -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_rf.mean(), run_time_rf.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_rf.std(), run_time_rf.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_rf < T - 1), sum(K_rf == T - 1), sum(K_rf > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- change forest - random forest -----------
avg loc error: 7.37, avg time: 1.0473732542991638
std loc error: 18.75774773260372, std time: 0.13995087827995042
K < K*: 0, K = K*: 85, K > K*: 15


In [11]:
import pickle
with open(path + 'rf_bs_n' + str(nt) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_p' + str(p) + '_kappa' + str(int(np.mean(diff) * 100)) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, loc_error_rf, run_time_rf, K_rf], f)

In [12]:
T = 4
Delta = 500
p = 5
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 2
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [13]:
p = 5
delta1 = 2
delta2 = 0.3
c = get_covariance(p, delta1, delta2)

In [14]:
np.linalg.inv(c)

array([[ 5.11786647e-01, -7.85776486e-02,  1.20643432e-02,
        -1.85130586e-03,  2.77695878e-04],
       [-7.85776486e-02,  5.23850990e-01, -8.04289544e-02,
         1.23420390e-02, -1.85130586e-03],
       [ 1.20643432e-02, -8.04289544e-02,  5.24128686e-01,
        -8.04289544e-02,  1.20643432e-02],
       [-1.85130586e-03,  1.23420390e-02, -8.04289544e-02,
         5.23850990e-01, -7.85776486e-02],
       [ 2.77695878e-04, -1.85130586e-03,  1.20643432e-02,
        -7.85776486e-02,  5.11786647e-01]])

In [15]:
Y_train_list.shape

(100, 2000, 5)

In [16]:
np.random.seed(0)

nt = Delta * T
B = 100

run_time_rf = np.zeros(B)
loc_error_rf = np.zeros(B)
K_rf = np.zeros(B)
cp_best_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    Y_all = np.zeros((2 * nt, p))
    Y_all[np.arange(0, 2 * nt, 2), :] = Y_train
    Y_all[np.arange(1, 2 * nt, 2), :] = Y_test

    cp_truth = cp_truth_list[b]
    
    start_time = time.time()
    result = changeforest.changeforest(Y_all, "random_forest", "bs")
    cp_best = result.split_points()
    cp_best = [x // 2 for x in cp_best]
    
    run_time_rf[b] = time.time() - start_time
    loc_error_rf[b] = cp_distance(cp_best, cp_truth)
    K_rf[b] = len(cp_best)

    cp_best_list.append(cp_best)
    print(b)

print('---------- change forest - random forest -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_rf.mean(), run_time_rf.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_rf.std(), run_time_rf.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_rf < T - 1), sum(K_rf == T - 1), sum(K_rf > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- change forest - random forest -----------
avg loc error: 58.25, avg time: 1.733952486515045
std loc error: 151.73604548689147, std time: 0.21014890424308993
K < K*: 2, K = K*: 69, K > K*: 29


In [17]:
import pickle
with open(path + 'rf_bs_n' + str(nt) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_p' + str(p) + '_kappa' + str(int(np.mean(diff) * 100)) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, loc_error_rf, run_time_rf, K_rf], f)

In [18]:
T = 4
Delta = 500
p = 10
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [19]:
np.random.seed(0)

nt = Delta * T
B = 100

run_time_rf = np.zeros(B)
loc_error_rf = np.zeros(B)
K_rf = np.zeros(B)
cp_best_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    Y_all = np.zeros((2 * nt, p))
    Y_all[np.arange(0, 2 * nt, 2), :] = Y_train
    Y_all[np.arange(1, 2 * nt, 2), :] = Y_test

    cp_truth = cp_truth_list[b]
    
    start_time = time.time()
    result = changeforest.changeforest(Y_all, "random_forest", "bs")
    cp_best = result.split_points()
    cp_best = [x // 2 for x in cp_best]
    
    run_time_rf[b] = time.time() - start_time
    loc_error_rf[b] = cp_distance(cp_best, cp_truth)
    K_rf[b] = len(cp_best)

    cp_best_list.append(cp_best)
    print(b)

print('---------- change forest - random forest -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_rf.mean(), run_time_rf.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_rf.std(), run_time_rf.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_rf < T - 1), sum(K_rf == T - 1), sum(K_rf > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- change forest - random forest -----------
avg loc error: 42.5, avg time: 2.890978066921234
std loc error: 137.92262323491386, std time: 0.30479714096130867
K < K*: 0, K = K*: 84, K > K*: 16


In [20]:
import pickle
with open(path + 'rf_bs_n' + str(nt) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_p' + str(p) + '_kappa' + str(int(np.mean(diff) * 100)) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, loc_error_rf, run_time_rf, K_rf], f)

In [21]:
T = 4
Delta = 500
p = 20
theta = np.zeros((T, p, p))

theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

nt = Delta * T

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'rb') as f:
    data = np.load(f)
    Y_train_list = data.f.Y_train_list
    Y_test_list = data.f.Y_test_list
    cp_truth_list = data.f.cp_truth_list
    theta = data.f.theta

In [22]:
np.random.seed(0)

nt = Delta * T
B = 100

run_time_rf = np.zeros(B)
loc_error_rf = np.zeros(B)
K_rf = np.zeros(B)
cp_best_list = []

for b in range(B):
    Y_train = Y_train_list[b]
    Y_test = Y_test_list[b]
    Y_all = np.zeros((2 * nt, p))
    Y_all[np.arange(0, 2 * nt, 2), :] = Y_train
    Y_all[np.arange(1, 2 * nt, 2), :] = Y_test

    cp_truth = cp_truth_list[b]
    
    start_time = time.time()
    result = changeforest.changeforest(Y_all, "random_forest", "bs")
    cp_best = result.split_points()
    cp_best = [x // 2 for x in cp_best]
    
    run_time_rf[b] = time.time() - start_time
    loc_error_rf[b] = cp_distance(cp_best, cp_truth)
    K_rf[b] = len(cp_best)

    cp_best_list.append(cp_best)
    print(b)

print('---------- change forest - random forest -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_rf.mean(), run_time_rf.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_rf.std(), run_time_rf.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_rf < T - 1), sum(K_rf == T - 1), sum(K_rf > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- change forest - random forest -----------
avg loc error: 27.68, avg time: 4.784125862121582
std loc error: 97.20019341544543, std time: 0.40912079561753983
K < K*: 0, K = K*: 86, K > K*: 14


In [23]:
import pickle
with open(path + 'rf_bs_n' + str(nt) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_p' + str(p) + '_kappa' + str(int(np.mean(diff) * 100)) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, loc_error_rf, run_time_rf, K_rf], f)