In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os, sys
sys.path.append('/content/drive/MyDrive/CPD_BT')

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

from bt_cpd import *

import time
import bisect

import pandas as pd

import statsmodels.api as sm
from sklearn import linear_model

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

  import pandas.util.testing as tm


In [4]:
T = 4
Delta = 500
m = np.array([Delta] * T)
cp_truth = np.cumsum(m)[:T-1]
print(cp_truth)

n = 10

[ 500 1000 1500]


In [5]:
path = '/content/drive/MyDrive/CPD_BT/experiment_random/'
with open(path + 'data_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '.npy', 'rb') as f:
    beta_list = np.load(f)
    X_train_list = np.load(f)
    Y_train_list = np.load(f)
    X_test_list = np.load(f)
    Y_test_list = np.load(f)

In [6]:
X_train_list.shape

(100, 2000, 10)

In [7]:
np.random.seed(0)

grid_n = 90
gamma_list = [20, 40]
lam_list = [0.1]

nt = Delta * T
B = 100

run_time_d = np.zeros(B)
loc_error_d = np.zeros(B)
K_d = np.zeros(B)

cp_best_list = []
param_best_list = []
cp_best_cand_list = []

for b in range(B):
    X_train = X_train_list[b]
    Y_train = Y_train_list[b]
    X_test = X_test_list[b]
    Y_test = Y_test_list[b]

    start_time = time.time()
    dp_fit = dplr_cv_bt(grid_n, lam_list, gamma_list)
    cp_best, param_best, cp_best_cand = dp_fit.fit((Y_train, X_train), (Y_test, X_test))    
    run_time_d[b] = time.time() - start_time
    loc_error_d[b] = cp_distance(cp_best, cp_truth)
    K_d[b] = len(cp_best)

    cp_best_list.append(cp_best)
    param_best_list.append(param_best)
    cp_best_cand_list.append(cp_best_cand)
    print(b)


print('---------- dplr -----------')
print("avg loc error: {0}, avg time: {1}".format(loc_error_d.mean(), run_time_d.mean()))
print("std loc error: {0}, std time: {1}".format(loc_error_d.std(), run_time_d.std()))
print('K < K*: {0}, K = K*: {1}, K > K*: {2}'.format(sum(K_d < T - 1), sum(K_d == T - 1), sum(K_d > T - 1)))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---------- dplr -----------
avg loc error: 12.09, avg time: 62.43706190109253
std loc error: 13.318479642962254, std time: 2.139522797504321
K < K*: 0, K = K*: 100, K > K*: 0


In [12]:
loc_error_d

array([ 5.,  1., 10.,  6.,  8.,  6., 11.,  4.,  5.,  8.,  6.,  3.,  3.,
        7.,  8., 25., 32., 11., 21., 11.,  7., 12.,  5., 10.,  3.,  4.,
       22.,  9., 14.,  8., 19.,  6.,  6., 18.,  3.,  1., 10.,  5., 18.,
        8.,  4., 19.,  6.,  3., 64.,  2.,  5.,  5.,  2.,  9.,  3., 18.,
        4.,  5.,  7., 16.,  4., 14., 16., 49., 17., 48., 17.,  6., 34.,
        6., 23., 63.,  2.,  4.,  2., 35.,  8., 24.,  2.,  6., 74.,  9.,
        3.,  3.,  3., 15.,  8.,  7.,  2., 10., 16., 17., 10., 26.,  1.,
        6.,  7., 13.,  7.,  6.,  3., 16.,  1., 15.])

In [9]:
cp_best_list

[[500, 1005, 1497],
 [500, 1000, 1499],
 [490, 1000, 1503],
 [494, 1000, 1500],
 [496, 1008, 1496],
 [494, 999, 1501],
 [502, 989, 1500],
 [504, 1000, 1501],
 [501, 995, 1500],
 [500, 1000, 1492],
 [499, 994, 1499],
 [503, 1000, 1500],
 [500, 1003, 1498],
 [500, 993, 1504],
 [498, 1005, 1492],
 [475, 1000, 1502],
 [516, 968, 1477],
 [489, 999, 1500],
 [500, 979, 1498],
 [497, 1011, 1510],
 [504, 999, 1507],
 [500, 988, 1507],
 [502, 997, 1495],
 [499, 995, 1490],
 [499, 1003, 1501],
 [502, 1002, 1496],
 [478, 995, 1500],
 [496, 1002, 1491],
 [514, 999, 1509],
 [508, 1003, 1499],
 [495, 1000, 1519],
 [499, 994, 1504],
 [499, 1000, 1494],
 [501, 1018, 1498],
 [498, 997, 1501],
 [499, 1001, 1500],
 [494, 1010, 1496],
 [505, 1001, 1500],
 [518, 997, 1499],
 [495, 992, 1501],
 [500, 1004, 1504],
 [515, 1019, 1502],
 [496, 994, 1505],
 [502, 1000, 1497],
 [501, 1064, 1499],
 [501, 1002, 1500],
 [500, 1001, 1505],
 [505, 998, 1502],
 [498, 999, 1500],
 [509, 1004, 1498],
 [500, 997, 1500],
 [

In [10]:
cp_best_cand_list

[array([ 506, 1012, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506,  990, 1496]),
 array([ 506,  990, 1496]),
 array([ 484, 1012, 1496]),
 array([ 484, 1012, 1496]),
 array([ 506,  990, 1496]),
 array([ 506, 1012, 1496]),
 array([ 484, 1012, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506,  990, 1496]),
 array([ 506,  990, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506,  990, 1518]),
 array([ 506, 1012, 1496]),
 array([ 484,  990, 1496]),
 array([ 506,  968, 1518]),
 array([ 506,  990, 1496]),
 array([ 484,  968, 1496]),
 array([ 506, 1012, 1518]),
 array([ 506,  990, 1518]),
 array([ 506,  990, 1496]),
 array([ 506,  990, 1496]),
 array([ 506,  990, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506, 1012, 1496]),
 array([ 484, 1012, 1496]),
 array([ 484, 1012, 1496]),
 array([ 506,  990, 1518]),
 array([ 506, 1012, 1496]),
 array([ 484, 1012, 1518]),
 array([ 506,  990, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506, 1012, 1496]),
 array([ 506, 1012, 1518]),
 array([ 484, 1012, 

In [11]:
import pickle
with open(path + 'dplr_n' + str(n) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_grid' + str(grid_n) + '.pickle', 'wb') as f:
    pickle.dump([cp_best_list, param_best_list, cp_best_cand_list, loc_error_d, run_time_d, K_d], f)