In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os, sys
sys.path.append('/content/drive/MyDrive/DCDP/covariance')

Mounted at /content/drive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random

import itertools

import time
import bisect

import pandas as pd

import statsmodels.api as sm
from sklearn import linear_model

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [3]:
def generate_data_covariance(n, T, theta):
    p = theta.shape[1]
    y_train_joint = np.concatenate([
        np.random.multivariate_normal(mean = np.zeros(p), cov = theta[i], size = n[i]) 
        for i in range(T)], axis = 0)
    nt = len(y_train_joint)
    
    return nt, y_train_joint


def get_covariance(p, delta1, delta2):
  cov = np.eye(p) * delta1
  for i in range(1, p):
      cov[i, i - 1] = delta2
      cov[i - 1, i] = delta2
  return cov

In [4]:
T = 4
Delta = 100
p = 10
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-5, 5, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_covariance(n, T, theta)
    nt, Y_test = generate_data_covariance(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

In [5]:
print(np.mean(diff))

12.712985487288185


In [6]:
T = 4
Delta = 100
p = 20
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-5, 5, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_covariance(n, T, theta)
    nt, Y_test = generate_data_covariance(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

In [7]:
print(np.mean(diff))

17.98388167220859


In [None]:
T = 4
Delta = 500
p = 5
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 2
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-150, 150, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_covariance(n, T, theta)
    nt, Y_test = generate_data_covariance(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

In [None]:
print(np.mean(diff))

2.3916521486202797


In [None]:
T = 4
Delta = 500
p = 10
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-150, 150, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_covariance(n, T, theta)
    nt, Y_test = generate_data_covariance(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

In [None]:
print(np.mean(diff))

12.691693871749962


In [None]:
Y_train_list.shape

(100, 2000, 10)

In [None]:
T = 4
Delta = 500
p = 20
theta = np.zeros((T, p, p))
theta[0] = np.eye(p)

delta1 = 5
delta2 = 0.3
theta[1] = get_covariance(p, delta1, delta2)

theta[2] = np.eye(p)

theta[3] = get_covariance(p, delta1, delta2)

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-150, 150, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_covariance(n, T, theta)
    nt, Y_test = generate_data_covariance(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/covariance/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

In [None]:
print(np.mean(diff))

17.98388167220859


In [None]:
Y_train_list.shape

(100, 2000, 10)