In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os, sys
sys.path.append('/content/drive/MyDrive/DCDP/linear')

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import random 

import itertools

import time
import bisect

import pandas as pd

import statsmodels.api as sm
from sklearn import linear_model

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [4]:
def generate_data_linear(n, T, beta):
    
    def linear(X, beta):
        beta = beta.reshape((-1,1))
        return X @ beta
    
    p = len(beta[0])
    X_train = [np.random.normal(0,1,(n[i],p)) for i in range(T)]
    y_train = np.concatenate([linear(X_train[i], beta[i]) for i in range(T)], axis = 0)
    
    X_train = np.concatenate(X_train)
    X_train_joint = X_train.reshape((-1, p))
    y_train_joint = y_train.reshape((-1, 1))
    nt = len(y_train_joint)
    
    return nt, y_train_joint, X_train_joint

In [5]:
T = 4
Delta = 50
p = 20
beta = np.zeros((T, p))
for t in range(T):
    beta[t, 5 * t: 5 * (t + 1)] = 5

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, 1))
X_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, 1))
X_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-15, 15, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train, X_train = generate_data_linear(n, T, beta)
    nt, Y_test, X_test = generate_data_linear(n, T, beta)
    Y_train_list[b] = Y_train
    X_train_list[b] = X_train
    Y_test_list[b] = Y_test
    X_test_list[b] = X_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/linear/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, X_train_list = X_train_list, Y_test_list = Y_test_list, X_test_list = X_test_list, cp_truth_list = cp_truth_list, beta = beta)

[15.8113883 15.8113883 15.8113883]


In [6]:
T = 4
Delta = 50
p = 20
beta = np.zeros((T, p))
for t in range(T):
    beta[t, 5 * t: 5 * (t + 1)] = 1

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, 1))
X_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, 1))
X_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-15, 15, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train, X_train = generate_data_linear(n, T, beta)
    nt, Y_test, X_test = generate_data_linear(n, T, beta)
    Y_train_list[b] = Y_train
    X_train_list[b] = X_train
    Y_test_list[b] = Y_test
    X_test_list[b] = X_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/linear/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, X_train_list = X_train_list, Y_test_list = Y_test_list, X_test_list = X_test_list, cp_truth_list = cp_truth_list, beta = beta)

[3.16227766 3.16227766 3.16227766]


In [7]:
T = 4
Delta = 50
p = 20
beta = np.zeros((T, p))
for t in range(T):
    beta[t, 5 * t: 5 * (t + 1)] = 0.5

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, 1))
X_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, 1))
X_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-15, 15, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train, X_train = generate_data_linear(n, T, beta)
    nt, Y_test, X_test = generate_data_linear(n, T, beta)
    Y_train_list[b] = Y_train
    X_train_list[b] = X_train
    Y_test_list[b] = Y_test
    X_test_list[b] = X_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/linear/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, X_train_list = X_train_list, Y_test_list = Y_test_list, X_test_list = X_test_list, cp_truth_list = cp_truth_list, beta = beta)

[1.58113883 1.58113883 1.58113883]


In [9]:
T = 4
Delta = 50
p = 100
beta = np.zeros((T, p))
for t in range(T):
    beta[t, 5 * t: 5 * (t + 1)] = 5

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, 1))
X_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, 1))
X_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-15, 15, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train, X_train = generate_data_linear(n, T, beta)
    nt, Y_test, X_test = generate_data_linear(n, T, beta)
    Y_train_list[b] = Y_train
    X_train_list[b] = X_train
    Y_test_list[b] = Y_test
    X_test_list[b] = X_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/linear/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, X_train_list = X_train_list, Y_test_list = Y_test_list, X_test_list = X_test_list, cp_truth_list = cp_truth_list, beta = beta)

[15.8113883 15.8113883 15.8113883]


In [None]:
a = np.array([Delta] * T) + np.random.uniform(-150, 150, T).astype(int)
a[-1] = nt - np.sum(a[:-1])
np.cumsum(a)

array([ 355,  955, 1430, 2000])

In [None]:
cp_truth

array([ 500, 1000, 1500])

In [11]:
T = 4
Delta = 50
p = 100
beta = np.zeros((T, p))
for t in range(T):
    beta[t, 5 * t: 5 * (t + 1)] = 1

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(beta[t] - beta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, 1))
X_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, 1))
X_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-15, 15, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train, X_train = generate_data_linear(n, T, beta)
    nt, Y_test, X_test = generate_data_linear(n, T, beta)
    Y_train_list[b] = Y_train
    X_train_list[b] = X_train
    Y_test_list[b] = Y_test
    X_test_list[b] = X_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/linear/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, X_train_list = X_train_list, Y_test_list = Y_test_list, X_test_list = X_test_list, cp_truth_list = cp_truth_list, beta = beta)

[3.16227766 3.16227766 3.16227766]


In [None]:
Y_train_list.shape

(100, 2000, 100)

In [None]:
T = 4
Delta = 200
p = 100
theta = np.zeros((T, p))
for t in range(T):
    theta[t, 5 * t: 5 * (t + 1)] = 0.5

diff = np.zeros(T - 1)
for t in range(1, T):
    diff[t - 1] = np.sum(np.abs(theta[t] - theta[t - 1])**2)**0.5
print(diff)

np.random.seed(0)

nt = Delta * T
B = 100
Y_train_list = np.zeros((B, nt, p))
Y_test_list = np.zeros((B, nt, p))
cp_truth_list = np.zeros((B, T - 1))
for b in range(B):
    n = np.array([Delta] * T) + np.random.uniform(-60, 60, T).astype(int)
    n[-1] = nt - np.sum(n[:-1])
    cp_truth = np.cumsum(n)[:T-1]
    nt, Y_train = generate_data_mean(n, T, theta)
    nt, Y_test = generate_data_mean(n, T, theta)
    Y_train_list[b] = Y_train
    Y_test_list[b] = Y_test
    cp_truth_list[b] = cp_truth

path = '/content/drive/MyDrive/DCDP/mean/'
with open(path + 'data_n' + str(nt) + '_p' + str(p) + '_Delta' + str(Delta) + '_K' + str(T - 1) + '_kappa' + str(int(np.mean(diff) * 100)) + '.npz', 'wb') as f:
    np.savez(f, Y_train_list = Y_train_list, Y_test_list = Y_test_list, cp_truth_list = cp_truth_list, theta = theta)

[1.58113883 1.58113883 1.58113883]
