<a href="https://colab.research.google.com/github/TomoyaOzawa-DA/research-project/blob/ToeplitzMatrix/SimulationStudy/ToeplitzMatrix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from scipy.linalg import toeplitz
import time

In [2]:
# prepare dataset for lag model with no intercept
## X_1, X_1_lag, X_2, X_2_lag, X_3

n = 1000 # sample used in estimation, t = 1, 2, ..., 1000. We add a sample at t = 0 in this function
SNR = 0.71
rho =0.3

beta1 = np.array([1,1,1,0,1])
beta2 = np.array([-1,-1,-1,0,-1])
p = len(beta1) - 2 # creare lag variable for x_1, x_2
Sigma = np.zeros((p,p));
for i in range(p):
  for j in range(p):
    Sigma[i,j] = rho**(abs(i-j))
X_123 = np.random.multivariate_normal(mean =np.zeros(p), cov = Sigma, size = n+1) # for lag variable, we add an additional sample that will be gone.
X_1_lag = np.r_[[0], X_123[0:n, 0]].reshape(n+1,1) # lag variable for X_1
X_2_lag = np.r_[[0], X_123[0:n, 1]].reshape(n+1,1) # lag variable for X_1

X = np.concatenate([X_123[0:n+1], X_1_lag, X_2_lag], axis = 1)[1:, ] # columns order: X_1, X_2, X_3, X_1_lag, X_2_lag

noise1 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta1)/SNR), size = int(n/2)) 
noise2 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta2)/SNR), size = int(n/2)) 
Y1 = X[:int(n/2)]@beta1 + noise1
Y2 = X[int(n/2):]@beta2 + noise2
Y = np.r_[Y1,Y2]

#lam = 0.1*n
#ols = sm.OLS(Y, X)
#ols_result = ols.fit()
#bigM = 10*float(1.96*(sum(ols_result.bse)))#1000

In [3]:
X

array([[ 0.47884903,  0.58830302, -0.17397167,  0.26414108,  0.70221908],
       [ 0.22133465,  0.05059659,  0.11760868,  0.47884903,  0.58830302],
       [-0.43110849, -0.8633376 , -0.7602266 ,  0.22133465,  0.05059659],
       ...,
       [-1.19899803, -0.25049085, -1.19279493,  1.07986473, -1.49423526],
       [-0.01823158,  0.76316846, -0.46083418, -1.19899803, -0.25049085],
       [ 1.13278136,  1.58763258,  2.2201848 , -0.01823158,  0.76316846]])

In [4]:
# estimated value of beta
beta_estimated = np.concatenate([np.ones(500*5).reshape(500, 5),(np.ones(500*5)*0.5).reshape(500, 5)], axis = 0)

In [5]:
# compute SSR by for-loop
sum_of_sq = sum((Y[t] - sum(X[t,j]*beta_estimated[t,j] for j in range(5)))* (Y[t] - sum(X[t,j]*beta_estimated[t,j] for j in range(5))) for t in range(len(Y)))
sum_of_sq

13797.774291866459

In [6]:
# compute SSR using Toeplitz matrix
n_lag = 1

first_col_X1 = np.r_[X_123[:, 0], np.zeros(n_lag)]
first_row_X1 = np.r_[X_123[0, 0], np.zeros(n_lag)]
Toep_mat_X1 = toeplitz(first_col_X1, first_row_X1)

first_col_X2 = np.r_[X_123[:, 1], np.zeros(n_lag)]
first_row_X2 = np.r_[X_123[1, 0], np.zeros(n_lag)]
Toep_mat_X2 = toeplitz(first_col_X2, first_row_X2)

sum_of_sq_toeplitz = sum(np.square(Y - np.diag(np.delete(Toep_mat_X1, [0, Toep_mat_X1.shape[0]-1], 0) @ beta_estimated[:, [0, 3]].T) - np.diag(np.delete(Toep_mat_X2, [0, Toep_mat_X2.shape[0]-1], 0) @ beta_estimated[:, [1, 4]].T) - (X[:, 2].T * beta_estimated[:, 2])))
sum_of_sq_toeplitz

13797.77429186646

In [7]:
# compute SSR using Toeplitz matrix in another way
first_col_X1 = X_123[1:n+1, 0]
first_row_X1 = np.r_[X_123[1, 0], X_123[0, 0]]
Toep_mat_X1 = toeplitz(first_col_X1, first_row_X1)

first_col_X2 = X_123[1:n+1, 1]
first_row_X2 = np.r_[X_123[1, 1], X_123[0, 1]]
Toep_mat_X2 = toeplitz(first_col_X2, first_row_X2)

sum_of_sq_toeplitz = sum(np.square(Y - np.diag(Toep_mat_X1 @ beta_estimated[:, [0, 3]].T) - np.diag(Toep_mat_X2 @ beta_estimated[:, [1, 4]].T) - (X[:, 2].T * beta_estimated[:, 2])))
sum_of_sq_toeplitz

13797.77429186646