In [9]:
import numpy as np
import pandas as pd
import libpysal.weights as lpw

# SAR
$$ 
y = \rho W y + v  \\
y = (I-\rho W)^{-1}v   
$$








* N = # of data points
* D = # of unobservable factors, assume D = N
* n = # of simulation

## 1. Weight matrix $W$
* N x N matrix


In [10]:
# Create a set of coordinates for the data points 

# Define the range of coordinate values
xmin, ymin = 0, 0
xmax, ymax = 10, 10

# Set the number of data points
N_units = 3

# Create a set of random coordinates for N_units
x_coords = np.random.uniform(xmin, xmax, size=N_units)
y_coords = np.random.uniform(ymin, ymax, size=N_units)
coords = np.column_stack((x_coords, y_coords))

# Print the resulting coordinates
print(coords)

[[6.84213116 8.97103056]
 [2.49977204 0.02462594]
 [6.72288125 0.18309641]]


In [11]:
# Calculate distances between coordinates

# Initialize an empty distances matrix
N = N_units
distances = np.zeros((N, N))

# Compute the euclidean distance between pair of coords
for i in range(N):
    for j in range(i+1, N):
        distances[i,j] = distances[j,i] = np.linalg.norm(coords[i] - coords[j])       

print(distances)

[[0.         9.94455823 8.78874321]
 [9.94455823 0.         4.22608144]
 [8.78874321 4.22608144 0.        ]]


In [12]:
# Create weight matrix using PySAL

# Create a distance-based weight matrix using inverse distance
w = lpw.DistanceBand.from_array(coords, threshold=9, binary=True)

# Convert the weight matrix to row-stochastic form
w.transform = 'R'

# Print the weight matrix
print(w.full())

(array([[0. , 0. , 1. ],
       [0. , 0. , 1. ],
       [0.5, 0.5, 0. ]]), [0, 1, 2])


## 2. Invertible Matrix $A$
* N x N matrix

In [14]:
# function that checks if the matrix is invertible

def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return inverse
    except np.linalg.LinAlgError:
        return False

In [15]:
# Calculate the invertible matrix A

W = w.full()[0]
I = np.identity(W.shape[0])
rho = 0.1

A = check_invertible(I - rho * W)

print(A)

[[1.00505051 0.00505051 0.1010101 ]
 [0.00505051 1.00505051 0.1010101 ]
 [0.05050505 0.05050505 1.01010101]]


## 3. Unobservable variable $v$
* N x 1 vector $\bold{v} = {v_1, v_2, ..., v_N}$, each element corresponds to each data point. 
* Randomly generated from standard normal distribution. $\bold{v} \sim N(0, 1^2)$

In [17]:
v = np.random.randn(N_units, 1)
print(v)

[[0.56986569]
 [0.6812501 ]
 [0.57879858]]


## 4. Generated variable $\bar{y}$
* Generate by $\bar{y} = (I-\rho W)^{-1}v$
* N x 1 vector (N x N matrix $\cdot$ N x 1 vector)
* Assume $ \rho = 0.1$

In [19]:
y_bar = np.dot(A, v)

print(y_bar)

[[0.63464896]
 [0.74603337]
 [0.6478327 ]]


## 5. Simulation
* Make n $\bar{y}$, where n = # of simulation.
* 

In [23]:
# Set up n = # of simulation
n = 1000

results = [] 

# Run simulations
for i in range(n):
    # Generate random v from standard normal distribution
    v = np.random.randn(N, 1) 
    # Compute the matrix product
    y = np.dot(A, v) # y = N x 1 vector
    results.append(y)

print(np.shape(results)) # (N, n), cf. 3D array

(1000, 3, 1)


In [25]:
# Convert results (N, n) to dataframe (n, N)

# Make a column names list; y1, y2, ..
col_names = []

for i in range(N):
    col_names.append('y{}'.format(i+1))
    

# make dataframe (n, N)
simul_df = pd.DataFrame(np.reshape(results, (n, N)), columns=col_names)

print(simul_df)

           y1        y2        y3
0    0.278013 -0.867641  0.707711
1    0.965819  0.400019 -0.009519
2    0.114268  0.195350  0.171075
3   -0.324922  0.503384  0.510573
4    0.476363 -0.552513 -1.794967
..        ...       ...       ...
995 -1.488787  0.486092  2.125663
996  0.475087 -1.045220  0.348049
997 -1.293460 -0.665734 -1.376727
998  1.081145  0.567232  0.376100
999  0.410963  1.065847  0.870926

[1000 rows x 3 columns]


# Conditional Covariance
1. Covariance of generated $\bar{y}$ (simul_df)
2. Calculated conditional covariance by using the model $$Cov(y_{i}, y_{j}|X) = \sum_{j}A_{ij}A_{kj}$$

## 1. Covariance of generated $\bar{y}$

In [26]:
cov_simul = simul_df.cov()
cov_simul

Unnamed: 0,y1,y2,y3
y1,0.997146,0.035908,0.156148
y2,0.035908,1.017454,0.13453
y3,0.156148,0.13453,1.05697


## 2. Calculated conditional covariance by using the model

In [None]:
# Initialize an empty covariance matrix
cov_model = np.zeros((N, N))


for i in range(N):
  for k in range(i, N):
    for j in range(N):
      cov_model[i,k] = cov_model[k,i] = np.sum(A[i,j]*A[k,j]) # cf. sigma = 1

# Convert results to dataframe
cov_model = pd.DataFrame(cov_model, columns=col_names, index=col_names)

cov_model

Unnamed: 0,y1,y2,y3
y1,0.002799,0.002799,0.05319
y2,0.002799,0.002799,0.05319
y3,0.05319,0.05319,1.01061


In [None]:
# Calculate the Frobenius norm of the difference between the matrices
diff_norm = np.linalg.norm(cov_model - cov_simul, ord='fro')
diff_norm

1.5181436713221714