In [185]:
import numpy as np
import pandas as pd
import libpysal.weights as lpw

# SAR
$$ 
y = \rho W y + v  \\
y = (I-\rho W)^{-1}v   
$$








* N = # of data points
* D = # of unobservable factors, assume D = N
* n = # of simulation

## 1. Weight matrix $W$
* N x N matrix


In [186]:
# Create a set of coordinates for the data points 

# Define the range of coordinate values
xmin, ymin = 0, 0
xmax, ymax = 10, 10

# Set the number of data points
N = 4

# Create a set of random coordinates for N
x_coords = np.random.uniform(xmin, xmax, size=N)
y_coords = np.random.uniform(ymin, ymax, size=N)
coords = np.column_stack((x_coords, y_coords))

# Print the resulting coordinates
print(coords)

[[6.39885301 5.91925252]
 [8.18239945 8.59447995]
 [3.94081586 9.53345527]
 [3.80261652 5.29920322]]


In [187]:
# Calculate distances between coordinates

# Initialize an empty distances matrix
distances = np.zeros((N, N))

# Compute the euclidean distance between pair of coords
for i in range(N):
    for j in range(i+1, N):
        distances[i,j] = distances[j,i] = np.linalg.norm(coords[i] - coords[j])       

print(distances)

[[0.         3.21525733 4.37085897 2.66925177]
 [3.21525733 0.         4.34427279 5.48099874]
 [4.37085897 4.34427279 0.         4.23650675]
 [2.66925177 5.48099874 4.23650675 0.        ]]


In [188]:
# Create weight matrix using PySAL

# Create a distance-based weight matrix using inverse distance
w = lpw.DistanceBand.from_array(coords, threshold=9, binary=True)

# Convert the weight matrix to row-stochastic form
w.transform = 'R'

# Print the weight matrix
print(w.full())

(array([[0.        , 0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.        , 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.        , 0.33333333],
       [0.33333333, 0.33333333, 0.33333333, 0.        ]]), [0, 1, 2, 3])


## 2. Invertible Matrix $A$
* N x N matrix

In [189]:
# function that checks if the matrix is invertible

def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return inverse
    except np.linalg.LinAlgError:
        return False

In [190]:
# Calculate the invertible matrix A

W = w.full()[0]
I = np.identity(W.shape[0])
rho = 0.5

A = check_invertible(I - rho * W)

print(A)

[[1.14285714 0.28571429 0.28571429 0.28571429]
 [0.28571429 1.14285714 0.28571429 0.28571429]
 [0.28571429 0.28571429 1.14285714 0.28571429]
 [0.28571429 0.28571429 0.28571429 1.14285714]]


## 3. Unobservable variable $v$
* N x 1 vector $\bold{v} = {v_1, v_2, ..., v_N}$, each element corresponds to each data point. 
* Randomly generated from standard normal distribution. $\bold{v} \sim N(0, 1^2)$

In [191]:
v = np.random.randn(N, 1)
print(v)

[[ 1.83524896]
 [ 0.58338203]
 [-0.37399595]
 [-0.8260774 ]]


## 4. Generated variable $y$
* Generate by $y = (I-\rho W)^{-1}v$
* N x 1 vector (N x N matrix $\cdot$ N x 1 vector)
* Assume $ \rho = 0.1$

In [192]:
y = np.dot(A, v)

print(y)

[[ 1.92122986]
 [ 0.84820107]
 [ 0.02759137]
 [-0.35990702]]


## 5. Simulation
* Make n * $y$, where n = # of simulation.

In [193]:
# Set up n = # of simulation
n = 1000

results = [] # generated n * y = [y1, y2, ..., yN]
V = [] # randomly generated v = [v1, v2, ..., vD], (D = N)

# Run simulations
for i in range(n):
    # Generate random v from standard normal distribution
    v = np.random.randn(N, 1) 
    V.append(v)
    # Compute the matrix product
    y = np.dot(A, v) # y = N x 1 vector
    results.append(y)

print("The shape of the simulation result is: ", np.shape(results)) # (n, N, 1), cf. 3D array

The shape of the simulation result is:  (1000, 4, 1)


In [194]:
# Convert results into dataframe (n, N) 

# Make a column names list; y1, y2, ..
col_names = []

for i in range(N):
    col_names.append('y{}'.format(i+1))

simul_df = pd.DataFrame(np.reshape(results, (n, N)), columns=col_names)

print(simul_df)

           y1        y2        y3        y4
0    0.216801 -0.247460 -0.527423  1.079930
1   -1.930600 -1.602773 -2.437187 -3.936927
2    2.316896  0.345529  2.946292  0.486094
3    1.435187  1.089415  0.461165  1.758962
4    4.008987  0.255102  0.816394  2.716573
..        ...       ...       ...       ...
995 -0.418197  0.224860  0.883906  0.871814
996 -0.618622  0.324946  0.073692 -0.734254
997  1.380170  3.782200  0.211252  2.994085
998  1.760421 -1.109666  1.673991 -0.693321
999 -3.354855 -1.815324  0.384143 -1.515081

[1000 rows x 4 columns]


# Conditional Covariance

## 1. Covariance of generated $y$

In [195]:
# Covariance Matrix
cov_simul = simul_df.cov()
cov_simul

Unnamed: 0,y1,y2,y3,y4
y1,1.618113,0.813212,0.82589,0.827651
y2,0.813212,1.546474,0.801657,0.823831
y3,0.82589,0.801657,1.543333,0.829709
y4,0.827651,0.823831,0.829709,1.543493


## 2. Calculated conditional covariance by using the model
* $Cov(y_{i}, y_{j}|X) = \sigma^2 \sum_{j}A_{ij}A_{kj}$ 
* where $\sigma^2$ is variance of randomly generated vector V

In [196]:
# Get variance of vector V 
var = np.var(V)
print(var)

1.0098188856153407


In [197]:
A

array([[1.14285714, 0.28571429, 0.28571429, 0.28571429],
       [0.28571429, 1.14285714, 0.28571429, 0.28571429],
       [0.28571429, 0.28571429, 1.14285714, 0.28571429],
       [0.28571429, 0.28571429, 0.28571429, 1.14285714]])

In [198]:
a_12 = 0 

for j in range(N):
    a_12 += A[0, j] * A[1, j]

cov_12 = var * a_12
print(cov_12)

0.8243419474410945


In [199]:
# Initialize an empty covariance matrix
cov_model = np.zeros((N, N))


for i in range(N):
  for k in range(N):
    for j in range(N):
      cov_model[i,k] += A[i,j] * A[k,j]
      cov_model[k,i] += A[i,j] * A[k,j]

cov_model = var * cov_model / 2


# Convert results to dataframe
cov_model = pd.DataFrame(cov_model, columns=col_names, index=col_names)

cov_model

Unnamed: 0,y1,y2,y3,y4
y1,1.56625,0.824342,0.824342,0.824342
y2,0.824342,1.56625,0.824342,0.824342
y3,0.824342,0.824342,1.56625,0.824342
y4,0.824342,0.824342,0.824342,1.56625
