In [56]:
import numpy as np
import pandas as pd
import libpysal.weights as lpw

# SAR
$$ 
y = \rho W y + v  \\
y = (I-\rho W)^{-1}v   
$$








* N = # of data points
* D = # of unobservable factors, assume D = N
* n = # of simulation

In [57]:
# set seed
seed_value = 42
np.random.seed(seed_value)

## 1. Weight matrix $W$
* N x N matrix


In [58]:
# Create a set of coordinates for the data 

# Define the range of coordinate values
xmin, ymin = 0, 0
xmax, ymax = 10, 10

# Set the number of data
N = 4 

# Create a set of random coordinates for N
x_coords = np.random.uniform(xmin, xmax, size=N)
y_coords = np.random.uniform(ymin, ymax, size=N)
coords = np.column_stack((x_coords, y_coords))


# Print the resulting coordinates
print(coords)

[[3.74540119 1.5601864 ]
 [9.50714306 1.5599452 ]
 [7.31993942 0.58083612]
 [5.98658484 8.66176146]]


In [59]:
# # function that makes coordincates of data points

# def make_coords(N_points, xm, xM, ym, yM):
#     # Define the range of coordinate values
#     xmin, ymin = xm, ym
#     xmax, ymax = xM, yM
    
#     x_coords = np.random.uniform(xmin, xmax, size=N_points)
#     y_coords = np.random.uniform(ymin, ymax, size=N_points)
#     coords = np.column_stack((x_coords, y_coords))
#     return coords

In [60]:
# Calculate distances between coordinates

# Initialize an empty distances matrix
distances = np.zeros((N, N))

# Compute the euclidean distance between pair of coords
for i in range(N):
    for j in range(i+1, N):
        distances[i,j] = distances[j,i] = np.linalg.norm(coords[i] - coords[j])       

print(distances)

[[0.         5.76174188 3.70627178 7.44682969]
 [5.76174188 0.         2.39635439 7.92654555]
 [3.70627178 2.39635439 0.         8.19018856]
 [7.44682969 7.92654555 8.19018856 0.        ]]


In [61]:
# Create weight matrix using PySAL

# Create a distance-based weight matrix using inverse distance
w = lpw.DistanceBand.from_array(coords, threshold=9, binary=True)

# Convert the weight matrix to row-stochastic form
w.transform = 'R'

# Print the weight matrix
print(w.full())

(array([[0.        , 0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.        , 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.        , 0.33333333],
       [0.33333333, 0.33333333, 0.33333333, 0.        ]]), [0, 1, 2, 3])


In [62]:
# function that makes weight matrix

def make_weight(coords, thres):
    w = lpw.DistanceBand.from_array(coords, threshold=thres, binary=True)
    w.transform = 'R'
    return w.full()[0]

## 2. Invertible Matrix $A$
* N x N matrix

In [63]:
# function that checks if the matrix is invertible

def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return inverse
    except np.linalg.LinAlgError:
        return False

In [64]:
# Calculate the invertible matrix A

W = w.full()[0]
I = np.identity(N)
rho = 0.5

A = check_invertible(I - rho * W)

print(A)

[[1.14285714 0.28571429 0.28571429 0.28571429]
 [0.28571429 1.14285714 0.28571429 0.28571429]
 [0.28571429 0.28571429 1.14285714 0.28571429]
 [0.28571429 0.28571429 0.28571429 1.14285714]]


* Function that calculates A

In [65]:
def get_A(coords, N, thres, rho):
    
    # make weight matrix
    W = make_weight(coords, thres)
    
    # calculate (I - rho * W)
    I = np.identity(N)
    X = I - rho * W
    
    # get A
    A = check_invertible(X)
    
    return A

## 3. Unobservable variable $v$
* N x 1 vector $\bold{v} = {v_1, v_2, ..., v_N}$, each element corresponds to each data point. 
* Randomly generated from standard normal distribution. $\bold{v} \sim N(0, 1^2)$

In [66]:
v = np.random.randn(N, 1)
print(v)

[[ 1.57921282]
 [ 0.76743473]
 [-0.46947439]
 [ 0.54256004]]


## 4. Generated variable $y$
* Generate by $y = (I-\rho W)^{-1}v$
* N x 1 vector (N x N matrix $\cdot$ N x 1 vector)
* Assume $ \rho = 0.1$

In [67]:
y = np.dot(A, v)

print(y)

[[2.04496333]
 [1.34915354]
 [0.28894573]
 [1.15640381]]


# Simulation
* Make n * $y$, where n = # of simulation.

In [72]:
# Set up parameters
n = 10000
N = 4
threshold = 9 
rho = 0.5 

# Calculate invertible matrix A
A = get_A(coords, N, threshold, rho)

results = [] # generated n * y = [y1, y2, ..., yN]
V = [] # randomly generated v = [v1, v2, ..., vD], (D = N)

# Run simulations
for i in range(n):
    # Generate random v from standard normal distribution
    v = np.random.randn(N, 1) 
    V.append(v)
    # Compute the matrix product
    y = np.dot(A, v) # y = N x 1 vector
    results.append(y)
    
    # if i % 10 == 0:
    #     print(f"Iteration: {i+1} / {n}")
    #     print("The shape of the simulation result is: ", np.shape(results))


print("The shape of the simulation result is: ", np.shape(results)) # (n, N, 1), cf. 3D array

The shape of the simulation result is:  (10000, 4, 1)


In [73]:
# Convert results into dataframe (n, N) 

# Make a column names list; y1, y2, ..
col_names = []

for i in range(N):
    col_names.append('y{}'.format(i+1))

simul_df = pd.DataFrame(np.reshape(results, (n, N)), columns=col_names)

print(simul_df)

            y1        y2        y3        y4
0    -0.546771  2.082232  0.546365  0.350522
1    -0.558923 -1.076911 -0.360095  0.641768
2     0.495086  1.031625  2.021988  0.010972
3     0.417441  1.018742 -0.641798  0.310450
4     2.307502  0.411009  1.618378  0.581678
...        ...       ...       ...       ...
9995 -0.176189 -1.457308 -1.705543  0.006778
9996  0.989701  1.063007 -0.111436 -0.145412
9997 -0.182564 -1.031260  0.389445 -0.433513
9998  0.102416  0.009162  0.600108  0.404745
9999  2.063498  0.100244  1.966240 -1.123413

[10000 rows x 4 columns]


# Conditional Covariance

## 1. Covariance of generated $y$

In [74]:
# Covariance Matrix
cov_simul = simul_df.cov()
cov_simul

Unnamed: 0,y1,y2,y3,y4
y1,1.579018,0.807993,0.819883,0.857117
y2,0.807993,1.554051,0.831439,0.841701
y3,0.819883,0.831439,1.552349,0.842961
y4,0.857117,0.841701,0.842961,1.589478


## 2. Calculated conditional covariance by using the model
* $Cov(y_{i}, y_{j}|X) = \sigma^2 \sum_{j}A_{ij}A_{kj}$ 
* where $\sigma^2$ is variance of randomly generated vector V

In [75]:
# Get variance of vector V 
var = np.var(V)
print(var)

1.004788261512675


In [76]:
# Initialize an empty covariance matrix
cov_model = np.zeros((N, N))


for i in range(N):
  for k in range(N):
    for j in range(N):
      cov_model[i,k] += A[i,j] * A[k,j]
      cov_model[k,i] += A[i,j] * A[k,j]

cov_model = var * cov_model / 2


# Convert results to dataframe
cov_model = pd.DataFrame(cov_model, columns=col_names, index=col_names)
cov_model

Unnamed: 0,y1,y2,y3,y4
y1,1.558447,0.820235,0.820235,0.820235
y2,0.820235,1.558447,0.820235,0.820235
y3,0.820235,0.820235,1.558447,0.820235
y4,0.820235,0.820235,0.820235,1.558447
