In [18]:
import numpy as np
import pandas as pd
import libpysal.weights as lpw

# SAR
$$ 
y = \rho W y + v  \\
y = (I-\rho W)^{-1}v   
$$








* N = # of data points
* D = # of unobservable factors, assume D = N
* n = # of simulation

In [19]:
# set seed
seed_value = 42
np.random.seed(seed_value)

## 1. Weight matrix $W$
* N x N matrix


In [20]:
# Create a set of coordinates for the data 

# Define the range of coordinate values
xmin, ymin = 0, 0
xmax, ymax = 10, 10

# Set the number of data
N = 4 

# Create a set of random coordinates for N
x_coords = np.random.uniform(xmin, xmax, size=N)
y_coords = np.random.uniform(ymin, ymax, size=N)
coords = np.column_stack((x_coords, y_coords))


# Print the resulting coordinates
print(coords)

[[3.74540119 1.5601864 ]
 [9.50714306 1.5599452 ]
 [7.31993942 0.58083612]
 [5.98658484 8.66176146]]


In [1]:
# function that makes coordincates of data points

def make_coords(N_points, xm, xM, ym, yM):
    # Define the range of coordinate values
    xmin, ymin = xm, ym
    xmax, ymax = xM, yM
    
    x_coords = np.random.uniform(xmin, xmax, size=N_points)
    y_coords = np.random.uniform(ymin, ymax, size=N_points)
    coords = np.column_stack((x_coords, y_coords))
    return coords

In [22]:
# Calculate distances between coordinates

# Initialize an empty distances matrix
distances = np.zeros((N, N))

# Compute the euclidean distance between pair of coords
for i in range(N):
    for j in range(i+1, N):
        distances[i,j] = distances[j,i] = np.linalg.norm(coords[i] - coords[j])       

print(distances)

[[0.         5.76174188 3.70627178 7.44682969]
 [5.76174188 0.         2.39635439 7.92654555]
 [3.70627178 2.39635439 0.         8.19018856]
 [7.44682969 7.92654555 8.19018856 0.        ]]


In [23]:
# Create weight matrix using PySAL

# Create a distance-based weight matrix using inverse distance
w = lpw.DistanceBand.from_array(coords, threshold=9, binary=True)

# Convert the weight matrix to row-stochastic form
w.transform = 'R'

# Print the weight matrix
print(w.full())

(array([[0.        , 0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.        , 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.        , 0.33333333],
       [0.33333333, 0.33333333, 0.33333333, 0.        ]]), [0, 1, 2, 3])


In [24]:
# function that makes weight matrix

def make_weight(coords, thres):
    w = lpw.DistanceBand.from_array(coords, threshold=thres, binary=True)
    w.transform = 'R'
    return w.full()[0]

## 2. Invertible Matrix $A$
* N x N matrix

In [25]:
# function that checks if the matrix is invertible

def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return inverse
    except np.linalg.LinAlgError:
        return False

In [26]:
# Calculate the invertible matrix A

W = w.full()[0]
I = np.identity(N)
rho = 0.5

A = check_invertible(I - rho * W)

print(A)

[[1.14285714 0.28571429 0.28571429 0.28571429]
 [0.28571429 1.14285714 0.28571429 0.28571429]
 [0.28571429 0.28571429 1.14285714 0.28571429]
 [0.28571429 0.28571429 0.28571429 1.14285714]]


* Function that calculates A

In [35]:
def get_A(N, thres, rho):
    # make coordinates
    coords = make_coords(N)
    
    # make weight matrix
    W = make_weight(coords, thres)
    
    # calculate (I - rho * W)
    I = np.identity(N)
    X = I - rho * W
    
    # get A
    A = check_invertible(X)
    
    return A

## 3. Unobservable variable $v$
* N x 1 vector $\bold{v} = {v_1, v_2, ..., v_N}$, each element corresponds to each data point. 
* Randomly generated from standard normal distribution. $\bold{v} \sim N(0, 1^2)$

In [28]:
v = np.random.randn(N, 1)
print(v)

[[ 1.57921282]
 [ 0.76743473]
 [-0.46947439]
 [ 0.54256004]]


## 4. Generated variable $y$
* Generate by $y = (I-\rho W)^{-1}v$
* N x 1 vector (N x N matrix $\cdot$ N x 1 vector)
* Assume $ \rho = 0.1$

In [29]:
y = np.dot(A, v)

print(y)

[[2.04496333]
 [1.34915354]
 [0.28894573]
 [1.15640381]]


# Simulation
* Make n * $y$, where n = # of simulation.

In [36]:
# Set up parameters
n = 10000
N = 3
threshold = 9 
rho = 0.5 

# Calculate invertible matrix A
A = get_A(N, threshold, rho)[1] 

results = [] # generated n * y = [y1, y2, ..., yN]
V = [] # randomly generated v = [v1, v2, ..., vD], (D = N)

# Run simulations
for i in range(n):
    # Generate random v from standard normal distribution
    v = np.random.randn(N, 1) 
    V.append(v)
    # Compute the matrix product
    y = np.dot(A, v) # y = N x 1 vector
    results.append(y)
    
    # if i % 10 == 0:
    #     print(f"Iteration: {i+1} / {n}")
    #     print("The shape of the simulation result is: ", np.shape(results))


print("The shape of the simulation result is: ", np.shape(results)) # (n, N, 1), cf. 3D array

The shape of the simulation result is:  (10000, 3, 1)


In [31]:
# Convert results into dataframe (n, N) 

# Make a column names list; y1, y2, ..
col_names = []

for i in range(N):
    col_names.append('y{}'.format(i+1))

simul_df = pd.DataFrame(np.reshape(results, (n, N)), columns=col_names)

print(simul_df)

            y1        y2        y3
0    -1.452908 -0.391245 -1.369062
1    -1.198815  1.103547 -0.249594
2    -0.706619 -1.900440 -1.196147
3    -0.177011 -1.186544  0.034809
4    -1.078127 -0.830971 -1.078981
...        ...       ...       ...
9995 -1.440927 -1.066561 -0.477203
9996  1.034919  0.770898 -0.727870
9997 -0.520387 -0.161917 -0.432783
9998 -1.473345 -2.636104 -2.180096
9999 -1.464560 -3.616789 -0.629693

[10000 rows x 3 columns]


# Conditional Covariance

## 1. Covariance of generated $y$

In [32]:
# Covariance Matrix
cov_simul = simul_df.cov()
cov_simul

Unnamed: 0,y1,y2,y3
y1,1.752605,1.109424,1.110506
y2,1.109424,1.734054,1.101289
y3,1.110506,1.101289,1.756998


## 2. Calculated conditional covariance by using the model
* $Cov(y_{i}, y_{j}|X) = \sigma^2 \sum_{j}A_{ij}A_{kj}$ 
* where $\sigma^2$ is variance of randomly generated vector V

In [33]:
# Get variance of vector V 
var = np.var(V)
print(var)

0.9976000157827155


In [34]:
# Initialize an empty covariance matrix
cov_model = np.zeros((N, N))


for i in range(N):
  for k in range(N):
    for j in range(N):
      cov_model[i,k] += A[i,j] * A[k,j]
      cov_model[k,i] += A[i,j] * A[k,j]

cov_model = var * cov_model / 2


# Convert results to dataframe
cov_model = pd.DataFrame(cov_model, columns=col_names, index=col_names)
cov_model

Unnamed: 0,y1,y2,y3
y1,1.755776,1.117312,1.117312
y2,1.117312,1.755776,1.117312
y3,1.117312,1.117312,1.755776
