In [86]:
import numpy as np
import pandas as pd
import libpysal.weights as lpw

# SAR
$$ 
y = \rho W y + v  \\
y = (I-\rho W)^{-1}v   
$$








* N = # of data points
* D = # of unobservable factors, assume D = N
* n = # of simulation

## 1. Weight matrix $W$
* N x N matrix


In [87]:
# Create a set of coordinates for the data 

# Define the range of coordinate values
xmin, ymin = 0, 0
xmax, ymax = 10, 10

# Set the number of data
N = 4 

# Create a set of random coordinates for N
x_coords = np.random.uniform(xmin, xmax, size=N)
y_coords = np.random.uniform(ymin, ymax, size=N)
coords = np.column_stack((x_coords, y_coords))


# Print the resulting coordinates
print(coords)

[[4.15372531 0.39757015]
 [8.09108568 2.55660619]
 [5.1767679  3.26829109]
 [5.14685004 8.3869186 ]]


In [88]:
# function that makes coordincates of data points

def make_coords(N_points):
    x_coords = np.random.uniform(xmin, xmax, size=N_points)
    y_coords = np.random.uniform(ymin, ymax, size=N_points)
    coords = np.column_stack((x_coords, y_coords))
    return coords

In [89]:
# Calculate distances between coordinates

# Initialize an empty distances matrix
distances = np.zeros((N, N))

# Compute the euclidean distance between pair of coords
for i in range(N):
    for j in range(i+1, N):
        distances[i,j] = distances[j,i] = np.linalg.norm(coords[i] - coords[j])       

print(distances)

[[0.         4.49046137 3.0475654  8.05083755]
 [4.49046137 0.         2.99995725 6.53154395]
 [3.0475654  2.99995725 0.         5.11871494]
 [8.05083755 6.53154395 5.11871494 0.        ]]


In [90]:
# Create weight matrix using PySAL

# Create a distance-based weight matrix using inverse distance
w = lpw.DistanceBand.from_array(coords, threshold=9, binary=True)

# Convert the weight matrix to row-stochastic form
w.transform = 'R'

# Print the weight matrix
print(w.full())

(array([[0.        , 0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.        , 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.        , 0.33333333],
       [0.33333333, 0.33333333, 0.33333333, 0.        ]]), [0, 1, 2, 3])


In [91]:
# function that makes weight matrix

def make_weight(coords, thres):
    w = lpw.DistanceBand.from_array(coords, threshold=thres, binary=True)
    w.transform = 'R'
    return w.full()[0]

## 2. Invertible Matrix $A$
* N x N matrix

In [92]:
# function that checks if the matrix is invertible

def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return inverse
    except np.linalg.LinAlgError:
        return False

In [93]:
# Calculate the invertible matrix A

W = w.full()[0]
I = np.identity(N)
rho = 0.5

A = check_invertible(I - rho * W)

print(A)

[[1.14285714 0.28571429 0.28571429 0.28571429]
 [0.28571429 1.14285714 0.28571429 0.28571429]
 [0.28571429 0.28571429 1.14285714 0.28571429]
 [0.28571429 0.28571429 0.28571429 1.14285714]]


* Function that calculates A

In [94]:
def get_A(N, thres, rho):
    # make coordinates
    coords = make_coords(N)
    
    # make weight matrix
    W = make_weight(coords, thres)
    
    # calculate (I - rho * W)
    I = np.identity(N)
    X = I - rho * W
    
    # get A
    A = check_invertible(X)
    
    return A

## 3. Unobservable variable $v$
* N x 1 vector $\bold{v} = {v_1, v_2, ..., v_N}$, each element corresponds to each data point. 
* Randomly generated from standard normal distribution. $\bold{v} \sim N(0, 1^2)$

In [95]:
v = np.random.randn(N, 1)
print(v)

[[-0.54747413]
 [-0.46390852]
 [-0.58889855]
 [-1.01570976]]


## 4. Generated variable $y$
* Generate by $y = (I-\rho W)^{-1}v$
* N x 1 vector (N x N matrix $\cdot$ N x 1 vector)
* Assume $ \rho = 0.1$

In [96]:
y = np.dot(A, v)

print(y)

[[-1.21668953]
 [-1.14506187]
 [-1.25219618]
 [-1.61803436]]


# Simulation
* Make n * $y$, where n = # of simulation.

In [97]:
# set seed
seed_value = 42
np.random.seed(seed_value)

In [108]:
# Set up parameters
n = 10000
N = 3
threshold = 9 
rho = 0.5 

# Calculate invertible matrix A
A = get_A(N, threshold, rho)

results = [] # generated n * y = [y1, y2, ..., yN]
V = [] # randomly generated v = [v1, v2, ..., vD], (D = N)

# Run simulations
for i in range(n):
    # Generate random v from standard normal distribution
    v = np.random.randn(N, 1) 
    V.append(v)
    # Compute the matrix product
    y = np.dot(A, v) # y = N x 1 vector
    results.append(y)
    
    # if i % 10 == 0:
    #     print(f"Iteration: {i+1} / {n}")
    #     print("The shape of the simulation result is: ", np.shape(results))


print("The shape of the simulation result is: ", np.shape(results)) # (n, N, 1), cf. 3D array

The shape of the simulation result is:  (10000, 3, 1)


In [99]:
# Convert results into dataframe (n, N) 

# Make a column names list; y1, y2, ..
col_names = []

for i in range(N):
    col_names.append('y{}'.format(i+1))

simul_df = pd.DataFrame(np.reshape(results, (n, N)), columns=col_names)

print(simul_df)

            y1        y2        y3
0     2.014240  1.364817  0.375290
1     0.279413 -0.525369 -0.527219
2    -1.164925 -2.889119 -2.738429
3    -0.954179 -1.314613 -0.252951
4    -1.068291 -1.471715  0.830647
...        ...       ...       ...
9995  2.047746  1.289200 -0.397772
9996 -1.166523 -0.619387  0.998126
9997 -1.450137 -2.562929 -1.232151
9998 -3.033492 -2.293023 -1.918657
9999  0.866922  1.566517  1.302496

[10000 rows x 3 columns]


# Conditional Covariance

## 1. Covariance of generated $y$

In [100]:
# Covariance Matrix
cov_simul = simul_df.cov()
cov_simul

Unnamed: 0,y1,y2,y3
y1,1.742953,1.095971,1.082992
y2,1.095971,1.74496,1.100075
y3,1.082992,1.100075,1.72249


## 2. Calculated conditional covariance by using the model
* $Cov(y_{i}, y_{j}|X) = \sigma^2 \sum_{j}A_{ij}A_{kj}$ 
* where $\sigma^2$ is variance of randomly generated vector V

In [101]:
# Get variance of vector V 
var = np.var(V)
print(var)

0.997431097603694


In [102]:
# Initialize an empty covariance matrix
cov_model = np.zeros((N, N))


for i in range(N):
  for k in range(N):
    for j in range(N):
      cov_model[i,k] += A[i,j] * A[k,j]
      cov_model[k,i] += A[i,j] * A[k,j]

cov_model = var * cov_model / 2


# Convert results to dataframe
cov_model = pd.DataFrame(cov_model, columns=col_names, index=col_names)
cov_model

Unnamed: 0,y1,y2,y3
y1,1.755479,1.117123,1.117123
y2,1.117123,1.755479,1.117123
y3,1.117123,1.117123,1.755479
