In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist # calculate distance matrix
from scipy.linalg import lu # calculate LU decomposition
from scipy.optimize import minimize # minimization of scalar function

# 1. Generate location data $X$

In [2]:
# function that makes coordincates of data points
def make_coords(N_points, xmu, xsd, ymu, ysd):  
    x_coords = np.random.normal(xmu, xsd, size=N_points)
    y_coords = np.random.normal(ymu, ysd, size=N_points)
    coords = np.column_stack((x_coords, y_coords))
    return coords

In [3]:
# function that calculates distances between coordinates
def distances(coords):
  distances = cdist(coords, coords, metric='Euclidean')
  return distances

In [4]:
np.random.seed(24)

In [5]:
# set parameters
p = 1000 # dimensionaltiy
paramTrue = [1, 0.5] # parameters 

In [6]:
# get coordinates
coord = [0, 1, 0, 1] # xmu, xsd, ymu, ysd
X = make_coords(p, *coord)

In [7]:
# get distance matrix 
d = distances(X)

# 2. Generate $\vec{Y}$
## 2-1. Covariance Matrix $\Sigma$
$ \Sigma = Cov(Y_i, Y_j|X) = \theta_1 \exp(-\frac{1}{\theta_2} d_{ij}) \\ d_{ij} = ||X_i - X_j||$

In [8]:
def Sigma(param, dis):
  theta1, theta2 = param[0], param[1]
  cov = theta1*np.exp((-1/theta2)*dis) # distance matrix is a square matrix
  return cov

In [9]:
# get SigmaTrue
SigmaTrue = Sigma(paramTrue, d)

## 2-2. Generate $\vec{Y}$
$ \vec{Y} = \Sigma^{1/2} \cdot \vec{v} $

where $ {v}_i \sim N(0,1) $

In [10]:
SigmaHalf = np.linalg.cholesky(SigmaTrue)
U = np.random.normal(0, 1, p).reshape(p, 1)
Y = np.dot(SigmaHalf, U)
Y.shape # dimensionality of Y


(1000, 1)

# 3. MLE

$ -logL(\vec{\theta};\vec{Y}) = \frac{1}{2} \log ( |\Sigma(\theta)|) + \frac{1}{2} \vec{Y}^T (\Sigma(\theta))^{-1} \vec{Y} $

* Constraints regarding $ \Sigma(\theta) $
    1. When calculating the determinant of $\Sigma(\theta)$, we need to consider the following constraints
    * Symmetric: As distance matrix is symmetric, $\Sigma(\theta)$ is also symmetric
    * Positive Definite
        * If it's pd, we can use choleskey factorization prior to the calculation of log determinant.
        * If it's not pd, we can use LU factorization. 
        * (This helps preventing over/underflow when calculating determinant of large matrix)
    2. When calculating the inverse of $\Sigma(\theta)$, we need to consider if it's invertible or not.

In [11]:
# check pd
def check_pd(Sigma):
    if np.linalg.eig(Sigma)[0].min() > 0:
        return True
    else:
        return False

In [12]:
# check invertible
def check_invertible(matrix):
    try:
        inverse = np.linalg.inv(matrix)
        return True
    except np.linalg.LinAlgError:
        return False

## 3-1. Log Likelihood function

In [13]:
# define log likelihood function
def logLikelihood(param, Y):
    
    p = len(Y)
    sig = Sigma(param, d)
    chol = check_pd(sig)
    inv = check_invertible(sig)
    
    if chol: # pd check
        # choleskey factorization; positive definite and symmetric
        log_det_Sigma = 2 * np.sum(np.log(np.diag(np.linalg.cholesky(sig))))
    else: 
        # LU factorization; square
        P, L, U = lu(sig)
        du = np.diag(U)
        c = np.linalg.det(P) * np.prod(np.sign(du))
        log_det_Sigma = np.log(c) + np.sum(np.log(np.abs(du)))

    
    if inv:
        inv_Sigma = np.linalg.inv(sig)
    else:   
        inv_Sigma = np.linalg.pinv(sig) # pseudo inverse
    
        
    return 0.5 * Y.T @ inv_Sigma @ Y + 0.5 * log_det_Sigma

## 3-2. MLE
* 8min for p=1,000 dimension

In [14]:
# set the seed
param0 = paramTrue + np.random.normal(0, 1, 1)

# define the optimization problem
problem = {
    'fun': logLikelihood,         # objective function: 
    'x0': param0,            # Initial guess for the parameters
    'args': (Y,),             # Additional arguments for the objective function      
}

result = minimize(**problem)
estimated_params = result.x

In [15]:
print('True paramter: ', paramTrue)
print('Esimated parameter: ', estimated_params)

True paramter:  [1, 0.5]
Esimated parameter:  [1.02283479 0.5535643 ]
