In [50]:
import numpy as np
from scipy.optimize import minimize

In [51]:
np.random.seed(42)

# MLE of Normal distribution
* Let $X \sim N(\mu, \sigma^2)$. 
    * Estimator vector $\bold{\theta} = (\mu, \sigma)$, Random samples $X_1, X_2, ...,X_n$

## 1. Likelihood Function
* Likelihood function = Joint distribution of random samples $X_1, X_2, ... , X_n$
$$ L(\mu, \sigma) = f(x_1, x_2, ...,x_n) = \Pi^{n}_{i=1}f(x_i;\theta)  $$

* Log Likelihood function
$$ logL(\mu, \sigma) = l(\mu, \sigma)= -\frac{n}{2}log2\pi-nlog\sigma-\frac{1}{2}\sum^n_{i=1}(\frac{x_i-\mu}{\sigma})^2  \\ = 
-\frac{n}{2}(log(2\pi\sigma^2)) -\frac{1}{2\sigma^2}\sum^n_{i=1}(x_i-\mu)^2$$

In [52]:
# define the negative log-likelihood function for a normal distribution
def neg_log_likelihood(params, data):
    mu, sigma = params
    n = len(data)
    logL = -(n/2)*(np.log(2*np.pi*sigma**2)) - 1/(2*sigma**2) * np.sum((data-mu)**2)
    return -logL

## 2. Random Sampling
* Generate a sample dataset from normal distribution (``np.random.normal(mu, sigma, size)``)
* Define initial parameter values (``initial_params``) for optimization. These values serve as a starting point for the optimization algorithm.

In [53]:
# generate a sample dataset
data = np.random.normal(5, 2, 100) # X ~ N(5, 4)

# define the initial parameter values for optimization
initial_params = [0, 1]

## 3. Find MLE 
* Using the ``minimize()`` function from the ``scipy.optimize`` module, we minimize the negative log-likelihood function to find the maximum likelihood estimates (MLE) for the parameters.
* ``scipy.optimize.minimize(fun, x0, args=(), method=None, ...)``
    * fun: The objective function to be minimized. It should take the variables to be optimized as input and return the value of the function to be minimized.
    * x0: The initial guess or starting point for the optimization. It can be a scalar or an array-like object.
    * args: Additional arguments to be passed to the objective function fun.
    * method: The optimization algorithm to be used. It can be specified as a string or an OptimizeResult object. If not specified, the default algorithm is used.

In [54]:
# minimize the negative log-likelihood function to find MLE
result = minimize(neg_log_likelihood, initial_params, args=(data,))
mu_mle, sigma_mle = result.x

In [55]:
# print the estimated MLE
print("Estimated MLE - mu:", mu_mle) # True mu: 5
print("Estimated MLE - sigma:", sigma_mle) # True sigma: 2

Estimated MLE - mu: 4.792306853229411
Estimated MLE - sigma: 1.8072323306279252


# MLE of Linear Regression Model 
* Let $Y_i = \alpha + \beta(x_i -\bar{x}) + e_i$ where $\bar{x} = \frac{1}{n} \sum^{n}_{i=1}x_i$ and $e_i \sim^{iid} N(0, \sigma^2)$
    * Estimator vector $\bold{\theta} = (\alpha, \beta, \sigma)$, Random samples $X_1, X_2, ...,X_n$

## 1. Generate data using Linear regression model

In [56]:
# define linear regression model function
def reg(params, data):
    alpha, beta, sigma = params
    n = len(data)
    
    x_bar = np.mean(data)
    epsilon = np.random.normal(0, sigma, n)
    
    return alpha + beta*(data-x_bar) + epsilon 

In [57]:
# generate a sample dataset (x, y) using linear regression
x = np.random.randn(100) 
true_params = [2, 3, 1]
y = reg(true_params, x) # yi

## 2. Likelihood Function
* Negative Log Likelihood function
$$ -logL(\alpha, \beta, \sigma) = \frac{n}{2} log(2\pi\sigma^2) + \frac{\sum^n_{i=1}[y_i-\alpha-\beta(x_i-\bar{x})]^2}{2\sigma^2} $$

* cf. Line-point vertical distance function (Residual)
$$H(\alpha, \beta) = \sum^n_{i=1}[y_i-\alpha-\beta(x_i-\bar{x})]^2 = \sum^n_{i=1} (y_i-\hat{y_i})^2$$


In [58]:
# define the negative log-likelihood function for linear regression
def neg_log_likelihood(params, x, y):
    alpha, beta, sigma = params 
    x_bar = np.mean(x)
    n = len(x)
    
    y_pred = alpha + beta * (x - x_bar) # yhat; no epsilon term 
    residuals = y - y_pred
    
    return (n/2 * np.log(2*np.pi*sigma**2)) + (np.sum(residuals**2) / 2*sigma**2)

## 3. Find MLE 

In [59]:
# set the initial parameter values for optimization
initial_params = [0, 0, 1]  # initial values for alpha, beta, and sigma

In [60]:
# minimize the negative log-likelihood function
result = minimize(neg_log_likelihood, initial_params, args=(x, y))
alpha_mle, beta_mle, sigma_mle = result.x

In [61]:
# print the estimated MLEs
print("Estimated MLE - alpha:", alpha_mle) # True alpha: 2
print("Estimated MLE - beta:", beta_mle) # True beta: 3
print("Estimated MLE - sigma:", sigma_mle) # True sigma: 1

Estimated MLE - alpha: 1.8740702012081738
Estimated MLE - beta: 2.632050426560142
Estimated MLE - sigma: -0.949647150130543


# MLE of Multivariate Normal Distribution

## 1. Likelihood function

* Log Likelihood function
$$ logL(\boldsymbol {\mu}, \boldsymbol {\Sigma})= l(\boldsymbol {\mu}, \boldsymbol {\Sigma}) = \sum^n_{i=1} -\frac{1}{2}(\bold{x}_i-\boldsymbol{\mu})^T\boldsymbol{\Sigma}^{-1}(\bold{x}_i-\boldsymbol{\mu})-{\frac {1}{2}}log|{\boldsymbol {\Sigma }}|$$

cf. $\bold{x}_i-\boldsymbol{\mu}$ is a column vector

In [67]:
# define the log-likelihood function for the bivariate normal distribution
def log_likelihood(params, data):
    k = data.shape[1] # dimension of random vector, 2
    
    mu = np.array(params[:k]) 
    Sigma = np.array([[params[k]**1, params[-1]], [params[-1], params[k+1]**1]])
    n = len(data)
    
    inv_Sigma = np.linalg.inv(Sigma)
    log_det_Sigma = np.log(np.linalg.det(Sigma))
    
    logL = 0.0
    for i in range(n):
        x = data[i]
        logL += -0.5 * (x - mu) @ inv_Sigma @ (x - mu).T - 0.5 * log_det_Sigma
    return -logL

## 2. Random sampling

In [68]:
n = 10000 # number of data points
k = 2

In [69]:
mu_true = np.array([0, 0])
sigma_true = np.array([[1, 0], [0, 1]])

data = np.random.multivariate_normal(mu_true, sigma_true, size=n) # 2-dimensional array
data.shape # [x1, x2]

(10000, 2)

## 3. Find MLE



In [74]:
true_params = [0,0,1,1,0]
initial_params = true_params

In [75]:
# minimize the negative log-likelihood to estimate the parameters
result = minimize(log_likelihood, initial_params, args=(data,))
estimated_params = result.x

In [76]:
# extract the estimated mean and covariance matrix
estimated_mean = estimated_params[:k]
estimated_cov = np.array([[estimated_params[k+1] , estimated_params[-1]], [estimated_params[-1] , estimated_params[k+2]]])

print("Estimated mean:", estimated_mean)
print("Estimated covariance matrix:")
print(estimated_cov)

Estimated mean: [-0.00535267 -0.01380805]
Estimated covariance matrix:
[[0.98280636 0.00593146]
 [0.00593146 0.00593146]]
