In [1]:
import numpy as np
from scipy.optimize import minimize

In [2]:
np.random.seed(42)

# MLE of Normal distribution
* Let $X \sim N(\mu, \sigma^2)$. 
    * Estimator vector $\bold{\theta} = (\mu, \sigma)$, Random samples $X_1, X_2, ...,X_n$

## 1. Likelihood Function
* Likelihood function = Joint distribution of random samples $X_1, X_2, ... , X_n$
$$ L(\mu, \sigma) = f(x_1, x_2, ...,x_n) = \Pi^{n}_{i=1}f(x_i;\theta)  $$

* Log Likelihood function
$$ logL(\mu, \sigma) = l(\mu, \sigma)= -\frac{n}{2}log2\pi-nlog\sigma-\frac{1}{2}\sum^n_{i=1}(\frac{x_i-\mu}{\sigma})^2 $$

In [3]:
# define the negative log-likelihood function for a normal distribution
def neg_log_likelihood(params, data):
    mu, sigma = params
    n = len(data)
    return (n/2) * np.log(2*np.pi*sigma**2) + (1/(2*sigma**2)) * np.sum((data - mu)**2)

## 2. Random Sampling
* Generate a sample dataset from normal distribution (``np.random.normal(mu, sigma, size)``)
* Define initial parameter values (``initial_params``) for optimization. These values serve as a starting point for the optimization algorithm.

In [4]:
# generate a sample dataset
data = np.random.normal(5, 2, 100) # X ~ N(5, 4)

# define the initial parameter values for optimization
initial_params = [0, 1]

## 3. Find MLE 
* Using the ``minimize()`` function from the ``scipy.optimize`` module, we minimize the negative log-likelihood function to find the maximum likelihood estimates (MLE) for the parameters.
* ``scipy.optimize.minimize(fun, x0, args=(), method=None, ...)``
    * fun: The objective function to be minimized. It should take the variables to be optimized as input and return the value of the function to be minimized.
    * x0: The initial guess or starting point for the optimization. It can be a scalar or an array-like object.
    * args: Additional arguments to be passed to the objective function fun.
    * method: The optimization algorithm to be used. It can be specified as a string or an OptimizeResult object. If not specified, the default algorithm is used.

In [5]:
# minimize the negative log-likelihood function to find MLE
result = minimize(neg_log_likelihood, initial_params, args=(data,))
mu_mle, sigma_mle = result.x

In [6]:
# print the estimated MLE
print("Estimated MLE - mu:", mu_mle) # True mu: 5
print("Estimated MLE - sigma:", sigma_mle) # True sigma: 2

Estimated MLE - mu: 4.792306853229411
Estimated MLE - sigma: 1.8072323306279252


# MLE for Linear Regression Model 
* Let $Y_i = \alpha + \beta(x_i -\bar{x}) + e_i$ where $\bar{x} = \frac{1}{n} \sum^{n}_{i=1}x_i$ and $e_i \sim^{iid} N(0, \sigma^2)$
    * Estimator vector $\bold{\theta} = (\alpha, \beta, \sigma)$, Random samples $X_1, X_2, ...,X_n$

## 1. Generate data using Linear regression model

In [7]:
# define linear regression model function
def reg(params, data):
    alpha, beta, sigma = params
    n = len(data)
    
    x_bar = np.mean(data)
    epsilon = np.random.normal(0, sigma, n)
    
    return alpha + beta*(data-x_bar) + epsilon 

In [8]:
# generate a sample dataset (x, y) using linear regression
x = np.random.randn(100) 
true_params = [2, 3, 1]
y = reg(true_params, x) # yi

## 2. Likelihood Function
* Negative Log Likelihood function
$$ -logL(\alpha, \beta, \sigma) = \frac{n}{2} log(2\pi\sigma^2) + \frac{\sum^n_{i=1}[y_i-\alpha-\beta(x_i-\bar{x})]^2}{2\sigma^2} $$

* cf. Line-point vertical distance function (Residual)
$$H(\alpha, \beta) = \sum^n_{i=1}[y_i-\alpha-\beta(x_i-\bar{x})]^2 = \sum^n_{i=1} (y_i-\hat{y_i})^2$$


In [9]:
# define the negative log-likelihood function for linear regression
def neg_log_likelihood(params, x, y):
    alpha, beta, sigma = params 
    x_bar = np.mean(x)
    n = len(x)
    
    y_pred = alpha + beta * (x - x_bar) # yhat; no epsilon term 
    residuals = y - y_pred
    
    return (n/2 * np.log(2*np.pi*sigma**2)) + (np.sum(residuals**2) / 2*sigma**2)

## 3. Find MLE 

In [10]:
# set the initial parameter values for optimization
initial_params = [0, 0, 1]  # initial values for alpha, beta, and sigma

In [11]:
# minimize the negative log-likelihood function
result = minimize(neg_log_likelihood, initial_params, args=(x, y))
alpha_mle, beta_mle, sigma_mle = result.x

In [12]:
# print the estimated MLEs
print("Estimated MLE - alpha:", alpha_mle) # True alpha: 2
print("Estimated MLE - beta:", beta_mle) # True beta: 3
print("Estimated MLE - sigma:", sigma_mle) # True sigma: 1

Estimated MLE - alpha: 1.8740702012081738
Estimated MLE - beta: 2.632050426560142
Estimated MLE - sigma: -0.949647150130543


# MLE for Multivariate Normal Distribution

* Let $\boldsymbol{X} \sim N(\boldsymbol{\mu}, \boldsymbol{\Sigma})$
* N: size of dimension, n: number of data points
    * $\boldsymbol{\mu}$: n 1 $\times$ N vector = (n, 1, N) array
    * $\boldsymbol{\sigma}$: n N $\times$ N matrix = (n, N, N) array
    * Random vector $\bold{X} = \{X_1, X_2, ...,X_n\}'$ where $X_i \sim^{iid} N(\mu, \Sigma)$:  n 1 $\times$ N vector = (n, 1, N) array


## 1. Generate data from Multivariate normal distribution

In [53]:
N = 2 # size of dimension
n = 50  # number of data points

In [54]:
mu_true = np.array([0., 1.])
Sigma_true = np.array([[ 1. , 0.8], [0.8,  1]])

data = np.random.multivariate_normal(mu_true, Sigma_true, size=n)


In [55]:
data.shape # (n, N)

(50, 2)

In [56]:
len(data)

50

## 2. Likelihood Function
* Log Likelihood function
$$ logL(\boldsymbol {\mu}, \boldsymbol {\Sigma})= l(\boldsymbol {\mu}, \boldsymbol {\Sigma}) = \sum^n_{i=1} -\frac{1}{2}(\bold{x}_i-\boldsymbol{\mu})^T\boldsymbol{\Sigma}^{-1}(\bold{x}_i-\boldsymbol{\mu})-{\frac {1}{2}}log|{\boldsymbol {\Sigma }}|$$

In [57]:
# define the log-likelihood function for the multivariate normal distribution
def log_likelihood(params, data):
    mu = params[:N]
    Sigma = params[N:].reshape((N, N))
    n = len(data)
    
    inv_Sigma = np.linalg.inv(Sigma)
    log_det_Sigma = np.log(np.linalg.det(Sigma))
    
    logL = 0.0
    for i in range(n):
        x = data[i]
        logL += -0.5 * (x - mu).T @ inv_Sigma @ (x - mu) - 0.5 * log_det_Sigma
    return -logL

## 3. Find MLE

In [58]:
# initialize the parameters
mean_params = np.zeros(N)
cov_params = np.eye(N).flatten()

initial_params = np.concatenate((mean_params, cov_params))

In [59]:
# minimize the negative log-likelihood to estimate the parameters
result = minimize(log_likelihood, initial_params, args=(data,))
estimated_params = result.x

  log_det_Sigma = np.log(np.linalg.det(Sigma))
  log_det_Sigma = np.log(np.linalg.det(Sigma))
  log_det_Sigma = np.log(np.linalg.det(Sigma))


In [60]:
# extract the estimated mean and covariance matrix
estimated_mean = estimated_params[:N]
estimated_cov = estimated_params[N:].reshape((N, N))

print("Estimated mean:", estimated_mean)
print("Estimated covariance matrix:")
print(estimated_cov)

Estimated mean: [-342.51051995  999.35238045]
Estimated covariance matrix:
[[ -852.4990295   1491.93387499]
 [ 1491.93387499 -1559.49539623]]
