In [73]:
import numpy as np
import yfinance as yf

# Numerical Gradient

$$f: \mathbb{R}^{d} \rightarrow \mathbb{R}$$

$$ x= (x_{1},x_{2},\ldots,x_{d})^{\top}$$

$$\nabla f(x)= \begin{pmatrix}\frac{\partial f}{\partial x_{1}} (x)\\\frac{\partial f}{\partial x_{2}} (x)\\ \ldots \\ \frac{\partial f}{\partial x_{d}} (x) \end{pmatrix}$$

$$ e_{2}= (0,1,\ldots,0)^{\top}$$

$$g_{i} \approx \frac{f(x+\epsilon e_{i})-f(x-\epsilon e_{i})}{2 \epsilon}$$

$$g=(g_{1},\ldots,g_{d})^{\top} \approx \nabla f(x)$$

In [74]:
def numerical_grad(f, x, eps=1e-5):
    x = np.asarray(x, dtype=float)
    g = np.zeros_like(x)
    for i in range(len(x)):
        x_step = x.copy()
        x_step[i] += eps
        f_plus = f(x_step)
        x_step[i] -= 2 * eps
        f_minus = f(x_step)
        g[i] = (f_plus - f_minus) / (2 * eps)
    return g

## Gradient Descent

$$g_{k} \approx \nabla f(x_{k})$$

$$x_{k+1} = x_{k} - \eta_{k} g_{k}$$

In [75]:
def gradient_descent(f, x0, lr=1e-3, max_iter=10000, tol=1e-6):
    x = np.asarray(x0, dtype=float)
    fx = f(x)
    for it in range(max_iter):
        g = numerical_grad(f, x)
        if np.linalg.norm(g) < tol:
            break
        x_new = x - lr * g
        fx_new = f(x_new)
        if fx_new > fx:
            lr *= 0.5
        else:
            x = x_new
            fx = fx_new
    return x, fx

# Autoregressive conditional heteroskedasticity: ARCH(1)

$$y_{t} = \sigma_{t} \epsilon_{t}, \quad \epsilon_{t} \overset{i.i.d.}\sim N(0,1)$$

$$ \sigma_{t}^2 = \omega + \alpha y_{t-1}^2$$

## Method of Moments Estimator

$$y_{t} = \sigma_{t} \epsilon_{t}, \quad \epsilon_{t} \overset{i.i.d.}\sim N(0,1)$$

$$ \sigma_{t}^2 = \omega + \alpha y_{t-1}^2$$

$$E[y_{t}^2]= E[\sigma_{t}^2 \epsilon_{t}^2] = E[\sigma_{t}^2] E[\epsilon_{t}^2] = E[\omega + \alpha y_{t-1}^2] E[\epsilon_{t}^2] = \omega + \alpha E[y_{t-1}^2]$$

$$m_2 = \omega + \alpha m_2 = \frac{\omega}{1-\alpha}$$

$$E[y_{t}^4]= E[\sigma_{t}^4 \epsilon_{t}^4] = E[\sigma_{t}^4] E[\epsilon_{t}^4] = 3 * E[(\omega + \alpha y_{t-1}^2)^2] = 3(\omega^2+2\omega\alpha E[y_{t}^2] + \alpha^2 E[y_{t}^4])$$

$$m_{4} = 3(\omega^2+2\omega\alpha m_{2} + \alpha^2 m_{4})= \frac{3(\omega^2+2\omega \alpha m_{2})}{1-3\alpha^2} = \frac{3(\omega^2+2\omega \alpha \frac{\omega}{1-\alpha})}{1-3\alpha^2}= \frac{3\omega^2(1+\frac{2\alpha}{1-\alpha})}{1-3\alpha^2}= \frac{3\omega^2(1+\frac{2\alpha}{1-\alpha})}{1-3\alpha^2}= \frac{3\omega^2(1+\alpha)}{(1-3\alpha^2)(1-\alpha)}$$

$$\kappa = \frac{m_{4}}{m_{2}^2}=\frac{3\omega^2(1+\alpha)}{(1-3\alpha^2)(1-\alpha)} \frac{(1-\alpha)^2}{\omega^2}=\frac{3(1-\alpha)(1+\alpha)}{(1-3\alpha^2)}= \frac{3(1-\alpha^2)}{(1-3\alpha^2)}$$

$$\kappa-3\alpha^2\kappa = 3-3\alpha^2 \Rightarrow 3\alpha^2(1-\kappa)=3-\kappa \Rightarrow \hat{\alpha} = \sqrt{\frac{3-\kappa}{3(1-\kappa)}}$$

$$\hat{\omega}= m_2(1-\hat{\alpha})$$

## Maximum Likelihood Calculation

$$y_{t}|\mathcal{F}_{t-1} \sim N(0,\sigma_{t}^2)$$

$$\omega = \exp(a)>0, \quad \alpha = \text{sigmoid}(b)= \frac{1}{1+e^{-b}}\in (0,1)$$

$$y_{t}|\mathcal{F}_{t-1};\omega,\alpha \sim N(0,\sigma_{t}^2); \quad \sigma_{t}^2=\omega+\alpha y_{t-1}^2$$

$$f(y_{t}|\mathcal{F}_{t-1};\omega,\alpha)=\frac{1}{\sqrt{2\pi \sigma_{t}^2}}\exp(-\frac{y_{t}^2}{2\sigma_{t}^2})$$

$$L(\omega,\alpha;y_{1},\ldots,y_{T})=\prod_{t=1}^{T} \frac{1}{\sqrt{2\pi \sigma_{t}^2}}\exp(-\frac{y_{t}^2}{2\sigma_{t}^2}), \quad \sigma_{t}^2 = \omega+\alpha y_{t-1}^2$$

$$l(\omega,\alpha)= L(\omega,\alpha;y_{1},\ldots,y_{T})=-\frac{T}{2} \log(2\pi)-\frac{1}{2}\sum \log \sigma_{t}^2-\frac{1}{2}\sum \frac{y_{t}^2}{ \sigma_{t}^2 }\propto-\frac{1}{2}\sum (\log \sigma_{t}^2+\frac{y_{t}^2}{ \sigma_{t}^2 }) $$

# Implementation

In [102]:
import numpy as np

def fitarch1(y, lr=1e-3, max_iter=5000):
    y = np.asarray(y, dtype=float)
    T = len(y)
    y = y - y.mean()

    # Negative log-likelihood WITHOUT the constant term T/2 * log(2π)
    def nll(theta):
        a, b = theta
        omega = np.exp(a)
        alpha = 1.0 / (1.0 + np.exp(-b))

        sigma2 = np.empty(T)
        # use unconditional variance for the first value
        sigma2[0] = omega / (1.0 - alpha + 1e-6)
        for t in range(1, T):
            sigma2[t] = omega + alpha * y[t-1]**2

        # constant-dropped Gaussian NLL
        return 0.5 * np.sum(np.log(sigma2) + (y**2) / sigma2)

    # --- Method-of-moments init (Gaussian eps) ---
    m2_hat = np.mean(y**2)
    m4_hat = np.mean(y**4)
    K_hat  = m4_hat / (m2_hat**2)
    kappa_eps = 3.0  # Gaussian kurtosis

    alpha_sq_hat = (kappa_eps - K_hat) / (kappa_eps * (1.0 - K_hat))
    alpha_sq_hat = max(min(alpha_sq_hat, 0.99), 1e-6)

    print("m2_hat:", m2_hat)
    print("m4_hat:", m4_hat)
    print("K_hat :", K_hat)

    alpha0 = np.sqrt(alpha_sq_hat)
    omega0 = m2_hat * (1.0 - alpha0)
    omega0 = max(omega0, 1e-8)

    a0 = np.log(omega0)
    b0 = np.log(alpha0 / (1.0 - alpha0))
    theta0 = np.array([a0, b0], dtype=float)

    # Optimize partial NLL
    theta_hat, nll_partial_min = gradient_descent(nll, theta0, lr=lr, max_iter=max_iter)

    a_hat, b_hat = theta_hat
    omega_hat = np.exp(a_hat)
    alpha_hat = 1.0 / (1.0 + np.exp(-b_hat))

    # --- FULL negative log-likelihood (with constant) ---
    # full NLL = partial NLL + T/2 * log(2π)
    nll_full_min = nll_partial_min + 0.5 * T * np.log(2.0 * np.pi)

    return {
        "omega": omega_hat,
        "alpha": alpha_hat,
        "nll_partial": nll_partial_min,  # constant-dropped
        "nll_full": nll_full_min        # full NLL, comparable across models
    }

In [112]:
import yfinance as yf
data = yf.download("SPY",start = "2021-01-01", end = "2025-12-31")
spy_close = data["Close"]["SPY"]
prices = spy_close.to_numpy()
log_prices = np.log(prices)
returns = np.diff(log_prices)
res = fitarch1(returns)
print("Estimated omega:", res["omega"])
print("Estimated alpha:", res["alpha"])

  data = yf.download("SPY",start = "2021-01-01", end = "2025-12-31")
[*********************100%***********************]  1 of 1 completed


m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
Estimated omega: 8.117000716509054e-05
Estimated alpha: 0.34145108214501435


In [113]:
# say you want nu = 8 degrees of freedom
res_t = fitarch1(returns)
print("omega_hat (t):", res_t["omega"])
print("alpha_hat (t):", res_t["alpha"])
print("nll_t:", res_t["nll_full"])

m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
omega_hat (t): 8.117000716509054e-05
alpha_hat (t): 0.34145108214501435
nll_t: -3897.2740525519757


# ARCH(1) with Student-t Errors

$$ t \sim t_{v}: f(t)= \frac{\Gamma(\frac{v+1}{2})}{\sqrt{\pi v}\Gamma(\frac{v}{2})}(1+\frac{t^2}{v})^{-(v+1)/2}, \quad E[t]=0, \quad V[t]=\frac{v}{v-2}$$

$$\epsilon = g(t)= \sqrt{\frac{v-2}{v}}t \sim T(0,1)$$

$$f_{\epsilon}(\epsilon)= f_{t}(g^{-1}(\epsilon))|\frac{d}{d\epsilon}g^{-1}(\epsilon)|=f_{t}(\epsilon\sqrt{\frac{v}{v-2}})\sqrt{\frac{v}{v-2}}= \frac{\Gamma(\frac{v+1}{2})}{\sqrt{\pi v}\Gamma(\frac{v}{2})}(1+\frac{(\epsilon\sqrt{\frac{v}{v-2}})^2}{v})^{-(v+1)/2}\sqrt{\frac{v}{v-2}}= \frac{\Gamma(\frac{v+1}{2})}{\sqrt{\pi (v-2)}\Gamma(\frac{v}{2})}(1+\frac{\epsilon^2}{v-2})^{-(v+1)/2}$$

$$y_{t} = \sigma_{t} \epsilon_{t}, \quad \epsilon_{t} \overset{i.i.d.}\sim T(0,1)$$

$$ \sigma_{t}^2 = \omega + \alpha y_{t-1}^2$$

## Method of Moments

## Maximum Likelihood Calculation (t-distributed errors)

$$
y_t \mid \mathcal{F}_{t-1} = \sigma_t \epsilon_t, 
\qquad \epsilon_t \overset{i.i.d.}\sim T(0,1;\nu)
$$

$$
\sigma_t^2 = \omega + \alpha y_{t-1}^2.
$$


$$
\omega = \exp(a) > 0, 
\qquad 
\alpha = \text{sigmoid}(b) = \frac{1}{1+e^{-b}} \in (0,1).
$$

$$
f_\epsilon(e;\nu)
= \frac{\Gamma\!\left(\frac{\nu+1}{2}\right)}
       {\sqrt{\pi(\nu-2)}\,\Gamma\!\left(\frac{\nu}{2}\right)}
  \left(1 + \frac{e^2}{\nu-2}\right)^{-(\nu+1)/2}.
$$

$$
y_t \mid \mathcal{F}_{t-1};\omega,\alpha,\nu 
= \sigma_t \epsilon_t
\quad\Rightarrow\quad 
\epsilon_t = \frac{y_t}{\sigma_t},
$$

$$
f(y_t \mid \mathcal{F}_{t-1};\omega,\alpha,\nu)
= \frac{1}{\sigma_t} f_\epsilon\!\left(\frac{y_t}{\sigma_t};\nu\right)
= \frac{\Gamma\!\left(\frac{\nu+1}{2}\right)}
       {\sqrt{\pi(\nu-2)}\,\Gamma\!\left(\frac{\nu}{2}\right)}
  \frac{1}{\sigma_t}
  \left(1 + \frac{y_t^2}{(\nu-2)\sigma_t^2}\right)^{-(\nu+1)/2},
$$


$$
L(\omega,\alpha;\,y_1,\ldots,y_T)
= \prod_{t=1}^T 
   f(y_t \mid \mathcal{F}_{t-1};\omega,\alpha,\nu).
$$


\begin{align*}
\ell(\omega,\alpha)
&= \sum_{t=1}^T \log f(y_t \mid \mathcal{F}_{t-1};\omega,\alpha,\nu) \\
&= T \log \frac{\Gamma\!\left(\frac{\nu+1}{2}\right)}
              {\sqrt{\pi(\nu-2)}\,\Gamma\!\left(\frac{\nu}{2}\right)}
   - \sum_{t=1}^T \log \sigma_t
   - \frac{\nu+1}{2} \sum_{t=1}^T 
      \log\left(1 + \frac{y_t^2}{(\nu-2)\sigma_t^2}\right),
\end{align*}

$$
\ell(\omega,\alpha) \propto
- \sum_{t=1}^T \log \sigma_t
- \frac{\nu+1}{2} \sum_{t=1}^T 
   \log\left(1 + \frac{y_t^2}{(\nu-2)\sigma_t^2}\right).
$$

$$
\ell(\omega,\alpha) \propto
-\frac{1}{2} \sum_{t=1}^T \log \sigma_t^2
- \frac{\nu+1}{2} \sum_{t=1}^T 
   \log\left(1 + \frac{y_t^2}{(\nu-2)\sigma_t^2}\right).
$$

$$
-\,\ell(\omega,\alpha) \propto
\frac{1}{2} \sum_{t=1}^T \log \sigma_t^2
+ \frac{\nu+1}{2} \sum_{t=1}^T 
   \log\left(1 + \frac{y_t^2}{(\nu-2)\sigma_t^2}\right),
\qquad \sigma_t^2 = \omega + \alpha y_{t-1}^2.
$$


In [114]:
import numpy as np
import math

def fitarch1_t(y, nu, lr=1e-3, max_iter=5000):
    """
    ARCH(1) with unit-variance t_nu innovations.

    Returns both:
      - nll_partial: constant-dropped NLL (used for optimization)
      - nll_full: full negative log-likelihood including the ν-dependent constant
    """
    y = np.asarray(y, dtype=float)
    T = len(y)
    y = y - y.mean()

    def nll_partial(theta):
        """
        Negative log-likelihood *without* the constant T * log C_nu.
        """
        a, b = theta
        omega = np.exp(a)
        alpha = 1.0 / (1.0 + np.exp(-b))

        sigma2 = np.empty(T)
        sigma2[0] = omega / (1.0 - alpha + 1e-6)
        for t in range(1, T):
            sigma2[t] = omega + alpha * y[t-1]**2

        # 0.5 * sum(log sigma2) + 0.5*(nu+1)*sum( log(1 + y^2 / ((nu-2)*sigma2)) )
        return (
            0.5 * np.sum(np.log(sigma2)) +
            0.5 * (nu + 1.0) * np.sum(
                np.log(1.0 + y**2 / ((nu - 2.0) * sigma2))
            )
        )

    # --- Method-of-moments style init (t-innovations) ---

    m2_hat = np.mean(y**2)
    m4_hat = np.mean(y**4)
    K_hat  = m4_hat / (m2_hat**2)

    # Kurtosis of eps_t (unit-variance t_nu), only defined if nu > 4
    if nu > 4:
        kappa_eps = 3.0 * (nu - 2.0) / (nu - 4.0)
        alpha_sq_hat = (kappa_eps - K_hat) / (kappa_eps * (1.0 - K_hat))
        alpha_sq_hat = max(min(alpha_sq_hat, 0.99), 1e-6)
        alpha0 = np.sqrt(alpha_sq_hat)
    else:
        # if nu <= 4, 4th moment doesn't exist; simple heuristic
        alpha0 = 0.1

    print("m2_hat:", m2_hat)
    print("m4_hat:", m4_hat)
    print("K_hat :", K_hat)

    omega0 = m2_hat * (1.0 - alpha0)
    omega0 = max(omega0, 1e-8)

    a0 = np.log(omega0)
    b0 = np.log(alpha0 / (1.0 - alpha0))
    theta0 = np.array([a0, b0], dtype=float)

    # Optimize partial NLL
    theta_hat, nll_partial_min = gradient_descent(
        nll_partial, theta0, lr=lr, max_iter=max_iter
    )

    a_hat, b_hat = theta_hat
    omega_hat = np.exp(a_hat)
    alpha_hat = 1.0 / (1.0 + np.exp(-b_hat))

    # --- FULL negative log-likelihood (with ν-dependent constant) ---

    # log C_nu = log Gamma((nu+1)/2) - log Gamma(nu/2) - 0.5 * log(pi * (nu-2))
    def log_C_nu(nu_val):
        return (
            math.lgamma((nu_val + 1.0) / 2.0)
            - math.lgamma(nu_val / 2.0)
            - 0.5 * np.log(np.pi * (nu_val - 2.0))
        )

    # full NLL = partial NLL - T * log C_nu
    nll_full_min = nll_partial_min - T * log_C_nu(nu)

    return {
        "omega": omega_hat,
        "alpha": alpha_hat,
        "nll_partial": nll_partial_min,  # constant-dropped
        "nll_full": nll_full_min        # full NLL, comparable to Gaussian and across ν
    }



In [115]:
# say you want nu = 8 degrees of freedom
res_t = fitarch1_t(returns, nu=10)
print("omega_hat (t):", res_t["omega"])
print("alpha_hat (t):", res_t["alpha"])
print("nll_t:", res_t["nll_full"])

m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
omega_hat (t): 7.980642920350023e-05
alpha_hat (t): 0.2189768247349752
nll_t: -3944.3852098743655


In [116]:
import numpy as np

def select_nu_arch1_t(y, nu_grid, lr=1e-3, max_iter=5000):
    y = np.asarray(y, dtype=float)
    T = len(y)

    best = None
    results = []

    for nu in nu_grid:
        if nu <= 2:
            continue  # just in case, but your grid is all > 2

        res = fitarch1_t(y, nu, lr=lr, max_iter=max_iter)
        nll_full = res["nll_full"]

        print(f"nu={nu:3d} | nll_full={nll_full:.3f} | "
              f"omega={res['omega']:.3e}, alpha={res['alpha']:.3f}")

        results.append({
            "nu": nu,
            "omega": res["omega"],
            "alpha": res["alpha"],
            "nll_full": nll_full
        })

        if (best is None) or (nll_full < best["nll_full"]):
            best = results[-1]

    return best, results

In [117]:
nu_grid = [1,2,3,4,5, 10, 25, 50, 100]
best_t, all_t = select_nu_arch1_t(returns, nu_grid)

print("\nBest t-ARCH(1) model:")
print("  nu       :", best_t["nu"])
print("  omega_hat:", best_t["omega"])
print("  alpha_hat:", best_t["alpha"])
print("  nll_full :", best_t["nll_full"])

m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu=  3 | nll_full=-3949.339 | omega=1.296e-04, alpha=0.297
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu=  4 | nll_full=-3954.104 | omega=9.752e-05, alpha=0.235
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu=  5 | nll_full=-3954.201 | omega=8.813e-05, alpha=0.219
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu= 10 | nll_full=-3944.385 | omega=7.981e-05, alpha=0.219
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu= 25 | nll_full=-3926.204 | omega=7.971e-05, alpha=0.246
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu= 50 | nll_full=-3915.159 | omega=8.045e-05, alpha=0.271
m2_hat: 0.00011683406364867377
m4_hat: 1.5006694108120768e-07
K_hat : 10.993755282421446
nu=100 | nll_full=-3907

## Generalized Autoregressive conditional heteroskedasticity: GARCH(1,1)

In [54]:
def fitgarch1(y, lr= 1e-4, max_iter=50000):
    y = np.asarray(y, dtype=float)
    T = len(y)
    y = y - y.mean()
    def nll(theta):
        a,b,c = theta
        omega = np.exp(a)
        u = 1.0 /(1.0 + np.exp(-b))
        v = 1.0 /(1.0 + np.exp(-c))
        alpha = u * (1-v)
        beta = u * v
        sigma2 = np.empty(T)
        sigma2[0] = np.var(y)
        for t in range(1,T):
            sigma2[t] = omega + alpha * y[t-1]**2+beta* sigma2[t-1]
        return 0.5 * np.sum(np.log(sigma2)+(y**2)/sigma2)
    y_var = np.var(y)
    alpha0 = 0.1
    beta0  = 0.7
    omega0 = y_var * (1.0 - alpha0 - beta0)
    a0 = np.log(omega0)
    b0 = np.log(alpha0 /(1-alpha0))
    c0 = np.log(beta0 /(1-beta0))
    theta0 = np.array([a0,b0,c0], dtype = float)
    theta_hat, nll_min = gradient_descent(nll, theta0, lr= lr, max_iter = max_iter)
    a_hat, b_hat, c_hat = theta_hat
    omega_hat = np.exp(a_hat)
    alpha_hat = 1.0/(1.0+ np.exp(-b_hat))
    beta_hat = 1.0/(1.0+ np.exp(-c_hat))
    return {
        "omega": omega_hat,
        "alpha": alpha_hat,
        "beta": beta_hat,
        "nll": nll_min
    }

In [55]:
import yfinance as yf
data = yf.download("SPY")
spy_close = data["Close"]["SPY"]
prices = spy_close.to_numpy()
log_prices = np.log(prices)
returns = np.diff(log_prices)
y = returns -returns.mean()
res = fitgarch1(y)
print("Estimated omega:", res["omega"])
print("Estimated alpha:", res["alpha"])
print("Estimated alpha:", res["beta"])

  data = yf.download("SPY")
[*********************100%***********************]  1 of 1 completed


Estimated omega: 6.952965832024371e-05
Estimated alpha: 0.08289674970156292
Estimated alpha: 0.7698824945763842


In [37]:
m2 = np.var(y)
y2 = y**2
rho1 = np.corrcoef(y2[1:], y2[:-1])[0, 1]

alpha0 = 0.05
persistence0 = min(max(rho1, 0.7), 0.99)  # clamp into [0.7, 0.99]
beta0 = max(persistence0 - alpha0, 0.5)

omega0 = m2 * (1.0 - alpha0 - beta0)
omega0 = max(omega0, 1e-8) 

In [29]:
rho1

-0.23453975899929336

In [30]:
beta0

0.6499999999999999

In [31]:
persistence0

0.7