GLOBAL FUNCTIONS

In [15]:
#Loading dependecies and libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.stats.diagnostic as smd 

from scipy.stats import norm
from scipy.special import beta
from arch import arch_model
from arch.univariate import ConstantMean, ARX, GARCH, EGARCH, ARCHInMean, StudentsT, Normal

In [2]:
#Distance function

def dist(fit, x, type_):

    # Extract distribution name
    dist_name = fit.model.modeldesc['distribution']

    # Extract parameters with default fallback
    lambda_ = fit.fit.coef.get('lambda', -0.5)
    skew = fit.fit.coef.get('skew', 1)
    shape = fit.fit.coef.get('shape', 1)

    # Call custom density or quantile function
    if type_ == 'd':
        return dist(distribution=dist_name, y=x, mu=0, sigma=1,
                     lambda_=lambda_, skew=skew, shape=shape)
    elif type_ == 'q':
        return dist(distribution=dist_name, p=x, mu=0, sigma=1,
                     lambda_=lambda_, skew=skew, shape=shape)
    else:
        raise ValueError("Argument 'type_' must be 'd' or 'q'")

Additional comments to the function:

    Compute density or quantile of a fitted distribution.

    Parameters:
    - fit: Fitted model object (must contain fit.coef and model.modeldesc['distribution'])
    - x: Value(s) at which to evaluate the function
    - type_: 'd' for density, 'q' for quantile

    Returns:
    - Density or quantile value(s)

In [3]:
#Global histogram options

def plot_hist(x, xlim, n=200, bins=100, title=""):

    # Histogram
    plt.hist(x, bins=bins, density=True, range=xlim, alpha=0.7, edgecolor='black')
    
    # Normal PDF overlay
    x1 = np.linspace(xlim[0], xlim[1], n)
    pdf1 = norm.pdf(x1, loc=np.mean(x), scale=np.std(x))
    plt.plot(x1, pdf1, color='red', linewidth=2)

    # Decorations
    plt.title(title)
    plt.xlim(xlim)
    plt.xlabel('x')
    plt.ylabel('Density')
    plt.grid(True)
    plt.show()

Additional comments to the function:

    Plot histogram of data with overlaid normal PDF.

    Parameters:
    - x: array-like object, data
    - xlim: tuple of (xmin, xmax), x-axis limits
    - n: number of points for PDF line
    - bins: number of histogram bins
    - title: plot title

    norm.pdf() is a built-in SciPy function

In [4]:
#Global histogram options specifically for model residuals in GARCH

def plot_hist_fit(fit, xlim, ylim=None, n=200, bins=100, plot_norm=False, title=""):
    
    colors = ['red', 'blue'] #change for visual preferences
    
    # Residuals
    z = fit.fit.z

    # Histogram
    plt.hist(z, bins=bins, density=True, range=xlim, edgecolor='black', alpha=0.7, label='Histogram')
    
    # Compute fitted PDF
    x1 = np.linspace(xlim[0], xlim[1], n)
    pdf1 = dist(fit=fit, x=x1, type_='d')
    plt.plot(x1, pdf1, color=colors[0], linewidth=2, label=fit.model.modeldesc['Distribution'])

    # Optional: add normal PDF
    if plot_norm and fit.model.modeldesc['Distribution'] != 'norm':
        pdf2 = norm.pdf(x1)
        plt.plot(x1, pdf2, color=colors[1], linewidth=2, label='norm')

    # Final touches
    plt.title(title)
    plt.xlabel('z')
    plt.ylabel('Density')
    plt.xlim(xlim)
    if ylim:
        plt.ylim(ylim)
    plt.legend(loc='upper right')
    plt.grid(True)
    plt.show()

Additional comments to the function:

    Plot histogram of model residuals with overlaid fitted and (optional) normal PDFs (Propability Density Functions).

    Parameters:
    - fit: Fitted model object with residuals in fit.fit.z and distribution in fit.model.modeldesc['distribution']
    - xlim: tuple of (xmin, xmax)
    - ylim: optional tuple of (ymin, ymax)
    - n: number of points to evaluate PDFs
    - bins: histogram bins
    - plot_norm: boolean, whether to overlay standard normal PDF
    - title: plot title

In [5]:
#QQ plot for multivariate distribution

def qqplot_fit(fit):
    
    # Step 1: Sort standardized residuals
    zemp = np.sort(fit.fit.z)
    n = len(zemp)

    # Step 2: Create probabilities
    p = np.linspace(1 / (n + 1), n / (n + 1), n)

    # Step 3: Get theoretical quantiles from fitted distribution
    zth = dist(fit=fit, x=p, type_='q')

    # Step 4: Create QQ plot
    plt.scatter(zth, zemp, edgecolor='black', facecolor='none')
    plt.plot([min(zth), max(zth)], [min(zth), max(zth)], color='red', linewidth=2)  # 45-degree line

    plt.xlabel("Theoretical quantiles")
    plt.ylabel("Empirical quantiles")
    plt.title("QQ Plot: Residuals vs. Fitted Distribution")
    plt.grid(True)
    plt.show()

Additional comments to the function:

    QQ plot comparing empirical residuals with theoretical quantiles
    from the model's fitted distribution.
    
    Parameters:
    - fit: Fitted model object with residuals in fit.fit.z
           and distribution in fit.model.modeldesc['distribution']
           
    We use defined at the beggining .dist() function

In [6]:
#Calculating expected value of the absolute value of a standardized residuals 

def Eabsz(fit):
    dist = fit.model.modeldesc['distribution']

    if dist == "norm":
        return np.sqrt(2 / np.pi)
    
    elif dist == "std":
        df = float(fit.fit.coef["shape"])
        if df <= 2:
            return float('nan')  # Undefined
        return (2 * np.sqrt(df - 2)) / ((df - 1) * beta(0.5 * df, 0.5))

    else:
        return f"Not implemented for distribution '{dist}'"

Additional comments to the function:
    
    Compute the expected absolute value of standardized residuals
    based on the model's assumed distribution.
    
    Parameters:
    - fit: Fitted model object with distribution in fit.model.modeldesc['distribution']
           and shape parameter in fit.fit.coef['shape'] for t-distribution.
           
    Returns:
    - Expected absolute value (float), or an informative string if unsupported.

GARCH FAMILY FUNCTIONS

In [7]:
#Transformation of parameters within (GJGARCH, TGARCH) functions

def transform_gjr_garch(fit, model_type="GJRGARCH"):
    est = fit.params
    vcov = fit.covariance

    # Updated parameter names for arch package
    try:
        alpha = est['alpha[1]']
        gamma = est['gamma[1]']   # asymmetric effect in arch is named gamma
    except KeyError:
        raise ValueError("Required coefficients 'alpha[1]' or 'gamma[1]' not found in model parameters.")

    # Find indices for these params
    alpha_idx = list(est.index).index('alpha[1]')
    gamma_idx = list(est.index).index('gamma[1]')

    # Apply transformation according to model type
    if model_type == "GJRGARCH":
        alpha_s = alpha * (1 - gamma) ** 2
        gamma_s = 4 * alpha * gamma

        # Jacobian matrix (derivatives of transformed params wrt original params)
        D = np.eye(len(est))
        D[alpha_idx, alpha_idx] = (1 - gamma) ** 2
        D[alpha_idx, gamma_idx] = -2 * alpha * (1 - gamma)
        D[gamma_idx, alpha_idx] = 4 * gamma
        D[gamma_idx, gamma_idx] = 4 * alpha

    elif model_type == "TGARCH":
        alpha_s = alpha * (1 - gamma)
        gamma_s = 2 * alpha * gamma

        D = np.eye(len(est))
        D[alpha_idx, alpha_idx] = (1 - gamma)
        D[alpha_idx, gamma_idx] = -alpha
        D[gamma_idx, alpha_idx] = 2 * gamma
        D[gamma_idx, gamma_idx] = 2 * alpha

    else:
        raise ValueError("Unsupported model_type! Use 'GJRGARCH' or 'TGARCH'.")

    # Update estimates vector
    est_updated = est.copy()
    est_updated.iloc[alpha_idx] = alpha_s
    est_updated.iloc[gamma_idx] = gamma_s

    # Update covariance matrix
    vcov_updated = D @ vcov @ D.T

    # Calculate standard errors, t-values, and p-values
    se = np.sqrt(np.abs(np.diag(vcov_updated)))
    tval = est_updated / se
    pval = 2 * (1 - norm.cdf(np.abs(tval)))

    # Create summary DataFrame
    summary_df = pd.DataFrame({
        "Estimate": est_updated,
        "Std. Error": se,
        "t value": tval,
        "Pr(>|t|)": pval
    }, index=est.index)

    return {
        "coef": est_updated,
        "se.coef": se,
        "tval": tval,
        "pval": pval,
        "summary": summary_df,
        "cov": vcov_updated
    }

Additional comments to the function:
    
    We don't have built-in GJR GARCH function- as a substitute, we will use this one from 'arch' library:
    
    model = arch_model(
    returns,
    vol='GARCH',
    p=1,
    o=1,
    q=1,
    dist='t',            # Student's t for fat tails
    mean='Constant'      # Simple constant mean model
    
    Purpose of function: Applies the alpha/gamma to alpha_s/gamma_s transformation for GJR-GARCH or TGARCH,
    updating coefficient estimates and covariance matrix using the delta method.
    
    Parameters:
    - fit: A fitted GARCH model from `arch` library
    - model_type: 'GJRGARCH' or 'TGARCH' (default is 'GJRGARCH')
    
    Returns:
    - Dictionary with transformed coefficients, standard errors, t-stats, p-values, and covariance matrix

In [8]:
#Garman-Klass volatility estimator (used for intraday data)

def garman_klass(data: pd.DataFrame, sd: bool = True, currency: bool = False) -> np.ndarray:
    nobs = len(data)
    coef = data['Adjusted'] / data['Close']

    # Log prices adjusted
    H1 = np.log(data['High'] * coef)
    L1 = np.log(data['Low'] * coef)
    O1 = np.log(data['Open'] * coef)
    C1 = np.log(data['Close'] * coef)

    u1 = H1 - O1
    d1 = L1 - O1
    c1 = C1 - O1

    # Garman-Klass formula components
    x = 0.511 * (u1 - d1) ** 2 + \
        (-0.019) * (c1 * (u1 + d1) - 2 * u1 * d1) + \
        (-0.383) * c1 ** 2

    if not currency:
        # Overnight returns
        retco = np.log(data['Open'][1:].values / data['Close'][:-1].values)
        retco = np.insert(retco, 0, np.nan)  # prepend NaN to align length
        retoc = np.log(data['Close'] / data['Open'])

        x1 = np.nansum(retco ** 2)
        x2 = np.nansum(retoc ** 2)

        f = x1 / (x1 + x2)
        f = np.clip(f, 0.01, 0.99)  # clip between 0.01 and 0.99

        a = 0.12
        x = a * (retco ** 2) / f + ((1 - a) / (1 - f)) * x

    if sd:
        return 1.034 * np.sqrt(x)
    else:
        return x

Additional comments to the function:

    Calculate the Garman-Klass volatility estimator.
    
    Parameters:
    - data: DataFrame with columns ['Open', 'High', 'Low', 'Close', 'Adjusted'] (input in yfinance form)
    - sd: If True (default), return volatility (std dev), else return variance estimate.
    - currency: If True, skip overnight adjustment (default False).
    
    Returns:
    - np.ndarray of Garman-Klass volatility estimates

In [9]:
#Modified predict function (instead of .forecast())

def garch_predict(fit, n_ahead, t, data=None, fixed_n_ahead=True, alpha=None):
    
    # Use original data if no new data provided
    if data is None:
        y = fit._y  # original returns used for fitting (numpy array)
    else:
        y = np.asarray(data)
    
    nobs = len(y)
    if n_ahead <= 0:
        raise ValueError("Argument 'n_ahead' must be a positive integer")
    if t > nobs:
        raise ValueError("Argument 't' must be <= length of data")
    if alpha is not None and alpha >= 0.5:
        raise ValueError("Argument 'alpha' must be lower than 0.5")
    
    n_ahead = int(round(n_ahead))

    # Refit model if new data is given and t < nobs, else use original fit
    # arch does not have direct fixed spec setting like rugarch, so we refit or use original
    if data is not None and len(data) > len(fit._y):
        # Fit model on new data for rolling forecast
        # To speed up, you could pass initial params, but arch does not natively support fixed params
        model = arch_model(y, vol=fit.model.volatility.__class__.__name__,
                           p=fit.model.p, o=fit.model.o, q=fit.model.q,
                           dist=fit.model.distribution.name, mean=fit.model.mean.__class__.__name__)
        fit = model.fit(disp="off")
    
    # Forecast horizon logic
    if fixed_n_ahead:
        # Forecast fixed horizon starting at t
        # arch's forecast function does not allow arbitrary start time,
        # so we slice data up to t and forecast n_ahead steps
        fit_rolling = fit
        # If t < nobs, refit on data up to t
        if t < nobs:
            fit_rolling = fit.model.fit(y[:t], disp='off')
        forecast = fit_rolling.forecast(horizon=n_ahead, reindex=False)
        pred_index = np.arange(t, t + n_ahead)
        mu = forecast.mean.values[-1, :]  # last row is horizon n_ahead
        sigma = np.sqrt(forecast.variance.values[-1, :])
        # arch does not provide direct quantiles, so compute if alpha is given using normal approx
        if alpha is not None:
            z = abs(norm.ppf(alpha / 2))
            left = mu - z * sigma
            right = mu + z * sigma

    else:
        # Rolling forecast: forecast 1 step ahead from t until end of series
        # arch supports rolling forecast via reindex but limited - so loop forecast
        mu = []
        sigma = []
        pred_index = np.arange(t, nobs)
        for i in range(t, nobs):
            fit_rolling = fit.model.fit(y[:i], disp='off')
            forecast = fit_rolling.forecast(horizon=1, reindex=False)
            mu.append(forecast.mean.values[-1, 0])
            sigma.append(np.sqrt(forecast.variance.values[-1, 0]))
        mu = np.array(mu)
        sigma = np.array(sigma)
        if alpha is not None:
            z = abs(norm.ppf(alpha / 2))
            left = mu - z * sigma
            right = mu + z * sigma
    
    # Prepare output dataframe
    pred_dict = {
        't': pred_index,
        'pred': mu,
        'se': sigma
    }
    if alpha is not None:
        pred_dict['left'] = left
        pred_dict['right'] = right
    
    pred_df = pd.DataFrame(pred_dict)
    
    return {
        'n_ahead': n_ahead,
        'fixed_n_ahead': fixed_n_ahead,
        'alpha': alpha,
        'pred': pred_df
    }

Additional comments to the function:
    
    Forecast from a fitted arch_model object.
    
    Parameters:
    - fit: Fitted arch_model result (arch.univariate.base.ARCHModelResult)
    - n_ahead: Number of steps ahead to forecast (positive int)
    - t: Time index to start forecasting (int, <= len(data))
    - data: Optional new dataset (pd.Series or np.array), must be longer than fit.data
    - fixed_n_ahead: Boolean, if True forecast horizon is fixed, else rolling forecast
    - alpha: Significance level for confidence intervals (e.g., 0.05 for 95% CI)
    
    Returns:
    - dict with keys: 'n_ahead', 'fixed_n_ahead', 'alpha', 'pred' (pd.DataFrame)
      pred dataframe columns: ['t', 'pred', 'se', ('left', 'right') if alpha is set]

In [16]:
#Customized ADF function 

def adf_test(series, max_aug=10, version='n'):
    
    results = []

    y = series.diff()
    X = pd.DataFrame({'y_lag': series.shift()})

    if version == 'c' or version == 't': # constant to be added optionally 
        X = sm.add_constant(X)
    if version == 't': # (deterministic) trend component to be added optionally
        X['trend'] = range(len(X))

    for i in range(0, max_aug): # iterating through different numbers of augmentations
        
        for aug in range(1, i+1): # adding augmentations one by one until its current amount is reached
            X['aug_'+str(aug)] = y.shift(aug)

        model = sm.OLS(series.diff(), X, missing='drop').fit() # fitting a linear regression with OLS

        ts = model.tvalues['y_lag'] # test statistic
        nobs = model.nobs # number of observations

        if version == 'n': # critical values for basic version of ADF
            if nobs > 500:
                cv1 = -2.567; cv5 = -1.941; cv10 = -1.616 # critical values for more than 500 observations
            else:
                cv1 = np.nan; cv5 = np.nan; cv10 = np.nan # if number of observations is lower than 500, we should check the critical values manually
        if version == 'c': # critical values for version with constant
            if nobs > 500:
                cv1 = -3.434; cv5 = -2.863; cv10 = -2.568 # critical values for more than 500 observations
            else:
                cv1 = np.nan; cv5 = np.nan; cv10 = np.nan # if number of observations is lower than 500, we should check the critical values manually
        if version == 't': # critical values for version with constant and (deterministic) trend component
            if nobs > 500:
                cv1 = -3.963; cv5 = -3.413; cv10 = -3.128 # critical values for more than 500 observations
            else:
                cv1 = np.nan; cv5 = np.nan; cv10 = np.nan # if number of observations is lower than 500, we should check the critical values manually

        bg_test5 = smd.acorr_breusch_godfrey(model, nlags=5); bg_pvalue5 = round(bg_test5[1],4)
        bg_test5 = smd.acorr_breusch_godfrey(model, nlags=10); bg_pvalue10 = round(bg_test5[1],4)
        bg_test5 = smd.acorr_breusch_godfrey(model, nlags=15); bg_pvalue15 = round(bg_test5[1],4)

        results.append([i, ts, cv1, cv5, cv10, bg_pvalue5, bg_pvalue10, bg_pvalue15])

    results_df = pd.DataFrame(results)
    results_df.columns = ['number of augmentations', 'ADF test statistic', 'ADF critival value (1%)', 'ADF critival value (5%)', 'ADF critival value (10%)', 'BG test (5 lags) (p-value)', 'BG test (10 lags) (p-value)', 'BG test (15 lags) (p-value)']
    
    return results_df

Additional comments to the function:

    This function was developped for the purpose of the Time-Series Analysis labs. Customized function that performs Augmented Dickey-Fuller (ADF) unit root test for a time series, iterating over a range of lag values (augmentations). It optionally includes a constant or trend component and returns test statistics alongside         critical values and Breusch-Godfrey test p-values for autocorrelation in residuals.