In [94]:
#import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.graphics.tsaplots import plot_acf
import os

In [95]:
def generate_data(T, alpha, beta, sigma, alpha_y, beta_y, gamma, theta, sigma_y, mu):
    df = pd.DataFrame(index=range(T), columns=['x', 'y', 'epsilon_x', 'epsilon_y'])
    x1 = alpha/(1-beta) # define initial value for time series x
    y1 = 0
    epsilon_x = sigma*np.random.randn(T) + mu # generate a vector of T random normal
    epsilon_y = sigma_y*np.random.randn(T) # generate a vector of T random normal
    
    df.iloc[0,:] = [x1, y1, np.nan, np.nan] # initialize x and y
    
    x = np.zeros(T)
    y = np.zeros(T)
    x[0] = x1
    y[0] = y1
    
    for i in range(0, T-1):
        x[i+1] = alpha + beta * x[i] + epsilon_x[i+1] # generate X(t) recursively
            # Xt = α + βXt-1 + εt
        y[i+1] = alpha_y + beta_y * y[i] + gamma * x[i+1] + epsilon_y[i+1] + theta * epsilon_y[i] # generate Y(t) recursively
            # Yt = α + βYt-1 + γXt + εt + θεt-1
        df.iloc[i+1,0] = x[i+1]
        df.iloc[i+1,1] = y[i+1]
        df.iloc[i+1,2] = epsilon_x[i+1] # associate error with X(t)
        df.iloc[i+1,3] = epsilon_y[i+1] # associate error with Y(t)
    
    return df, x, y, epsilon_x, epsilon_y

In [82]:
df, x, y, epsilon_x, epsilon_y = generate_data(1000, 1, 0.9, 0.1, 1, 0.9, 0.9, 0.5, 0.1, 0)
new_df = df.drop(0)

In [89]:
######################## quantile GLS normal errors
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize

# Define the quantile of interest
tau = 0.5

# Define the weight function
#def weight_function(residuals):
    #return np.diag(np.exp(-0.5 * (residuals / norm.ppf(tau))**2))
#Huber weight function, which down-weights the residuals that are larger than a certain threshold
#if the weight matrix is singular. if the quantile of interest is too extreme (e.g., close to 0 or 1), as the weight matrix can become degenerate and hence singular.
#solution: increase sample size or
def huber_weight_function(residuals, c=1.345):
    weights = np.zeros_like(residuals)
    mask = np.abs(residuals) <= c
    weights[mask] = 1
    weights[~mask] = c / np.abs(residuals[~mask])
    return np.diag(weights)


# Define the GLS function
def gls_quantile(x, y, tau, weight_function):
    # Define the negative log-likelihood function
    def neg_log_likelihood(params):
    
        sigma_sq, rho = np.exp(params)
        residuals = y - np.dot(x, beta)  # Compute the residuals
        weights = weight_function(residuals)  # Compute the weight matrix
        det = np.linalg.det(weights)  # Compute the determinant of the weight matrix
        inv_weights = np.linalg.inv(weights)  # Compute the inverse of the weight matrix
        log_likelihood = 0.5*np.log(det) - 0.5*np.dot(residuals.T, np.dot(inv_weights, residuals)) / sigma_sq
        #log_likelihood += 0.5*np.log(1 - rho**2) - 0.5*(rho**2) / (1 - rho**2) / sigma_sq
        eps = 1e-10  # Add small positive value to denominator
        #log_likelihood += 0.5*np.log(np.abs(1 - rho**2) + eps) - 0.5*(rho**2) / (np.abs(1 - rho**2) + eps) / sigma_sq

        denominator = np.abs(1 - rho**2) + eps
        log_likelihood += 0.5*np.log(denominator) - 0.5*(rho**2) / denominator / sigma_sq

        log_likelihood += 0.5*np.log(sigma_sq)  # Add the log of the variance term
        return -log_likelihood  # Return the negative log-likelihood

    # Initialize the coefficients with OLS
    beta = np.linalg.lstsq(x, y, rcond=None)[0]

    # Optimize the negative log-likelihood function
    res = minimize(neg_log_likelihood, np.array([1, 0]), method='L-BFGS-B')
    sigma_sq, rho = np.exp(res.x)

    # Compute the residuals and the weight matrix
    residuals = y - np.dot(x, beta)
    weights = weight_function(residuals)

    # Compute the GLS estimator
    det = np.linalg.det(weights)
    inv_weights = np.linalg.inv(weights)
    beta_gls = np.linalg.lstsq(np.dot(x.T, inv_weights).dot(x), np.dot(x.T, inv_weights).dot(y), rcond=None)[0]

    # Compute the GLS variance
    weighted_resid = np.dot(residuals.T, inv_weights)
    var_gls = np.dot(weighted_resid, weighted_resid.T) / (len(y) - x.shape[1])
    var_gls *= (1 - rho**2) / sigma_sq
    var_gls += np.diag(np.ones(x.shape[1]))*rho**2 / sigma_sq

    print(beta_gls)
    print(var_gls)
    return beta_gls, var_gls

#x = np.random.normal(size=100)
x = x.reshape(-1, 1)  # Reshape to a 2D array
#y = 1 + 2*x[:, 0] + np.random.normal(size=100)

# Fit a quantile regression model with GLS assuming normal errors
#beta, var = gls_quantile(np.column_stack((np.ones_like(x), x)), y, tau, weight_function)

beta, var = gls_quantile(x, y, tau, huber_weight_function)


  sigma_sq, rho = np.exp(params)
  log_likelihood += 0.5*np.log(denominator) - 0.5*(rho**2) / denominator / sigma_sq


[8.0862573]
[[-685687.94293896]]


In [93]:
############ Quantile GLS Laplace error Weight
# Define the quantile of interest
tau = 0.5


def laplace_weight_function(residuals, b=1):
    #weights = np.exp(-np.abs(residuals) / b)
    weights = np.random.laplace(-np.abs(residuals) / b)
    return np.diag(weights)

# Define the GLS function
def gls_quantile(x, y, tau, weight_function):
    # Define the negative log-likelihood function
    def neg_log_likelihood(params):
    
        sigma_sq, rho = np.exp(params)
        residuals = y - np.dot(x, beta)  # Compute the residuals
        weights = weight_function(residuals)  # Compute the weight matrix
        det = np.linalg.det(weights)  # Compute the determinant of the weight matrix
        inv_weights = np.linalg.inv(weights)  # Compute the inverse of the weight matrix
        
        #log_likelihood = 0.5*np.log(det) - 0.5*np.dot(residuals.T, np.dot(inv_weights, residuals)) / sigma_sq
        log_likelihood = np.sum(np.log(np.diag(weights))) - np.sum(np.abs(residuals) / sigma_sq)
        #log_likelihood += 0.5*np.log(1 - rho**2) - 0.5*(rho**2) / (1 - rho**2) / sigma_sq
        eps = 1e-10  # Add small positive value to denominator
        #log_likelihood += 0.5*np.log(np.abs(1 - rho**2) + eps) - 0.5*(rho**2) / (np.abs(1 - rho**2) + eps) / sigma_sq

        denominator = np.abs(1 - rho**2) + eps
        log_likelihood += 0.5*np.log(denominator) - 0.5*(rho**2) / denominator / sigma_sq

        log_likelihood += 0.5*np.log(sigma_sq)  # Add the log of the variance term
        return -log_likelihood  # Return the negative log-likelihood

    # Initialize the coefficients with OLS
    beta = np.linalg.lstsq(x, y, rcond=None)[0]

    # Optimize the negative log-likelihood function
    res = minimize(neg_log_likelihood, np.array([1, 0]), method='L-BFGS-B')
    sigma_sq, rho = np.exp(res.x)

    # Compute the residuals and the weight matrix
    residuals = y - np.dot(x, beta)
    weights = weight_function(residuals)

    # Compute the GLS estimator
    det = np.linalg.det(weights)
    inv_weights = np.linalg.inv(weights)
    beta_gls = np.linalg.lstsq(np.dot(x.T, inv_weights).dot(x), np.dot(x.T, inv_weights).dot(y), rcond=None)[0]

    # Compute the GLS variance
    weighted_resid = np.dot(residuals.T, inv_weights)
    var_gls = np.dot(weighted_resid, weighted_resid.T) / (len(y) - x.shape[1])
    var_gls *= (1 - rho**2) / sigma_sq
    var_gls += np.diag(np.ones(x.shape[1]))*rho**2 / sigma_sq

    print(beta_gls)
    print(var_gls)
    return beta_gls, var_gls

#x = np.random.normal(size=100)
x = x.reshape(-1, 1)  # Reshape to a 2D array
#y = 1 + 2*x[:, 0] + np.random.normal(size=100)

# Fit a quantile regression model with GLS assuming normal errors
#beta, var = gls_quantile(np.column_stack((np.ones_like(x), x)), y, tau, weight_function)

beta, var = gls_quantile(x, y, tau, laplace_weight_function)


  log_likelihood = np.sum(np.log(np.diag(weights))) - np.sum(np.abs(residuals) / sigma_sq)


[2.00430583]
[[0.36787944]]


In [101]:
l = 1
T = 100
z = np.random.exponential(l, T)
V = np.random.normal(0, 1, T)

#G = #simmetric covariance matrix
mu = 0.2
errors = mu + np.sqrt(z) * V #* np.sqrt(G)
print(errors)

[ 0.53041155  0.32132771  1.23810895  1.54101256  0.14971553  0.48723179
  0.46741677  0.60508731  0.05261598 -0.55356941 -1.28252434 -0.30621721
  0.46985889  0.68882266 -0.83220722  0.16499098  0.50864633  1.92775326
  0.41388462 -0.01737432  0.20905304 -0.22761676 -0.24441633  1.02625057
 -2.23452663 -0.29726811  1.06953428 -0.61516498  0.43304716 -0.00645704
 -0.37167    -0.10189762 -0.56883341  0.61580813 -1.20836582  0.16341879
 -0.52498936  0.27987305  1.10901069 -0.02712289 -0.37619653  1.10208722
  0.63020956  1.51020602  1.22627796  1.38883976 -0.5539065  -0.25087808
 -0.10508656  0.49437655  0.02701274  1.55491191 -0.82448652 -0.607733
  0.09896388 -1.1933123   0.38074981 -0.18780691 -1.05352566 -0.85471157
  0.19270253 -0.31400145  1.09804135 -0.20997025 -0.10221741 -1.07137673
  0.60208051 -0.28117304  0.70475358 -0.50626457  1.43569403  0.08478007
  0.68041092  1.27466326 -0.3270033  -0.0318273   0.80086974 -0.0519348
  2.5098065  -0.21809258 -0.87709725 -1.03808695  0.75

In [None]:
########### Quantile GLS Laplace error WITH NON-ZERO CORRELATION
# Define the quantile of interest
tau = 0.5

def laplace_weight_function(residuals, b=1):
    #weights = np.exp(-np.abs(residuals) / b)
    weights = np.random.laplace(-np.abs(residuals) / b)
    return np.diag(weights)


# Define the GLS function
def gls_quantile(x, y, tau, weight_function):
    # Define the negative log-likelihood function
    def neg_log_likelihood(params):
    
        sigma_sq, rho = np.exp(params)
        residuals = y - np.dot(x, beta)  # Compute the residuals
        weights = weight_function(residuals)  # Compute the weight matrix
        det = np.linalg.det(weights)  # Compute the determinant of the weight matrix
        inv_weights = np.linalg.inv(weights)  # Compute the inverse of the weight matrix
        
        #log_likelihood = 0.5*np.log(det) - 0.5*np.dot(residuals.T, np.dot(inv_weights, residuals)) / sigma_sq
        log_likelihood = np.sum(np.log(np.diag(weights))) - np.sum(np.abs(residuals) / sigma_sq)
        #log_likelihood += 0.5*np.log(1 - rho**2) - 0.5*(rho**2) / (1 - rho**2) / sigma_sq
        eps = 1e-10  # Add small positive value to denominator
        #log_likelihood += 0.5*np.log(np.abs(1 - rho**2) + eps) - 0.5*(rho**2) / (np.abs(1 - rho**2) + eps) / sigma_sq

        denominator = np.abs(1 - rho**2) + eps
        log_likelihood += 0.5*np.log(denominator) - 0.5*(rho**2) / denominator / sigma_sq

        log_likelihood += 0.5*np.log(sigma_sq)  # Add the log of the variance term
        return -log_likelihood  # Return the negative log-likelihood

    # Initialize the coefficients with OLS
    beta = np.linalg.lstsq(x, y, rcond=None)[0]

    # Optimize the negative log-likelihood function
    res = minimize(neg_log_likelihood, np.array([1, 0]), method='L-BFGS-B')
    sigma_sq, rho = np.exp(res.x)

    # Compute the residuals and the weight matrix
    residuals = y - np.dot(x, beta)
    weights = weight_function(residuals)

    # Compute the GLS estimator
    det = np.linalg.det(weights)
    inv_weights = np.linalg.inv(weights)
    beta_gls = np.linalg.lstsq(np.dot(x.T, inv_weights).dot(x), np.dot(x.T, inv_weights).dot(y), rcond=None)[0]

    # Compute the GLS variance
    weighted_resid = np.dot(residuals.T, inv_weights)
    var_gls = np.dot(weighted_resid, weighted_resid.T) / (len(y) - x.shape[1])
    var_gls *= (1 - rho**2) / sigma_sq
    var_gls += np.diag(np.ones(x.shape[1]))*rho**2 / sigma_sq

    print(beta_gls)
    print(var_gls)
    return beta_gls, var_gls

#x = np.random.normal(size=100)
x = x.reshape(-1, 1)  # Reshape to a 2D array
#y = 1 + 2*x[:, 0] + np.random.normal(size=100)

# Fit a quantile regression model with GLS assuming normal errors
#beta, var = gls_quantile(np.column_stack((np.ones_like(x), x)), y, tau, weight_function)

beta, var = gls_quantile(x, y, tau, laplace_weight_function)


In [92]:
######################## ############ Quantile GLS Laplace error WRONG?
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize

# Define the quantile of interest
tau = 0.5

# Define the weight function
#def weight_function(residuals):
    #return np.diag(np.exp(-0.5 * (residuals / norm.ppf(tau))**2))
def laplace_errors(n, loc=0, scale=1):
    return np.random.laplace(loc=loc, scale=scale, size=n)


def huber_weight_function(residuals, c=1.345):
    weights = np.zeros_like(residuals)
    mask = np.abs(residuals) <= c
    weights[mask] = 1
    weights[~mask] = c / np.abs(residuals[~mask])
    return np.diag(weights)


# Define the GLS function
def gls_quantile(x, y, tau, weight_function):
    # Define the negative log-likelihood function
    
    def neg_log_likelihood(params):
        sigma_sq, rho = np.exp(params)
        residuals = y - np.dot(x, beta)  # Compute the residuals
        weights = weight_function(residuals)  # Compute the weight matrix
        det = np.linalg.det(weights)  # Compute the determinant of the weight matrix
        inv_weights = np.linalg.inv(weights)  # Compute the inverse of the weight matrix
        log_likelihood = np.sum(np.log(0.5/sigma_sq) - np.abs(residuals)/sigma_sq) - np.sum(np.log(np.diag(weights)))
        log_likelihood += 0.5*np.log(np.abs(1 - rho**2) + eps) - 0.5*(rho**2) / (np.abs(1 - rho**2) + eps) / sigma_sq
        log_likelihood += 0.5*np.log(sigma_sq)  # Add the log of the variance term
        return -log_likelihood  # Return the negative log-likelihood


    # Initialize the coefficients with OLS
    beta = np.linalg.lstsq(x, y, rcond=None)[0]

    # Optimize the negative log-likelihood function
    res = minimize(neg_log_likelihood, np.array([1, 0]), method='L-BFGS-B')
    sigma_sq, rho = np.exp(res.x)

    # Compute the residuals and the weight matrix
    residuals = y - np.dot(x, beta)
    weights = weight_function(residuals)

    # Compute the GLS estimator
    det = np.linalg.det(weights)
    inv_weights = np.linalg.inv(weights)
    beta_gls = np.linalg.lstsq(np.dot(x.T, inv_weights).dot(x), np.dot(x.T, inv_weights).dot(y), rcond=None)[0]

    # Compute the GLS variance
    weighted_resid = np.dot(residuals.T, inv_weights)
    var_gls = np.dot(weighted_resid, weighted_resid.T) / (len(y) - x.shape[1])
    var_gls *= (1 - rho**2) / sigma_sq
    var_gls += np.diag(np.ones(x.shape[1]))*rho**2 / sigma_sq

    print(beta_gls)
    print(var_gls)

    return beta_gls, var_gls

#x = np.random.normal(size=100)
x = x.reshape(-1, 1)  # Reshape to a 2D array
#y = 1 + 2*x[:, 0] + np.random.normal(size=100)

# Fit a quantile regression model with GLS assuming normal errors
#beta, var = gls_quantile(np.column_stack((np.ones_like(x), x)), y, tau, weight_function)

y = 1 + 2*x[:, 0] + laplace_errors(1000, scale=1)
beta, var = gls_quantile(np.column_stack((np.ones_like(x), x)), y, tau, weight_function)

  return np.diag(np.exp(-0.5 * (residuals / norm.ppf(tau))**2))


LinAlgError: Singular matrix

In [85]:
import numpy as np
from scipy.optimize import minimize

def neg_log_likelihood(params, X, y, tau):
    n = X.shape[0]
    sigma_sq, rho = np.exp(params[:2])
    omega = np.identity(n) * (1 - rho**2)
    beta = params[2:]
    residuals = y - np.dot(X, beta)
    Q = np.dot(X.T, np.dot(omega, X))
    Q_inv = np.linalg.inv(Q)
    beta_hat = np.dot(Q_inv, np.dot(X.T, np.dot(omega, y)))
    e = y - np.dot(X, beta_hat)
    u = e / sigma_sq
    ll = tau * np.sum(np.maximum(u, 0)) + (1 - tau) * np.sum(np.maximum(-u, 0))
    log_likelihood = -ll + np.log(sigma_sq) + np.log(np.abs(1 - rho**2) + 1e-10)
    return log_likelihood

def fit_quantile_regression_gls(X, y, tau, initial_params=None):
    n, k = X.shape
    if initial_params is None:
        initial_params = np.zeros(k + 2)
    res = minimize(neg_log_likelihood, initial_params, args=(X, y, tau))
    params = res.x
    sigma_sq, rho = np.exp(params[:2])
    beta = params[2:]
    return beta, sigma_sq, rho


In [42]:
import numpy as np
from scipy.stats import norm

def quantile_regression_gls(y, X, tau):
    n, p = X.shape
    nq = len(tau)
    residuals = np.zeros((n, nq))
    beta = np.zeros((nq, p))
    weights = np.zeros((n, n))
    Xw = np.zeros((n, nq*p+1))

    for i in range(nq):
        q = tau[i]
        for j in range(n):
            residuals[j, i] = y[j] - np.dot(X[j,:], beta[i,:])
            weights[j,j] = norm.pdf(residuals[j,i]/weights[j,j])
        Xw[:, i*p+1:(i+1)*p+1] = np.multiply(X, np.tile(weights[:,i],(p,1)).T)
        #Xw[:, i*p] = np.tile(weights[:,i],(1,1)).T
        Xw[:, i*p+1:(i+1)*p+1] = np.multiply(X, np.tile(weights[:,i, np.newaxis],(1,p)))

        #debugging
        rank = np.linalg.matrix_rank(Xw.T @ Xw)
        #beta[i,:] = np.linalg.inv(Xw.T @ Xw) @ Xw.T @ np.multiply(y, np.tile(weights[:,i],(1,1)).T)

    #return beta
    return rank


In [43]:
print(X.shape)
print(beta.shape)
print(y.shape)

(1000, 4)
(3, 3)
(1000,)


In [44]:
y_lag = np.roll(y, 1)
y_lag[0] = 0
epsilon_lag = np.roll(epsilon_y, 1)
epsilon_lag[0] = 0
X = np.column_stack((np.ones_like(x), y_lag, x, epsilon_lag)) #question
tau = [0.1, 0.5, 0.9]
quantile_regression_gls(y, X, tau)

  weights[j,j] = norm.pdf(residuals[j,i]/weights[j,j])
  weights[j,j] = norm.pdf(residuals[j,i]/weights[j,j])


LinAlgError: SVD did not converge

In [36]:
rank = np.linalg.matrix_rank(Xw.T @ Xw)


NameError: name 'Xw' is not defined