In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Load the dataset
df = pd.read_csv('3.csv')

# Convert the DataFrame to a NumPy array
data = df.values

# Set T and K
T, K = data.shape
K = 3  # Override K as per your requirement

def create_parameters(K, p=1):
    """
    Initializes the parameters for the VAR(p) model.
    
    Parameters:
    - K: The number of time series.
    - p: The order of the VAR model.
    
    Returns:
    - A: The vector of constants for each time series (K-dimensional).
    - B: The matrices of autoregressive parameters (KxK for each lag).
    """
    A = np.zeros(K)  # Vector of constants
    B = np.zeros((K, K * p))  # Autoregressive parameters for p lags
    
    return A, B

# Example usage
A, B = create_parameters(K)
print("Vector of constants (A):", A)
print("Matrix of autoregressive parameters (B):", B)


Vector of constants (A): [0. 0. 0.]
Matrix of autoregressive parameters (B): [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [2]:

K = 3  # Number of time series (set to 3 as per your requirement)
p = 1  # Order of the VAR model

def calculate_log_likelihood(params, data, K, p):
    """
    Calculates the log likelihood of a VAR(p) model.
    
    Parameters:
    - params: A flattened array containing all parameters (A and B matrices flattened).
    - data: The observed data as a NumPy array (T x K).
    - K: The number of time series.
    - p: The order of the VAR model.
    
    Returns:
    - The log likelihood of the model.
    """
    T, _ = data.shape
    
    # Reshape params into A and B matrices
    A = params[:K]
    B = params[K:].reshape((K, K*p))
    
    # Construct the lagged Y matrix
    Y_lagged = np.hstack([data[p-i:-i] for i in range(1, p+1)])
    Y = data[p:]  # Match dimensions
    
    # Calculate predicted values
    Y_pred = np.dot(Y_lagged, B.T) + A
    
    # Calculate residuals
    residuals = Y - Y_pred
    
    # Estimate covariance matrix of residuals
    Sigma = np.cov(residuals.T)
    
    # Calculate log likelihood
    term1 = -T * K / 2 * np.log(2 * np.pi)
    term2 = -T / 2 * np.log(np.linalg.det(Sigma))
    term3 = -1/2 * np.sum(np.dot(residuals, np.linalg.inv(Sigma)) * residuals)
    log_likelihood = term1 + term2 + term3
    
    return log_likelihood

# Flattening A and B into a single array of parameters
# Initial A and B for example purposes
A_initial = np.zeros(K)  # Vector of constants
B_initial = np.zeros((K, K*p))  # Autoregressive parameters for p lags
params_initial = np.concatenate([A_initial, B_initial.flatten()])
print(params_initial)
# Use your actual data and initial parameters
log_likelihood = calculate_log_likelihood(params_initial, data, K, p)
print("Log likelihood:", log_likelihood)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Log likelihood: 10348.117397080463


In [3]:

def negative_log_likelihood(params, data, K, p):
    """
    Returns the negative log likelihood of the VAR model.
    
    This is the function to be minimized.

    Parameters:
    - params: The parameters of the VAR model (flattened A and B matrices).
    - data: The observed data as a NumPy array (T x K).
    - K: The number of time series.
    - p: The order of the VAR model.
    """
    # The negative of the log likelihood is returned because we are minimizing
    return -calculate_log_likelihood(params, data, K, p)

def estimate_var_parameters(data, K, p, initial_params):
    """
    Estimates the VAR model parameters by minimizing the negative log likelihood.

    Parameters:
    - data: The observed data as a NumPy array (T x K).
    - K: The number of time series.
    - p: The order of the VAR model.
    - initial_params: Initial guesses for the parameters (flattened A and B).

    Returns:
    - The result of the optimization procedure (OptimizeResult object).
    """
    result = minimize(negative_log_likelihood, initial_params, args=(data, K, p), method='L-BFGS-B')
    return result

In [4]:

# Initialize A and B with zeros or some other initial guess
initial_A = np.zeros(K)  # Vector of constants
initial_B = np.zeros((K, K*p))  # Autoregressive parameters for p lags
initial_params = np.concatenate([initial_A, initial_B.flatten()])  # Flatten the parameters

# Call the estimation function
result = estimate_var_parameters(data, K, p, initial_params)

# Results
if result.success:
    print("Optimization was successful.")
    optimized_params = result.x
    print("Optimized Parameters:", optimized_params)
else:
    print("Optimization failed.")
    print("Reason:", result.message)

# Reshape the optimized parameters back into A and B if needed
optimized_A = optimized_params[:K]
optimized_B = optimized_params[K:].reshape(K, K*p)
print("Optimized A:", optimized_A)
print("Optimized B:", optimized_B)

Optimization was successful.
Optimized Parameters: [ 0.00032334  0.00046835 -0.00037058 -0.10738508 -0.03853695  0.1132595
 -0.03784116 -0.11854587  0.08826391  0.04238161  0.04306515 -0.01172047]
Optimized A: [ 0.00032334  0.00046835 -0.00037058]
Optimized B: [[-0.10738508 -0.03853695  0.1132595 ]
 [-0.03784116 -0.11854587  0.08826391]
 [ 0.04238161  0.04306515 -0.01172047]]


In [5]:
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

# Assuming your DataFrame 'df' is already loaded from '3.csv'
# df = pd.read_csv('3.csv')

# Preparing the data (ensure no missing values and it's stationary)
# For VAR models, it's crucial to ensure the data is stationary. You may need to difference or transform it.

data2 = df

# Fit VAR model using statsmodels
model = VAR(data2)
results = model.fit(maxlags=1, ic='aic')  # Here, we use AIC to choose the best lag order up to 1. Adjust as necessary.

# Print summary of the fitted model
print(results.summary())

# Extracting estimated parameters for comparison
A_statsmodels = results.params.iloc[0].values  # Intercept terms (constants)
B_statsmodels = results.params.iloc[1:].values  # Autoregressive coefficients

print("\nEstimated A (intercept/terms) from statsmodels:\n", A_statsmodels)
print("\nEstimated B (autoregressive parameters) from statsmodels:\n", B_statsmodels)

# Assuming `optimized_A` and `optimized_B` are your optimized parameters from the custom estimation
# print("Optimized A from custom estimation:", optimized_A)
# print("Optimized B from custom estimation:", optimized_B)

# Note: Before comparing, make sure the formats of A and B are compatible between the custom estimation and statsmodels results.

# Here, you would directly compare the values of A_statsmodels and B_statsmodels with your optimized_A and optimized_B.
# This could be done by looking at the differences, ratios, or by evaluating the models' predictive performance.


  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Fri, 15, Mar, 2024
Time:                     06:06:27
--------------------------------------------------------------------
No. of Equations:         3.00000    BIC:                   -29.0844
Nobs:                     1004.00    HQIC:                  -29.1208
Log likelihood:           10368.0    FPE:                2.20448e-13
AIC:                     -29.1431    Det(Omega_mle):     2.17834e-13
--------------------------------------------------------------------
Results for equation ACWI
             coefficient       std. error           t-stat            prob
--------------------------------------------------------------------------
const           0.000328         0.000427            0.768           0.443
L1.ACWI        -0.075152         0.148917           -0.505           0.614
L1.SPY         -0.069341         0.142528           -0.487           0.627
L1.TLT    


Optimization was successful.
Optimized Parameters: [ 0.3967176   0.46135682  0.48737813  0.15284922  0.17526633 -0.08598358
 -0.09927688  0.01802577  0.15329777 -0.08297423 -0.05485256  0.09327762]
 
Optimized A: [0.3967176  0.46135682 0.48737813]

Optimized B: 
[[ 0.15284922  0.17526633 -0.08598358]
 [-0.09927688  0.01802577  0.15329777]
 [-0.08297423 -0.05485256  0.09327762]]


Estimated A (intercept/terms) from statsmodels:
 [ 0.00032775  0.00047306 -0.00037058]

Estimated B (autoregressive parameters) from statsmodels:
 [[-0.07515212 -0.00342125  0.03974991]
 [-0.06934101 -0.1514413   0.0454299 ]
 [ 0.11303057  0.08801938 -0.01138539]]

In [6]:
Estimated A (intercept/terms) from statsmodels:
 [ 0.00040347  0.00055942 -0.00041654]

Estimated B (autoregressive parameters) from statsmodels:
 [[-0.05866018  0.00894351  0.09763493]
 [-0.0796686  -0.16043477  0.00179141]
 [ 0.11113023  0.08568708 -0.02940672]
 [ 0.435087    0.47028072  0.21270671]
 [-0.30868078 -0.3442554  -0.17788739]
 [ 0.05054378  0.06193812 -0.13328895]
 [ 0.18438885  0.21882371  0.10355544]
 [-0.15850544 -0.18704238 -0.09058968]
 [ 0.0896783   0.09023644 -0.10620292]]

SyntaxError: invalid syntax (4078822011.py, line 1)

In [None]:
Optimized A: [0.29406983 0.4598727  0.33642975]
Optimized B: [[ 0.09019945  0.18850645 -0.06585952  0.02080435  0.15970833 -0.05398002
   0.02586116  0.14150846 -0.0566283 ]
 [-0.11949213  0.06700082  0.15567987  0.03886389  0.00660415 -0.24294738
   0.00165449  0.06371084  0.10683534]
 [-0.08872433 -0.05476073  0.11257378 -0.05901685  0.09345693 -0.00376962
   0.10836608 -0.00655298  0.17453902]]



In [None]:
result.hess_inv.todense()