In [1]:
import sys
import os

# Get the parent directory of the current folder
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_dir not in sys.path:
    sys.path.insert(0, project_dir)


In [2]:
import numpy as np
import plotly.graph_objects as go
from scipy.optimize import minimize
from tqdm import tqdm
from scipy.stats import t, multivariate_normal, multivariate_t
from scipy.special import logsumexp, gamma, gammaln
from numpy.linalg import inv, det, slogdet, LinAlgError
import plotly.express as px
from plotly.subplots import make_subplots
from numpy import tril_indices, triu_indices
from typing import Literal, Optional, Dict, Union

# from simulation import * 

from Functions.simulation_1 import * 
# from Functions.gas_filter_2 import *
# from Functions.kf_filter_2 import *
from Functions.gas_filter_5 import *
from Functions.kf_filter_5 import *

## $\Phi$ Full, $Q, R$ Full

In [None]:
T = 1000
N = 3
K = 2

c_true_3x3 = np.array([0.2, -0.1, 0.4])

Phi_true_3x3_phi_full_yes_noise_correl = np.array([[0.8, 0.2, 0.1],
                                                    [0.1, 0.7, 0.2],
                                                    [0.1, 0.1, 0.6]])

beta_true_3x3 = np.array([[1.0, -0.5, 0.2],
                          [-1.2, 0.8, 0.4],])

Q_true_3x3_yes_correl = np.array([[0.5, 0.2, 0.3],
                                 [0.2, 0.6, 0.1],
                                 [0.3, 0.1, 0.5]])

R_true_3x3_yes_correl = np.array([[0.4, 0.1, 0.2],
                                 [0.1, 0.5, 0.3],
                                 [0.2, 0.3, 0.5]])


y_sim_3x3_phi_full_yes_noise_correl, mu_sim_3x3_phi_full_yes_noise_correl, X_sim_3x3_phi_full_yes_noise_correl = simulate_multivariate_state_space(
    T=T,
    N=N,
    c=c_true_3x3,
    Phi=Phi_true_3x3_phi_full_yes_noise_correl,
    beta=beta_true_3x3, 
    Q=Q_true_3x3_yes_correl,
    R=R_true_3x3_yes_correl,
    # use_intercept=None,
    seed=8888,
    t_noise=True,
    nu= 10
)

In [None]:
initial_params_3x3_phi_full_yes_noise_correl = np.concatenate([
    c_true_3x3, 
    Phi_true_3x3_phi_full_yes_noise_correl.flatten(), 
    beta_true_3x3.flatten(), 
    np.ones(N * (N + 1) // 2),  # L_Q_elements
    np.ones(N * (N + 1) // 2)   # L_R_elements
])

kf_results_3x3_phi_full_yes_correl  =  multivariate_KF_with_estimation(
    y=y_sim_3x3_phi_full_yes_noise_correl,
    X=X_sim_3x3_phi_full_yes_noise_correl,
    verbose=True,
    ftol=1e-8,
    gtol=1e-5,
)

mu_kf_3x3_phi_full_yes_correl = kf_results_3x3_phi_full_yes_correl['mu_filtered']
P_kf_3x3_phi_full_yes_correl = kf_results_3x3_phi_full_yes_correl['P_filtered']
kalman_gain_kf_3x3_phi_full_yes_correl = kf_results_3x3_phi_full_yes_correl['kalman_gain']

✅ Kalman estimation completed successfully.
Estimated parameters:
omega (unconditional mean): [-0.21962191  0.07801968  0.5600662 ]
Phi (persistence matrix): 
[[1.21269271 0.19196091 0.32162265]
 [0.21131488 0.94964043 0.33629516]
 [0.17783124 0.09805771 0.94324712]]
beta: 
[[ 1.80068477 -0.52184687  0.51198736]
 [-1.15895837  0.85023166  0.44013485]]
Q (state noise covariance): 
[[0.41997243 0.27249573 0.14052342]
 [0.27249573 0.76158504 0.09194241]
 [0.14052342 0.09194241 0.42163175]]
R (observation noise covariance): 
[[0.60445351 0.1123162  0.40957749]
 [0.1123162  0.51185886 0.3889131 ]
 [0.40957749 0.3889131  0.68847697]]
Log-likelihood: -4395.720300721743
AIC: 8851.440601443486
BIC: 9031.631628472993


In [6]:
gas_results_3x3_phi_full_yes_correl_1 = estimate_and_filter_gas(
                                        y=y_sim_3x3_phi_full_yes_noise_correl, 
                                        X=X_sim_3x3_phi_full_yes_noise_correl, 
                                        phi_type= "full", 
                                        kappa_type= "full", 
                                        fix_nu=10,
                                        ftol = 1e-8,  # Tolerance for convergence
                                        gtol = 1e-5,   # Gradient tolerance for convergence)
)


mu_gas_3x3_phi_full_yes_correl_1 = gas_results_3x3_phi_full_yes_correl_1["mu_filtered"]
u_gas_3x3_phi_full_yes_correl_1 = gas_results_3x3_phi_full_yes_correl_1["u"]
resid_gas_phi_full_yes_correl_1 = gas_results_3x3_phi_full_yes_correl_1["resid"]
Kappa_est_3x3_phi_full_yes_correl_1 = gas_results_3x3_phi_full_yes_correl_1["Kappa"]

Estimated omega:
 [0.3073665 0.2718786 0.1347773]
Estimated Phi:
 [[ 0.76372905  0.13162454  0.27391297]
 [-0.01088989  0.73996238  0.3643341 ]
 [ 0.047021    0.14309001  0.63029218]]
Estimated beta:
 [[ 9.79696355  5.02515892  4.76972602]
 [-1.1481457   0.83701968  0.44222644]]
Estimated Omega (from lambda):
 [[1.1503056  0.         0.        ]
 [0.         1.23033118 0.        ]
 [0.         0.         1.07092265]]
Estimated Kappa:
 [[ 0.80351259  0.40128497 -0.0714741 ]
 [ 0.35565399  0.77303523 -0.1377822 ]
 [ 0.21016085  0.06097447  0.39845867]]
Estimated nu:
 10
Log-likelihood: -4707.426607230362
AIC: 9474.853214460723
BIC: 9655.04424149023


In [None]:
phi_type = "full"
kappa_type = "full"
X_shared = True
fix_nu = 10
R = N
P = X_sim_3x3_phi_full_yes_noise_correl.shape[1]


lambda_true = 0.5 * np.log(np.diag(R_true_3x3_yes_correl))  # diagonal Lambda

# For Kappa set to identity or try a scaled value
Kappa_true = np.eye(N) 
kappa_true_flat = Kappa_true.flatten()

# Compose params vector
params_true = np.concatenate([
    Phi_true_3x3_phi_full_yes_noise_correl.flatten(),
    # omega, use the true unconditional mean
    np.linalg.solve(np.eye(N) - Phi_true_3x3_phi_full_yes_noise_correl, c_true_3x3),
    beta_true_3x3.flatten(),
    lambda_true,
    kappa_true_flat,
    [fix_nu]
])

# Get param sizes for slicing
cL = N
cK = N * N  # "full"
# Evaluate loglikelihood at true parameters
neg_loglik_true = gas_general_filter(
    params_true, y_sim_3x3_phi_full_yes_noise_correl, X_sim_3x3_phi_full_yes_noise_correl,
    cL, cK, fix_nu=fix_nu, kappa_type=kappa_type, phi_type=phi_type, X_shared=X_shared
)
print("GAS loglik at true params:", -neg_loglik_true)



GAS loglik at true params: -6091.212836146152


In [8]:
import numpy as np
from scipy.stats import t, norm

from scipy.stats import multivariate_t, multivariate_normal

from scipy.stats import multivariate_normal

loglik_t = 0
loglik_norm = 0

for t in range(T):
    obs = y_sim_3x3_phi_full_yes_noise_correl[t]
    mu = mu_sim_3x3_phi_full_yes_noise_correl[t] + X_sim_3x3_phi_full_yes_noise_correl[t] @ beta_true_3x3
    Omega = R_true_3x3_yes_correl
    nu = 10

    loglik_t += multivariate_t.logpdf(obs, loc=mu, shape=Omega, df=nu)
    loglik_norm += multivariate_normal.logpdf(obs, mean=mu, cov=Omega)

print("Sum log-likelihood t(10):", loglik_t)
print("Sum log-likelihood Gaussian:", loglik_norm)



Sum log-likelihood t(10): -3099.0737055493723
Sum log-likelihood Gaussian: -3133.036220116899


In [10]:
gas_specs = [
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": None, "label": "GAS_diag_diag_nufree"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 5, "label": "GAS_diag_diag_nu10"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 10, "label": "GAS_diag_diag_nu10"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 20, "label": "GAS_diag_diag_nu20"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 50, "label": "GAS_diag_diag_nu50"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 100, "label": "GAS_diag_diag_nu100"},
    {"phi_type": "diagonal", "kappa_type": "diagonal", "fix_nu": 500, "label": "GAS_diag_diag_nu500"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": None, "label": "GAS_diag_full_nufree"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 5, "label": "GAS_diag_full_nu5"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 10, "label": "GAS_diag_full_nu10"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 20, "label": "GAS_diag_full_nu20"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 50, "label": "GAS_diag_full_nu50"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 100, "label": "GAS_diag_full_nu100"},
    {"phi_type": "diagonal", "kappa_type": "full", "fix_nu": 500, "label": "GAS_diag_full_nu500"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": None, "label": "GAS_full_diag_nufree"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 5, "label": "GAS_full_diag_nu5"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 10, "label": "GAS_full_diag_nu10"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 20, "label": "GAS_full_diag_nu20"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 50, "label": "GAS_full_diag_nu50"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 100, "label": "GAS_full_diag_nu100"},
    {"phi_type": "full", "kappa_type": "diagonal", "fix_nu": 500, "label": "GAS_full_diag_nu500"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": None, "label": "GAS_full_full_nufree"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 5, "label": "GAS_full_full_nu5"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 10, "label": "GAS_full_full_nu10"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 20, "label": "GAS_full_full_nu20"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 50, "label": "GAS_full_full_nu50"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 100, "label": "GAS_full_full_nu100"},
    {"phi_type": "full", "kappa_type": "full", "fix_nu": 500, "label": "GAS_full_full_nu500"},
    # Add more specs as needed
]

In [None]:
from collections import defaultdict

spec_results = defaultdict(list)
target_success = 30
sim_counter = 0

while any(len(v) < target_success for v in spec_results.values()) or len(spec_results) == 0:
    sim_seed = 8888 + sim_counter
    sim_counter += 1
    # Simulate data
    y_sim, mu_sim, X_sim = simulate_multivariate_state_space(
            T=T,
            N=N,
            c=c_true_3x3,
            Phi=Phi_true_3x3_phi_full_yes_noise_correl,
            beta=beta_true_3x3, 
            Q=Q_true_3x3_yes_correl,
            R=R_true_3x3_yes_correl,
            seed=sim_seed,
        )
    # KF
    try:
        if len(spec_results["KF"]) < target_success:
            kf_results = multivariate_KF_with_estimation(
            y=y_sim, 
            X=X_sim, 
            initial_params=None, 
            verbose=False,
            ftol=1e-8,  # Tolerance for convergence
            gtol=1e-5,  # Gradient tolerance for convergence,
        )
            # Calculate MSE for mu
            mu_kf = kf_results['mu_filtered']
            mse_kf = np.mean((mu_sim[1:] - mu_kf[1:]) ** 2)
            mae_kf = np.mean(np.abs(mu_sim[1:] - mu_kf[1:]))

            spec_results["KF"].append({
                "seed": sim_seed, "loglik": kf_results["loglik"],
                "aic": kf_results["aic"], "bic": kf_results["bic"],
                "mse": mse_kf, "mae": mae_kf
                })

    except Exception as e:
        print(f"KF failed: {e}")
    # GAS
    for spec in gas_specs:
        try:
            label = spec["label"]
            if len(spec_results[label]) < target_success:
                res_gas = estimate_and_filter_gas(
                    y=y_sim, 
                    X=X_sim,
                    phi_type=spec["phi_type"], 
                    kappa_type=spec["kappa_type"],
                    fix_nu=spec["fix_nu"], 
                    verbose=False,
                    ftol=1e-8,  # Tolerance for convergence
                    gtol=1e-5   # Gradient tolerance for convergence
                )

                # Calculate MSE for mu
                mu_gas = res_gas["mu_filtered"]
                mse_gas = np.mean((mu_sim[1:] - mu_gas[1:]) ** 2)
                mae_gas = np.mean(np.abs(mu_sim[1:] - mu_gas[1:]))

                spec_results[label].append({
                    "seed": sim_seed, "loglik": res_gas["loglik"],
                    "aic": res_gas["aic"], "bic": res_gas["bic"],
                    "mse": mse_gas, "mae": mae_gas
                })
        except Exception as e:
            print(f"GAS {label} failed: {e}")

    # Break if all filled
    if all(len(v) >= target_success for v in spec_results.values()):
        break



  mu[t + 1] = omega + Phi @ (mu[t] - omega) + Kappa @ u_t
  eps = inv_sqrt_Omega @ diff
  u_t = (Lambda_inv @ resid) / alpha_t
  u_t = (Lambda_inv @ resid) / alpha_t




In [None]:
test1 = mu_sim[1:]
test2 = kf_results['mu_filtered'][1:]
test3 = res_gas["mu_filtered"][1:]

In [None]:
np.mean((test1 - test2)**2)  # Should be close to zero if KF is working correctly

np.float64(1.6254717377848062)

In [None]:
np.mean((test1 - test3)**2)  # Should be close to zero if GAS is working correctly

np.float64(1.5388307135189712)

In [None]:
import pandas as pd

# Flatten to long-form list of dicts
rows = []
for spec_label, run_list in spec_results.items():
    for run in run_list:
        row = {'spec': spec_label}
        row.update(run)  # adds 'seed', 'loglik', etc.
        rows.append(row)

df_results = pd.DataFrame(rows)


agg = df_results.groupby('spec').agg({
    'loglik': ['mean', 'std'],
    'aic': ['mean', 'std'],
    'bic': ['mean', 'std'],
    'mse': ['mean', 'std'],  # Mean Squared Error
    'mae': ['mean', 'std'],  # Mean Absolute Error
    # add more metrics here if you wish!
}).reset_index()
# Flatten MultiIndex columns
agg.columns = ['spec', 'loglik_mean', 'loglik_std', 'aic_mean', 'aic_std', 'bic_mean', 'bic_std', 'mse_mean', 'mse_std', 'mae_mean', 'mae_std']  # Added mse and mae


agg

Unnamed: 0,spec,loglik_mean,loglik_std,aic_mean,aic_std,bic_mean,bic_std,mse_mean,mse_std,mae_mean,mae_std
0,GAS_diag_diag_nu10,-4707.901817,48.41557,9457.803633,96.831141,9583.937352,96.831141,2.97462,0.667126,1.580174,0.206886
1,GAS_diag_diag_nu100,-4617.332526,38.373631,9276.665053,76.747263,9402.798772,76.747263,2.93152,0.574299,1.587475,0.177175
2,GAS_diag_diag_nu20,-4626.805535,39.305689,9295.61107,78.611377,9421.744788,78.611377,2.994767,0.637997,1.603761,0.196187
3,GAS_diag_diag_nu50,-4617.820603,38.629638,9277.641206,77.259277,9403.774925,77.259277,3.039385,0.616888,1.619082,0.191238
4,GAS_diag_diag_nu500,-4618.393999,38.143195,9278.787997,76.286389,9404.921716,76.286389,2.950629,0.604243,1.592704,0.186018
5,GAS_diag_diag_nufree,-4672.565258,258.111023,9389.130516,516.222046,9521.270602,516.222046,14.758167,5.768076,3.514552,0.828665
6,GAS_diag_full_nu10,-4600.1334,40.097544,9254.266801,80.195089,9416.438725,80.195089,3.318673,2.117491,1.612423,0.346314
7,GAS_diag_full_nu100,-5007.922384,1915.256245,10069.844768,3830.512491,10232.016692,3830.512491,7.359005,19.304271,1.975778,1.714842
8,GAS_diag_full_nu20,-4580.262302,38.223591,9214.524604,76.447183,9376.696528,76.447183,3.077168,0.811647,1.611318,0.238341
9,GAS_diag_full_nu5,-4656.360684,43.41225,9366.721369,86.824499,9528.893293,86.824499,2.94865,0.612117,1.558586,0.190687


In [11]:
from collections import defaultdict

spec_results = defaultdict(list)
target_success = 30
sim_counter = 0

while any(len(v) < target_success for v in spec_results.values()) or len(spec_results) == 0:
    sim_seed = 8888 + sim_counter
    sim_counter += 1
    # Simulate data
    # Run explicit simulation with regressors
    y_sim, mu_sim, X_sim = simulate_multivariate_state_space(
            T=T,
            N=N,
            c=c_true_3x3,
            Phi=Phi_true_3x3_phi_full_yes_noise_correl,
            beta=beta_true_3x3,  # Explicitly include regressors
            Q=Q_true_3x3_yes_correl,
            R=R_true_3x3_yes_correl,
            # use_intercept=None,
            seed=sim_seed,
        )
    # KF
    try:
        if len(spec_results["KF"]) < target_success:
            kf_results = multivariate_KF_with_estimation(
            y=y_sim, 
            X=X_sim, 
            initial_params=None, 
            verbose=False,
            ftol=1e-8,  # Tolerance for convergence
            gtol=1e-5,  # Gradient tolerance for convergence,
        )
            # Calculate MSE for mu
            mu_kf = kf_results['mu_filtered']
            mse_kf = np.mean((mu_sim[1:] - mu_kf[1:]) ** 2)
            mae_kf = np.mean(np.abs(mu_sim[1:] - mu_kf[1:]))

            spec_results["KF"].append({
                "seed": sim_seed, "loglik": kf_results["loglik"],
                "aic": kf_results["aic"], "bic": kf_results["bic"],
                "mse": mse_kf, "mae": mae_kf
                })

    except Exception as e:
        print(f"KF failed: {e}")
    # GAS
    for spec in gas_specs:
        try:
            label = spec["label"]
            if len(spec_results[label]) < target_success:
                res_gas = estimate_and_filter_gas(
                    y=y_sim, 
                    X=X_sim,
                    phi_type=spec["phi_type"], 
                    kappa_type=spec["kappa_type"],
                    fix_nu=spec["fix_nu"], 
                    verbose=False,
                    ftol=1e-8,  # Tolerance for convergence
                    gtol=1e-5   # Gradient tolerance for convergence
                )

                # Calculate MSE for mu
                mu_gas = res_gas["mu_filtered"]
                mse_gas = np.mean((mu_sim[1:] - mu_gas[1:]) ** 2)
                mae_gas = np.mean(np.abs(mu_sim[1:] - mu_gas[1:]))

                spec_results[label].append({
                    "seed": sim_seed, "loglik": res_gas["loglik"],
                    "aic": res_gas["aic"], "bic": res_gas["bic"],
                    "mse": mse_gas, "mae": mae_gas
                })
        except Exception as e:
            print(f"GAS {label} failed: {e}")

    # Break if all filled
    if all(len(v) >= target_success for v in spec_results.values()):
        break

# Now you can flatten/aggregate spec_results as needed


GAS GAS_full_full_nu5 failed: Estimation failed: ABNORMAL: 
GAS GAS_full_full_nufree failed: Estimation failed: ABNORMAL: 


In [None]:
import pandas as pd

rows = []
for spec_label, run_list in spec_results.items():
    for run in run_list:
        row = {'spec': spec_label}
        row.update(run)  
        rows.append(row)

df_results = pd.DataFrame(rows)


agg = df_results.groupby('spec').agg({
    'loglik': ['mean', 'std'],
    'aic': ['mean', 'std'],
    'bic': ['mean', 'std'],
    'mse': ['mean', 'std'], 
    'mae': ['mean', 'std'],  

}).reset_index()

agg.columns = ['spec', 'loglik_mean', 'loglik_std', 'aic_mean', 'aic_std', 'bic_mean', 'bic_std', 'mse_mean', 'mse_std', 'mae_mean', 'mae_std']  


agg

Unnamed: 0,spec,loglik_mean,loglik_std,aic_mean,aic_std,bic_mean,bic_std,mse_mean,mse_std,mae_mean,mae_std
0,GAS_diag_diag_nu10,-4669.367862,67.106145,9374.735723,134.212289,9482.850339,134.212289,50.562349,9.530761,6.770236,0.678906
1,GAS_diag_diag_nu100,-4586.852037,46.319291,9209.704074,92.638583,9317.81869,92.638583,38.151358,10.286445,5.872994,0.813192
2,GAS_diag_diag_nu20,-4595.887268,48.580894,9227.774536,97.161787,9335.889152,97.161787,48.85685,9.356338,6.663829,0.65942
3,GAS_diag_diag_nu50,-4587.328071,47.014866,9210.656142,94.029731,9318.770758,94.029731,46.795674,9.782892,6.521667,0.710311
4,GAS_diag_diag_nu500,-4587.826413,45.673036,9211.652826,91.346072,9319.767442,91.346072,39.452248,7.465816,5.998869,0.59516
5,GAS_diag_diag_nufree,-4586.433202,46.72803,9210.866403,93.456059,9324.987387,93.456059,85.72095,71.50241,7.660249,4.372393
6,GAS_diag_full_nu10,-4552.163564,51.550694,9152.327128,103.101388,9296.479949,103.101388,48.179711,9.284312,6.61049,0.643236
7,GAS_diag_full_nu100,-4622.08083,291.20342,9292.16166,582.40684,9436.314482,582.40684,42.82119,11.388704,6.202694,0.914983
8,GAS_diag_full_nu20,-4534.154464,49.484881,9116.308928,98.969762,9260.46175,98.969762,48.081932,9.301864,6.608451,0.651606
9,GAS_diag_full_nu5,-4608.285976,53.878506,9264.571952,107.757011,9408.724773,107.757011,47.251733,9.141852,6.538405,0.64515
