In [10]:
import pandas as pd
import numpy as np
from utils import *
import pytensor.tensor as pt  # Import PyTensor (Theano backend)
import pickle
import xarray as xr

In [11]:
df, scaling_factors = read_dd_data('../data/PD_data',standardize=True, reduced_data=True)

In [12]:
linear_trace = az.from_netcdf("models/linear_model.nc")
quadratic_trace = az.from_netcdf("models/quadratic_model.nc")

In [13]:
 # 5. Bayes Factor approximation using BIC
def calculate_bic(trace, n_params):
    log_likelihood = trace.log_likelihood.log_RT.mean(dim=["chain", "draw"]).values
    n_samples = len(log_likelihood)
    bic = -2 * np.sum(log_likelihood) + n_params * np.log(n_samples)
    return bic

print("Linear model BIC:",calculate_bic(linear_trace, 2))
print("Quadratic model BIC:",calculate_bic(quadratic_trace, 3))

Linear model BIC: 1751.8782103649726
Quadratic model BIC: 1724.1631427429559


In [14]:
# 6. Akaike Information Criterion (AIC) calculation
def calculate_aic(trace, n_params):
    log_likelihood = trace.log_likelihood.log_RT.mean(dim=["chain", "draw"]).values
    aic = -2 * np.sum(log_likelihood) + 2 * n_params
    return aic

print("Linear model AIC:", calculate_aic(linear_trace, 2))
print("Quadratic model AIC:", calculate_aic(quadratic_trace, 3))


Linear model AIC: 1742.0626998070084
Quadratic model AIC: 1709.4398769060094


Hypothesis Formulation


Null Hypothesis (H₀): The quadratic term (β₂) does not significantly improve reaction time predictions (β₂ is effectively zero across participants)


Alternative Hypothesis (H₁): The quadratic term (β₂) significantly improves reaction time predictions (β₂ is meaningfully different from zero)


In [15]:
def compare_models_with_information_criteria(linear_trace, quadratic_trace):
    """
    Compare linear and quadratic models using WAIC and LOO-CV.
    """
    # Ensure both traces have log_likelihood variables properly defined
    linear_waic = az.waic(linear_trace, var_name="log_likelihood")
    quadratic_waic = az.waic(quadratic_trace, var_name="log_likelihood")
    
    linear_loo = az.loo(linear_trace, var_name="log_likelihood")
    quadratic_loo = az.loo(quadratic_trace, var_name="log_likelihood")
    
    # Compare WAIC
    waic_diff = linear_waic.waic - quadratic_waic.waic
    waic_se = np.sqrt(linear_waic.waic_se**2 + quadratic_waic.waic_se**2)
    waic_z = waic_diff / waic_se
    waic_p = 2 * (1 - norm.cdf(abs(waic_z)))  # Two-tailed p-value
    
    # Compare LOO
    loo_diff = linear_loo.loo - quadratic_loo.loo
    loo_se = np.sqrt(linear_loo.loo_se**2 + quadratic_loo.loo_se**2)
    loo_z = loo_diff / loo_se
    loo_p = 2 * (1 - norm.cdf(abs(loo_z)))  # Two-tailed p-value
    
    print(f"WAIC: Linear = {linear_waic.waic:.2f}, Quadratic = {quadratic_waic.waic:.2f}")
    print(f"WAIC difference: {waic_diff:.2f} ± {waic_se:.2f}, p = {waic_p:.4f}")
    print(f"LOO: Linear = {linear_loo.loo:.2f}, Quadratic = {quadratic_loo.loo:.2f}")
    print(f"LOO difference: {loo_diff:.2f} ± {loo_se:.2f}, p = {loo_p:.4f}")
    
    return {
        "waic_diff": waic_diff,
        "waic_p": waic_p,
        "loo_diff": loo_diff,
        "loo_p": loo_p,
        "quadratic_better_waic": waic_diff > 0 and waic_p < 0.05,
        "quadratic_better_loo": loo_diff > 0 and loo_p < 0.05
    }

In [16]:
# hypothesis testing
def test_beta2_significance(trace):
    """
    Test if the quadratic parameter (beta2) is significantly different from zero
    by examining posterior distributions.
    """
    # Extract β₂ posterior samples (both group-level and individual-level)
    mu_beta2 = trace.posterior["group_beta2_mu"].values.flatten()
    individual_beta2 = trace.posterior["beta2"].values
    
    # Group-level analysis
    beta2_mean = np.mean(mu_beta2)
    beta2_hdi = az.hdi(mu_beta2, hdi_prob=0.95)
    zero_in_hdi = (beta2_hdi[0] <= 0 <= beta2_hdi[1])
    
    # Individual-level analysis
    n_participants = individual_beta2.shape[2]
    significant_participants = 0
    
    for p in range(n_participants):
        participant_beta2 = individual_beta2[:, :, p].flatten()
        p_hdi = az.hdi(participant_beta2, hdi_prob=0.95)
        if not (p_hdi[0] <= 0 <= p_hdi[1]):
            significant_participants += 1
    
    proportion_significant = significant_participants / n_participants
    
    # Print results
    print(f"Group-level β₂ mean: {beta2_mean:.4f}")
    print(f"Group-level β₂ 95% HDI: [{beta2_hdi[0]:.4f}, {beta2_hdi[1]:.4f}]")
    print(f"Zero within HDI: {zero_in_hdi}")
    print(f"Participants with significant β₂: {significant_participants}/{n_participants} ({proportion_significant*100:.1f}%)")
    
    return {
        "group_level_significant": not zero_in_hdi,
        "individual_significant_proportion": proportion_significant,
        "beta2_mean": beta2_mean,
        "beta2_hdi": beta2_hdi
    }

In [17]:
results_quadratic = test_beta2_significance(quadratic_trace)

Group-level β₂ mean: -0.0031
Group-level β₂ 95% HDI: [-0.0096, 0.0015]
Zero within HDI: True
Participants with significant β₂: 1/10 (10.0%)


In [18]:
print(print_beta2_interpretation(results_quadratic))



SUMMARY:
The analysis shows no significant evidence that the quadratic term (β₂) improves your reaction time model. There is a minimal quadratic effect that is practically indistinguishable from zero.

DETAILED ANALYSIS:
• Group-level β₂ mean: -0.0031
• Group-level β₂ 95% HDI: [-0.0096, 0.0015]
• The 95% HDI interval contains zero, meaning we cannot reject the null hypothesis that β₂ = 0 at the group level.
• Only 10.0% of participants showed a significant individual effect, suggesting the quadratic pattern is rare in your sample.

RECOMMENDATIONS:
1. The linear model is likely sufficient; added complexity of the quadratic model is not justified
2. Reaction times appear to change approximately linearly with decision difficulty
3. Consider exploring alternative predictors or model formulations

MODEL SELECTION:
✓ The linear model is sufficient; quadratic term not justified


None
