In [None]:
def train_ar_model(train_data, d=1, step=1):
    """
    Train AR model on training set with individual AR coefficients for each vehicle
    """
    print("Starting AR model training with individual AR coefficients...")
    
    vt = train_data['vt']
    s = train_data['s']
    dv = train_data['dv']
    label_v = train_data['label_v']
    id_idx = train_data['id_idx']
    N_veh = train_data['n_vehicles']
    
    dt = 0.5
    D = 5
    DELTA = 4
    
    coords = {
        "veh_id": np.arange(N_veh),
        "ar_lag": np.arange(d),
        "parameter": np.arange(D)
    }
    
    with pm.Model(coords=coords) as constant_ar_model:
        chol, _, _ = pm.LKJCholeskyCov('chol', n=D, eta=2.0, 
                                      sd_dist=pm.Exponential.dist(2, shape=D))
        
        log_mu_vmax = pm.Normal('log_mu_vmax', mu=0, sigma=0.1)
        log_mu_dsafe = pm.Normal('log_mu_dsafe', mu=0, sigma=0.1)
        log_mu_tsafe = pm.Normal('log_mu_tsafe', mu=0, sigma=0.1)
        log_mu_amax = pm.Normal('log_mu_amax', mu=0, sigma=0.1)
        
        log_ratio_mu = pm.Normal('log_ratio_mu', mu=np.log(0.5), sigma=0.1)
        ratio_mu = pm.Deterministic('ratio_mu', pt.exp(log_ratio_mu))
        
        log_ratio_raw = pm.Normal('log_ratio_raw', mu=0, sigma=0.1, shape=N_veh)
        ratio_individual = pm.Deterministic('ratio_individual', ratio_mu * pt.exp(log_ratio_raw))
        
        vals_raw_vmax = pm.Normal('vals_raw_vmax', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_dsafe = pm.Normal('vals_raw_dsafe', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_tsafe = pm.Normal('vals_raw_tsafe', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_amax = pm.Normal('vals_raw_amax', mu=0, sigma=0.1, shape=N_veh)
        
        vals_raw_first4 = pm.Deterministic('vals_raw_first4', pt.stack([
            vals_raw_vmax, vals_raw_dsafe, vals_raw_tsafe, vals_raw_amax
        ], axis=1))
        
        log_mu_first4 = pt.stack([log_mu_vmax, log_mu_dsafe, log_mu_tsafe, log_mu_amax])
        log_parameters_first4 = pm.Deterministic('log_parameters_first4', 
                                               log_mu_first4 + pt.dot(vals_raw_first4, chol[:4, :4].T))
        parameters_first4 = pm.Deterministic('parameters_first4', pt.exp(log_parameters_first4))
        
        amin_individual = ratio_individual * parameters_first4[:, 3]
        
        parameters = pm.Deterministic('parameters', pt.stack([
            parameters_first4[:, 0],
            parameters_first4[:, 1],
            parameters_first4[:, 2],
            parameters_first4[:, 3],
            amin_individual
        ], axis=1), dims=('veh_id', 'parameter'))
        
        s_a_list = []
        s_v_list = []
        
        for i in range(N_veh):
            s_a_i = pm.Exponential(f's_a_{i}', lam=2000)
            s_v_i = pm.Exponential(f's_v_{i}', lam=4000)
            s_a_list.append(s_a_i)
            s_v_list.append(s_v_i)
        
        rho_mu = pm.Normal('rho_mu', mu=0., sigma=0.4, shape=d)
        rho_raw = pm.Normal('rho_raw', mu=0, sigma=0.1, dims=("veh_id", "ar_lag"))
        rho = pm.Deterministic('rho', rho_mu + rho_raw, dims=("veh_id", "ar_lag"))
        
        for i in range(N_veh):
            mask = (id_idx == i)
            if np.sum(mask) > (d * step) + 5:
                s_veh = s[mask]
                vt_veh = vt[mask]
                dv_veh = dv[mask]
                label_veh = label_v[mask]
                
                vmax = 25 * parameters[i, 0]
                dsafe = 2 * parameters[i, 1]
                tsafe = 1.6 * parameters[i, 2]
                amax = 1.5 * parameters[i, 3]
                amin = 1.5 * parameters[i, 4]
                
                sn = dsafe + vt_veh * tsafe + \
                     vt_veh * dv_veh / (2 * pm.math.sqrt(amax * amin))
                a_idm = amax * (1 - (vt_veh / vmax) ** DELTA - (sn / s_veh) ** 2)
                
                mean_speed = vt_veh + a_idm * dt
                
                n = len(vt_veh)
                if n > d:
                    for lag in range(d):
                        if n > lag + 1:
                            vt_diff = vt_veh[lag+1:n] - vt_veh[lag:n-1]
                            a_lag = a_idm[lag:n-1]
                            
                            ar_correction = rho[i, lag] * (vt_diff - a_lag * dt)
                            
                            start_idx = max(d, lag+1)
                            if start_idx < n:
                                correction_length = n - start_idx
                                ar_slice = ar_correction[start_idx-lag-1:start_idx-lag-1+correction_length]
                                mean_speed = pt.set_subtensor(
                                    mean_speed[start_idx:n],
                                    mean_speed[start_idx:n] + ar_slice
                                )
                
                total_sigma = pm.math.sqrt((s_a_list[i] * dt) ** 2 + s_v_list[i] ** 2)
                pm.Normal(f'obs_{i}', mu=mean_speed, 
                         sigma=total_sigma,
                         observed=label_veh)

        try:
            trace = pm.sample(
                draws=600, 
                tune=600, 
                random_seed=42, 
                chains=2,
                target_accept=0.9, 
                return_inferencedata=True
            )
            print("AR model training with individual AR coefficients completed!")
        except Exception as e:
            print(f"Sampling error: {e}")
            print("Trying more conservative sampling settings...")
            trace = pm.sample(
                draws=1500, 
                tune=1000, 
                random_seed=42,
                chains=2, 
                target_accept=0.8,
                return_inferencedata=True
            )
            print("AR model training with individual AR coefficients completed (using conservative settings)!")
    
    return trace, constant_ar_model

In [None]:
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import matplotlib.pyplot as plt
from scipy import signal
from collections import deque

def calculate_metrics(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    nrmse = rmse / (np.max(y_true) - np.min(y_true))
    
    return {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'nrmse': nrmse
    }

def robust_smooth_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    if len(velocity) < window_size:
        acceleration = np.gradient(velocity, dt)
        return acceleration
    
    try:
        acceleration = signal.savitzky_golay(velocity, window_length=window_size, 
                                           polyorder=poly_order, deriv=1, delta=dt)
        
        if len(acceleration) > 10:
            x_fit = np.arange(5) * dt
            y_fit = acceleration[5:10]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[i] = intercept + slope * (i * dt)
            
            x_fit = np.arange(5) * dt
            y_fit = acceleration[-10:-5]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[-(5-i)] = intercept + slope * ((4-i) * dt)
        
        return acceleration
        
    except:
        acceleration = np.zeros_like(velocity)
        for i in range(1, len(velocity)-1):
            acceleration[i] = (velocity[i+1] - velocity[i-1]) / (2 * dt)
        
        if len(velocity) > 1:
            acceleration[0] = (velocity[1] - velocity[0]) / dt
            acceleration[-1] = (velocity[-1] - velocity[-2]) / dt
        
        window = min(5, len(acceleration))
        acceleration = np.convolve(acceleration, np.ones(window)/window, mode='same')
        
        return acceleration

def calculate_initial_acceleration(velocity, dt=0.5, method='savitzky_golay'):
    if len(velocity) < 3:
        return 0.0
    
    if method == 'savitzky_golay':
        acc_all = robust_smooth_acceleration(velocity, dt)
        return acc_all[0]
    
    elif method == 'robust_fit':
        n_points = min(5, len(velocity))
        t_points = np.arange(n_points) * dt
        v_points = velocity[:n_points]
        
        slope, intercept = np.polyfit(t_points, v_points, 1)
        return slope
    
    elif method == 'physical_constrained':
        if len(velocity) >= 4:
            weights = np.array([0.1, 0.2, 0.3, 0.4])[:len(velocity)]
            weights = weights / np.sum(weights)
            
            t_points = np.arange(len(velocity)) * dt
            A = np.vstack([t_points, np.ones(len(t_points))]).T
            W = np.diag(weights)
            slope, intercept = np.linalg.lstsq(A.T @ W @ A, A.T @ W @ velocity, rcond=None)[0]
            return slope
        else:
            return (velocity[1] - velocity[0]) / dt
    
    else:
        initial_acc = (velocity[1] - velocity[0]) / dt
        return np.clip(initial_acc, -3.0, 3.0)

def improved_robust_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    acceleration = robust_smooth_acceleration(velocity, dt, window_size, poly_order)
    acceleration = np.clip(acceleration, -3.0, 3.0)
    
    if len(acceleration) > 10:
        acceleration[:3] = np.mean(acceleration[:5])
        acceleration[-3:] = np.mean(acceleration[-5:])
    
    return acceleration

def split_data_for_ar_idm(ar_idm_data, train_ratio=0.7):
    vt = ar_idm_data['vt']
    s = ar_idm_data['s']
    dv = ar_idm_data['dv']
    label_v = ar_idm_data['label_v']
    id_idx = ar_idm_data['id_idx']
    
    unique_vehicles = np.unique(id_idx)
    
    train_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    val_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    for veh_id in unique_vehicles:
        mask = (id_idx == veh_id)
        n_points = np.sum(mask)
        
        if n_points < 20:
            continue
            
        split_point = int(n_points * train_ratio)
        
        train_mask = np.zeros_like(mask, dtype=bool)
        train_indices = np.where(mask)[0][:split_point]
        train_mask[train_indices] = True
        
        val_mask = np.zeros_like(mask, dtype=bool)
        val_indices = np.where(mask)[0][split_point:]
        val_mask[val_indices] = True
        
        train_data['vt'] = np.concatenate([train_data['vt'], vt[train_mask]])
        train_data['s'] = np.concatenate([train_data['s'], s[train_mask]])
        train_data['dv'] = np.concatenate([train_data['dv'], dv[train_mask]])
        train_data['label_v'] = np.concatenate([train_data['label_v'], label_v[train_mask]])
        train_data['id_idx'] = np.concatenate([train_data['id_idx'], np.full(np.sum(train_mask), veh_id)])
        
        if np.sum(train_mask) > 0:
            train_data['tracks'][veh_id] = {
                'last_vt': vt[train_mask][-1],
                'last_s': s[train_mask][-1],
                'last_dv': dv[train_mask][-1] if len(dv[train_mask]) > 0 else 0.0
            }
        
        val_data['vt'] = np.concatenate([val_data['vt'], vt[val_mask]])
        val_data['s'] = np.concatenate([val_data['s'], s[val_mask]])
        val_data['dv'] = np.concatenate([val_data['dv'], dv[val_mask]])
        val_data['label_v'] = np.concatenate([val_data['label_v'], label_v[val_mask]])
        val_data['id_idx'] = np.concatenate([val_data['id_idx'], np.full(np.sum(val_mask), veh_id)])
    
    print(f"Training set: {len(train_data['vt'])} data points")
    print(f"Validation set: {len(val_data['vt'])} data points")
    
    return train_data, val_data

In [None]:
def validate_ar_model_comprehensive_improved(trace, model, train_data, val_data, n_samples=50):
    """
    Improved AR model validation with posterior sampling and proper boundary handling
    Using individual AR coefficients for each vehicle
    Adopting the same dual-correction logic as dynamic AR model
    修改：前3步使用真实速度、加速度和间距进行warm-up
    """
    print("\nStarting improved AR model validation with posterior sampling...")
    

    n_chains = len(trace.posterior.chain)
    n_draws = len(trace.posterior.draw)
    

    sample_indices = []
    for _ in range(n_samples):
        chain_idx = np.random.randint(0, n_chains)
        draw_idx = np.random.randint(0, n_draws)
        sample_indices.append((chain_idx, draw_idx))
    
    vt_val = val_data['vt']
    s_val = val_data['s'] 
    dv_val = val_data['dv']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    
    dt = 0.5
    DELTA = 4
    d = 1  
    
 
    print("Calculating real acceleration using improved robust method...")
    real_acceleration_all = improved_robust_acceleration(vt_val, dt)
    

    all_samples_speed_predictions = []
    all_samples_spacing_predictions = []
    all_samples_acceleration_predictions = []
    all_samples_ar_coefficients = [] 
    
 
    all_samples_vehicle_predictions = {} 
    

    for sample_idx, (chain_idx, draw_idx) in enumerate(sample_indices):
        print(f"Processing posterior sample {sample_idx + 1}/{n_samples}...")
        
     
        individual_params = trace.posterior['parameters'].sel(chain=chain_idx, draw=draw_idx).values
       
        ar_coeffs = trace.posterior['rho'].sel(chain=chain_idx, draw=draw_idx).values
        
        all_speed_predictions = []
        all_spacing_predictions = []
        all_acceleration_predictions = []
        all_ar_coefficients_veh = []
        

        sample_vehicle_predictions = {}
        
        # Process each vehicle separately
        unique_vehicles = np.unique(id_idx_val)
        
        for veh_id in unique_vehicles:
            mask = (id_idx_val == veh_id)
            if np.sum(mask) > d + 5:  # Ensure enough data points
                vt_veh = vt_val[mask]
                s_veh = s_val[mask]
                dv_veh = dv_val[mask]
                real_accel_veh = real_acceleration_all[mask]  
                

                # Fallback: use first validation point
                initial_v = vt_veh[0]
                initial_s = s_veh[0]
                initial_dv = dv_veh[0] if len(dv_veh) > 0 else 0.0
                
                # Calculate improved initial acceleration from training data
                if veh_id in train_data['tracks']:
                    # Get the last few training points to calculate initial acceleration
                    train_mask = (train_data['id_idx'] == veh_id)
                    if np.sum(train_mask) >= 5:
                        train_vt = train_data['vt'][train_mask]
                        initial_acc = calculate_initial_acceleration(train_vt[-5:], dt, method='savitzky_golay')
                    else:
                        initial_acc = 0.0
                else:
                    initial_acc = 0.0
                
                # Apply physical constraints to initial acceleration
                vmax = 25 * individual_params[veh_id, 0]
                dsafe = 2 * individual_params[veh_id, 1]
                tsafe = 1.6 * individual_params[veh_id, 2]
                amax = 1.5 * individual_params[veh_id, 3]
                amin = 1.5 * individual_params[veh_id, 4]
                
                # Limit initial acceleration in reasonable range
                initial_acc = np.clip(initial_acc, -amin, amax)
                
              
                warmup_steps = 5  
                
                # Initialize simulation arrays with proper initial conditions
                v_sim = [initial_v]  # Use training final speed as initial
                s_sim = [initial_s]   # Use training final spacing as initial
                a_sim = [initial_acc]  # Improved initial acceleration from training data
                
   
                v_history_real = deque([initial_v], maxlen=d+1)
                
  
                idm_acc_history = deque([initial_acc], maxlen=d) 

                acceleration_errors = deque([], maxlen=d)
                
                n = len(vt_veh)
 
                static_ar_coeffs = np.zeros((n, d))
                for lag in range(d):
                    static_ar_coeffs[:, lag] = ar_coeffs[veh_id, lag]  
                
                all_ar_coefficients_veh.append(static_ar_coeffs)
                
                # Simulate the entire trajectory with sequential AR correction
                for i in range(n-1):
                 
                    if i < warmup_steps and i < len(vt_veh) - 1:
                       
                        v_next_real = vt_veh[i + 1] if (i + 1) < len(vt_veh) else vt_veh[i]
                        s_next_real = s_veh[i + 1] if (i + 1) < len(s_veh) else s_veh[i]
                        a_next_real = real_accel_veh[i] if i < len(real_accel_veh) else 0.0
                        
                      
                        s_next_real = max(1.0, min(200.0, s_next_real))
                        a_next_real = np.clip(a_next_real, -amin, amax)
                
                        v_sim.append(v_next_real)
                        s_sim.append(s_next_real)
                        a_sim.append(a_next_real)

                        v_history_real.append(v_next_real)
 
                        current_dv = dv_veh[i] if i < len(dv_veh) else 0.0
                        sn = dsafe + v_sim[-2] * tsafe + v_sim[-2] * current_dv / (2 * np.sqrt(amax * amin))
                        current_acc_idm = amax * (1 - (v_sim[-2] / vmax) ** DELTA - (sn / s_sim[-2]) ** 2)
                        current_acc_idm = np.clip(current_acc_idm, -amin, amax)
                        idm_acc_history.append(current_acc_idm)
                        

                        if i > 0:
                            a_real_prev = real_accel_veh[i-1] if (i-1) < len(real_accel_veh) else 0.0
                            prev_dv = dv_veh[i-1] if (i-1) < len(dv_veh) else 0.0
                            prev_sn = dsafe + v_sim[-3] * tsafe + v_sim[-3] * prev_dv / (2 * np.sqrt(amax * amin))
                            a_idm_prev = amax * (1 - (v_sim[-3] / vmax) ** DELTA - (prev_sn / s_sim[-3]) ** 2)
                            a_idm_prev = np.clip(a_idm_prev, -amin, amax)
                            a_error = a_real_prev - a_idm_prev
                            acceleration_errors.append(a_error)
                        
                        is_warmup = True
                    else:

                        if i > 0:

                            a_real_prev = real_accel_veh[i-1] if (i-1) < len(real_accel_veh) else 0.0
                            

                            prev_dv = dv_veh[i-1] if (i-1) < len(dv_veh) else 0.0
                            prev_sn = dsafe + v_sim[-2] * tsafe + v_sim[-2] * prev_dv / (2 * np.sqrt(amax * amin))
                            a_idm_prev = amax * (1 - (v_sim[-2] / vmax) ** DELTA - (prev_sn / s_sim[-2]) ** 2)
                            a_idm_prev = np.clip(a_idm_prev, -amin, amax)
                            

                            a_error = a_real_prev - a_idm_prev
                            acceleration_errors.append(a_error)
                        

                        current_dv = dv_veh[i] if i < len(dv_veh) else 0.0
                        sn = dsafe + v_sim[-1] * tsafe + v_sim[-1] * current_dv / (2 * np.sqrt(amax * amin))
                        current_acc_idm = amax * (1 - (v_sim[-1] / vmax) ** DELTA - (sn / s_sim[-1]) ** 2)
                        current_acc_idm = np.clip(current_acc_idm, -amin, amax)
                        

                        v_next_base = v_sim[-1] + current_acc_idm * dt
                        

                        v_ar_correction = 0.0
                        if len(v_history_real) > d:
                            for lag in range(d):
                                if len(v_history_real) > lag + 1:

                                    vt_diff = v_history_real[-1] - v_history_real[-(lag+2)]
                                    
 
                                    if len(idm_acc_history) > lag:
                                        a_lag = idm_acc_history[-(lag+1)]  
                                    else:
                                        a_lag = 0.0
                                    

                                    ar_coef = ar_coeffs[veh_id, lag]  # 静态AR系数
                                    v_ar_correction += ar_coef * (vt_diff - a_lag * dt)
                        

                        v_next_after_v_correction = v_next_base + v_ar_correction
                        
 
                        a_ar_correction = 0.0
                        if len(acceleration_errors) >= d:
                            for lag in range(d):
                                if len(acceleration_errors) > lag:
                                    a_ar_correction += ar_coeffs[veh_id, lag] * acceleration_errors[-(lag+1)]
                        

                        current_acc_total = current_acc_idm + a_ar_correction
                        current_acc_total = np.clip(current_acc_total, -amin, amax)
                        

                        if i + 1 < len(vt_veh):
                            v_history_real.append(vt_veh[i + 1])
                        else:
                            v_history_real.append(v_next_after_v_correction)
                        

                        idm_acc_history.append(current_acc_idm)
                        

                        s_next = s_sim[-1] + (current_dv+0.5*v_ar_correction)* dt+0.5*current_acc_idm * dt* dt
                        s_next = max(1.0, min(200.0, s_next))
                        

                        v_sim.append(v_next_after_v_correction)
                        s_sim.append(s_next)
                        a_sim.append(current_acc_total)
                        
                        is_warmup = False
                
                # Convert to arrays
                ar_corrected_speed = np.array(v_sim)
                corrected_spacing = np.array(s_sim)
                corrected_acceleration = np.array(a_sim)
                
                all_speed_predictions.extend(ar_corrected_speed[:n])
                all_spacing_predictions.extend(corrected_spacing[:n])
                all_acceleration_predictions.extend(corrected_acceleration[:n])
                

                sample_vehicle_predictions[veh_id] = {
                    'speed_pred': ar_corrected_speed[:n],
                    'spacing_pred': corrected_spacing[:n],
                    'acceleration_pred': corrected_acceleration[:n],
                    'speed_true': vt_veh,
                    'spacing_true': s_veh,
                    'acceleration_true': real_accel_veh,
                    'ar_coefficients': static_ar_coeffs,
                    'warmup_steps': warmup_steps  
                }
                
            else:
                # Too few data points, use simple predictions
                if veh_id in train_data['tracks']:
                    initial_v = train_data['tracks'][veh_id]['last_vt']
                    initial_s = train_data['tracks'][veh_id]['last_s']
                else:
                    initial_v = vt_veh[0] if len(vt_veh) > 0 else 0.0
                    initial_s = s_veh[0] if len(s_veh) > 0 else 10.0
                
                simple_speed = np.full(len(vt_veh), initial_v)
                simple_spacing = np.full(len(s_veh), initial_s)
                simple_acceleration = np.zeros(len(vt_veh))
                
                all_speed_predictions.extend(simple_speed)
                all_spacing_predictions.extend(simple_spacing)
                all_acceleration_predictions.extend(simple_acceleration)
                

                empty_ar_coeffs = np.zeros((len(vt_veh), d))
                all_ar_coefficients_veh.append(empty_ar_coeffs)
                

                sample_vehicle_predictions[veh_id] = {
                    'speed_pred': simple_speed,
                    'spacing_pred': simple_spacing,
                    'acceleration_pred': simple_acceleration,
                    'speed_true': vt_veh,
                    'spacing_true': s_veh,
                    'acceleration_true': real_accel_veh if len(real_accel_veh) == len(simple_speed) else np.zeros(len(simple_speed)),
                    'ar_coefficients': empty_ar_coeffs,
                    'warmup_steps': 0
                }
        
        # Convert to arrays for this sample
        all_speed_predictions = np.array(all_speed_predictions)
        all_spacing_predictions = np.array(all_spacing_predictions)
        all_acceleration_predictions = np.array(all_acceleration_predictions)
        
        all_samples_speed_predictions.append(all_speed_predictions)
        all_samples_spacing_predictions.append(all_spacing_predictions)
        all_samples_acceleration_predictions.append(all_acceleration_predictions)
        all_samples_ar_coefficients.append(all_ar_coefficients_veh)
        all_samples_vehicle_predictions[sample_idx] = sample_vehicle_predictions
    

    min_len = min(
        len(all_samples_speed_predictions[0]), 
        len(all_samples_spacing_predictions[0]),
        len(all_samples_acceleration_predictions[0]), 
        len(real_acceleration_all),
        len(label_val), 
        len(s_val)
    )
    
    boundary_cut = int(0.05 * min_len)
    valid_indices = slice(boundary_cut, min_len - boundary_cut)
    
    print(f"Excluding boundary segments: using indices {boundary_cut} to {min_len - boundary_cut}")
    

    print("\n" + "="*80)
    print("PER-VEHICLE VALIDATION RESULTS (Using All Samples Average)")
    print("="*80)
    
    vehicle_metrics = {}
    
    for veh_id in unique_vehicles:

        veh_speed_preds = []
        veh_spacing_preds = []
        veh_accel_preds = []
        veh_speed_true = None
        veh_spacing_true = None
        veh_accel_true = None
        
        for sample_idx in range(n_samples):
            if veh_id in all_samples_vehicle_predictions[sample_idx]:
                veh_data = all_samples_vehicle_predictions[sample_idx][veh_id]
                veh_speed_preds.append(veh_data['speed_pred'])
                veh_spacing_preds.append(veh_data['spacing_pred'])
                veh_accel_preds.append(veh_data['acceleration_pred'])
                
                if veh_speed_true is None:
                    veh_speed_true = veh_data['speed_true']
                    veh_spacing_true = veh_data['spacing_true']
                    veh_accel_true = veh_data['acceleration_true']
        
        if veh_speed_true is not None and len(veh_speed_true) > 10:

            avg_speed_pred = np.mean(veh_speed_preds, axis=0)
            avg_spacing_pred = np.mean(veh_spacing_preds, axis=0)
            avg_accel_pred = np.mean(veh_accel_preds, axis=0)
            

            veh_boundary_cut = int(0.05 * len(veh_speed_true))
            veh_valid_indices = slice(veh_boundary_cut, len(veh_speed_true) - veh_boundary_cut)
            

            speed_metrics = calculate_metrics(
                veh_speed_true[veh_valid_indices], 
                avg_speed_pred[veh_valid_indices]
            )
            spacing_metrics = calculate_metrics(
                veh_spacing_true[veh_valid_indices], 
                avg_spacing_pred[veh_valid_indices]
            )
            acceleration_metrics = calculate_metrics(
                veh_accel_true[veh_valid_indices],
                avg_accel_pred[veh_valid_indices]
            )
            
            vehicle_metrics[veh_id] = {
                'speed': speed_metrics,
                'spacing': spacing_metrics,
                'acceleration': acceleration_metrics,
                'n_points': len(veh_speed_true[veh_valid_indices])
            }
            

            print(f"\nVehicle {veh_id} (n={vehicle_metrics[veh_id]['n_points']} points):")
            print(f"  Speed - RMSE: {speed_metrics['rmse']:.4f} m/s, MAE: {speed_metrics['mae']:.4f} m/s, NRMSE: {speed_metrics['nrmse']:.4f}")
            print(f"  Spacing - RMSE: {spacing_metrics['rmse']:.4f} m, MAE: {spacing_metrics['mae']:.4f} m, NRMSE: {spacing_metrics['nrmse']:.4f}")
            print(f"  Acceleration - RMSE: {acceleration_metrics['rmse']:.4f} m/s², MAE: {acceleration_metrics['mae']:.4f} m/s², NRMSE: {acceleration_metrics['nrmse']:.4f}")
    

    speed_metrics_all = []
    spacing_metrics_all = []
    acceleration_metrics_all = []
    
    for i in range(n_samples):
        speed_metrics = calculate_metrics(
            label_val[valid_indices], 
            all_samples_speed_predictions[i][valid_indices]
        )
        spacing_metrics = calculate_metrics(
            s_val[valid_indices], 
            all_samples_spacing_predictions[i][valid_indices]
        )
        acceleration_metrics = calculate_metrics(
            real_acceleration_all[valid_indices],  
            all_samples_acceleration_predictions[i][valid_indices]
        )
        
        speed_metrics_all.append(speed_metrics)
        spacing_metrics_all.append(spacing_metrics)
        acceleration_metrics_all.append(acceleration_metrics)
    

    avg_speed_metrics = {
        'mse': np.mean([m['mse'] for m in speed_metrics_all]),
        'rmse': np.mean([m['rmse'] for m in speed_metrics_all]),
        'mae': np.mean([m['mae'] for m in speed_metrics_all]),
        'nrmse': np.mean([m['nrmse'] for m in speed_metrics_all])
    }
    
    avg_spacing_metrics = {
        'mse': np.mean([m['mse'] for m in spacing_metrics_all]),
        'rmse': np.mean([m['rmse'] for m in spacing_metrics_all]),
        'mae': np.mean([m['mae'] for m in spacing_metrics_all]),
        'nrmse': np.mean([m['nrmse'] for m in spacing_metrics_all])
    }
    
    avg_acceleration_metrics = {
        'mse': np.mean([m['mse'] for m in acceleration_metrics_all]),
        'rmse': np.mean([m['rmse'] for m in acceleration_metrics_all]),
        'mae': np.mean([m['mae'] for m in acceleration_metrics_all]),
        'nrmse': np.mean([m['nrmse'] for m in acceleration_metrics_all])
    }


    print("\n" + "="*80)
    print("VEHICLE PERFORMANCE SUMMARY")
    print("="*80)
    
    if vehicle_metrics:
        speed_rmse_values = [vm['speed']['rmse'] for vm in vehicle_metrics.values()]
        spacing_rmse_values = [vm['spacing']['rmse'] for vm in vehicle_metrics.values()]
        accel_rmse_values = [vm['acceleration']['rmse'] for vm in vehicle_metrics.values()]
        
        print(f"Number of vehicles analyzed: {len(vehicle_metrics)}")
        print(f"\nSpeed RMSE - Mean: {np.mean(speed_rmse_values):.4f}, Std: {np.std(speed_rmse_values):.4f}, "
              f"Min: {np.min(speed_rmse_values):.4f}, Max: {np.max(speed_rmse_values):.4f}")
        print(f"Spacing RMSE - Mean: {np.mean(spacing_rmse_values):.4f}, Std: {np.std(spacing_rmse_values):.4f}, "
              f"Min: {np.min(spacing_rmse_values):.4f}, Max: {np.max(spacing_rmse_values):.4f}")
        print(f"Acceleration RMSE - Mean: {np.mean(accel_rmse_values):.4f}, Std: {np.std(accel_rmse_values):.4f}, "
              f"Min: {np.min(accel_rmse_values):.4f}, Max: {np.max(accel_rmse_values):.4f}")
    
    print("\n" + "="*80)
    print("OVERALL VALIDATION RESULTS")
    print("="*80)
    print(f"Improved AR Validation Results (using {n_samples} posterior samples, excluding boundaries):")
    print(f"\nSpeed Metrics:")
    print(f"  RMSE: {avg_speed_metrics['rmse']:.4f} m/s")
    print(f"  NRMSE: {avg_speed_metrics['nrmse']:.4f}")
    print(f"  MAE: {avg_speed_metrics['mae']:.4f} m/s")
    
    print(f"\nSpacing Metrics:")
    print(f"  RMSE: {avg_spacing_metrics['rmse']:.4f} m")
    print(f"  NRMSE: {avg_spacing_metrics['nrmse']:.4f}")
    print(f"  MAE: {avg_spacing_metrics['mae']:.4f} m")
    
    print(f"\nAcceleration Metrics:")
    print(f"  RMSE: {avg_acceleration_metrics['rmse']:.4f} m/s²")
    print(f"  NRMSE: {avg_acceleration_metrics['nrmse']:.4f}")
    print(f"  MAE: {avg_acceleration_metrics['mae']:.4f} m/s²")
    
    return {
        'speed_metrics': avg_speed_metrics,
        'spacing_metrics': avg_spacing_metrics,
        'acceleration_metrics': avg_acceleration_metrics,
        'vehicle_metrics': vehicle_metrics,  
        'all_samples_speed_predictions': all_samples_speed_predictions,
        'all_samples_spacing_predictions': all_samples_spacing_predictions,
        'all_samples_acceleration_predictions': all_samples_acceleration_predictions,
        'all_samples_ar_coefficients': all_samples_ar_coefficients,
        'all_samples_vehicle_predictions': all_samples_vehicle_predictions,  
        'real_acceleration': real_acceleration_all,  
        'individual_params': trace.posterior['parameters'].mean(dim=("chain", "draw")).values,
        'ar_coeffs': trace.posterior['rho'].mean(dim=("chain", "draw")).values,
        'valid_indices': valid_indices,
        'n_samples': n_samples,
        'unique_vehicles': unique_vehicles,
        'warmup_steps': warmup_steps 
    }


def plot_comprehensive_validation_results_improved(val_data, validation_results):
    """
    Plot improved comprehensive validation results with posterior samples
    Enhanced uncertainty visualization for each vehicle
    """
    vt_val = val_data['vt']
    s_val = val_data['s']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    real_acceleration = validation_results['real_acceleration']
    valid_indices = validation_results['valid_indices']
    n_samples = validation_results['n_samples']
    
    unique_vehicles = np.unique(id_idx_val)
    n_vehicles = len(unique_vehicles)
    
    fig, axes = plt.subplots(n_vehicles, 3, figsize=(20, 5*n_vehicles))
    if n_vehicles == 1:
        axes = axes.reshape(1, -1)
    
    for idx, veh_id in enumerate(unique_vehicles):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
            veh_data_points = min(np.sum(mask), len(all_samples_speed[0]))
            time_points = np.arange(veh_data_points)
            
            axes[idx, 0].plot(time_points, label_val[mask][:veh_data_points], 'k-', 
                             label='True Speed', linewidth=3, alpha=0.9)
            
            veh_speed_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_speed])
            
            lower_5 = np.percentile(veh_speed_samples, 5, axis=0)
            upper_95 = np.percentile(veh_speed_samples, 95, axis=0)
            lower_25 = np.percentile(veh_speed_samples, 25, axis=0)
            upper_75 = np.percentile(veh_speed_samples, 75, axis=0)
            
            axes[idx, 0].fill_between(time_points, lower_5, upper_95, 
                                     alpha=0.3, color='red', label='90% CI')
            axes[idx, 0].fill_between(time_points, lower_25, upper_75, 
                                     alpha=0.5, color='red', label='50% CI')
            
            mean_speed = np.mean(veh_speed_samples, axis=0)
            median_speed = np.median(veh_speed_samples, axis=0)
            
            axes[idx, 0].plot(time_points, mean_speed, 'b-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 0].plot(time_points, median_speed, 'g--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            sample_indices_to_plot = np.random.choice(len(all_samples_speed), 
                                                     min(10, len(all_samples_speed)), replace=False)
            for sample_idx in sample_indices_to_plot:
                axes[idx, 0].plot(time_points, all_samples_speed[sample_idx][mask][:veh_data_points], 
                                 'r-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 0].set_title(f'Vehicle {veh_id} - Speed Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 0].set_xlabel('Time Index')
            axes[idx, 0].set_ylabel('Speed (m/s)')
            axes[idx, 0].legend(loc='upper right', fontsize=8)
            axes[idx, 0].grid(True, alpha=0.3)
            
            axes[idx, 1].plot(time_points, s_val[mask][:veh_data_points], 'k-', 
                             label='True Spacing', linewidth=3, alpha=0.9)
            
            veh_spacing_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_spacing])
            
            lower_5_s = np.percentile(veh_spacing_samples, 5, axis=0)
            upper_95_s = np.percentile(veh_spacing_samples, 95, axis=0)
            lower_25_s = np.percentile(veh_spacing_samples, 25, axis=0)
            upper_75_s = np.percentile(veh_spacing_samples, 75, axis=0)
            
            axes[idx, 1].fill_between(time_points, lower_5_s, upper_95_s, 
                                     alpha=0.3, color='magenta', label='90% CI')
            axes[idx, 1].fill_between(time_points, lower_25_s, upper_75_s, 
                                     alpha=0.5, color='magenta', label='50% CI')
            
            mean_spacing = np.mean(veh_spacing_samples, axis=0)
            median_spacing = np.median(veh_spacing_samples, axis=0)
            
            axes[idx, 1].plot(time_points, mean_spacing, 'g-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 1].plot(time_points, median_spacing, 'c--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            for sample_idx in sample_indices_to_plot:
                axes[idx, 1].plot(time_points, all_samples_spacing[sample_idx][mask][:veh_data_points], 
                                 'm-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 1].set_title(f'Vehicle {veh_id} - Spacing Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 1].set_xlabel('Time Index')
            axes[idx, 1].set_ylabel('Spacing (m)')
            axes[idx, 1].legend(loc='upper right', fontsize=8)
            axes[idx, 1].grid(True, alpha=0.3)
            
            axes[idx, 2].plot(time_points, real_acceleration[mask][:veh_data_points], 'k-', 
                             label='True Acceleration', linewidth=3, alpha=0.9)
            
            veh_accel_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_acceleration])
            
            lower_5_a = np.percentile(veh_accel_samples, 5, axis=0)
            upper_95_a = np.percentile(veh_accel_samples, 95, axis=0)
            lower_25_a = np.percentile(veh_accel_samples, 25, axis=0)
            upper_75_a = np.percentile(veh_accel_samples, 75, axis=0)
            
            axes[idx, 2].fill_between(time_points, lower_5_a, upper_95_a, 
                                     alpha=0.3, color='orange', label='90% CI')
            axes[idx, 2].fill_between(time_points, lower_25_a, upper_75_a, 
                                     alpha=0.5, color='orange', label='50% CI')
            
            mean_acceleration = np.mean(veh_accel_samples, axis=0)
            median_acceleration = np.median(veh_accel_samples, axis=0)
            
            axes[idx, 2].plot(time_points, mean_acceleration, 'c-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 2].plot(time_points, median_acceleration, 'y--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            for sample_idx in sample_indices_to_plot:
                axes[idx, 2].plot(time_points, all_samples_acceleration[sample_idx][mask][:veh_data_points], 
                                 'y-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 2].set_title(f'Vehicle {veh_id} - Acceleration Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 2].set_xlabel('Time Index')
            axes[idx, 2].set_ylabel('Acceleration (m/s²)')
            axes[idx, 2].legend(loc='upper right', fontsize=8)
            axes[idx, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    plot_posterior_parameter_distributions(validation_results)
    plot_uncertainty_summary(val_data, validation_results)

def plot_posterior_parameter_distributions(validation_results):
    individual_params = validation_results['individual_params']
    ar_coeffs = validation_results['ar_coeffs']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    param_names = ['vmax', 'dsafe', 'tsafe', 'amax', 'amin']
    param_scales = [25, 2, 1.6, 1.5, 1.5]
    
    for i in range(5):
        row, col = i // 3, i % 3
        scaled_params = individual_params[:, i] * param_scales[i]
        axes[row, col].hist(scaled_params, bins=20, alpha=0.7, color='skyblue', 
                           edgecolor='black', density=True)
        axes[row, col].axvline(np.mean(scaled_params), color='red', linestyle='--', 
                              label=f'Mean: {np.mean(scaled_params):.3f}')
        axes[row, col].set_title(f'Posterior: {param_names[i]}', fontsize=12)
        axes[row, col].set_xlabel('Parameter Value')
        axes[row, col].set_ylabel('Density')
        axes[row, col].legend()
        axes[row, col].grid(True, alpha=0.3)
    
    if len(ar_coeffs.shape) > 1 and ar_coeffs.shape[1] > 0:
        axes[1, 2].hist(ar_coeffs[:, 0], bins=20, alpha=0.7, color='lightcoral', 
                       edgecolor='black', density=True)
        axes[1, 2].axvline(np.mean(ar_coeffs[:, 0]), color='red', linestyle='--', 
                          label=f'Mean: {np.mean(ar_coeffs[:, 0]):.3f}')
        axes[1, 2].set_title('Posterior: AR Coefficient 1', fontsize=12)
        axes[1, 2].set_xlabel('AR Coefficient Value')
        axes[1, 2].set_ylabel('Density')
        axes[1, 2].legend()
        axes[1, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Posterior Parameter Distributions', fontsize=16, y=1.02)
    plt.show()

def plot_uncertainty_summary(val_data, validation_results):
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    n_samples = validation_results['n_samples']
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    all_uncertainty_speed = []
    all_uncertainty_spacing = []
    all_uncertainty_acceleration = []
    
    unique_vehicles = np.unique(val_data['id_idx'])
    
    for veh_id in unique_vehicles:
        mask = (val_data['id_idx'] == veh_id)
        if np.sum(mask) > 0:
            veh_speed_samples = np.array([sample[mask] for sample in all_samples_speed])
            speed_intervals = np.percentile(veh_speed_samples, 95, axis=0) - np.percentile(veh_speed_samples, 5, axis=0)
            all_uncertainty_speed.extend(speed_intervals)
            
            veh_spacing_samples = np.array([sample[mask] for sample in all_samples_spacing])
            spacing_intervals = np.percentile(veh_spacing_samples, 95, axis=0) - np.percentile(veh_spacing_samples, 5, axis=0)
            all_uncertainty_spacing.extend(spacing_intervals)
            
            veh_accel_samples = np.array([sample[mask] for sample in all_samples_acceleration])
            accel_intervals = np.percentile(veh_accel_samples, 95, axis=0) - np.percentile(veh_accel_samples, 5, axis=0)
            all_uncertainty_acceleration.extend(accel_intervals)
    
    uncertainty_data = [
        (all_uncertainty_speed, 'Speed Uncertainty (90% CI Width)', 'lightblue', 'm/s'),
        (all_uncertainty_spacing, 'Spacing Uncertainty (90% CI Width)', 'lightgreen', 'm'),
        (all_uncertainty_acceleration, 'Acceleration Uncertainty (90% CI Width)', 'lightyellow', 'm/s²')
    ]
    
    for i, (data, title, color, unit) in enumerate(uncertainty_data):
        if i < 3:
            row, col = i // 2, i % 2
            axes[row, col].hist(data, bins=30, alpha=0.7, color=color, edgecolor='black')
            axes[row, col].axvline(np.mean(data), color='red', linestyle='--', 
                                 label=f'Mean: {np.mean(data):.3f} {unit}')
            axes[row, col].set_title(title, fontsize=12)
            axes[row, col].set_xlabel(f'Uncertainty ({unit})')
            axes[row, col].set_ylabel('Frequency')
            axes[row, col].legend()
            axes[row, col].grid(True, alpha=0.3)
    
    if len(unique_vehicles) > 0:
        first_vehicle_mask = (val_data['id_idx'] == unique_vehicles[0])
        veh_speed_samples = np.array([sample[first_vehicle_mask] for sample in all_samples_speed])
        
        time_points = np.arange(min(100, np.sum(first_vehicle_mask)))
        lower_5 = np.percentile(veh_speed_samples[:, :len(time_points)], 5, axis=0)
        upper_95 = np.percentile(veh_speed_samples[:, :len(time_points)], 95, axis=0)
        mean_speed = np.mean(veh_speed_samples[:, :len(time_points)], axis=0)
        
        axes[1, 1].fill_between(time_points, lower_5, upper_95, alpha=0.3, color='red', label='90% CI')
        axes[1, 1].plot(time_points, mean_speed, 'b-', label='Mean Prediction', linewidth=2)
        axes[1, 1].plot(time_points, val_data['label_v'][first_vehicle_mask][:len(time_points)], 
                       'k-', label='True Speed', linewidth=2, alpha=0.8)
        axes[1, 1].set_title(f'Vehicle {unique_vehicles[0]} - Speed Uncertainty Over Time', fontsize=12)
        axes[1, 1].set_xlabel('Time Index')
        axes[1, 1].set_ylabel('Speed (m/s)')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Uncertainty Analysis Summary', fontsize=16, y=1.02)
    plt.show()

def run_calibration_only(ar_idm_data, ar_order=1):
    print("=" * 60)
    print("AR MODEL CALIBRATION ONLY")
    print("=" * 60)
    
    print("\nStep 1: Data Splitting")
    train_data, val_data = split_data_for_ar_idm(ar_idm_data, train_ratio=0.7)
    
    print("\nStep 2: Model Training")
    trace, model = train_ar_model(train_data, d=ar_order, step=1)
    
    print("\n" + "=" * 60)
    print("CALIBRATION COMPLETED!")
    print("=" * 60)
    
    return {
        'trace': trace,
        'model': model,
        'train_data': train_data,
        'val_data': val_data
    }

def run_validation_only(calibration_results, n_posterior_samples=100):
    print("=" * 60)
    print("AR MODEL VALIDATION ONLY")
    print("=" * 60)
    
    trace = calibration_results['trace']
    model = calibration_results['model']
    train_data = calibration_results['train_data']
    val_data = calibration_results['val_data']
    
    print("\nStep 1: Model Validation with Posterior Sampling")
    validation_results = validate_ar_model_comprehensive_improved(
        trace, model, train_data, val_data, n_samples=n_posterior_samples
    )
    
    print("\nStep 2: Enhanced Results Visualization")
    plot_comprehensive_validation_results_improved(val_data, validation_results)
    
    print("\n" + "=" * 60)
    print("VALIDATION COMPLETED!")
    print("=" * 60)
    
    return validation_results

In [None]:
calibration_result = run_calibration_only(ar_idm_data, ar_order=1)

In [None]:
validation_result = run_validation_only(calibration_result, n_posterior_samples=200)