In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter



def idm_model(s, v, dv, v0=30.0, T=1.5, a=1.0, b=2.0, s0=2.0):
   
    s_star = s0 + max(0, v * T + (v * dv) / (2 * np.sqrt(a * b)))
    
   
    acceleration = a * (1 - (v / v0)**4 - (s_star / s)**2)
    
  
    acceleration = np.clip(acceleration, -b, a)
    
    return acceleration


def calculate_smoothed_acceleration(v_data, dt, window_length=0.5, polyorder=2):
   
    if len(v_data) < window_length:
        acc = np.gradient(v_data, dt)
    else:
        try:
            v_smooth = savgol_filter(v_data, window_length, polyorder)
            acc = np.gradient(v_smooth, dt)
        except:
            acc = np.gradient(v_data, dt)
    
    return acc


def calculate_nrmse(real, pred):
    
    if len(real) == 0 or len(pred) == 0:
        return 1.0
    
    rmse = np.sqrt(np.mean((real - pred) ** 2))
    range_val = np.max(real) - np.min(real)
    
    if range_val < 1e-6:
        return rmse / (np.std(real) + 1e-6)
    
    return rmse / range_val


def calculate_rmse(real, pred):
   
    if len(real) == 0 or len(pred) == 0:
        return 1.0
    return np.sqrt(np.mean((real - pred) ** 2))


def idm_objective_enhanced(params, data):
    
    v0, T, a, b, s0 = params
    

    if (v0 < 12 or v0 > 45 or T < 0.2 or T > 5.0 or 
        a < 0.2 or a > 6.0 or b < 0.2 or b > 6.0 or s0 < 0.5 or s0 > 10.0):
        return 1e10

    s_data, v_data, dv_data, dt, v_next_data, _ = data
    acc_real = np.gradient(v_data, dt)
    

    v_sim = [v_data[0]]
    s_sim = [s_data[0]]
    if len(v_data) > 1:
        initial_acc = (v_data[1] - v_data[0]) / dt
    else:
        initial_acc = 0.0
    acc_sim = [initial_acc]
    
    for i in range(len(s_data)-1):
        current_acc = idm_model(s_sim[-1], v_sim[-1], dv_data[i], v0, T, a, b, s0)
        
        v_next = v_sim[-1] + current_acc * dt
        s_next = s_sim[-1] + (dv_data[i] - current_acc) * dt

        v_next = max(0.1, min(45.0, v_next))
        s_next = max(0.5, min(250.0, s_next))
        
        v_sim.append(v_next)
        s_sim.append(s_next)
        acc_sim.append(current_acc)
    
    v_sim = np.array(v_sim)
    s_sim = np.array(s_sim)
    acc_sim = np.array(acc_sim)
    
    min_len = min(len(v_data), len(v_sim), len(s_data), len(s_sim), len(acc_real), len(acc_sim))
    
    if min_len < 10:
        return 1e10
    
    v_data_trim = v_data[:min_len]
    v_sim_trim = v_sim[:min_len]
    s_data_trim = s_data[:min_len]
    s_sim_trim = s_sim[:min_len]
    acc_real_trim = acc_real[:min_len]
    acc_sim_trim = acc_sim[:min_len]
    
 
    nrmse_v = calculate_nrmse(v_data_trim, v_sim_trim)
    nrmse_s = calculate_nrmse(s_data_trim, s_sim_trim)
    nrmse_acc = calculate_nrmse(acc_real_trim, acc_sim_trim)
    
  
    weights = [1, 1, 1]
    total_nrmse = (weights[0] * nrmse_v + 
                   weights[1] * nrmse_s + 
                   weights[2] * nrmse_acc)
    
    return total_nrmse


def calibrate_idm_enhanced(track_data, dt=0.5):
    """
    Enhanced IDM parameter calibration using multi-start optimization to increase parameter diversity
    """
    s_data = track_data['sReal']
    v_data = track_data['vFollReal']
    dv_data = track_data['dvReal']
    v_next_data = track_data['vFollReal_next']
    

    acc_real = None
    
  
    initial_guesses = [
        # Aggressive driver - high desired speed, short time headway, large acceleration
        [35.0, 0.8, 2.5, 3.0, 1.5],
        # Conservative driver - low desired speed, long time headway, small acceleration  
        [20.0, 2.5, 0.8, 1.5, 3.0],
        # Moderate driver - balanced parameters
        [28.0, 1.5, 1.2, 2.5, 2.5],
        # Fast reactive type - short headway but medium speed
        [30.0, 0.6, 1.8, 3.5, 2.0],
        # Safety priority type - long headway, gentle acceleration/deceleration
        [25.0, 3.0, 0.6, 1.2, 3.0],
        # High-speed aggressive type
        [35.0, 1.0, 3.0, 3.0, 1.0],
        # Low-speed conservative type
        [18.0, 3.5, 0.5, 1.0, 3.0]
    ]
    

    bounds = [
        (15.0, 65.0),   # v0 - expanded range
        (0.1, 9.0),     # T - expanded range
        (0.1, 9.0),     # a - expanded range
        (0.1, 9.0),     # b - expanded range
        (0.1, 9.0)     # s0 - expanded range
    ]
    
    data = (s_data, v_data, dv_data, dt, v_next_data, acc_real)
    
    best_result = None
    best_score = float('inf')
    best_params = None
    
    print(f"  Trying {len(initial_guesses)} different starting points...")

    for i, initial_guess in enumerate(initial_guesses):
        try:
            result = minimize(
                idm_objective_enhanced,
                initial_guess,
                args=(data,),
                bounds=bounds,
                method='L-BFGS-B',
                options={'maxiter': 800, 'ftol': 1e-6, 'gtol': 1e-6}
            )
            
            if result.fun < best_score:
                best_score = result.fun
                best_params = result.x
                best_result = result
                print(f"    Starting point {i+1}: NRMSE = {result.fun:.4f} - found better solution")
                
        except Exception as e:
            continue
    

    if best_params is None:
        print("  Multi-start optimization failed, using single-start optimization...")
        initial_guess = [25.0, 1.5, 1.0, 2.0, 2.0]
        try:
            result = minimize(
                idm_objective_enhanced,
                initial_guess,
                args=(data,),
                bounds=bounds,
                method='L-BFGS-B',
                options={'maxiter': 500, 'ftol': 1e-5}
            )
            if result.fun < 10:
                best_params = result.x
                best_score = result.fun
            else:
                return None
        except:
            return None
    
    if best_score < 15.0:  
        print(f"  Final result: NRMSE = {best_score:.4f}")
        return best_params
    else:
        print(f"  Poor result: NRMSE = {best_score:.4f}")
        return None


def calculate_errors(track_data, params, dt=0.5):
    """
    Calculate model error metrics
    """
    s_data = track_data['sReal']
    v_data = track_data['vFollReal']
    dv_data = track_data['dvReal']
    v_next_data = track_data['vFollReal_next']
    
    v0, T, a, b, s0 = params
    

    acc_real = np.gradient(v_data, dt)
    
 
    v_sim = [v_data[0]]
    s_sim = [s_data[0]]
    if len(v_data) > 1:
        initial_acc = (v_data[1] - v_data[0]) / dt
    else:
        initial_acc = 0.0
    acc_sim = [initial_acc]
    
    for i in range(len(s_data)-1):
        current_acc = idm_model(s_sim[-1], v_sim[-1], dv_data[i], v0, T, a, b, s0)
        v_next = v_sim[-1] + current_acc * dt
        s_next = s_sim[-1] + (dv_data[i] - current_acc) * dt
        
        v_next = max(0.1, min(45.0, v_next))
        s_next = max(0.5, min(250.0, s_next))
        
        v_sim.append(v_next)
        s_sim.append(s_next)
        acc_sim.append(current_acc)
    
    v_sim = np.array(v_sim)
    s_sim = np.array(s_sim)
    acc_sim = np.array(acc_sim)
    
    min_len = min(len(v_data), len(v_sim), len(s_data), len(s_sim), len(acc_real), len(acc_sim))
    
    v_data_trim = v_data[:min_len]
    v_sim_trim = v_sim[:min_len]
    s_data_trim = s_data[:min_len]
    s_sim_trim = s_sim[:min_len]
    acc_real_trim = acc_real[:min_len]
    acc_sim_trim = acc_sim[:min_len]
    

    speed_error = v_sim_trim - v_data_trim
    spacing_error = s_sim_trim - s_data_trim
    acceleration_error = acc_sim_trim - acc_real_trim
    

    nrmse_v = calculate_nrmse(v_data_trim, v_sim_trim)
    nrmse_s = calculate_nrmse(s_data_trim, s_sim_trim)
    nrmse_acc = calculate_nrmse(acc_real_trim, acc_sim_trim)
    
    rmse_speed = calculate_rmse(v_data_trim, v_sim_trim)
    rmse_spacing = calculate_rmse(s_data_trim, s_sim_trim)
    rmse_acceleration = calculate_rmse(acc_real_trim, acc_sim_trim)
    
    return {
        'NRMSE_speed': nrmse_v,
        'NRMSE_spacing': nrmse_s,
        'NRMSE_acceleration': nrmse_acc,
        'RMSE_speed': rmse_speed,
        'RMSE_spacing': rmse_spacing,
        'RMSE_acceleration': rmse_acceleration,
        'Total_NRMSE': nrmse_v + nrmse_s + nrmse_acc,
        'Total_RMSE': rmse_speed + rmse_spacing + rmse_acceleration,
        'v_sim': v_sim_trim,
        's_sim': s_sim_trim,
        'acc_sim': acc_sim_trim,
        'speed_error': speed_error,
        'spacing_error': spacing_error,
        'acceleration_error': acceleration_error,
        'time_index': np.arange(min_len)
    }


def plot_prediction_errors_single(driver_name, vehicle_pair, track_data, errors):
    """
    Plot speed, acceleration, spacing prediction errors over time for a single driver
    """
    fig, axes = plt.subplots(3, 1, figsize=(12, 10))
    fig.suptitle(f'{driver_name} ({vehicle_pair}) - Prediction Errors Over Time', fontsize=14)
    
    time_idx = errors['time_index']
    
    # Speed error
    axes[0].plot(time_idx, errors['speed_error'], 'b-', linewidth=1.5, alpha=0.8)
    axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[0].set_ylabel('Speed Error (m/s)')
    axes[0].set_title(f'Speed Prediction Error (Mean: {np.mean(errors["speed_error"]):.3f}, Std: {np.std(errors["speed_error"]):.3f})')
    axes[0].grid(True, alpha=0.3)
    axes[0].fill_between(time_idx, errors['speed_error'], 0, alpha=0.3, color='blue')
    
    # Spacing error
    axes[1].plot(time_idx, errors['spacing_error'], 'g-', linewidth=1.5, alpha=0.8)
    axes[1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[1].set_ylabel('Spacing Error (m)')
    axes[1].set_title(f'Spacing Prediction Error (Mean: {np.mean(errors["spacing_error"]):.3f}, Std: {np.std(errors["spacing_error"]):.3f})')
    axes[1].grid(True, alpha=0.3)
    axes[1].fill_between(time_idx, errors['spacing_error'], 0, alpha=0.3, color='green')
    
    # Acceleration error
    axes[2].plot(time_idx, errors['acceleration_error'], 'purple', linewidth=1.5, alpha=0.8)
    axes[2].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[2].set_ylabel('Acceleration Error (m/s²)')
    axes[2].set_xlabel('Time Index')
    axes[2].set_title(f'Acceleration Prediction Error (Mean: {np.mean(errors["acceleration_error"]):.3f}, Std: {np.std(errors["acceleration_error"]):.3f})')
    axes[2].grid(True, alpha=0.3)
    axes[2].fill_between(time_idx, errors['acceleration_error'], 0, alpha=0.3, color='purple')
    
    plt.tight_layout()
    plt.show()


def plot_all_drivers_prediction_errors(individual_results):
    """
    Plot speed, acceleration, spacing prediction errors over time comparison for all drivers
    """
    n_drivers = len(individual_results)
    
    # Create three subplots for speed, spacing, acceleration errors
    fig, axes = plt.subplots(3, 1, figsize=(15, 12))
    fig.suptitle('Prediction Errors Over Time - All Drivers Comparison', fontsize=16)
    
    colors = plt.cm.Set3(np.linspace(0, 1, n_drivers))
    
    for idx, (driver_id, result) in enumerate(individual_results.items()):
        driver_name = result['driver_name']
        errors = result['errors']
        time_idx = errors['time_index']
        
        # Speed error
        axes[0].plot(time_idx, errors['speed_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
        
        # Spacing error
        axes[1].plot(time_idx, errors['spacing_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
        
        # Acceleration error
        axes[2].plot(time_idx, errors['acceleration_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
    
    # Set speed error subplot
    axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[0].set_ylabel('Speed Error (m/s)')
    axes[0].set_title('Speed Prediction Errors - All Drivers')
    axes[0].grid(True, alpha=0.3)
    axes[0].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set spacing error subplot
    axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[1].set_ylabel('Spacing Error (m)')
    axes[1].set_title('Spacing Prediction Errors - All Drivers')
    axes[1].grid(True, alpha=0.3)
    axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Set acceleration error subplot
    axes[2].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[2].set_ylabel('Acceleration Error (m/s²)')
    axes[2].set_xlabel('Time Index')
    axes[2].set_title('Acceleration Prediction Errors - All Drivers')
    axes[2].grid(True, alpha=0.3)
    axes[2].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    plt.show()

# Plot error trajectories - fix parameter name error
def plot_error_trajectories(individual_results):
    """
    Plot error trajectories for each driver
    """
    n_drivers = len(individual_results)
    fig, axes = plt.subplots(n_drivers, 3, figsize=(18, 4*n_drivers))
    
    if n_drivers == 1:
        axes = axes.reshape(1, -1)
    
    for idx, (driver_id, result) in enumerate(individual_results.items()):  # Fix parameter name
        driver_name = result['driver_name']
        track_data = result['track_data']
        errors = result['errors']
        
        time_idx = np.arange(len(track_data['vFollReal']))
        
        # Speed trajectory
        axes[idx, 0].plot(time_idx, track_data['vFollReal'], 'b-', label='Real Speed', linewidth=2, alpha=0.8)
        axes[idx, 0].plot(time_idx[:len(errors['v_sim'])], errors['v_sim'], 'r--', label='Simulated Speed', linewidth=2)
        axes[idx, 0].set_ylabel('Speed (m/s)')
        axes[idx, 0].set_title(f'{driver_name} - Speed')
        axes[idx, 0].legend()
        axes[idx, 0].grid(True, alpha=0.3)
        
        # Spacing trajectory
        axes[idx, 1].plot(time_idx, track_data['sReal'], 'g-', label='Real Spacing', linewidth=2, alpha=0.8)
        axes[idx, 1].plot(time_idx[:len(errors['s_sim'])], errors['s_sim'], 'r--', label='Simulated Spacing', linewidth=2)
        axes[idx, 1].set_ylabel('Spacing (m)')
        axes[idx, 1].set_title(f'{driver_name} - Spacing')
        axes[idx, 1].legend()
        axes[idx, 1].grid(True, alpha=0.3)
        
        # Acceleration trajectory
        acc_real = calculate_smoothed_acceleration(track_data['vFollReal'], 0.1)
        min_len = min(len(acc_real), len(errors['acc_sim']))
        axes[idx, 2].plot(time_idx[:min_len], acc_real[:min_len], 'b-', label='Real Acceleration', linewidth=2, alpha=0.8)
        axes[idx, 2].plot(time_idx[:min_len], errors['acc_sim'][:min_len], 'r--', label='Simulated Acceleration', linewidth=2)
        axes[idx, 2].set_ylabel('Acceleration (m/s²)')
        axes[idx, 2].set_title(f'{driver_name} - Acceleration')
        axes[idx, 2].legend()
        axes[idx, 2].grid(True, alpha=0.3)
        
        if idx == n_drivers - 1:
            for ax in axes[idx, :]:
                ax.set_xlabel('Time Index')
    
    plt.tight_layout()
    plt.show()

# Plot parameter distribution
def plot_parameter_distribution(individual_results):
    """
    Plot distribution of all parameters across all drivers
    """
    all_params = []
    driver_names = []
    
    for driver_id, result in individual_results.items():
        all_params.append(result['params'])
        driver_names.append(result['driver_name'])
    
    all_params = np.array(all_params)
    param_names = ['v0 (m/s)', 'T (s)', 'a (m/s²)', 'b (m/s²)', 's0 (m)']
    param_short_names = ['v0', 'T', 'a', 'b', 's0']
    
    fig, axes = plt.subplots(5, 1, figsize=(12, 15))
    
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    
    for i, (ax, param_name, short_name, color) in enumerate(zip(axes, param_names, param_short_names, colors)):
        param_values = all_params[:, i]
        
        x_pos = np.arange(len(driver_names))
        ax.plot(x_pos, param_values, 'o-', linewidth=2, markersize=8, color=color, alpha=0.8)
        
        for j, value in enumerate(param_values):
            ax.annotate(f'{value:.2f}', 
                       (x_pos[j], param_values[j]),
                       textcoords="offset points",
                       xytext=(0, 10),
                       ha='center',
                       fontsize=9,
                       bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.7))
        
        ax.set_ylabel(param_name)
        ax.set_title(f'{param_name} Distribution Across Drivers')
        ax.set_xticks(x_pos)
        ax.set_xticklabels(driver_names, rotation=45)
        ax.grid(True, alpha=0.3)
        
        mean_val = np.mean(param_values)
        std_val = np.std(param_values)
        ax.axhline(y=mean_val, color='red', linestyle='--', alpha=0.7, label=f'Mean: {mean_val:.2f}')
        ax.axhline(y=mean_val + std_val, color='orange', linestyle=':', alpha=0.5, label=f'±1 STD')
        ax.axhline(y=mean_val - std_val, color='orange', linestyle=':', alpha=0.5)
        ax.legend()
    
    plt.tight_layout()
    plt.show()
    

    fig_box, ax_box = plt.subplots(figsize=(10, 6))
    box_data = [all_params[:, i] for i in range(len(param_short_names))]
    box_plot = ax_box.boxplot(box_data, labels=param_short_names, patch_artist=True)
    
    for patch, color in zip(box_plot['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax_box.set_ylabel('Parameter Values')
    ax_box.set_title('IDM Parameter Distributions Comparison')
    ax_box.grid(True, alpha=0.3)
    
    for i, data in enumerate(box_data):
        x = np.random.normal(i+1, 0.04, size=len(data))
        ax_box.scatter(x, data, alpha=0.6, color=colors[i], s=30)
    
    for i, (data, name) in enumerate(zip(box_data, param_short_names)):
        median = np.median(data)
        mean = np.mean(data)
        ax_box.text(i+1, np.max(data) + 0.1, f'mean: {mean:.2f}', 
                   ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.show()

def analyze_inter_driver_heterogeneity_enhanced(ar_idm_data):

    individual_results = {}
    
    for driver_id, track_data in ar_idm_data['tracks'].items():
        driver_name = track_data.get('driver_id', f'Driver_{driver_id}')
        vehicle_pair = track_data.get('vehicle_pair', 'Unknown')
        
        print(f"\nCalibrating {driver_name} ({vehicle_pair})...")
        print(f"Data points: {len(track_data['vFollReal'])}")
        
        # Use enhanced calibration method
        individual_params = calibrate_idm_enhanced(track_data)
        
        if individual_params is not None:
            errors = calculate_errors(track_data, individual_params)
            individual_results[driver_id] = {
                'params': individual_params,
                'errors': errors,
                'driver_name': driver_name,
                'vehicle_pair': vehicle_pair,
                'data_points': len(track_data['vFollReal']),
                'track_data': track_data
            }
            print(f"  {driver_name} calibration successful")
            param_names = ['v0', 'T', 'a', 'b', 's0']
            params_str = " | ".join([f"{name}: {individual_params[i]:.3f}" for i, name in enumerate(param_names)])
            print(f"  Parameters: {params_str}")
            print(f"  Total NRMSE: {errors['Total_NRMSE']:.4f}")
            

            plot_prediction_errors_single(driver_name, vehicle_pair, track_data, errors)
            
        else:
            print(f"  {driver_name} calibration failed")
    
    if not individual_results:
        print("No successful individual calibration results")
        return None
    

    print("\n" + "=" * 60)
    print("Inter-Driver Heterogeneity Analysis Results")
    print("=" * 60)
    
    return analyze_individual_results_enhanced(individual_results)

def analyze_individual_results_enhanced(individual_results):

    all_params = []
    driver_names = []
    all_nrmse_errors = []
    all_rmse_errors = []
    all_rmse_speed = []
    all_rmse_spacing = []
    all_rmse_acceleration = []
    data_points = []
    
    for driver_id, result in individual_results.items():
        all_params.append(result['params'])
        driver_names.append(result['driver_name'])
        all_nrmse_errors.append(result['errors']['Total_NRMSE'])
        all_rmse_errors.append(result['errors']['Total_RMSE'])
        all_rmse_speed.append(result['errors']['RMSE_speed'])
        all_rmse_spacing.append(result['errors']['RMSE_spacing'])
        all_rmse_acceleration.append(result['errors']['RMSE_acceleration'])
        data_points.append(result['data_points'])
    
    all_params = np.array(all_params)
    

    param_means = np.mean(all_params, axis=0)
    param_stds = np.std(all_params, axis=0)
    param_cvs = param_stds / param_means
    param_ranges = np.ptp(all_params, axis=0)
    
    param_names = ['v0', 'T', 'a', 'b', 's0']
    
    print("\nIndividual Parameter Statistics (Inter-Driver Heterogeneity):")
    print(f"{'Parameter':<8} {'Mean':<8} {'Std':<8} {'CV':<10} {'Min':<8} {'Max':<8} {'Range':<8}")
    for i, name in enumerate(param_names):
        min_val = np.min(all_params[:, i])
        max_val = np.max(all_params[:, i])
        range_val = param_ranges[i]
        print(f"{name:<8} {param_means[i]:<8.3f} {param_stds[i]:<8.3f} {param_cvs[i]:<10.3f} {min_val:<8.3f} {max_val:<8.3f} {range_val:<8.3f}")
    
    print("\nDetailed Parameters by Driver:")
    print(f"{'Driver':<15} {'v0':<8} {'T':<8} {'a':<8} {'b':<8} {'s0':<8} {'Total_NRMSE':<12} {'Total_RMSE':<12} {'Data Points':<8}")
    for driver_id, result in individual_results.items():
        params = result['params']
        errors = result['errors']
        points = result['data_points']
        print(f"{result['driver_name']:<15} {params[0]:<8.3f} {params[1]:<8.3f} {params[2]:<8.3f} {params[3]:<8.3f} {params[4]:<8.3f} {errors['Total_NRMSE']:<12.4f} {errors['Total_RMSE']:<12.4f} {points:<8}")
    
    print(f"\nRMSE Detailed Statistics:")
    print(f"{'Metric':<25} {'Mean':<10} {'Std':<10} {'Min':<10} {'Max':<10}")
    print(f"{'RMSE_speed (m/s)':<25} {np.mean(all_rmse_speed):<10.4f} {np.std(all_rmse_speed):<10.4f} {np.min(all_rmse_speed):<10.4f} {np.max(all_rmse_speed):<10.4f}")
    print(f"{'RMSE_spacing (m)':<25} {np.mean(all_rmse_spacing):<10.4f} {np.std(all_rmse_spacing):<10.4f} {np.min(all_rmse_spacing):<10.4f} {np.max(all_rmse_spacing):<10.4f}")
    print(f"{'RMSE_acceleration (m/s²)':<25} {np.mean(all_rmse_acceleration):<10.4f} {np.std(all_rmse_acceleration):<10.4f} {np.min(all_rmse_acceleration):<10.4f} {np.max(all_rmse_acceleration):<10.4f}")
    print(f"{'Total_RMSE':<25} {np.mean(all_rmse_errors):<10.4f} {np.std(all_rmse_errors):<10.4f} {np.min(all_rmse_errors):<10.4f} {np.max(all_rmse_errors):<10.4f}")
    

    avg_cv = np.mean(param_cvs)
    max_cv_param = param_names[np.argmax(param_cvs)]
    max_cv_value = np.max(param_cvs)
    
    print(f"\nOverall Statistics:")
    print(f"Parameter average coefficient of variation: {avg_cv:.3f}")
    print(f"Parameter with highest variation: {max_cv_param} (CV = {max_cv_value:.3f})")
    print(f"Average NRMSE fitting error: {np.mean(all_nrmse_errors):.4f} ± {np.std(all_nrmse_errors):.4f}")
    print(f"Average RMSE fitting error: {np.mean(all_rmse_errors):.4f} ± {np.std(all_rmse_errors):.4f}")
    print(f"Number of drivers analyzed: {len(driver_names)}")
      

    plot_error_trajectories(individual_results)
    

    plot_parameter_distribution(individual_results)
    

    plot_all_drivers_prediction_errors(individual_results)
    

    fig_errors, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    

    bars1 = ax1.bar(driver_names, all_nrmse_errors, alpha=0.7, color='green')
    ax1.set_xlabel('Drivers')
    ax1.set_ylabel('Total NRMSE')
    ax1.set_title('Total NRMSE by Driver')
    ax1.set_xticklabels(driver_names, rotation=45)
    ax1.grid(True, alpha=0.3)
    
    for bar, error in zip(bars1, all_nrmse_errors):
        ax1.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,
                f'{error:.3f}', ha='center', va='bottom')
    
    # RMSE
    bars2 = ax2.bar(driver_names, all_rmse_errors, alpha=0.7, color='red')
    ax2.set_xlabel('Drivers')
    ax2.set_ylabel('Total RMSE')
    ax2.set_title('Total RMSE by Driver')
    ax2.set_xticklabels(driver_names, rotation=45)
    ax2.grid(True, alpha=0.3)
    
    for bar, error in zip(bars2, all_rmse_errors):
        ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,
                f'{error:.3f}', ha='center', va='bottom')
    
    plt

In [None]:
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import matplotlib.pyplot as plt
from scipy import signal
from collections import deque
def calculate_metrics(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    nrmse = rmse / (np.max(y_true) - np.min(y_true))
    
    return {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'nrmse': nrmse
    }

def robust_smooth_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    if len(velocity) < window_size:
        acceleration = np.gradient(velocity, dt)
        return acceleration
    
    try:
        acceleration = signal.savitzky_golay(velocity, window_length=window_size, 
                                           polyorder=poly_order, deriv=1, delta=dt)
        
        if len(acceleration) > 10:
            x_fit = np.arange(5) * dt
            y_fit = acceleration[5:10]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[i] = intercept + slope * (i * dt)
            
            x_fit = np.arange(5) * dt
            y_fit = acceleration[-10:-5]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[-(5-i)] = intercept + slope * ((4-i) * dt)
        
        return acceleration
        
    except:
        acceleration = np.zeros_like(velocity)
        for i in range(1, len(velocity)-1):
            acceleration[i] = (velocity[i+1] - velocity[i-1]) / (2 * dt)
        
        if len(velocity) > 1:
            acceleration[0] = (velocity[1] - velocity[0]) / dt
            acceleration[-1] = (velocity[-1] - velocity[-2]) / dt
        
        window = min(5, len(acceleration))
        acceleration = np.convolve(acceleration, np.ones(window)/window, mode='same')
        
        return acceleration

def calculate_initial_acceleration(velocity, dt=0.5, method='savitzky_golay'):
    if len(velocity) < 3:
        return 0.0
    
    if method == 'savitzky_golay':
        acc_all = robust_smooth_acceleration(velocity, dt)
        return acc_all[0]
    
    elif method == 'robust_fit':
        n_points = min(5, len(velocity))
        t_points = np.arange(n_points) * dt
        v_points = velocity[:n_points]
        
        slope, intercept = np.polyfit(t_points, v_points, 1)
        return slope
    
    elif method == 'physical_constrained':
        if len(velocity) >= 4:
            weights = np.array([0.1, 0.2, 0.3, 0.4])[:len(velocity)]
            weights = weights / np.sum(weights)
            
            t_points = np.arange(len(velocity)) * dt
            A = np.vstack([t_points, np.ones(len(t_points))]).T
            W = np.diag(weights)
            slope, intercept = np.linalg.lstsq(A.T @ W @ A, A.T @ W @ velocity, rcond=None)[0]
            return slope
        else:
            return (velocity[1] - velocity[0]) / dt
    
    else:
        initial_acc = (velocity[1] - velocity[0]) / dt
        return np.clip(initial_acc, -3.0, 3.0)

def improved_robust_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    acceleration = robust_smooth_acceleration(velocity, dt, window_size, poly_order)
    acceleration = np.clip(acceleration, -3.0, 3.0)
    
    if len(acceleration) > 10:
        acceleration[:3] = np.mean(acceleration[:5])
        acceleration[-3:] = np.mean(acceleration[-5:])
    
    return acceleration

def split_data_for_ar_idm(ar_idm_data, train_ratio=0.7):
    vt = ar_idm_data['vt']
    s = ar_idm_data['s']
    dv = ar_idm_data['dv']
    label_v = ar_idm_data['label_v']
    id_idx = ar_idm_data['id_idx']
    
    unique_vehicles = np.unique(id_idx)
    
    train_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    val_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    for veh_id in unique_vehicles:
        mask = (id_idx == veh_id)
        n_points = np.sum(mask)
        
        if n_points < 20:
            continue
            
        split_point = int(n_points * train_ratio)
        
        train_mask = np.zeros_like(mask, dtype=bool)
        train_indices = np.where(mask)[0][:split_point]
        train_mask[train_indices] = True
        
        val_mask = np.zeros_like(mask, dtype=bool)
        val_indices = np.where(mask)[0][split_point:]
        val_mask[val_indices] = True
        
        train_data['vt'] = np.concatenate([train_data['vt'], vt[train_mask]])
        train_data['s'] = np.concatenate([train_data['s'], s[train_mask]])
        train_data['dv'] = np.concatenate([train_data['dv'], dv[train_mask]])
        train_data['label_v'] = np.concatenate([train_data['label_v'], label_v[train_mask]])
        train_data['id_idx'] = np.concatenate([train_data['id_idx'], np.full(np.sum(train_mask), veh_id)])
        
        if np.sum(train_mask) > 0:
            train_data['tracks'][veh_id] = {
                'last_vt': vt[train_mask][-1],
                'last_s': s[train_mask][-1],
                'last_dv': dv[train_mask][-1] if len(dv[train_mask]) > 0 else 0.0
            }
        
        val_data['vt'] = np.concatenate([val_data['vt'], vt[val_mask]])
        val_data['s'] = np.concatenate([val_data['s'], s[val_mask]])
        val_data['dv'] = np.concatenate([val_data['dv'], dv[val_mask]])
        val_data['label_v'] = np.concatenate([val_data['label_v'], label_v[val_mask]])
        val_data['id_idx'] = np.concatenate([val_data['id_idx'], np.full(np.sum(val_mask), veh_id)])
    
    print(f"Training set: {len(train_data['vt'])} data points")
    print(f"Validation set: {len(val_data['vt'])} data points")
    
    return train_data, val_data
def validate_basic_idm_model_comprehensive_improved(trace, model, train_data, val_data, n_samples=100):

    
    vt_val = val_data['vt']
    s_val = val_data['s']
    dv_val = val_data['dv']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    N_veh = val_data['n_vehicles']
    
    dt = 0.5
    DELTA = 4
    
 
    with model:
        posterior_samples = pm.sample_posterior_predictive(
            trace, 
            var_names=['v0', 'T', 'a', 'b', 's0'], 
            samples=n_samples,
            random_seed=42
        )
    

    v0_samples = posterior_samples['v0']
    T_samples = posterior_samples['T']
    a_samples = posterior_samples['a']
    b_samples = posterior_samples['b']
    s0_samples = posterior_samples['s0']
    

    all_samples_speed_predictions = []
    all_samples_spacing_predictions = []
    all_samples_acceleration_predictions = []
    
    print("Generating predictions for each posterior sample...")
    for sample_idx in range(n_samples):
        speed_predictions = np.zeros_like(vt_val)
        spacing_predictions = np.zeros_like(s_val)
        acceleration_predictions = np.zeros_like(vt_val)
        

        v0 = v0_samples[sample_idx]
        T = T_samples[sample_idx]
        a = a_samples[sample_idx]
        b = b_samples[sample_idx]
        s0 = s0_samples[sample_idx]
        
        for veh_id in range(N_veh):
            mask = (id_idx_val == veh_id)
            if np.sum(mask) > 0:
                vt_veh = vt_val[mask]
                s_veh = s_val[mask]
                dv_veh = dv_val[mask]
         
                a_idm = idm_model(s_veh, vt_veh, dv_veh, v0, T, a, b, s0)
                
               
                speed_pred = vt_veh + a_idm * dt
                
                
                speed_predictions[mask] = speed_pred
                spacing_predictions[mask] = s_veh  
                acceleration_predictions[mask] = a_idm
        
        all_samples_speed_predictions.append(speed_predictions)
        all_samples_spacing_predictions.append(spacing_predictions)
        all_samples_acceleration_predictions.append(acceleration_predictions)
    

    real_acceleration = np.zeros_like(vt_val)
    valid_indices = []
    
    for veh_id in range(N_veh):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 2:
            vt_veh = vt_val[mask]

            acc_veh = improved_robust_acceleration(vt_veh, dt=dt)
            real_acceleration[mask] = acc_veh
            valid_indices.extend(np.where(mask)[0])
    
 
    all_samples_speed_predictions = np.array(all_samples_speed_predictions)
    all_samples_spacing_predictions = np.array(all_samples_spacing_predictions)
    all_samples_acceleration_predictions = np.array(all_samples_acceleration_predictions)
    

    mean_speed_pred = np.mean(all_samples_speed_predictions, axis=0)
    mean_acceleration_pred = np.mean(all_samples_acceleration_predictions, axis=0)
    
    speed_metrics = calculate_metrics(label_val, mean_speed_pred)
    acceleration_metrics = calculate_metrics(real_acceleration[valid_indices], 
                                           mean_acceleration_pred[valid_indices])
    spacing_metrics = calculate_metrics(s_val, np.mean(all_samples_spacing_predictions, axis=0))
    
    vehicle_metrics = {}
    unique_vehicles = np.unique(id_idx_val)
    
    for veh_id in unique_vehicles:
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
            veh_speed_true = label_val[mask]
            veh_speed_pred = mean_speed_pred[mask]
            veh_accel_true = real_acceleration[mask]
            veh_accel_pred = mean_acceleration_pred[mask]
            veh_spacing_true = s_val[mask]
            veh_spacing_pred = np.mean(all_samples_spacing_predictions[:, mask], axis=0)
            
            vehicle_metrics[veh_id] = {
                'speed': calculate_metrics(veh_speed_true, veh_speed_pred),
                'acceleration': calculate_metrics(veh_accel_true, veh_accel_pred),
                'spacing': calculate_metrics(veh_spacing_true, veh_spacing_pred)
            }

    individual_params = np.column_stack([
        v0_samples, T_samples, a_samples, b_samples, s0_samples
    ])
    
    validation_results = {
        'all_samples_speed_predictions': all_samples_speed_predictions,
        'all_samples_spacing_predictions': all_samples_spacing_predictions,
        'all_samples_acceleration_predictions': all_samples_acceleration_predictions,
        'real_acceleration': real_acceleration,
        'valid_indices': valid_indices,
        'n_samples': n_samples,
        'speed_metrics': speed_metrics,
        'acceleration_metrics': acceleration_metrics,
        'spacing_metrics': spacing_metrics,
        'vehicle_metrics': vehicle_metrics,
        'individual_params': individual_params,
        'mean_predictions': {
            'speed': mean_speed_pred,
            'acceleration': mean_acceleration_pred,
            'spacing': np.mean(all_samples_spacing_predictions, axis=0)
        },
        'parameter_samples': {
            'v0': v0_samples,
            'T': T_samples,
            'a': a_samples,
            'b': b_samples,
            's0': s0_samples
        }
    }
    
    return validation_results

def plot_basic_idm_validation_results(val_data, validation_results):
   
    vt_val = val_data['vt']
    s_val = val_data['s']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    real_acceleration = validation_results['real_acceleration']
    n_samples = validation_results['n_samples']
    
    unique_vehicles = np.unique(id_idx_val)
    n_vehicles = len(unique_vehicles)
    

    fig, axes = plt.subplots(n_vehicles, 3, figsize=(20, 5*n_vehicles))
    if n_vehicles == 1:
        axes = axes.reshape(1, -1)
    
    for idx, veh_id in enumerate(unique_vehicles):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
          
            veh_data_points = min(np.sum(mask), len(all_samples_speed[0]))
            time_points = np.arange(veh_data_points)
            
          
            axes[idx, 0].plot(time_points, label_val[mask][:veh_data_points], 'k-', 
                             label='True Speed', linewidth=3, alpha=0.9)
            
           
            veh_speed_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_speed])
            
          
            lower_5 = np.percentile(veh_speed_samples, 5, axis=0)
            upper_95 = np.percentile(veh_speed_samples, 95, axis=0)
            lower_25 = np.percentile(veh_speed_samples, 25, axis=0)
            upper_75 = np.percentile(veh_speed_samples, 75, axis=0)
            
         
            axes[idx, 0].fill_between(time_points, lower_5, upper_95, 
                                     alpha=0.3, color='red', label='90% CI')
            axes[idx, 0].fill_between(time_points, lower_25, upper_75, 
                                     alpha=0.5, color='red', label='50% CI')
            
          
            mean_speed = np.mean(veh_speed_samples, axis=0)
            median_speed = np.median(veh_speed_samples, axis=0)
            
            axes[idx, 0].plot(time_points, mean_speed, 'b-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 0].plot(time_points, median_speed, 'g--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
       
            sample_indices_to_plot = np.random.choice(len(all_samples_speed), 
                                                     min(10, len(all_samples_speed)), replace=False)
            for sample_idx in sample_indices_to_plot:
                axes[idx, 0].plot(time_points, all_samples_speed[sample_idx][mask][:veh_data_points], 
                                 'r-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 0].set_title(f'Vehicle {veh_id} - Speed Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 0].set_xlabel('Time Index')
            axes[idx, 0].set_ylabel('Speed (m/s)')
            axes[idx, 0].legend(loc='upper right', fontsize=8)
            axes[idx, 0].grid(True, alpha=0.3)
            
           
            axes[idx, 1].plot(time_points, s_val[mask][:veh_data_points], 'k-', 
                             label='True Spacing', linewidth=3, alpha=0.9)
            
            veh_spacing_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_spacing])
            
           
            lower_5_s = np.percentile(veh_spacing_samples, 5, axis=0)
            upper_95_s = np.percentile(veh_spacing_samples, 95, axis=0)
            lower_25_s = np.percentile(veh_spacing_samples, 25, axis=0)
            upper_75_s = np.percentile(veh_spacing_samples, 75, axis=0)
            
            axes[idx, 1].fill_between(time_points, lower_5_s, upper_95_s, 
                                     alpha=0.3, color='magenta', label='90% CI')
            axes[idx, 1].fill_between(time_points, lower_25_s, upper_75_s, 
                                     alpha=0.5, color='magenta', label='50% CI')
            
            mean_spacing = np.mean(veh_spacing_samples, axis=0)
            median_spacing = np.median(veh_spacing_samples, axis=0)
            
            axes[idx, 1].plot(time_points, mean_spacing, 'g-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 1].plot(time_points, median_spacing, 'c--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
           
            for sample_idx in sample_indices_to_plot:
                axes[idx, 1].plot(time_points, all_samples_spacing[sample_idx][mask][:veh_data_points], 
                                 'm-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 1].set_title(f'Vehicle {veh_id} - Spacing Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 1].set_xlabel('Time Index')
            axes[idx, 1].set_ylabel('Spacing (m)')
            axes[idx, 1].legend(loc='upper right', fontsize=8)
            axes[idx, 1].grid(True, alpha=0.3)
            
           
            axes[idx, 2].plot(time_points, real_acceleration[mask][:veh_data_points], 'k-', 
                             label='True Acceleration', linewidth=3, alpha=0.9)
            
            veh_accel_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_acceleration])
            
            
            lower_5_a = np.percentile(veh_accel_samples, 5, axis=0)
            upper_95_a = np.percentile(veh_accel_samples, 95, axis=0)
            lower_25_a = np.percentile(veh_accel_samples, 25, axis=0)
            upper_75_a = np.percentile(veh_accel_samples, 75, axis=0)
            
            axes[idx, 2].fill_between(time_points, lower_5_a, upper_95_a, 
                                     alpha=0.3, color='orange', label='90% CI')
            axes[idx, 2].fill_between(time_points, lower_25_a, upper_75_a, 
                                     alpha=0.5, color='orange', label='50% CI')
            
            mean_acceleration = np.mean(veh_accel_samples, axis=0)
            median_acceleration = np.median(veh_accel_samples, axis=0)
            
            axes[idx, 2].plot(time_points, mean_acceleration, 'c-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 2].plot(time_points, median_acceleration, 'y--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
           
            for sample_idx in sample_indices_to_plot:
                axes[idx, 2].plot(time_points, all_samples_acceleration[sample_idx][mask][:veh_data_points], 
                                 'y-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 2].set_title(f'Vehicle {veh_id} - Acceleration Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 2].set_xlabel('Time Index')
            axes[idx, 2].set_ylabel('Acceleration (m/s²)')
            axes[idx, 2].legend(loc='upper right', fontsize=8)
            axes[idx, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Basic IDM Model - Comprehensive Validation Results', fontsize=16, y=1.02)
    plt.show()
    
 
    plot_basic_idm_parameter_distributions(validation_results)
    
 
    plot_basic_idm_uncertainty_analysis(val_data, validation_results)

def plot_basic_idm_parameter_distributions(validation_results):
  
    individual_params = validation_results['individual_params']
    param_samples = validation_results['parameter_samples']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
   
    param_names = ['v0', 'T', 'a', 'b', 's0']
    param_display_names = ['Desired Speed (v0)', 'Time Headway (T)', 
                          'Max Acceleration (a)', 'Comfort Decel (b)', 
                          'Min Spacing (s0)']
    
    for i in range(5):
        row, col = i // 3, i % 3
        params = individual_params[:, i]
        axes[row, col].hist(params, bins=30, alpha=0.7, color='skyblue', 
                           edgecolor='black', density=True)
        axes[row, col].axvline(np.mean(params), color='red', linestyle='--', 
                              label=f'Mean: {np.mean(params):.3f}')
        axes[row, col].axvline(np.median(params), color='green', linestyle='--', 
                              label=f'Median: {np.median(params):.3f}')
        axes[row, col].set_title(f'Posterior: {param_display_names[i]}', fontsize=12)
        axes[row, col].set_xlabel('Parameter Value')
        axes[row, col].set_ylabel('Density')
        axes[row, col].legend(fontsize=8)
        axes[row, col].grid(True, alpha=0.3)
    

    if individual_params.shape[0] > 1:
        
        v0_params = individual_params[:, 0]
        T_params = individual_params[:, 1]
        
        axes[1, 2].scatter(v0_params, T_params, alpha=0.6, color='purple')
        axes[1, 2].set_xlabel('Desired Speed (v0)')
        axes[1, 2].set_ylabel('Time Headway (T)')
        axes[1, 2].set_title('Parameter Correlation: v0 vs T', fontsize=12)
        axes[1, 2].grid(True, alpha=0.3)
        
        
        correlation = np.corrcoef(v0_params, T_params)[0, 1]
        axes[1, 2].text(0.05, 0.95, f'Correlation: {correlation:.3f}', 
                       transform=axes[1, 2].transAxes, fontsize=10,
                       bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
    
    plt.tight_layout()
    plt.suptitle('Basic IDM - Posterior Parameter Distributions', fontsize=16, y=1.02)
    plt.show()

def plot_basic_idm_uncertainty_analysis(val_data, validation_results):
   
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    n_samples = validation_results['n_samples']
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    
    all_uncertainty_speed = []
    all_uncertainty_spacing = []
    all_uncertainty_acceleration = []
    
    unique_vehicles = np.unique(val_data['id_idx'])
    
    for veh_id in unique_vehicles:
        mask = (val_data['id_idx'] == veh_id)
        if np.sum(mask) > 0:
           
            veh_speed_samples = np.array([sample[mask] for sample in all_samples_speed])
            speed_intervals = np.percentile(veh_speed_samples, 95, axis=0) - np.percentile(veh_speed_samples, 5, axis=0)
            all_uncertainty_speed.extend(speed_intervals)
            
           
            veh_spacing_samples = np.array([sample[mask] for sample in all_samples_spacing])
            spacing_intervals = np.percentile(veh_spacing_samples, 95, axis=0) - np.percentile(veh_spacing_samples, 5, axis=0)
            all_uncertainty_spacing.extend(spacing_intervals)
            
            
            veh_accel_samples = np.array([sample[mask] for sample in all_samples_acceleration])
            accel_intervals = np.percentile(veh_accel_samples, 95, axis=0) - np.percentile(veh_accel_samples, 5, axis=0)
            all_uncertainty_acceleration.extend(accel_intervals)
    
  
    uncertainty_data = [
        (all_uncertainty_speed, 'Speed Uncertainty (90% CI Width)', 'lightblue', 'm/s'),
        (all_uncertainty_spacing, 'Spacing Uncertainty (90% CI Width)', 'lightgreen', 'm'),
        (all_uncertainty_acceleration, 'Acceleration Uncertainty (90% CI Width)', 'lightyellow', 'm/s²')
    ]
    
    for i, (data, title, color, unit) in enumerate(uncertainty_data):
        if i < 3:
            row, col = i // 2, i % 2
            axes[row, col].hist(data, bins=30, alpha=0.7, color=color, edgecolor='black')
            axes[row, col].axvline(np.mean(data), color='red', linestyle='--', 
                                 label=f'Mean: {np.mean(data):.3f} {unit}')
            axes[row, col].axvline(np.median(data), color='blue', linestyle='--', 
                                 label=f'Median: {np.median(data):.3f} {unit}')
            axes[row, col].set_title(title, fontsize=12)
            axes[row, col].set_xlabel(f'Uncertainty ({unit})')
            axes[row, col].set_ylabel('Frequency')
            axes[row, col].legend(fontsize=8)
            axes[row, col].grid(True, alpha=0.3)
    
    
    if len(unique_vehicles) > 0:
        first_vehicle_mask = (val_data['id_idx'] == unique_vehicles[0])
        veh_speed_samples = np.array([sample[first_vehicle_mask] for sample in all_samples_speed])
        
        time_points = np.arange(min(100, np.sum(first_vehicle_mask)))
        lower_5 = np.percentile(veh_speed_samples[:, :len(time_points)], 5, axis=0)
        upper_95 = np.percentile(veh_speed_samples[:, :len(time_points)], 95, axis=0)
        mean_speed = np.mean(veh_speed_samples[:, :len(time_points)], axis=0)
        
        axes[1, 1].fill_between(time_points, lower_5, upper_95, alpha=0.3, color='red', label='90% CI')
        axes[1, 1].plot(time_points, mean_speed, 'b-', label='Mean Prediction', linewidth=2)
        axes[1, 1].plot(time_points, val_data['label_v'][first_vehicle_mask][:len(time_points)], 
                       'k-', label='True Speed', linewidth=2, alpha=0.8)
        axes[1, 1].set_title(f'Vehicle {unique_vehicles[0]} - Speed Uncertainty Over Time', fontsize=12)
        axes[1, 1].set_xlabel('Time Index')
        axes[1, 1].set_ylabel('Speed (m/s)')
        axes[1, 1].legend(fontsize=8)
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Basic IDM - Uncertainty Analysis', fontsize=16, y=1.02)
    plt.show()

def print_basic_idm_validation_summary(validation_results):
   
    
    speed_metrics = validation_results['speed_metrics']
    acceleration_metrics = validation_results['acceleration_metrics']
    spacing_metrics = validation_results['spacing_metrics']
    vehicle_metrics = validation_results['vehicle_metrics']
    
    print(f"\nOVERALL PERFORMANCE METRICS:")
    print(f"Speed Prediction:")
    print(f"  - RMSE: {speed_metrics['rmse']:.4f} m/s")
    print(f"  - MAE: {speed_metrics['mae']:.4f} m/s")
    print(f"  - NRMSE: {speed_metrics['nrmse']:.4f}")
    
    print(f"\nAcceleration Prediction:")
    print(f"  - RMSE: {acceleration_metrics['rmse']:.4f} m/s²")
    print(f"  - MAE: {acceleration_metrics['mae']:.4f} m/s²")
    print(f"  - NRMSE: {acceleration_metrics['nrmse']:.4f}")
    
    print(f"\nSpacing Prediction:")
    print(f"  - RMSE: {spacing_metrics['rmse']:.4f} m")
    print(f"  - MAE: {spacing_metrics['mae']:.4f} m")
    print(f"  - NRMSE: {spacing_metrics['nrmse']:.4f}")
    
    print(f"\nVEHICLE-LEVEL PERFORMANCE:")
    for veh_id, metrics in vehicle_metrics.items():
        print(f"\nVehicle {veh_id}:")
        print(f"  Speed - RMSE: {metrics['speed']['rmse']:.4f} m/s, MAE: {metrics['speed']['mae']:.4f} m/s")
        print(f"  Acceleration - RMSE: {metrics['acceleration']['rmse']:.4f} m/s², MAE: {metrics['acceleration']['mae']:.4f} m/s²")
        print(f"  Spacing - RMSE: {metrics['spacing']['rmse']:.4f} m, MAE: {metrics['spacing']['mae']:.4f} m")
    
    # Parameter statistics
    individual_params = validation_results['individual_params']
    param_names = ['v0', 'T', 'a', 'b', 's0']
    param_display_names = ['Desired Speed', 'Time Headway', 'Max Acceleration', 
                          'Comfort Deceleration', 'Min Spacing']
    
    print(f"\nPARAMETER POSTERIOR STATISTICS:")
    for i, name in enumerate(param_display_names):
        params = individual_params[:, i]
        print(f"  {name}: Mean = {np.mean(params):.3f}, Std = {np.std(params):.3f}, "
              f"95% CI = [{np.percentile(params, 2.5):.3f}, {np.percentile(params, 97.5):.3f}]")


def run_basic_idm_calibration_only(ar_idm_data):
    

    train_data, val_data = split_data_for_ar_idm(ar_idm_data, train_ratio=0.7)
    

    trace, model = train_basic_idm_model(train_data)
    

    return {
        'trace': trace,
        'model': model,
        'train_data': train_data,
        'val_data': val_data
    }

def run_basic_idm_validation_only(calibration_results, n_posterior_samples=100):
   
    trace = calibration_results['trace']
    model = calibration_results['model']
    train_data = calibration_results['train_data']
    val_data = calibration_results['val_data']
    

    validation_results = validate_basic_idm_model_comprehensive_improved(
        trace, model, train_data, val_data, n_samples=n_posterior_samples
    )
    

    plot_basic_idm_validation_results(val_data, validation_results)
    

    print_basic_idm_validation_summary(validation_results)
    

    return validation_results


In [None]:
calibration_results = run_basic_idm_calibration_only(ar_idm_data)

In [None]:
validation_results = run_basic_idm_validation_only(calibration_results, n_posterior_samples=100)