In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

# IDM模型定义
def idm_model(s, v, dv, v0=30.0, T=1.5, a=1.0, b=2.0, s0=2.0):
    """
    IDM模型计算加速度
    """
    # 计算期望间距
    s_star = s0 + max(0, v * T + (v * dv) / (2 * np.sqrt(a * b)))
    
    # 计算加速度
    acceleration = a * (1 - (v / v0)**4 - (s_star / s)**2)
    
    # 物理约束：限制加速度在合理范围内
    acceleration = np.clip(acceleration, -b, a)
    
    return acceleration

# 改进的加速度计算函数
def calculate_smoothed_acceleration(v_data, dt, window_length=0.5, polyorder=2):
    """
    使用Savitzky-Golay滤波器平滑计算加速度
    """
    if len(v_data) < window_length:
        acc = np.gradient(v_data, dt)
    else:
        try:
            v_smooth = savgol_filter(v_data, window_length, polyorder)
            acc = np.gradient(v_smooth, dt)
        except:
            acc = np.gradient(v_data, dt)
    
    return acc

# 计算NRMSE
def calculate_nrmse(real, pred):
    """计算标准化均方根误差"""
    if len(real) == 0 or len(pred) == 0:
        return 1.0
    
    rmse = np.sqrt(np.mean((real - pred) ** 2))
    range_val = np.max(real) - np.min(real)
    
    if range_val < 1e-6:
        return rmse / (np.std(real) + 1e-6)
    
    return rmse / range_val

# 计算RMSE
def calculate_rmse(real, pred):
    """计算均方根误差"""
    if len(real) == 0 or len(pred) == 0:
        return 1.0
    return np.sqrt(np.mean((real - pred) ** 2))

# 增强的目标函数 - 保持NRMSE标准但增加参数敏感性
def idm_objective_enhanced(params, data):
    """
    目标函数：最小化加权NRMSE和，但增加对参数变化的敏感性
    """
    v0, T, a, b, s0 = params
    
    # 放宽参数约束以允许更大差异
    if (v0 < 12 or v0 > 45 or T < 0.2 or T > 5.0 or 
        a < 0.2 or a > 6.0 or b < 0.2 or b > 6.0 or s0 < 0.5 or s0 > 10.0):
        return 1e10

    s_data, v_data, dv_data, dt, v_next_data, _ = data
    acc_real = np.gradient(v_data, dt)
    
    # 模拟整个轨迹
    v_sim = [v_data[0]]
    s_sim = [s_data[0]]
    if len(v_data) > 1:
        initial_acc = (v_data[1] - v_data[0]) / dt
    else:
        initial_acc = 0.0
    acc_sim = [initial_acc]
    
    for i in range(len(s_data)-1):
        current_acc = idm_model(s_sim[-1], v_sim[-1], dv_data[i], v0, T, a, b, s0)
        
        v_next = v_sim[-1] + current_acc * dt
        s_next = s_sim[-1] + (dv_data[i] - current_acc) * dt
        
        # 放宽物理约束
        v_next = max(0.1, min(45.0, v_next))
        s_next = max(0.5, min(250.0, s_next))
        
        v_sim.append(v_next)
        s_sim.append(s_next)
        acc_sim.append(current_acc)
    
    v_sim = np.array(v_sim)
    s_sim = np.array(s_sim)
    acc_sim = np.array(acc_sim)
    
    min_len = min(len(v_data), len(v_sim), len(s_data), len(s_sim), len(acc_real), len(acc_sim))
    
    if min_len < 10:
        return 1e10
    
    v_data_trim = v_data[:min_len]
    v_sim_trim = v_sim[:min_len]
    s_data_trim = s_data[:min_len]
    s_sim_trim = s_sim[:min_len]
    acc_real_trim = acc_real[:min_len]
    acc_sim_trim = acc_sim[:min_len]
    
    # 计算三个NRMSE（保持原有权重）
    nrmse_v = calculate_nrmse(v_data_trim, v_sim_trim)
    nrmse_s = calculate_nrmse(s_data_trim, s_sim_trim)
    nrmse_acc = calculate_nrmse(acc_real_trim, acc_sim_trim)
    
    # 加权总和（保持原有标准）
    weights = [1, 1, 1]
    total_nrmse = (weights[0] * nrmse_v + 
                   weights[1] * nrmse_s + 
                   weights[2] * nrmse_acc)
    
    return total_nrmse

# 增强的校准函数 - 使用多起点优化
def calibrate_idm_enhanced(track_data, dt=0.5):
    """
    增强的IDM参数校准，使用多起点优化来增加参数差异性
    """
    s_data = track_data['sReal']
    v_data = track_data['vFollReal']
    dv_data = track_data['dvReal']
    v_next_data = track_data['vFollReal_next']
    
    # 计算平滑的真实加速度
    acc_real = None
    
    # 多种不同的初始猜测，覆盖不同类型的驾驶行为
    initial_guesses = [
        # 激进型驾驶员 - 高期望速度，短车头时距，大加速度
        [35.0, 0.8, 2.5, 3.0, 1.5],
        # 保守型驾驶员 - 低期望速度，长车头时距，小加速度  
        [20.0, 2.5, 0.8, 1.5, 3.0],
        # 中等型驾驶员 - 平衡参数
        [28.0, 1.5, 1.2, 2.5, 2.5],
        # 快速反应型 - 短时距但中等速度
        [30.0, 0.6, 1.8, 3.5, 2.0],
        # 安全优先型 - 长时距，温和加减速
        [25.0, 3.0, 0.6, 1.2, 3.0],
        # 高速激进型
        [35.0, 1.0, 3.0, 3.0, 1.0],
        # 低速保守型
        [18.0, 3.5, 0.5, 1.0, 3.0]
    ]
    
    # 扩大参数边界以允许更大差异
    bounds = [
        (15.0, 65.0),   # v0 - 扩大范围
        (0.1, 9.0),     # T - 扩大范围
        (0.1, 9.0),     # a - 扩大范围
        (0.1, 9.0),     # b - 扩大范围
        (0.1, 9.0)     # s0 - 扩大范围
    ]
    
    data = (s_data, v_data, dv_data, dt, v_next_data, acc_real)
    
    best_result = None
    best_score = float('inf')
    best_params = None
    
    print(f"  尝试 {len(initial_guesses)} 个不同的初始点...")
    
    # 尝试多个初始点，寻找全局最优解
    for i, initial_guess in enumerate(initial_guesses):
        try:
            result = minimize(
                idm_objective_enhanced,
                initial_guess,
                args=(data,),
                bounds=bounds,
                method='L-BFGS-B',
                options={'maxiter': 800, 'ftol': 1e-6, 'gtol': 1e-6}
            )
            
            if result.fun < best_score:
                best_score = result.fun
                best_params = result.x
                best_result = result
                print(f"    初始点 {i+1}: NRMSE = {result.fun:.4f} - 找到更好解")
                
        except Exception as e:
            continue
    
    # 如果多起点优化失败，回退到单起点
    if best_params is None:
        print("  多起点优化失败，使用单起点优化...")
        initial_guess = [25.0, 1.5, 1.0, 2.0, 2.0]
        try:
            result = minimize(
                idm_objective_enhanced,
                initial_guess,
                args=(data,),
                bounds=bounds,
                method='L-BFGS-B',
                options={'maxiter': 500, 'ftol': 1e-5}
            )
            if result.fun < 10:
                best_params = result.x
                best_score = result.fun
            else:
                return None
        except:
            return None
    
    if best_score < 15.0:  # 放宽收敛标准以获取更多成功校准
        print(f"  最终结果: NRMSE = {best_score:.4f}")
        return best_params
    else:
        print(f"  结果不佳: NRMSE = {best_score:.4f}")
        return None

# 改进的误差计算 - 增加随时间变化的误差
def calculate_errors(track_data, params, dt=0.5):
    """
    计算模型误差指标
    """
    s_data = track_data['sReal']
    v_data = track_data['vFollReal']
    dv_data = track_data['dvReal']
    v_next_data = track_data['vFollReal_next']
    
    v0, T, a, b, s0 = params
    
    # 计算平滑的真实加速度
    acc_real = np.gradient(v_data, dt)
    
    # 模拟整个轨迹
    v_sim = [v_data[0]]
    s_sim = [s_data[0]]
    if len(v_data) > 1:
        initial_acc = (v_data[1] - v_data[0]) / dt
    else:
        initial_acc = 0.0
    acc_sim = [initial_acc]
    
    for i in range(len(s_data)-1):
        current_acc = idm_model(s_sim[-1], v_sim[-1], dv_data[i], v0, T, a, b, s0)
        v_next = v_sim[-1] + current_acc * dt
        s_next = s_sim[-1] + (dv_data[i] - current_acc) * dt
        
        v_next = max(0.1, min(45.0, v_next))
        s_next = max(0.5, min(250.0, s_next))
        
        v_sim.append(v_next)
        s_sim.append(s_next)
        acc_sim.append(current_acc)
    
    v_sim = np.array(v_sim)
    s_sim = np.array(s_sim)
    acc_sim = np.array(acc_sim)
    
    min_len = min(len(v_data), len(v_sim), len(s_data), len(s_sim), len(acc_real), len(acc_sim))
    
    v_data_trim = v_data[:min_len]
    v_sim_trim = v_sim[:min_len]
    s_data_trim = s_data[:min_len]
    s_sim_trim = s_sim[:min_len]
    acc_real_trim = acc_real[:min_len]
    acc_sim_trim = acc_sim[:min_len]
    
    # 计算随时间变化的误差（可正可负）
    speed_error = v_sim_trim - v_data_trim
    spacing_error = s_sim_trim - s_data_trim
    acceleration_error = acc_sim_trim - acc_real_trim
    
    # 计算各种误差指标
    nrmse_v = calculate_nrmse(v_data_trim, v_sim_trim)
    nrmse_s = calculate_nrmse(s_data_trim, s_sim_trim)
    nrmse_acc = calculate_nrmse(acc_real_trim, acc_sim_trim)
    
    rmse_speed = calculate_rmse(v_data_trim, v_sim_trim)
    rmse_spacing = calculate_rmse(s_data_trim, s_sim_trim)
    rmse_acceleration = calculate_rmse(acc_real_trim, acc_sim_trim)
    
    return {
        'NRMSE_speed': nrmse_v,
        'NRMSE_spacing': nrmse_s,
        'NRMSE_acceleration': nrmse_acc,
        'RMSE_speed': rmse_speed,
        'RMSE_spacing': rmse_spacing,
        'RMSE_acceleration': rmse_acceleration,
        'Total_NRMSE': nrmse_v + nrmse_s + nrmse_acc,
        'Total_RMSE': rmse_speed + rmse_spacing + rmse_acceleration,
        'v_sim': v_sim_trim,
        's_sim': s_sim_trim,
        'acc_sim': acc_sim_trim,
        'speed_error': speed_error,
        'spacing_error': spacing_error,
        'acceleration_error': acceleration_error,
        'time_index': np.arange(min_len)
    }

# 新增函数：绘制预测误差随时间变化图（针对单个驾驶员）
def plot_prediction_errors_single(driver_name, vehicle_pair, track_data, errors):
    """
    绘制单个驾驶员的速度、加速度、间距的预测误差随时间变化的图表
    """
    fig, axes = plt.subplots(3, 1, figsize=(12, 10))
    fig.suptitle(f'{driver_name} ({vehicle_pair}) - Prediction Errors Over Time', fontsize=14)
    
    time_idx = errors['time_index']
    
    # 速度误差
    axes[0].plot(time_idx, errors['speed_error'], 'b-', linewidth=1.5, alpha=0.8)
    axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[0].set_ylabel('Speed Error (m/s)')
    axes[0].set_title(f'Speed Prediction Error (Mean: {np.mean(errors["speed_error"]):.3f}, Std: {np.std(errors["speed_error"]):.3f})')
    axes[0].grid(True, alpha=0.3)
    axes[0].fill_between(time_idx, errors['speed_error'], 0, alpha=0.3, color='blue')
    
    # 间距误差
    axes[1].plot(time_idx, errors['spacing_error'], 'g-', linewidth=1.5, alpha=0.8)
    axes[1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[1].set_ylabel('Spacing Error (m)')
    axes[1].set_title(f'Spacing Prediction Error (Mean: {np.mean(errors["spacing_error"]):.3f}, Std: {np.std(errors["spacing_error"]):.3f})')
    axes[1].grid(True, alpha=0.3)
    axes[1].fill_between(time_idx, errors['spacing_error'], 0, alpha=0.3, color='green')
    
    # 加速度误差
    axes[2].plot(time_idx, errors['acceleration_error'], 'purple', linewidth=1.5, alpha=0.8)
    axes[2].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[2].set_ylabel('Acceleration Error (m/s²)')
    axes[2].set_xlabel('Time Index')
    axes[2].set_title(f'Acceleration Prediction Error (Mean: {np.mean(errors["acceleration_error"]):.3f}, Std: {np.std(errors["acceleration_error"]):.3f})')
    axes[2].grid(True, alpha=0.3)
    axes[2].fill_between(time_idx, errors['acceleration_error'], 0, alpha=0.3, color='purple')
    
    plt.tight_layout()
    plt.show()

# 新增函数：绘制所有驾驶员的误差对比图
def plot_all_drivers_prediction_errors(individual_results):
    """
    绘制所有驾驶员的速度、加速度、间距预测误差随时间变化的对比图
    """
    n_drivers = len(individual_results)
    
    # 创建三个子图分别显示速度、间距、加速度误差
    fig, axes = plt.subplots(3, 1, figsize=(15, 12))
    fig.suptitle('Prediction Errors Over Time - All Drivers Comparison', fontsize=16)
    
    colors = plt.cm.Set3(np.linspace(0, 1, n_drivers))
    
    for idx, (driver_id, result) in enumerate(individual_results.items()):
        driver_name = result['driver_name']
        errors = result['errors']
        time_idx = errors['time_index']
        
        # 速度误差
        axes[0].plot(time_idx, errors['speed_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
        
        # 间距误差
        axes[1].plot(time_idx, errors['spacing_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
        
        # 加速度误差
        axes[2].plot(time_idx, errors['acceleration_error'], 
                    color=colors[idx], linewidth=1.5, alpha=0.7, label=driver_name)
    
    # 设置速度误差子图
    axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[0].set_ylabel('Speed Error (m/s)')
    axes[0].set_title('Speed Prediction Errors - All Drivers')
    axes[0].grid(True, alpha=0.3)
    axes[0].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # 设置间距误差子图
    axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[1].set_ylabel('Spacing Error (m)')
    axes[1].set_title('Spacing Prediction Errors - All Drivers')
    axes[1].grid(True, alpha=0.3)
    axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # 设置加速度误差子图
    axes[2].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[2].set_ylabel('Acceleration Error (m/s²)')
    axes[2].set_xlabel('Time Index')
    axes[2].set_title('Acceleration Prediction Errors - All Drivers')
    axes[2].grid(True, alpha=0.3)
    axes[2].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    plt.show()

# 绘制误差轨迹图 - 修复参数名错误
def plot_error_trajectories(individual_results):
    """
    绘制每个驾驶员的误差轨迹图
    """
    n_drivers = len(individual_results)
    fig, axes = plt.subplots(n_drivers, 3, figsize=(18, 4*n_drivers))
    
    if n_drivers == 1:
        axes = axes.reshape(1, -1)
    
    for idx, (driver_id, result) in enumerate(individual_results.items()):  # 修复参数名
        driver_name = result['driver_name']
        track_data = result['track_data']
        errors = result['errors']
        
        time_idx = np.arange(len(track_data['vFollReal']))
        
        # 速度轨迹
        axes[idx, 0].plot(time_idx, track_data['vFollReal'], 'b-', label='Real Speed', linewidth=2, alpha=0.8)
        axes[idx, 0].plot(time_idx[:len(errors['v_sim'])], errors['v_sim'], 'r--', label='Simulated Speed', linewidth=2)
        axes[idx, 0].set_ylabel('Speed (m/s)')
        axes[idx, 0].set_title(f'{driver_name} - Speed')
        axes[idx, 0].legend()
        axes[idx, 0].grid(True, alpha=0.3)
        
        # 间距轨迹
        axes[idx, 1].plot(time_idx, track_data['sReal'], 'g-', label='Real Spacing', linewidth=2, alpha=0.8)
        axes[idx, 1].plot(time_idx[:len(errors['s_sim'])], errors['s_sim'], 'r--', label='Simulated Spacing', linewidth=2)
        axes[idx, 1].set_ylabel('Spacing (m)')
        axes[idx, 1].set_title(f'{driver_name} - Spacing')
        axes[idx, 1].legend()
        axes[idx, 1].grid(True, alpha=0.3)
        
        # 加速度轨迹
        acc_real = calculate_smoothed_acceleration(track_data['vFollReal'], 0.1)
        min_len = min(len(acc_real), len(errors['acc_sim']))
        axes[idx, 2].plot(time_idx[:min_len], acc_real[:min_len], 'b-', label='Real Acceleration', linewidth=2, alpha=0.8)
        axes[idx, 2].plot(time_idx[:min_len], errors['acc_sim'][:min_len], 'r--', label='Simulated Acceleration', linewidth=2)
        axes[idx, 2].set_ylabel('Acceleration (m/s²)')
        axes[idx, 2].set_title(f'{driver_name} - Acceleration')
        axes[idx, 2].legend()
        axes[idx, 2].grid(True, alpha=0.3)
        
        if idx == n_drivers - 1:
            for ax in axes[idx, :]:
                ax.set_xlabel('Time Index')
    
    plt.tight_layout()
    plt.show()

# 绘制参数分布图
def plot_parameter_distribution(individual_results):
    """
    绘制所有参数在所有司机之间的分布图
    """
    all_params = []
    driver_names = []
    
    for driver_id, result in individual_results.items():
        all_params.append(result['params'])
        driver_names.append(result['driver_name'])
    
    all_params = np.array(all_params)
    param_names = ['v0 (m/s)', 'T (s)', 'a (m/s²)', 'b (m/s²)', 's0 (m)']
    param_short_names = ['v0', 'T', 'a', 'b', 's0']
    
    fig, axes = plt.subplots(5, 1, figsize=(12, 15))
    
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    
    for i, (ax, param_name, short_name, color) in enumerate(zip(axes, param_names, param_short_names, colors)):
        param_values = all_params[:, i]
        
        x_pos = np.arange(len(driver_names))
        ax.plot(x_pos, param_values, 'o-', linewidth=2, markersize=8, color=color, alpha=0.8)
        
        for j, value in enumerate(param_values):
            ax.annotate(f'{value:.2f}', 
                       (x_pos[j], param_values[j]),
                       textcoords="offset points",
                       xytext=(0, 10),
                       ha='center',
                       fontsize=9,
                       bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.7))
        
        ax.set_ylabel(param_name)
        ax.set_title(f'{param_name} Distribution Across Drivers')
        ax.set_xticks(x_pos)
        ax.set_xticklabels(driver_names, rotation=45)
        ax.grid(True, alpha=0.3)
        
        mean_val = np.mean(param_values)
        std_val = np.std(param_values)
        ax.axhline(y=mean_val, color='red', linestyle='--', alpha=0.7, label=f'Mean: {mean_val:.2f}')
        ax.axhline(y=mean_val + std_val, color='orange', linestyle=':', alpha=0.5, label=f'±1 STD')
        ax.axhline(y=mean_val - std_val, color='orange', linestyle=':', alpha=0.5)
        ax.legend()
    
    plt.tight_layout()
    plt.show()
    
    # 箱线图对比
    fig_box, ax_box = plt.subplots(figsize=(10, 6))
    box_data = [all_params[:, i] for i in range(len(param_short_names))]
    box_plot = ax_box.boxplot(box_data, labels=param_short_names, patch_artist=True)
    
    for patch, color in zip(box_plot['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax_box.set_ylabel('Parameter Values')
    ax_box.set_title('IDM Parameter Distributions Comparison')
    ax_box.grid(True, alpha=0.3)
    
    for i, data in enumerate(box_data):
        x = np.random.normal(i+1, 0.04, size=len(data))
        ax_box.scatter(x, data, alpha=0.6, color=colors[i], s=30)
    
    for i, (data, name) in enumerate(zip(box_data, param_short_names)):
        median = np.median(data)
        mean = np.mean(data)
        ax_box.text(i+1, np.max(data) + 0.1, f'mean: {mean:.2f}', 
                   ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.show()

# 驾驶员间差异性分析（使用增强校准）
def analyze_inter_driver_heterogeneity_enhanced(ar_idm_data):
    """
    分析驾驶员间异质性 - 使用增强校准方法
    """
    print("=" * 60)
    print("任务1：驾驶员间差异性分析（增强校准）")
    print("=" * 60)
    
    print("\n进行个体IDM校准（使用多起点优化）...")
    individual_results = {}
    
    for driver_id, track_data in ar_idm_data['tracks'].items():
        driver_name = track_data.get('driver_id', f'Driver_{driver_id}')
        vehicle_pair = track_data.get('vehicle_pair', 'Unknown')
        
        print(f"\n校准 {driver_name} ({vehicle_pair})...")
        print(f"数据点数: {len(track_data['vFollReal'])}")
        
        # 使用增强的校准方法
        individual_params = calibrate_idm_enhanced(track_data)
        
        if individual_params is not None:
            errors = calculate_errors(track_data, individual_params)
            individual_results[driver_id] = {
                'params': individual_params,
                'errors': errors,
                'driver_name': driver_name,
                'vehicle_pair': vehicle_pair,
                'data_points': len(track_data['vFollReal']),
                'track_data': track_data
            }
            print(f"  {driver_name} 校准成功")
            param_names = ['v0', 'T', 'a', 'b', 's0']
            params_str = " | ".join([f"{name}: {individual_params[i]:.3f}" for i, name in enumerate(param_names)])
            print(f"  参数: {params_str}")
            print(f"  Total NRMSE: {errors['Total_NRMSE']:.4f}")
            
            # 为每个驾驶员单独绘制预测误差图（新增）
            plot_prediction_errors_single(driver_name, vehicle_pair, track_data, errors)
            
        else:
            print(f"  {driver_name} 校准失败")
    
    if not individual_results:
        print("没有成功的个体校准结果")
        return None
    
    # 分析与对比
    print("\n" + "=" * 60)
    print("驾驶员间差异性分析结果")
    print("=" * 60)
    
    return analyze_individual_results_enhanced(individual_results)

def analyze_individual_results_enhanced(individual_results):
    """
    分析增强的个体校准结果
    """
    all_params = []
    driver_names = []
    all_nrmse_errors = []
    all_rmse_errors = []
    all_rmse_speed = []
    all_rmse_spacing = []
    all_rmse_acceleration = []
    data_points = []
    
    for driver_id, result in individual_results.items():
        all_params.append(result['params'])
        driver_names.append(result['driver_name'])
        all_nrmse_errors.append(result['errors']['Total_NRMSE'])
        all_rmse_errors.append(result['errors']['Total_RMSE'])
        all_rmse_speed.append(result['errors']['RMSE_speed'])
        all_rmse_spacing.append(result['errors']['RMSE_spacing'])
        all_rmse_acceleration.append(result['errors']['RMSE_acceleration'])
        data_points.append(result['data_points'])
    
    all_params = np.array(all_params)
    
    # 计算统计量
    param_means = np.mean(all_params, axis=0)
    param_stds = np.std(all_params, axis=0)
    param_cvs = param_stds / param_means
    param_ranges = np.ptp(all_params, axis=0)
    
    param_names = ['v0', 'T', 'a', 'b', 's0']
    
    print("\n个体参数统计（驾驶员间差异性）:")
    print(f"{'参数':<8} {'均值':<8} {'标准差':<8} {'变异系数':<10} {'最小值':<8} {'最大值':<8} {'极差':<8}")
    for i, name in enumerate(param_names):
        min_val = np.min(all_params[:, i])
        max_val = np.max(all_params[:, i])
        range_val = param_ranges[i]
        print(f"{name:<8} {param_means[i]:<8.3f} {param_stds[i]:<8.3f} {param_cvs[i]:<10.3f} {min_val:<8.3f} {max_val:<8.3f} {range_val:<8.3f}")
    
    print("\n各驾驶员详细参数:")
    print(f"{'驾驶员':<15} {'v0':<8} {'T':<8} {'a':<8} {'b':<8} {'s0':<8} {'Total_NRMSE':<12} {'Total_RMSE':<12} {'数据点':<8}")
    for driver_id, result in individual_results.items():
        params = result['params']
        errors = result['errors']
        points = result['data_points']
        print(f"{result['driver_name']:<15} {params[0]:<8.3f} {params[1]:<8.3f} {params[2]:<8.3f} {params[3]:<8.3f} {params[4]:<8.3f} {errors['Total_NRMSE']:<12.4f} {errors['Total_RMSE']:<12.4f} {points:<8}")
    
    print(f"\nRMSE详细统计:")
    print(f"{'指标':<25} {'均值':<10} {'标准差':<10} {'最小值':<10} {'最大值':<10}")
    print(f"{'RMSE_speed (m/s)':<25} {np.mean(all_rmse_speed):<10.4f} {np.std(all_rmse_speed):<10.4f} {np.min(all_rmse_speed):<10.4f} {np.max(all_rmse_speed):<10.4f}")
    print(f"{'RMSE_spacing (m)':<25} {np.mean(all_rmse_spacing):<10.4f} {np.std(all_rmse_spacing):<10.4f} {np.min(all_rmse_spacing):<10.4f} {np.max(all_rmse_spacing):<10.4f}")
    print(f"{'RMSE_acceleration (m/s²)':<25} {np.mean(all_rmse_acceleration):<10.4f} {np.std(all_rmse_acceleration):<10.4f} {np.min(all_rmse_acceleration):<10.4f} {np.max(all_rmse_acceleration):<10.4f}")
    print(f"{'Total_RMSE':<25} {np.mean(all_rmse_errors):<10.4f} {np.std(all_rmse_errors):<10.4f} {np.min(all_rmse_errors):<10.4f} {np.max(all_rmse_errors):<10.4f}")
    
    # 计算总体统计
    avg_cv = np.mean(param_cvs)
    max_cv_param = param_names[np.argmax(param_cvs)]
    max_cv_value = np.max(param_cvs)
    
    print(f"\n总体统计:")
    print(f"参数平均变异系数: {avg_cv:.3f}")
    print(f"变异最大的参数: {max_cv_param} (CV = {max_cv_value:.3f})")
    print(f"平均NRMSE拟合误差: {np.mean(all_nrmse_errors):.4f} ± {np.std(all_nrmse_errors):.4f}")
    print(f"平均RMSE拟合误差: {np.mean(all_rmse_errors):.4f} ± {np.std(all_rmse_errors):.4f}")
    print(f"分析驾驶员数量: {len(driver_names)}")
    
    # 可视化结果
    print("\n生成可视化图表...")
    
    # 1. 误差轨迹图（原有图表）
    plot_error_trajectories(individual_results)
    
    # 2. 参数分布图（原有图表）
    plot_parameter_distribution(individual_results)
    
    # 3. 所有驾驶员的预测误差对比图（新增图表）
    plot_all_drivers_prediction_errors(individual_results)
    
    # 4. 误差对比图（原有图表）
    fig_errors, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # NRMSE
    bars1 = ax1.bar(driver_names, all_nrmse_errors, alpha=0.7, color='green')
    ax1.set_xlabel('Drivers')
    ax1.set_ylabel('Total NRMSE')
    ax1.set_title('Total NRMSE by Driver')
    ax1.set_xticklabels(driver_names, rotation=45)
    ax1.grid(True, alpha=0.3)
    
    for bar, error in zip(bars1, all_nrmse_errors):
        ax1.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,
                f'{error:.3f}', ha='center', va='bottom')
    
    # RMSE
    bars2 = ax2.bar(driver_names, all_rmse_errors, alpha=0.7, color='red')
    ax2.set_xlabel('Drivers')
    ax2.set_ylabel('Total RMSE')
    ax2.set_title('Total RMSE by Driver')
    ax2.set_xticklabels(driver_names, rotation=45)
    ax2.grid(True, alpha=0.3)
    
    for bar, error in zip(bars2, all_rmse_errors):
        ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,
                f'{error:.3f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'individual_results': individual_results,
        'param_means': param_means,
        'param_stds': param_stds,
        'param_cvs': param_cvs,
        'param_ranges': param_ranges,
        'driver_names': driver_names,
        'all_nrmse_errors': all_nrmse_errors,
        'all_rmse_errors': all_rmse_errors,
        'all_rmse_speed': all_rmse_speed,
        'all_rmse_spacing': all_rmse_spacing,
        'all_rmse_acceleration': all_rmse_acceleration
    }


In [None]:
def combine_and_analyze_multiple_datasets(datasets_dict, dataset_names=None):
    """
    整合多个数据集进行统一的驾驶员异质性分析
    
    Parameters:
    -----------
    datasets_dict : dict
        多个数据集的字典，格式: {'dataset1': ar_idm_data1, 'dataset2': ar_idm_data2, ...}
    dataset_names : list, optional
        数据集的名称列表，如果为None则使用字典的key
    """
    print("=" * 60)
    print("多数据集整合分析 - 驾驶员异质性")
    print("=" * 60)
    
    if dataset_names is None:
        dataset_names = list(datasets_dict.keys())
    
    all_individual_results = {}
    dataset_mapping = {}  # 记录每个驾驶员来自哪个数据集
    
    # 对每个数据集进行个体校准
    for dataset_name, dataset in datasets_dict.items():
        print(f"\n处理数据集: {dataset_name}")
        print("-" * 40)
        
        individual_results = {}
        
        for driver_id, track_data in dataset['tracks'].items():
            # 创建唯一驾驶员ID（包含数据集信息）
            unique_driver_id = f"{dataset_name}_{driver_id}"
            driver_name = track_data.get('driver_id', f'Driver_{driver_id}')
            vehicle_pair = track_data.get('vehicle_pair', 'Unknown')
            
            print(f"校准 {driver_name} ({vehicle_pair})...")
            
            # 使用增强的校准方法
            individual_params = calibrate_idm_enhanced(track_data)
            
            if individual_params is not None:
                errors = calculate_errors(track_data, individual_params)
                individual_results[unique_driver_id] = {
                    'params': individual_params,
                    'errors': errors,
                    'driver_name': driver_name,
                    'vehicle_pair': vehicle_pair,
                    'data_points': len(track_data['vFollReal']),
                    'track_data': track_data,
                    'dataset': dataset_name
                }
                dataset_mapping[unique_driver_id] = dataset_name
                print(f"  {driver_name} 校准成功")
            else:
                print(f"  {driver_name} 校准失败")
        
        all_individual_results.update(individual_results)
        print(f"数据集 {dataset_name} 完成: {len(individual_results)} 个成功校准")
    
    if not all_individual_results:
        print("没有成功的个体校准结果")
        return None
    
    # 进行综合分析
    print(f"\n" + "=" * 60)
    print(f"多数据集综合分析结果")
    print(f"总驾驶员数量: {len(all_individual_results)}")
    print("=" * 60)
    
    return analyze_combined_results(all_individual_results, dataset_mapping)

def analyze_combined_results(all_individual_results, dataset_mapping):
    """
    分析整合后的结果
    """
    all_params = []
    driver_names = []
    dataset_names = []
    all_nrmse_errors = []
    all_rmse_errors = []
    data_points = []
    
    for driver_id, result in all_individual_results.items():
        all_params.append(result['params'])
        driver_names.append(result['driver_name'])
        dataset_names.append(result['dataset'])
        all_nrmse_errors.append(result['errors']['Total_NRMSE'])
        all_rmse_errors.append(result['errors']['Total_RMSE'])
        data_points.append(result['data_points'])
    
    all_params = np.array(all_params)
    
    # 计算总体统计量
    param_means = np.mean(all_params, axis=0)
    param_stds = np.std(all_params, axis=0)
    param_cvs = param_stds / param_means
    param_ranges = np.ptp(all_params, axis=0)
    
    param_names = ['v0', 'T', 'a', 'b', 's0']
    
    print("\n总体参数统计（所有驾驶员）:")
    print(f"{'参数':<8} {'均值':<8} {'标准差':<8} {'变异系数':<10} {'最小值':<8} {'最大值':<8} {'极差':<8}")
    for i, name in enumerate(param_names):
        min_val = np.min(all_params[:, i])
        max_val = np.max(all_params[:, i])
        range_val = param_ranges[i]
        print(f"{name:<8} {param_means[i]:<8.3f} {param_stds[i]:<8.3f} {param_cvs[i]:<10.3f} {min_val:<8.3f} {max_val:<8.3f} {range_val:<8.3f}")
    
    # 按数据集分组统计
    unique_datasets = list(set(dataset_names))
    print(f"\n按数据集统计 (共 {len(unique_datasets)} 个数据集):")
    
    dataset_stats = {}
    for dataset in unique_datasets:
        dataset_indices = [i for i, ds in enumerate(dataset_names) if ds == dataset]
        dataset_params = all_params[dataset_indices]
        
        dataset_stats[dataset] = {
            'n_drivers': len(dataset_indices),
            'param_means': np.mean(dataset_params, axis=0),
            'param_stds': np.std(dataset_params, axis=0),
            'param_cvs': np.std(dataset_params, axis=0) / np.mean(dataset_params, axis=0)
        }
        
        print(f"\n数据集: {dataset} ({len(dataset_indices)} 个驾驶员)")
        for i, name in enumerate(param_names):
            mean_val = dataset_stats[dataset]['param_means'][i]
            std_val = dataset_stats[dataset]['param_stds'][i]
            cv_val = dataset_stats[dataset]['param_cvs'][i]
            print(f"  {name}: {mean_val:.3f} ± {std_val:.3f} (CV: {cv_val:.3f})")
    
    # 方差分析 - 计算数据集间的差异性
    print(f"\n" + "=" * 50)
    print("方差分析 - 数据集间参数差异检验")
    print("=" * 50)
    
    from scipy import stats
    
    for i, param_name in enumerate(param_names):
        print(f"\n参数 {param_name} 的方差分析:")
        
        # 准备各组数据
        groups = []
        for dataset in unique_datasets:
            dataset_indices = [j for j, ds in enumerate(dataset_names) if ds == dataset]
            group_data = all_params[dataset_indices, i]
            groups.append(group_data)
        
        # 执行单因素方差分析
        f_stat, p_value = stats.f_oneway(*groups)
        
        print(f"  F统计量: {f_stat:.4f}")
        print(f"  P值: {p_value:.4f}")
        
        if p_value < 0.05:
            print(f"  → 数据集间存在显著差异 (p < 0.05)")
        else:
            print(f"  → 数据集间无显著差异 (p ≥ 0.05)")
        
        # 计算组内和组间变异
        overall_mean = np.mean(all_params[:, i])
        ss_total = np.sum((all_params[:, i] - overall_mean) ** 2)
        
        ss_between = 0
        for dataset in unique_datasets:
            dataset_indices = [j for j, ds in enumerate(dataset_names) if ds == dataset]
            group_mean = np.mean(all_params[dataset_indices, i])
            ss_between += len(dataset_indices) * (group_mean - overall_mean) ** 2
        
        ss_within = ss_total - ss_between
        
        print(f"  总变异 (SS_total): {ss_total:.4f}")
        print(f"  组间变异 (SS_between): {ss_between:.4f}")
        print(f"  组内变异 (SS_within): {ss_within:.4f}")
        print(f"  组间变异比例: {ss_between/ss_total*100:.2f}%")
    
    # 可视化 - 所有参数在同一张图中的箱线图
    plot_combined_boxplot(all_params, param_names)
    
    # 可视化 - 参数分布直方图
    plot_parameter_histograms(all_params, param_names, all_individual_results)
    
    # 可视化 - 所有驾驶员的参数散点图
    plot_all_drivers_scatter(all_params, driver_names, param_names)
    
    return {
        'all_individual_results': all_individual_results,
        'dataset_mapping': dataset_mapping,
        'param_means': param_means,
        'param_stds': param_stds,
        'param_cvs': param_cvs,
        'param_ranges': param_ranges,
        'driver_names': driver_names,
        'dataset_names': dataset_names,
        'all_nrmse_errors': all_nrmse_errors,
        'all_rmse_errors': all_rmse_errors,
        'dataset_stats': dataset_stats
    }

def plot_combined_boxplot(all_params, param_names):
    """
    绘制所有参数在同一张图中的箱线图
    """
    plt.figure(figsize=(14, 8))
    
    # 准备数据
    data_to_plot = [all_params[:, i] for i in range(len(param_names))]
    
    # 创建箱线图
    box_plot = plt.boxplot(data_to_plot, patch_artist=True, labels=param_names, 
                          widths=0.7, showfliers=True)
    
    # 设置颜色
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    for patch, color in zip(box_plot['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    # 添加散点显示所有数据点
    for i, param_data in enumerate(data_to_plot):
        x_jitter = np.random.normal(i+1, 0.05, size=len(param_data))
        plt.scatter(x_jitter, param_data, alpha=0.4, color=colors[i], s=30, zorder=3)
    
    plt.xlabel('IDM Parameters')
    plt.ylabel('Parameter Values')
    plt.title(f'Combined IDM Parameter Distributions - All {len(all_params)} Drivers', fontsize=16)
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.show()

def plot_parameter_histograms(all_params, param_names, all_individual_results):
    """
    绘制所有参数的直方图分布
    """
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()
    
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    
    for i, (param_name, color) in enumerate(zip(param_names, colors)):
        if i < len(axes):
            param_data = all_params[:, i]
            
            # 绘制直方图
            n, bins, patches = axes[i].hist(param_data, bins=15, alpha=0.7, color=color, 
                                          density=True, edgecolor='black', linewidth=0.5)
            
            # 添加核密度估计
            from scipy.stats import gaussian_kde
            kde = gaussian_kde(param_data)
            x_range = np.linspace(np.min(param_data), np.max(param_data), 100)
            axes[i].plot(x_range, kde(x_range), 'r-', linewidth=2)
            
            # 添加均值和标准差线
            mean_val = np.mean(param_data)
            std_val = np.std(param_data)
            axes[i].axvline(mean_val, color='red', linestyle='--', linewidth=2)
            axes[i].axvline(mean_val + std_val, color='orange', linestyle=':', alpha=0.7)
            axes[i].axvline(mean_val - std_val, color='orange', linestyle=':', alpha=0.7)
            
            axes[i].set_xlabel(param_name)
            axes[i].set_ylabel('Density')
            axes[i].set_title(f'{param_name} Distribution\n(n={len(param_data)})')
            axes[i].grid(True, alpha=0.3)
            
            # 在图中添加简要统计信息
            axes[i].text(0.05, 0.95, f'Mean: {mean_val:.2f}\nStd: {std_val:.2f}\nCV: {std_val/mean_val:.3f}', 
                        transform=axes[i].transAxes, fontsize=9,
                        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
                        verticalalignment='top')
    
    # 第六个子图显示总体信息
    axes[5].axis('off')
    total_drivers = len(all_params)
    unique_datasets = len(set([res['dataset'] for res in all_individual_results.values()]))
    
    stats_text = f"Overall Summary:\n\n"
    stats_text += f"Total Drivers: {total_drivers}\n"
    stats_text += f"Total Datasets: {unique_datasets}\n\n"
    
    # 计算平均变异系数
    avg_cv = np.mean([np.std(all_params[:, i]) / np.mean(all_params[:, i]) 
                     for i in range(len(param_names))])
    stats_text += f"Average CV: {avg_cv:.3f}"
    
    axes[5].text(0.1, 0.9, stats_text, transform=axes[5].transAxes, 
                fontfamily='monospace', verticalalignment='top', fontsize=12,
                bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
    
    plt.suptitle('IDM Parameter Histograms - Combined Analysis', fontsize=16)
    plt.tight_layout()
    plt.show()

def plot_all_drivers_scatter(all_params, driver_names, param_names):
    """
    绘制所有驾驶员的参数散点图矩阵
    """
    # 选择几个重要的参数对进行散点图分析
    important_pairs = [(0, 1), (0, 2), (1, 2)]  # (v0,T), (v0,a), (T,a)
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # 使用颜色区分不同的参数组合
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
    
    for idx, (i, j) in enumerate(important_pairs):
        x_data = all_params[:, i]
        y_data = all_params[:, j]
        
        scatter = axes[idx].scatter(x_data, y_data, alpha=0.6, s=60, 
                                  c=colors[idx], edgecolors='white', linewidth=0.5)
        
        # 添加椭圆显示分布
        cov = np.cov(x_data, y_data)
        lambda_, v = np.linalg.eig(cov)
        lambda_ = np.sqrt(lambda_)
        
        ell = Ellipse(xy=(np.mean(x_data), np.mean(y_data)),
                     width=lambda_[0]*2, height=lambda_[1]*2,
                     angle=np.degrees(np.arctan2(v[1,0], v[0,0])),
                     alpha=0.2, color=colors[idx])
        axes[idx].add_patch(ell)
        
        axes[idx].set_xlabel(param_names[i])
        axes[idx].set_ylabel(param_names[j])
        axes[idx].set_title(f'{param_names[i]} vs {param_names[j]}')
        axes[idx].grid(True, alpha=0.3)
        
        # 添加相关系数
        corr = np.corrcoef(x_data, y_data)[0,1]
        axes[idx].text(0.05, 0.95, f'ρ = {corr:.3f}', transform=axes[idx].transAxes,
                      bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
        # 添加数据点数量
        axes[idx].text(0.05, 0.85, f'n = {len(x_data)}', transform=axes[idx].transAxes,
                      bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    plt.suptitle(f'Parameter Relationships - All {len(all_params)} Drivers', fontsize=16)
    plt.tight_layout()
    plt.show()

# 使用示例
# 假设您有多个数据集
datasets = {
    'dataset1': ar_idm_data1,
    'dataset2': ar_idm_data2, 
    'dataset3': ar_idm_data3,
    'dataset4': ar_idm_data4,
    'dataset5': ar_idm_data5,
    'dataset6': ar_idm_data6,
    'dataset7': ar_idm_data7,
    'dataset8': ar_idm_data8,
    'dataset9': ar_idm_data9,
}

# 运行综合分析
print("开始多数据集综合分析...")
combined_results = combine_and_analyze_multiple_datasets(datasets)

if combined_results:
    print("\n" + "=" * 60)
    print("多数据集分析完成总结")
    print("=" * 60)
    print(f"✓ 总驾驶员数量: {len(combined_results['all_individual_results'])}")
    print(f"✓ 数据集数量: {len(combined_results['dataset_stats'])}")
    print(f"✓ 参数平均变异系数: {np.mean(combined_results['param_cvs']):.3f}")
    
    # 评估总体差异性
    avg_cv = np.mean(combined_results['param_cvs'])
    if avg_cv > 0.25:
        diversity_level = "高度显著"
    elif avg_cv > 0.15:
        diversity_level = "显著" 
    elif avg_cv > 0.08:
        diversity_level = "中等"
    else:
        diversity_level = "不显著"
        
    print(f"✓ 总体驾驶员间差异性: {diversity_level} (平均CV: {avg_cv:.3f})")
    print("✓ 多数据集综合分析完成！")

In [None]:
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import matplotlib.pyplot as plt
from scipy import signal
from collections import deque

def validate_basic_idm_model_comprehensive_improved(trace, model, train_data, val_data, n_samples=100):
    """
    Comprehensive validation for basic IDM model with improved posterior sampling
    """
    print("Starting comprehensive basic IDM model validation...")
    
    vt_val = val_data['vt']
    s_val = val_data['s']
    dv_val = val_data['dv']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    N_veh = val_data['n_vehicles']
    
    dt = 0.5
    DELTA = 4
    
    # Sample from posterior
    print(f"Drawing {n_samples} posterior samples...")
    with model:
        posterior_samples = pm.sample_posterior_predictive(
            trace, 
            var_names=['v0', 'T', 'a', 'b', 's0'], 
            samples=n_samples,
            random_seed=42
        )
    
    # Extract parameters
    v0_samples = posterior_samples['v0']
    T_samples = posterior_samples['T']
    a_samples = posterior_samples['a']
    b_samples = posterior_samples['b']
    s0_samples = posterior_samples['s0']
    
    # Initialize arrays for predictions
    all_samples_speed_predictions = []
    all_samples_spacing_predictions = []
    all_samples_acceleration_predictions = []
    
    print("Generating predictions for each posterior sample...")
    for sample_idx in range(n_samples):
        speed_predictions = np.zeros_like(vt_val)
        spacing_predictions = np.zeros_like(s_val)
        acceleration_predictions = np.zeros_like(vt_val)
        
        # Get parameters for this sample
        v0 = v0_samples[sample_idx]
        T = T_samples[sample_idx]
        a = a_samples[sample_idx]
        b = b_samples[sample_idx]
        s0 = s0_samples[sample_idx]
        
        for veh_id in range(N_veh):
            mask = (id_idx_val == veh_id)
            if np.sum(mask) > 0:
                vt_veh = vt_val[mask]
                s_veh = s_val[mask]
                dv_veh = dv_val[mask]
                
                # Calculate IDM acceleration using the basic model
                a_idm = idm_model(s_veh, vt_veh, dv_veh, v0, T, a, b, s0)
                
                # Predict speed (no AR correction)
                speed_pred = vt_veh + a_idm * dt
                
                # Store predictions
                speed_predictions[mask] = speed_pred
                spacing_predictions[mask] = s_veh  # Spacing remains the same as input
                acceleration_predictions[mask] = a_idm
        
        all_samples_speed_predictions.append(speed_predictions)
        all_samples_spacing_predictions.append(spacing_predictions)
        all_samples_acceleration_predictions.append(acceleration_predictions)
    
    # Calculate real acceleration from validation data
    print("Calculating real acceleration from validation data...")
    real_acceleration = np.zeros_like(vt_val)
    valid_indices = []
    
    for veh_id in range(N_veh):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 2:
            vt_veh = vt_val[mask]
            # Use improved acceleration calculation
            acc_veh = improved_robust_acceleration(vt_veh, dt=dt)
            real_acceleration[mask] = acc_veh
            valid_indices.extend(np.where(mask)[0])
    
    # Convert to arrays
    all_samples_speed_predictions = np.array(all_samples_speed_predictions)
    all_samples_spacing_predictions = np.array(all_samples_spacing_predictions)
    all_samples_acceleration_predictions = np.array(all_samples_acceleration_predictions)
    
    # Calculate metrics
    print("Calculating performance metrics...")
    
    # Overall metrics
    mean_speed_pred = np.mean(all_samples_speed_predictions, axis=0)
    mean_acceleration_pred = np.mean(all_samples_acceleration_predictions, axis=0)
    
    speed_metrics = calculate_metrics(label_val, mean_speed_pred)
    acceleration_metrics = calculate_metrics(real_acceleration[valid_indices], 
                                           mean_acceleration_pred[valid_indices])
    spacing_metrics = calculate_metrics(s_val, np.mean(all_samples_spacing_predictions, axis=0))
    
    # Vehicle-level metrics
    vehicle_metrics = {}
    unique_vehicles = np.unique(id_idx_val)
    
    for veh_id in unique_vehicles:
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
            veh_speed_true = label_val[mask]
            veh_speed_pred = mean_speed_pred[mask]
            veh_accel_true = real_acceleration[mask]
            veh_accel_pred = mean_acceleration_pred[mask]
            veh_spacing_true = s_val[mask]
            veh_spacing_pred = np.mean(all_samples_spacing_predictions[:, mask], axis=0)
            
            vehicle_metrics[veh_id] = {
                'speed': calculate_metrics(veh_speed_true, veh_speed_pred),
                'acceleration': calculate_metrics(veh_accel_true, veh_accel_pred),
                'spacing': calculate_metrics(veh_spacing_true, veh_spacing_pred)
            }
    
    # Extract individual parameters for analysis
    individual_params = np.column_stack([
        v0_samples, T_samples, a_samples, b_samples, s0_samples
    ])
    
    validation_results = {
        'all_samples_speed_predictions': all_samples_speed_predictions,
        'all_samples_spacing_predictions': all_samples_spacing_predictions,
        'all_samples_acceleration_predictions': all_samples_acceleration_predictions,
        'real_acceleration': real_acceleration,
        'valid_indices': valid_indices,
        'n_samples': n_samples,
        'speed_metrics': speed_metrics,
        'acceleration_metrics': acceleration_metrics,
        'spacing_metrics': spacing_metrics,
        'vehicle_metrics': vehicle_metrics,
        'individual_params': individual_params,
        'mean_predictions': {
            'speed': mean_speed_pred,
            'acceleration': mean_acceleration_pred,
            'spacing': np.mean(all_samples_spacing_predictions, axis=0)
        },
        'parameter_samples': {
            'v0': v0_samples,
            'T': T_samples,
            'a': a_samples,
            'b': b_samples,
            's0': s0_samples
        }
    }
    
    print("Basic IDM model validation completed!")
    return validation_results

def plot_basic_idm_validation_results(val_data, validation_results):
    """
    Plot comprehensive validation results for basic IDM model
    """
    vt_val = val_data['vt']
    s_val = val_data['s']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    real_acceleration = validation_results['real_acceleration']
    n_samples = validation_results['n_samples']
    
    unique_vehicles = np.unique(id_idx_val)
    n_vehicles = len(unique_vehicles)
    
    # Create comprehensive subplots for each vehicle
    fig, axes = plt.subplots(n_vehicles, 3, figsize=(20, 5*n_vehicles))
    if n_vehicles == 1:
        axes = axes.reshape(1, -1)
    
    for idx, veh_id in enumerate(unique_vehicles):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
            # Ensure we don't exceed array bounds
            veh_data_points = min(np.sum(mask), len(all_samples_speed[0]))
            time_points = np.arange(veh_data_points)
            
            # ========== Speed Visualization ==========
            axes[idx, 0].plot(time_points, label_val[mask][:veh_data_points], 'k-', 
                             label='True Speed', linewidth=3, alpha=0.9)
            
            # Extract posterior samples for this vehicle
            veh_speed_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_speed])
            
            # Calculate confidence intervals
            lower_5 = np.percentile(veh_speed_samples, 5, axis=0)
            upper_95 = np.percentile(veh_speed_samples, 95, axis=0)
            lower_25 = np.percentile(veh_speed_samples, 25, axis=0)
            upper_75 = np.percentile(veh_speed_samples, 75, axis=0)
            
            # Plot confidence intervals
            axes[idx, 0].fill_between(time_points, lower_5, upper_95, 
                                     alpha=0.3, color='red', label='90% CI')
            axes[idx, 0].fill_between(time_points, lower_25, upper_75, 
                                     alpha=0.5, color='red', label='50% CI')
            
            # Plot mean and median
            mean_speed = np.mean(veh_speed_samples, axis=0)
            median_speed = np.median(veh_speed_samples, axis=0)
            
            axes[idx, 0].plot(time_points, mean_speed, 'b-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 0].plot(time_points, median_speed, 'g--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            # Plot some sample trajectories
            sample_indices_to_plot = np.random.choice(len(all_samples_speed), 
                                                     min(10, len(all_samples_speed)), replace=False)
            for sample_idx in sample_indices_to_plot:
                axes[idx, 0].plot(time_points, all_samples_speed[sample_idx][mask][:veh_data_points], 
                                 'r-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 0].set_title(f'Vehicle {veh_id} - Speed Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 0].set_xlabel('Time Index')
            axes[idx, 0].set_ylabel('Speed (m/s)')
            axes[idx, 0].legend(loc='upper right', fontsize=8)
            axes[idx, 0].grid(True, alpha=0.3)
            
            # ========== Spacing Visualization ==========
            axes[idx, 1].plot(time_points, s_val[mask][:veh_data_points], 'k-', 
                             label='True Spacing', linewidth=3, alpha=0.9)
            
            veh_spacing_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_spacing])
            
            # Calculate spacing confidence intervals
            lower_5_s = np.percentile(veh_spacing_samples, 5, axis=0)
            upper_95_s = np.percentile(veh_spacing_samples, 95, axis=0)
            lower_25_s = np.percentile(veh_spacing_samples, 25, axis=0)
            upper_75_s = np.percentile(veh_spacing_samples, 75, axis=0)
            
            axes[idx, 1].fill_between(time_points, lower_5_s, upper_95_s, 
                                     alpha=0.3, color='magenta', label='90% CI')
            axes[idx, 1].fill_between(time_points, lower_25_s, upper_75_s, 
                                     alpha=0.5, color='magenta', label='50% CI')
            
            mean_spacing = np.mean(veh_spacing_samples, axis=0)
            median_spacing = np.median(veh_spacing_samples, axis=0)
            
            axes[idx, 1].plot(time_points, mean_spacing, 'g-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 1].plot(time_points, median_spacing, 'c--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            # Plot sample trajectories
            for sample_idx in sample_indices_to_plot:
                axes[idx, 1].plot(time_points, all_samples_spacing[sample_idx][mask][:veh_data_points], 
                                 'm-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 1].set_title(f'Vehicle {veh_id} - Spacing Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 1].set_xlabel('Time Index')
            axes[idx, 1].set_ylabel('Spacing (m)')
            axes[idx, 1].legend(loc='upper right', fontsize=8)
            axes[idx, 1].grid(True, alpha=0.3)
            
            # ========== Acceleration Visualization ==========
            axes[idx, 2].plot(time_points, real_acceleration[mask][:veh_data_points], 'k-', 
                             label='True Acceleration', linewidth=3, alpha=0.9)
            
            veh_accel_samples = np.array([sample[mask][:veh_data_points] for sample in all_samples_acceleration])
            
            # Calculate acceleration confidence intervals
            lower_5_a = np.percentile(veh_accel_samples, 5, axis=0)
            upper_95_a = np.percentile(veh_accel_samples, 95, axis=0)
            lower_25_a = np.percentile(veh_accel_samples, 25, axis=0)
            upper_75_a = np.percentile(veh_accel_samples, 75, axis=0)
            
            axes[idx, 2].fill_between(time_points, lower_5_a, upper_95_a, 
                                     alpha=0.3, color='orange', label='90% CI')
            axes[idx, 2].fill_between(time_points, lower_25_a, upper_75_a, 
                                     alpha=0.5, color='orange', label='50% CI')
            
            mean_acceleration = np.mean(veh_accel_samples, axis=0)
            median_acceleration = np.median(veh_accel_samples, axis=0)
            
            axes[idx, 2].plot(time_points, mean_acceleration, 'c-', 
                             label='Mean Prediction', linewidth=2, alpha=0.8)
            axes[idx, 2].plot(time_points, median_acceleration, 'y--', 
                             label='Median Prediction', linewidth=2, alpha=0.8)
            
            # Plot sample trajectories
            for sample_idx in sample_indices_to_plot:
                axes[idx, 2].plot(time_points, all_samples_acceleration[sample_idx][mask][:veh_data_points], 
                                 'y-', alpha=0.2, linewidth=0.8)
            
            axes[idx, 2].set_title(f'Vehicle {veh_id} - Acceleration Prediction\n({n_samples} Posterior Samples)', fontsize=12)
            axes[idx, 2].set_xlabel('Time Index')
            axes[idx, 2].set_ylabel('Acceleration (m/s²)')
            axes[idx, 2].legend(loc='upper right', fontsize=8)
            axes[idx, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Basic IDM Model - Comprehensive Validation Results', fontsize=16, y=1.02)
    plt.show()
    
    # Plot parameter distributions
    plot_basic_idm_parameter_distributions(validation_results)
    
    # Plot uncertainty analysis
    plot_basic_idm_uncertainty_analysis(val_data, validation_results)

def plot_basic_idm_parameter_distributions(validation_results):
    """
    Plot posterior parameter distributions for basic IDM model
    """
    individual_params = validation_results['individual_params']
    param_samples = validation_results['parameter_samples']
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # IDM parameter posterior distributions
    param_names = ['v0', 'T', 'a', 'b', 's0']
    param_display_names = ['Desired Speed (v0)', 'Time Headway (T)', 
                          'Max Acceleration (a)', 'Comfort Decel (b)', 
                          'Min Spacing (s0)']
    
    for i in range(5):
        row, col = i // 3, i % 3
        params = individual_params[:, i]
        axes[row, col].hist(params, bins=30, alpha=0.7, color='skyblue', 
                           edgecolor='black', density=True)
        axes[row, col].axvline(np.mean(params), color='red', linestyle='--', 
                              label=f'Mean: {np.mean(params):.3f}')
        axes[row, col].axvline(np.median(params), color='green', linestyle='--', 
                              label=f'Median: {np.median(params):.3f}')
        axes[row, col].set_title(f'Posterior: {param_display_names[i]}', fontsize=12)
        axes[row, col].set_xlabel('Parameter Value')
        axes[row, col].set_ylabel('Density')
        axes[row, col].legend(fontsize=8)
        axes[row, col].grid(True, alpha=0.3)
    
    # Add parameter correlations plot
    if individual_params.shape[0] > 1:
        # Show correlation between v0 and T
        v0_params = individual_params[:, 0]
        T_params = individual_params[:, 1]
        
        axes[1, 2].scatter(v0_params, T_params, alpha=0.6, color='purple')
        axes[1, 2].set_xlabel('Desired Speed (v0)')
        axes[1, 2].set_ylabel('Time Headway (T)')
        axes[1, 2].set_title('Parameter Correlation: v0 vs T', fontsize=12)
        axes[1, 2].grid(True, alpha=0.3)
        
        # Calculate and display correlation coefficient
        correlation = np.corrcoef(v0_params, T_params)[0, 1]
        axes[1, 2].text(0.05, 0.95, f'Correlation: {correlation:.3f}', 
                       transform=axes[1, 2].transAxes, fontsize=10,
                       bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
    
    plt.tight_layout()
    plt.suptitle('Basic IDM - Posterior Parameter Distributions', fontsize=16, y=1.02)
    plt.show()

def plot_basic_idm_uncertainty_analysis(val_data, validation_results):
    """
    Plot uncertainty analysis for basic IDM model
    """
    all_samples_speed = validation_results['all_samples_speed_predictions']
    all_samples_spacing = validation_results['all_samples_spacing_predictions']
    all_samples_acceleration = validation_results['all_samples_acceleration_predictions']
    n_samples = validation_results['n_samples']
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Calculate uncertainty statistics for all vehicles
    all_uncertainty_speed = []
    all_uncertainty_spacing = []
    all_uncertainty_acceleration = []
    
    unique_vehicles = np.unique(val_data['id_idx'])
    
    for veh_id in unique_vehicles:
        mask = (val_data['id_idx'] == veh_id)
        if np.sum(mask) > 0:
            # Speed uncertainty (90% interval width)
            veh_speed_samples = np.array([sample[mask] for sample in all_samples_speed])
            speed_intervals = np.percentile(veh_speed_samples, 95, axis=0) - np.percentile(veh_speed_samples, 5, axis=0)
            all_uncertainty_speed.extend(speed_intervals)
            
            # Spacing uncertainty
            veh_spacing_samples = np.array([sample[mask] for sample in all_samples_spacing])
            spacing_intervals = np.percentile(veh_spacing_samples, 95, axis=0) - np.percentile(veh_spacing_samples, 5, axis=0)
            all_uncertainty_spacing.extend(spacing_intervals)
            
            # Acceleration uncertainty
            veh_accel_samples = np.array([sample[mask] for sample in all_samples_acceleration])
            accel_intervals = np.percentile(veh_accel_samples, 95, axis=0) - np.percentile(veh_accel_samples, 5, axis=0)
            all_uncertainty_acceleration.extend(accel_intervals)
    
    # Plot uncertainty distributions
    uncertainty_data = [
        (all_uncertainty_speed, 'Speed Uncertainty (90% CI Width)', 'lightblue', 'm/s'),
        (all_uncertainty_spacing, 'Spacing Uncertainty (90% CI Width)', 'lightgreen', 'm'),
        (all_uncertainty_acceleration, 'Acceleration Uncertainty (90% CI Width)', 'lightyellow', 'm/s²')
    ]
    
    for i, (data, title, color, unit) in enumerate(uncertainty_data):
        if i < 3:
            row, col = i // 2, i % 2
            axes[row, col].hist(data, bins=30, alpha=0.7, color=color, edgecolor='black')
            axes[row, col].axvline(np.mean(data), color='red', linestyle='--', 
                                 label=f'Mean: {np.mean(data):.3f} {unit}')
            axes[row, col].axvline(np.median(data), color='blue', linestyle='--', 
                                 label=f'Median: {np.median(data):.3f} {unit}')
            axes[row, col].set_title(title, fontsize=12)
            axes[row, col].set_xlabel(f'Uncertainty ({unit})')
            axes[row, col].set_ylabel('Frequency')
            axes[row, col].legend(fontsize=8)
            axes[row, col].grid(True, alpha=0.3)
    
    # Plot uncertainty over time for first vehicle
    if len(unique_vehicles) > 0:
        first_vehicle_mask = (val_data['id_idx'] == unique_vehicles[0])
        veh_speed_samples = np.array([sample[first_vehicle_mask] for sample in all_samples_speed])
        
        time_points = np.arange(min(100, np.sum(first_vehicle_mask)))
        lower_5 = np.percentile(veh_speed_samples[:, :len(time_points)], 5, axis=0)
        upper_95 = np.percentile(veh_speed_samples[:, :len(time_points)], 95, axis=0)
        mean_speed = np.mean(veh_speed_samples[:, :len(time_points)], axis=0)
        
        axes[1, 1].fill_between(time_points, lower_5, upper_95, alpha=0.3, color='red', label='90% CI')
        axes[1, 1].plot(time_points, mean_speed, 'b-', label='Mean Prediction', linewidth=2)
        axes[1, 1].plot(time_points, val_data['label_v'][first_vehicle_mask][:len(time_points)], 
                       'k-', label='True Speed', linewidth=2, alpha=0.8)
        axes[1, 1].set_title(f'Vehicle {unique_vehicles[0]} - Speed Uncertainty Over Time', fontsize=12)
        axes[1, 1].set_xlabel('Time Index')
        axes[1, 1].set_ylabel('Speed (m/s)')
        axes[1, 1].legend(fontsize=8)
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.suptitle('Basic IDM - Uncertainty Analysis', fontsize=16, y=1.02)
    plt.show()

def print_basic_idm_validation_summary(validation_results):
    """
    Print comprehensive summary of basic IDM validation results
    """
    print("\n" + "="*70)
    print("BASIC IDM MODEL VALIDATION SUMMARY")
    print("="*70)
    
    speed_metrics = validation_results['speed_metrics']
    acceleration_metrics = validation_results['acceleration_metrics']
    spacing_metrics = validation_results['spacing_metrics']
    vehicle_metrics = validation_results['vehicle_metrics']
    
    print(f"\nOVERALL PERFORMANCE METRICS:")
    print(f"Speed Prediction:")
    print(f"  - RMSE: {speed_metrics['rmse']:.4f} m/s")
    print(f"  - MAE: {speed_metrics['mae']:.4f} m/s")
    print(f"  - NRMSE: {speed_metrics['nrmse']:.4f}")
    
    print(f"\nAcceleration Prediction:")
    print(f"  - RMSE: {acceleration_metrics['rmse']:.4f} m/s²")
    print(f"  - MAE: {acceleration_metrics['mae']:.4f} m/s²")
    print(f"  - NRMSE: {acceleration_metrics['nrmse']:.4f}")
    
    print(f"\nSpacing Prediction:")
    print(f"  - RMSE: {spacing_metrics['rmse']:.4f} m")
    print(f"  - MAE: {spacing_metrics['mae']:.4f} m")
    print(f"  - NRMSE: {spacing_metrics['nrmse']:.4f}")
    
    print(f"\nVEHICLE-LEVEL PERFORMANCE:")
    for veh_id, metrics in vehicle_metrics.items():
        print(f"\nVehicle {veh_id}:")
        print(f"  Speed - RMSE: {metrics['speed']['rmse']:.4f} m/s, MAE: {metrics['speed']['mae']:.4f} m/s")
        print(f"  Acceleration - RMSE: {metrics['acceleration']['rmse']:.4f} m/s², MAE: {metrics['acceleration']['mae']:.4f} m/s²")
        print(f"  Spacing - RMSE: {metrics['spacing']['rmse']:.4f} m, MAE: {metrics['spacing']['mae']:.4f} m")
    
    # Parameter statistics
    individual_params = validation_results['individual_params']
    param_names = ['v0', 'T', 'a', 'b', 's0']
    param_display_names = ['Desired Speed', 'Time Headway', 'Max Acceleration', 
                          'Comfort Deceleration', 'Min Spacing']
    
    print(f"\nPARAMETER POSTERIOR STATISTICS:")
    for i, name in enumerate(param_display_names):
        params = individual_params[:, i]
        print(f"  {name}: Mean = {np.mean(params):.3f}, Std = {np.std(params):.3f}, "
              f"95% CI = [{np.percentile(params, 2.5):.3f}, {np.percentile(params, 97.5):.3f}]")


def run_basic_idm_calibration_only(ar_idm_data):
    """
    Run only basic IDM model calibration
    """
    print("=" * 60)
    print("BASIC IDM MODEL CALIBRATION ONLY")
    print("=" * 60)
    
    # Data splitting
    print("\nStep 1: Data Splitting")
    train_data, val_data = split_data_for_ar_idm(ar_idm_data, train_ratio=0.7)
    
    # Model training
    print("\nStep 2: Model Training")
    trace, model = train_basic_idm_model(train_data)
    
    print("\n" + "=" * 60)
    print("BASIC IDM CALIBRATION COMPLETED!")
    print("=" * 60)
    
    return {
        'trace': trace,
        'model': model,
        'train_data': train_data,
        'val_data': val_data
    }

def run_basic_idm_validation_only(calibration_results, n_posterior_samples=100):
    """
    Run only basic IDM model validation (using calibrated model)
    """
    print("=" * 60)
    print("BASIC IDM MODEL VALIDATION ONLY")
    print("=" * 60)
    
    trace = calibration_results['trace']
    model = calibration_results['model']
    train_data = calibration_results['train_data']
    val_data = calibration_results['val_data']
    
    # Model validation
    print("\nStep 1: Model Validation with Posterior Sampling")
    validation_results = validate_basic_idm_model_comprehensive_improved(
        trace, model, train_data, val_data, n_samples=n_posterior_samples
    )
    
    # Enhanced visualization
    print("\nStep 2: Enhanced Results Visualization")
    plot_basic_idm_validation_results(val_data, validation_results)
    
    # Print summary
    print("\nStep 3: Performance Summary")
    print_basic_idm_validation_summary(validation_results)
    
    print("\n" + "=" * 60)
    print("BASIC IDM VALIDATION COMPLETED!")
    print("=" * 60)
    
    return validation_results

# IDM模型定义
def idm_model(s, v, dv, v0=30.0, T=1.5, a=1.0, b=2.0, s0=2.0):
    """
    IDM模型计算加速度
    """
    # 计算期望间距
    s_star = s0 + max(0, v * T + (v * dv) / (2 * np.sqrt(a * b)))
    
    # 计算加速度
    acceleration = a * (1 - (v / v0)**4 - (s_star / s)**2)
    
    # 物理约束：限制加速度在合理范围内
    acceleration = np.clip(acceleration, -b, a)
    
    return acceleration

In [None]:
calibration_results = run_basic_idm_calibration_only(ar_idm_data)

In [None]:
validation_results = run_basic_idm_validation_only(calibration_results, n_posterior_samples=100)