In [None]:
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import matplotlib.pyplot as plt
from scipy import signal
from collections import deque

In [None]:
def train_hierarchical_idm_model(train_data):

    
    vt = train_data['vt']
    s = train_data['s']
    dv = train_data['dv']
    label_v = train_data['label_v']
    id_idx = train_data['id_idx']
    N_veh = train_data['n_vehicles']
    
    dt = 0.5
    D = 5
    DELTA = 4
    
    coords = {
        "veh_id": np.arange(N_veh),
        "parameter": np.arange(D)
    }
    
    with pm.Model(coords=coords) as hierarchical_idm_model:
        chol, _, _ = pm.LKJCholeskyCov('chol', n=D, eta=2.0, 
                                      sd_dist=pm.Exponential.dist(2, shape=D))
        
        log_mu_vmax = pm.Normal('log_mu_vmax', mu=0, sigma=0.1)
        log_mu_dsafe = pm.Normal('log_mu_dsafe', mu=0, sigma=0.1)
        log_mu_tsafe = pm.Normal('log_mu_tsafe', mu=0, sigma=0.1)
        log_mu_amax = pm.Normal('log_mu_amax', mu=0, sigma=0.1)
        
        log_ratio_mu = pm.Normal('log_ratio_mu', mu=np.log(0.5), sigma=0.1)
        ratio_mu = pm.Deterministic('ratio_mu', pt.exp(log_ratio_mu))
        
        log_ratio_raw = pm.Normal('log_ratio_raw', mu=0, sigma=0.1, shape=N_veh)
        ratio_individual = pm.Deterministic('ratio_individual', ratio_mu * pt.exp(log_ratio_raw))
        
        vals_raw_vmax = pm.Normal('vals_raw_vmax', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_dsafe = pm.Normal('vals_raw_dsafe', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_tsafe = pm.Normal('vals_raw_tsafe', mu=0, sigma=0.1, shape=N_veh)
        vals_raw_amax = pm.Normal('vals_raw_amax', mu=0, sigma=0.1, shape=N_veh)
        
        vals_raw_first4 = pm.Deterministic('vals_raw_first4', pt.stack([
            vals_raw_vmax, vals_raw_dsafe, vals_raw_tsafe, vals_raw_amax
        ], axis=1))
        
        log_mu_first4 = pt.stack([log_mu_vmax, log_mu_dsafe, log_mu_tsafe, log_mu_amax])
        log_parameters_first4 = pm.Deterministic('log_parameters_first4', 
                                               log_mu_first4 + pt.dot(vals_raw_first4, chol[:4, :4].T))
        parameters_first4 = pm.Deterministic('parameters_first4', pt.exp(log_parameters_first4))
        
        amin_individual = ratio_individual * parameters_first4[:, 3]
        
        parameters = pm.Deterministic('parameters', pt.stack([
            parameters_first4[:, 0],
            parameters_first4[:, 1],
            parameters_first4[:, 2],
            parameters_first4[:, 3],
            amin_individual
        ], axis=1), dims=('veh_id', 'parameter'))
        
        s_a_list = []
        s_v_list = []
        
        for i in range(N_veh):
            s_a_i = pm.Exponential(f's_a_{i}', lam=2000)
            s_v_i = pm.Exponential(f's_v_{i}', lam=4000)
            s_a_list.append(s_a_i)
            s_v_list.append(s_v_i)
        
        for i in range(N_veh):
            mask = (id_idx == i)
            if np.sum(mask) > 5:
                s_veh = s[mask]
                vt_veh = vt[mask]
                dv_veh = dv[mask]
                label_veh = label_v[mask]
                
                vmax = 25 * parameters[i, 0]
                dsafe = 2 * parameters[i, 1]
                tsafe = 1.6 * parameters[i, 2]
                amax = 1.5 * parameters[i, 3]
                amin = 1.5 * parameters[i, 4]
                
                sn = dsafe + vt_veh * tsafe + \
                     vt_veh * dv_veh / (2 * pm.math.sqrt(amax * amin))
                a_idm = amax * (1 - (vt_veh / vmax) ** DELTA - (sn / s_veh) ** 2)
                
                mean_speed = vt_veh + a_idm * dt
                
                total_sigma = pm.math.sqrt((s_a_list[i] * dt) ** 2 + s_v_list[i] ** 2)
                pm.Normal(f'obs_{i}', mu=mean_speed, 
                         sigma=total_sigma,
                         observed=label_veh)

        try:
            trace = pm.sample(
                draws=600, 
                tune=600, 
                random_seed=42, 
                chains=2,
                target_accept=0.9, 
                return_inferencedata=True
            )
            print("Hierarchical IDM model training completed!")
        except Exception as e:
            print(f"Sampling error: {e}")
            print("Trying more conservative sampling settings...")
            trace = pm.sample(
                draws=1500, 
                tune=1000, 
                random_seed=42,
                chains=2, 
                target_accept=0.8,
                return_inferencedata=True
            )
            print("Hierarchical IDM model training completed (using conservative settings)!")
    
    return trace, hierarchical_idm_model

In [None]:
def calculate_metrics(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    nrmse = rmse / (np.max(y_true) - np.min(y_true))
    
    return {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'nrmse': nrmse
    }

def robust_smooth_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    if len(velocity) < window_size:
        acceleration = np.gradient(velocity, dt)
        return acceleration
    
    try:
        acceleration = signal.savitzky_golay(velocity, window_length=window_size, 
                                           polyorder=poly_order, deriv=1, delta=dt)
        
        if len(acceleration) > 10:
            x_fit = np.arange(5) * dt
            y_fit = acceleration[5:10]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[i] = intercept + slope * (i * dt)
            
            x_fit = np.arange(5) * dt
            y_fit = acceleration[-10:-5]
            if len(y_fit) >= 2:
                slope, intercept = np.polyfit(x_fit[:len(y_fit)], y_fit, 1)
                for i in range(5):
                    acceleration[-(5-i)] = intercept + slope * ((4-i) * dt)
        
        return acceleration
        
    except:
        acceleration = np.zeros_like(velocity)
        for i in range(1, len(velocity)-1):
            acceleration[i] = (velocity[i+1] - velocity[i-1]) / (2 * dt)
        
        if len(velocity) > 1:
            acceleration[0] = (velocity[1] - velocity[0]) / dt
            acceleration[-1] = (velocity[-1] - velocity[-2]) / dt
        
        window = min(5, len(acceleration))
        acceleration = np.convolve(acceleration, np.ones(window)/window, mode='same')
        
        return acceleration

def calculate_initial_acceleration(velocity, dt=0.5, method='savitzky_golay'):
    if len(velocity) < 3:
        return 0.0
    
    if method == 'savitzky_golay':
        acc_all = robust_smooth_acceleration(velocity, dt)
        return acc_all[0]
    
    elif method == 'robust_fit':
        n_points = min(5, len(velocity))
        t_points = np.arange(n_points) * dt
        v_points = velocity[:n_points]
        
        slope, intercept = np.polyfit(t_points, v_points, 1)
        return slope
    
    elif method == 'physical_constrained':
        if len(velocity) >= 4:
            weights = np.array([0.1, 0.2, 0.3, 0.4])[:len(velocity)]
            weights = weights / np.sum(weights)
            
            t_points = np.arange(len(velocity)) * dt
            A = np.vstack([t_points, np.ones(len(t_points))]).T
            W = np.diag(weights)
            slope, intercept = np.linalg.lstsq(A.T @ W @ A, A.T @ W @ velocity, rcond=None)[0]
            return slope
        else:
            return (velocity[1] - velocity[0]) / dt
    
    else:
        initial_acc = (velocity[1] - velocity[0]) / dt
        return np.clip(initial_acc, -3.0, 3.0)

def improved_robust_acceleration(velocity, dt=0.5, window_size=7, poly_order=2):
    acceleration = robust_smooth_acceleration(velocity, dt, window_size, poly_order)
    acceleration = np.clip(acceleration, -3.0, 3.0)
    
    if len(acceleration) > 10:
        acceleration[:3] = np.mean(acceleration[:5])
        acceleration[-3:] = np.mean(acceleration[-5:])
    
    return acceleration

def split_data_for_ar_idm(ar_idm_data, train_ratio=0.7):
    vt = ar_idm_data['vt']
    s = ar_idm_data['s']
    dv = ar_idm_data['dv']
    label_v = ar_idm_data['label_v']
    id_idx = ar_idm_data['id_idx']
    
    unique_vehicles = np.unique(id_idx)
    
    train_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    val_data = {
        'vt': np.array([]),
        's': np.array([]),
        'dv': np.array([]),
        'label_v': np.array([]),
        'id_idx': np.array([], dtype=int),
        'n_vehicles': ar_idm_data['n_vehicles'],
        'tracks': {}
    }
    
    for veh_id in unique_vehicles:
        mask = (id_idx == veh_id)
        n_points = np.sum(mask)
        
        if n_points < 20:
            continue
            
        split_point = int(n_points * train_ratio)
        
        train_mask = np.zeros_like(mask, dtype=bool)
        train_indices = np.where(mask)[0][:split_point]
        train_mask[train_indices] = True
        
        val_mask = np.zeros_like(mask, dtype=bool)
        val_indices = np.where(mask)[0][split_point:]
        val_mask[val_indices] = True
        
        train_data['vt'] = np.concatenate([train_data['vt'], vt[train_mask]])
        train_data['s'] = np.concatenate([train_data['s'], s[train_mask]])
        train_data['dv'] = np.concatenate([train_data['dv'], dv[train_mask]])
        train_data['label_v'] = np.concatenate([train_data['label_v'], label_v[train_mask]])
        train_data['id_idx'] = np.concatenate([train_data['id_idx'], np.full(np.sum(train_mask), veh_id)])
        
        if np.sum(train_mask) > 0:
            train_data['tracks'][veh_id] = {
                'last_vt': vt[train_mask][-1],
                'last_s': s[train_mask][-1],
                'last_dv': dv[train_mask][-1] if len(dv[train_mask]) > 0 else 0.0
            }
        
        val_data['vt'] = np.concatenate([val_data['vt'], vt[val_mask]])
        val_data['s'] = np.concatenate([val_data['s'], s[val_mask]])
        val_data['dv'] = np.concatenate([val_data['dv'], dv[val_mask]])
        val_data['label_v'] = np.concatenate([val_data['label_v'], label_v[val_mask]])
        val_data['id_idx'] = np.concatenate([val_data['id_idx'], np.full(np.sum(val_mask), veh_id)])
    
    print(f"Training set: {len(train_data['vt'])} data points")
    print(f"Validation set: {len(val_data['vt'])} data points")
    
    return train_data, val_data

In [None]:
def validate_hierarchical_idm_model_comprehensive_improved(trace, model, train_data, val_data, n_samples=100):
    
    vt_val = val_data['vt']
    s_val = val_data['s']
    dv_val = val_data['dv']
    label_val = val_data['label_v']
    id_idx_val = val_data['id_idx']
    N_veh = val_data['n_vehicles']
    
    dt = 0.5
    D = 5
    DELTA = 4
    

    print(f"Drawing {n_samples} posterior samples...")
    with model:
        posterior_samples = pm.sample_posterior_predictive(
            trace, 
            var_names=['parameters', 's_a_0', 's_a_1', 's_a_2', 's_a_3'], 
            samples=n_samples,
            random_seed=42
        )

    parameters_samples = posterior_samples['parameters']  # shape: (n_samples, N_veh, D)
    

    all_samples_speed_predictions = []
    all_samples_spacing_predictions = []
    all_samples_acceleration_predictions = []
    
    print("Generating predictions for each posterior sample...")
    for sample_idx in range(n_samples):
        speed_predictions = np.zeros_like(vt_val)
        spacing_predictions = np.zeros_like(s_val)
        acceleration_predictions = np.zeros_like(vt_val)
        

        params_sample = parameters_samples[sample_idx]  # shape: (N_veh, D)
        
        for veh_id in range(N_veh):
            mask = (id_idx_val == veh_id)
            if np.sum(mask) > 0:
                vt_veh = vt_val[mask]
                s_veh = s_val[mask]
                dv_veh = dv_val[mask]
                               
                vmax = 25 * params_sample[veh_id, 0]
                dsafe = 2 * params_sample[veh_id, 1]
                tsafe = 1.6 * params_sample[veh_id, 2]
                amax = 1.5 * params_sample[veh_id, 3]
                amin = 1.5 * params_sample[veh_id, 4]
                

                sn = dsafe + vt_veh * tsafe + vt_veh * dv_veh / (2 * np.sqrt(amax * amin))
                a_idm = amax * (1 - (vt_veh / vmax) ** DELTA - (sn / s_veh) ** 2)
                
                
                speed_pred = vt_veh + a_idm * dt
                
               
                speed_predictions[mask] = speed_pred
                spacing_predictions[mask] = s_veh  
                acceleration_predictions[mask] = a_idm
        
        all_samples_speed_predictions.append(speed_predictions)
        all_samples_spacing_predictions.append(spacing_predictions)
        all_samples_acceleration_predictions.append(acceleration_predictions)
    

    real_acceleration = np.zeros_like(vt_val)
    valid_indices = []
    
    for veh_id in range(N_veh):
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 2:
            vt_veh = vt_val[mask]
            
            acc_veh = improved_robust_acceleration(vt_veh, dt=dt)
            real_acceleration[mask] = acc_veh
            valid_indices.extend(np.where(mask)[0])
    

    all_samples_speed_predictions = np.array(all_samples_speed_predictions)
    all_samples_spacing_predictions = np.array(all_samples_spacing_predictions)
    all_samples_acceleration_predictions = np.array(all_samples_acceleration_predictions)
    
 
    

    mean_speed_pred = np.mean(all_samples_speed_predictions, axis=0)
    mean_acceleration_pred = np.mean(all_samples_acceleration_predictions, axis=0)
    
    speed_metrics = calculate_metrics(label_val, mean_speed_pred)
    acceleration_metrics = calculate_metrics(real_acceleration[valid_indices], 
                                           mean_acceleration_pred[valid_indices])
    spacing_metrics = calculate_metrics(s_val, np.mean(all_samples_spacing_predictions, axis=0))
    
 
    vehicle_metrics = {}
    unique_vehicles = np.unique(id_idx_val)
    
    for veh_id in unique_vehicles:
        mask = (id_idx_val == veh_id)
        if np.sum(mask) > 0:
            veh_speed_true = label_val[mask]
            veh_speed_pred = mean_speed_pred[mask]
            veh_accel_true = real_acceleration[mask]
            veh_accel_pred = mean_acceleration_pred[mask]
            veh_spacing_true = s_val[mask]
            veh_spacing_pred = np.mean(all_samples_spacing_predictions[:, mask], axis=0)
            
            vehicle_metrics[veh_id] = {
                'speed': calculate_metrics(veh_speed_true, veh_speed_pred),
                'acceleration': calculate_metrics(veh_accel_true, veh_accel_pred),
                'spacing': calculate_metrics(veh_spacing_true, veh_spacing_pred)
            }
    
    individual_params = parameters_samples.reshape(-1, D)
    
    validation_results = {
        'all_samples_speed_predictions': all_samples_speed_predictions,
        'all_samples_spacing_predictions': all_samples_spacing_predictions,
        'all_samples_acceleration_predictions': all_samples_acceleration_predictions,
        'real_acceleration': real_acceleration,
        'valid_indices': valid_indices,
        'n_samples': n_samples,
        'speed_metrics': speed_metrics,
        'acceleration_metrics': acceleration_metrics,
        'spacing_metrics': spacing_metrics,
        'vehicle_metrics': vehicle_metrics,
        'individual_params': individual_params,
        'mean_predictions': {
            'speed': mean_speed_pred,
            'acceleration': mean_acceleration_pred,
            'spacing': np.mean(all_samples_spacing_predictions, axis=0)
        }
    }
    
    return validation_results



def print_hierarchical_idm_validation_summary(validation_results):
  
    
    speed_metrics = validation_results['speed_metrics']
    acceleration_metrics = validation_results['acceleration_metrics']
    spacing_metrics = validation_results['spacing_metrics']
    vehicle_metrics = validation_results['vehicle_metrics']
    
    print(f"\nOVERALL PERFORMANCE METRICS:")
    print(f"Speed Prediction:")
    print(f"  - RMSE: {speed_metrics['rmse']:.4f} m/s")
    print(f"  - MAE: {speed_metrics['mae']:.4f} m/s")
    print(f"  - NRMSE: {speed_metrics['nrmse']:.4f}")
    
    print(f"\nAcceleration Prediction:")
    print(f"  - RMSE: {acceleration_metrics['rmse']:.4f} m/s²")
    print(f"  - MAE: {acceleration_metrics['mae']:.4f} m/s²")
    print(f"  - NRMSE: {acceleration_metrics['nrmse']:.4f}")
    
    print(f"\nSpacing Prediction:")
    print(f"  - RMSE: {spacing_metrics['rmse']:.4f} m")
    print(f"  - MAE: {spacing_metrics['mae']:.4f} m")
    print(f"  - NRMSE: {spacing_metrics['nrmse']:.4f}")
    
    print(f"\nVEHICLE-LEVEL PERFORMANCE:")
    for veh_id, metrics in vehicle_metrics.items():
        print(f"\nVehicle {veh_id}:")
        print(f"  Speed - RMSE: {metrics['speed']['rmse']:.4f} m/s, MAE: {metrics['speed']['mae']:.4f} m/s")
        print(f"  Acceleration - RMSE: {metrics['acceleration']['rmse']:.4f} m/s², MAE: {metrics['acceleration']['mae']:.4f} m/s²")
        print(f"  Spacing - RMSE: {metrics['spacing']['rmse']:.4f} m, MAE: {metrics['spacing']['mae']:.4f} m")
    

    individual_params = validation_results['individual_params']
    param_names = ['vmax', 'dsafe', 'tsafe', 'amax', 'amin']
    param_scales = [25, 2, 1.6, 1.5, 1.5]
    
    print(f"\nPARAMETER POSTERIOR STATISTICS:")
    for i, name in enumerate(param_names):
        scaled_params = individual_params[:, i] * param_scales[i]
        print(f"  {name}: Mean = {np.mean(scaled_params):.3f}, Std = {np.std(scaled_params):.3f}, "
              f"95% CI = [{np.percentile(scaled_params, 2.5):.3f}, {np.percentile(scaled_params, 97.5):.3f}]")

def run_hierarchical_idm_calibration_only(ar_idm_data):

    
    train_data, val_data = split_data_for_ar_idm(ar_idm_data, train_ratio=0.7)
    

    trace, model = train_hierarchical_idm_model(train_data)

    return {
        'trace': trace,
        'model': model,
        'train_data': train_data,
        'val_data': val_data
    }

def run_hierarchical_idm_validation_only(calibration_results, n_posterior_samples=100):

    
    trace = calibration_results['trace']
    model = calibration_results['model']
    train_data = calibration_results['train_data']
    val_data = calibration_results['val_data']
    

    validation_results = validate_hierarchical_idm_model_comprehensive_improved(
        trace, model, train_data, val_data, n_samples=n_posterior_samples
    )
    


    print_hierarchical_idm_validation_summary(validation_results)
    
    
    return validation_results


In [None]:
calibration_results = run_hierarchical_idm_calibration_only(ar_idm_data)

In [None]:
validation_results = run_hierarchical_idm_validation_only(calibration_results, n_posterior_samples=100)