# Pr√©dictions Multi-√âtapes avec Calcul Vitesse/Direction

Ce notebook:
1. Charge les donn√©es et le snapshot
2. Charge le scaler et les 4 mod√®les LightGBM
3. Cr√©e les 14 features (avec acc√©l√©ration)
4. Fait des pr√©dictions √† t+1, t+2, t+3, t+4, t+5
5. Calcule vitesse et direction √† partir des positions GPS
6. Compare avec les valeurs r√©elles
7. Exporte les r√©sultats en CSV

In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import joblib
import warnings
warnings.filterwarnings('ignore')
from datetime import timedelta
import math

print("="*80)
print("PR√âDICTIONS MULTI-√âTAPES AVEC CALCUL GPS")
print("="*80)

PR√âDICTIONS MULTI-√âTAPES AVEC CALCUL GPS


## 1. Chargement et pr√©paration des donn√©es

In [2]:
print("\n[1/5] Chargement et pr√©paration des donn√©es...")

#all_taxis_with_candidates_at_fixed_distances.csv
df = pd.read_csv('synthetic_vehicles_recalculated.csv', sep=';')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(['vehicle_id', 'timestamp']).reset_index(drop=True)

print(f"‚úì Donn√©es charg√©es: {len(df):,} lignes")
print(f"  V√©hicules: {df['vehicle_id'].nunique()}")
print(f"  Taxis: {sorted(df['vehicle_id'].unique())}")

# Choisir un snapshot au milieu
timestamps_unique = sorted(df['timestamp'].unique())
mid_idx = len(timestamps_unique) // 2
snapshot_timestamp = timestamps_unique[mid_idx]

print(f"\n‚úì Snapshot choisi: {snapshot_timestamp}")
print(f"  Position: {mid_idx} / {len(timestamps_unique)-1} timestamps")

snapshot_df = df[df['timestamp'] == snapshot_timestamp].copy()
print(f"  V√©hicules au snapshot: {len(snapshot_df)}")


[1/5] Chargement et pr√©paration des donn√©es...
‚úì Donn√©es charg√©es: 26,905 lignes
  V√©hicules: 5
  Taxis: ['close_100m', 'close_200m', 'opposite_50', 'opposite_direction', 'witness']

‚úì Snapshot choisi: 2008-02-04 21:11:17
  Position: 2690 / 5380 timestamps
  V√©hicules au snapshot: 5


## 2. Chargement du scaler et des mod√®les

In [3]:
print("\n[2/5] Chargement du scaler et des mod√®les...")

scaler = joblib.load('scaler_ENRICHI.pkl')
print(f"‚úì Scaler charg√©: {scaler.n_features_in_} features")

models = {}
model_files = {
    'latitude': 'model_lgbm_latitude_ENRICHI.txt',
    'longitude': 'model_lgbm_longitude_ENRICHI.txt',
    'direction': 'model_lgbm_direction_ENRICHI.txt',
    'speed': 'model_lgbm_speed_ENRICHI.txt'
}

for target, filepath in model_files.items():
    try:
        models[target] = lgb.Booster(model_file=filepath)
        print(f"‚úì Mod√®le {target:12} charg√©")
    except FileNotFoundError:
        print(f"‚ùå {filepath} non trouv√©!")

print(f"\n‚úì {len(models)}/4 mod√®les charg√©s")


[2/5] Chargement du scaler et des mod√®les...
‚úì Scaler charg√©: 15 features
‚úì Mod√®le latitude     charg√©
‚úì Mod√®le longitude    charg√©
‚úì Mod√®le direction    charg√©
‚úì Mod√®le speed        charg√©

‚úì 4/4 mod√®les charg√©s


## 3. Pr√©paration des features

In [4]:
print("\n[3/5] Pr√©paration des features...")

# Ajouter les features temporelles
df['hour'] = df['timestamp'].dt.hour
df['dayofweek'] = df['timestamp'].dt.dayofweek
df['month'] = df['timestamp'].dt.month
df['minute'] = df['timestamp'].dt.minute
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)
df['time_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['time_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

# Calculer les deltas
df['delta_lat'] = df.groupby('vehicle_id')['latitude'].diff()
df['delta_lon'] = df.groupby('vehicle_id')['longitude'].diff()
df[['delta_lat', 'delta_lon']] = df[['delta_lat', 'delta_lon']].fillna(0)

# Feature taxi_id (encodage num√©rique)
taxi_id_mapping = {taxi_id: idx for idx, taxi_id in enumerate(sorted(df['vehicle_id'].unique()))}
df['taxi_id'] = df['vehicle_id'].map(taxi_id_mapping)

# Calculer l'acc√©l√©ration
df['time_diff_sec'] = df.groupby('vehicle_id')['timestamp'].diff().dt.total_seconds()
df['acceleration'] = df.groupby('vehicle_id')['vitesse'].diff() / df['time_diff_sec']
df[['acceleration', 'time_diff_sec']] = df[['acceleration', 'time_diff_sec']].fillna(0)
df.loc[df['acceleration'] > 10, 'acceleration'] = 0
df.loc[df['acceleration'] < -10, 'acceleration'] = 0

# Features finales
features = [
    'latitude', 'longitude', 'vitesse', 'direction', 'acceleration',
    'delta_lat', 'delta_lon',
    'hour', 'dayofweek', 'month', 'minute', 'is_weekend', 'time_sin', 'time_cos', 'taxi_id'
]

print(f"‚úì Features cr√©√©es: {len(features)}")
print(f"  {features}")
print(f"‚úì Match scaler ENRICHI: {len(features)} == {scaler.n_features_in_}")


[3/5] Pr√©paration des features...
‚úì Features cr√©√©es: 15
  ['latitude', 'longitude', 'vitesse', 'direction', 'acceleration', 'delta_lat', 'delta_lon', 'hour', 'dayofweek', 'month', 'minute', 'is_weekend', 'time_sin', 'time_cos', 'taxi_id']
‚úì Match scaler ENRICHI: 15 == 15


## 4. Pr√©dictions Multi-√âtapes

In [5]:
print("\n[4/5] Pr√©dictions multi-√©tapes...\n")

TIME_STEP_SECONDS = 1  # Intervalle de temps entre les timestamps
results = []

for vehicle_id in sorted(df['vehicle_id'].unique()):
    vehicle_df = df[df['vehicle_id'] == vehicle_id].copy().reset_index(drop=True)
    
    if len(vehicle_df) == 0:
        continue
    
    # R√©cup√©rer la position initiale au snapshot
    snapshot_row = vehicle_df[vehicle_df['timestamp'] == snapshot_timestamp]
    
    if len(snapshot_row) == 0:
        print(f"  ‚ö†Ô∏è  {vehicle_id} pas au snapshot")
        continue
    
    snapshot_idx = snapshot_row.index[0]
    print(f"\n‚ñ∂Ô∏è  {vehicle_id} (position {snapshot_idx} dans la s√©rie)")
    
    # √âtat courant (t) au snapshot
    current_state = {
        'latitude': snapshot_row['latitude'].iloc[0],
        'longitude': snapshot_row['longitude'].iloc[0],
        'vitesse': snapshot_row['vitesse'].iloc[0],
        'direction': snapshot_row['direction'].iloc[0],
        'acceleration': snapshot_row['acceleration'].iloc[0],
        'delta_lat': snapshot_row['delta_lat'].iloc[0],
        'delta_lon': snapshot_row['delta_lon'].iloc[0],
        'hour': snapshot_row['hour'].iloc[0],
        'dayofweek': snapshot_row['dayofweek'].iloc[0],
        'month': snapshot_row['month'].iloc[0],
        'minute': snapshot_row['minute'].iloc[0],
        'is_weekend': snapshot_row['is_weekend'].iloc[0],
        'time_sin': snapshot_row['time_sin'].iloc[0],
        'time_cos': snapshot_row['time_cos'].iloc[0],
        'taxi_id': snapshot_row['taxi_id'].iloc[0]
    }
    
    # ===== PR√âDICTIONS POUR t+1 √† t+5 =====
    for step in range(1, 6):
        next_timestamp = snapshot_timestamp + timedelta(seconds=step * TIME_STEP_SECONDS)
        
        # V√©rifier s'il existe une vraie valeur pour ce timestamp
        true_future = df[(df['vehicle_id'] == vehicle_id) & (df['timestamp'] == next_timestamp)]
        
        # ===== PR√âDICTIONS (toutes avec scaler ENRICHI) =====
        # Features pour tous les mod√®les (scaler ENRICHI)
        X = np.array([[
            current_state['latitude'],
            current_state['longitude'],
            current_state['vitesse'],
            current_state['direction'],
            current_state['acceleration'],
            current_state['delta_lat'],
            current_state['delta_lon'],
            current_state['hour'],
            current_state['dayofweek'],
            current_state['month'],
            current_state['minute'],
            current_state['is_weekend'],
            current_state['time_sin'],
            current_state['time_cos'],
            current_state['taxi_id']
        ]])
        
        X_scaled = scaler.transform(X)
        
        # Pr√©dictions
        pred_lat = models['latitude'].predict(X_scaled)[0]
        pred_lon = models['longitude'].predict(X_scaled)[0]
        pred_speed = models['speed'].predict(X_scaled)[0]
        pred_direction = models['direction'].predict(X_scaled)[0]
        
        # Pr√©parer le r√©sultat
        result_dict = {
            'vehicle_id': vehicle_id,
            'step': step,
            'timestamp': next_timestamp,
            'time_diff_from_snapshot': step * TIME_STEP_SECONDS,
            
            # Position pr√©c√©dente (pour GPS)
            'prev_lat': current_state['latitude'],
            'prev_lon': current_state['longitude'],
            
            # Pr√©dictions GPS
            'pred_lat': pred_lat,
            'pred_lon': pred_lon,
            
            # Vitesse et direction pr√©dites
            'pred_speed': pred_speed,
            'pred_direction': pred_direction,
        }
        
        # Ajouter les valeurs r√©elles si elles existent
        if len(true_future) > 0:
            true_row = true_future.iloc[0]
            result_dict['actual_lat'] = true_row['latitude']
            result_dict['actual_lon'] = true_row['longitude']
            result_dict['actual_speed'] = true_row['vitesse']
            result_dict['actual_direction'] = true_row['direction']
            
            # Calculer les erreurs
            result_dict['error_lat'] = abs(pred_lat - true_row['latitude'])
            result_dict['error_lon'] = abs(pred_lon - true_row['longitude'])
            result_dict['error_speed'] = abs(pred_speed - true_row['vitesse'])
            
            # Erreur direction (circulaire)
            error_dir = abs(pred_direction - true_row['direction'])
            if error_dir > 180:
                error_dir = 360 - error_dir
            result_dict['error_direction'] = error_dir
        else:
            # Pas de vraie valeur
            result_dict['actual_lat'] = None
            result_dict['actual_lon'] = None
            result_dict['actual_speed'] = None
            result_dict['actual_direction'] = None
            result_dict['error_lat'] = None
            result_dict['error_lon'] = None
            result_dict['error_speed'] = None
            result_dict['error_direction'] = None
        
        results.append(result_dict)
        
        # ===== MISE √Ä JOUR DE L'√âTAT COURANT =====
        # Mettre √† jour pour la pr√©diction suivante
        current_state['latitude'] = pred_lat
        current_state['longitude'] = pred_lon
        current_state['vitesse'] = pred_speed
        current_state['direction'] = pred_direction
        
        # Recalculer delta_lat et delta_lon
        current_state['delta_lat'] = pred_lat - snapshot_row['latitude'].iloc[0]
        current_state['delta_lon'] = pred_lon - snapshot_row['longitude'].iloc[0]
        
        # Recalculer acceleration (optionnel)
        current_state['acceleration'] = 0

results_df = pd.DataFrame(results)
print(f"\n‚úì {len(results_df)} pr√©dictions g√©n√©r√©es")


[4/5] Pr√©dictions multi-√©tapes...


‚ñ∂Ô∏è  close_100m (position 2690 dans la s√©rie)

‚ñ∂Ô∏è  close_200m (position 2690 dans la s√©rie)

‚ñ∂Ô∏è  opposite_50 (position 2690 dans la s√©rie)

‚ñ∂Ô∏è  opposite_direction (position 2690 dans la s√©rie)

‚ñ∂Ô∏è  witness (position 2690 dans la s√©rie)

‚úì 25 pr√©dictions g√©n√©r√©es


## 5. R√©sultats et Exportation

In [6]:
print("\n[5/5] R√©sultats et exportation...\n")
print("="*80)
print(f"SNAPSHOT: {snapshot_timestamp}")
print("="*80)

# Afficher un aper√ßu des r√©sultats
display_cols = [
    'vehicle_id', 'step', 'timestamp',
    'prev_lat', 'prev_lon',
    'pred_lat', 'pred_lon',
    'actual_lat', 'actual_lon', 'error_lat', 'error_lon',
    'pred_speed', 'actual_speed', 'error_speed',
    'pred_direction', 'actual_direction', 'error_direction'
]

print("\n‚ñ∂Ô∏è  APER√áU DES R√âSULTATS (premiers 10):")
print("-"*80)
print(results_df[display_cols].head(10).to_string())

# Statistiques par step
print("\n" + "="*80)
print("STATISTIQUES PAR STEP")
print("="*80)

for step in range(1, 6):
    step_data = results_df[results_df['step'] == step]
    print(f"\nüìä STEP {step} (T+{step*TIME_STEP_SECONDS}s):")
    
    if 'error_lat' in step_data.columns:
        print(f"  LATITUDE:")
        print(f"    Erreur moyenne: {step_data['error_lat'].mean():.6f}¬∞")
        print(f"    Erreur max: {step_data['error_lat'].max():.6f}¬∞")
        
        print(f"  LONGITUDE:")
        print(f"    Erreur moyenne: {step_data['error_lon'].mean():.6f}¬∞")
        print(f"    Erreur max: {step_data['error_lon'].max():.6f}¬∞")
        
        print(f"  VITESSE:")
        print(f"    Erreur moyenne: {step_data['error_speed'].mean():.4f} km/h")
        print(f"    Erreur max: {step_data['error_speed'].max():.4f} km/h")
        
        print(f"  DIRECTION:")
        print(f"    Erreur moyenne: {step_data['error_direction'].mean():.4f}¬∞")
        print(f"    Erreur max: {step_data['error_direction'].max():.4f}¬∞")

# Sauvegarder les r√©sultats
output_file = 'predictions_multi_step_results.csv'
results_df.to_csv(output_file, index=False)
print(f"\n‚úì R√©sultats sauvegard√©s: {output_file}")

print("\n" + "="*80)
print("‚ú® SCRIPT TERMIN√â AVEC SUCC√àS ‚ú®")
print("="*80)
print(f"\nüìà R√©sum√©:")
print(f"  Total de pr√©dictions: {len(results_df)}")
print(f"  V√©hicules: {results_df['vehicle_id'].nunique()}")
print(f"  Steps: 1 √† 5")
print(f"  Fichier de sortie: {output_file}")


[5/5] R√©sultats et exportation...

SNAPSHOT: 2008-02-04 21:11:17

‚ñ∂Ô∏è  APER√áU DES R√âSULTATS (premiers 10):
--------------------------------------------------------------------------------
   vehicle_id  step           timestamp   prev_lat    prev_lon   pred_lat    pred_lon  actual_lat  actual_lon  error_lat  error_lon  pred_speed  actual_speed  error_speed  pred_direction  actual_direction  error_direction
0  close_100m     1 2008-02-04 21:11:18  39.921149  116.393932  39.920955  116.393504   39.921146  116.394038   0.000191   0.000533   34.449783       32.5753     1.874483       92.067082             91.82         0.247082
1  close_100m     2 2008-02-04 21:11:19  39.920955  116.393504  39.920955  116.393164   39.921144  116.394148   0.000189   0.000984   35.815044       33.8955     1.919544       93.376008             91.20         2.176008
2  close_100m     3 2008-02-04 21:11:20  39.920955  116.393164  39.920955  116.392394   39.921144  116.394262   0.000188   0.001868   35.81

In [11]:
"""
Scripts utiles pour analyser les r√©sultats des pr√©dictions multi-√©tapes
Fichier: analyze_predictions.py
"""

import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# ============================================================================
# 1. CHARGER LES DONN√âES
# ============================================================================

def load_predictions(filepath='predictions_multi_step_results.csv'):
    """Charge le CSV des pr√©dictions"""
    df = pd.read_csv(filepath)
    return df


# ============================================================================
# 2. CALCULER LES M√âTRIQUES
# ============================================================================

def calculate_metrics(y_actual, y_pred):
    """Calcule R¬≤, RMSE, MAE"""
    # Enlever les NaN
    mask = ~(np.isnan(y_actual) | np.isnan(y_pred))
    y_actual = y_actual[mask]
    y_pred = y_pred[mask]
    
    if len(y_actual) == 0:
        return {'R2': np.nan, 'RMSE': np.nan, 'MAE': np.nan}
    
    r2 = r2_score(y_actual, y_pred)
    rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
    mae = mean_absolute_error(y_actual, y_pred)
    
    return {'R2': r2, 'RMSE': rmse, 'MAE': mae}


# ============================================================================
# 3. R√âSUM√â GLOBAL
# ============================================================================

def print_summary_stats(df):
    """Affiche un r√©sum√© des statistiques"""
    print("="*80)
    print("R√âSUM√â DES PR√âDICTIONS")
    print("="*80)
    
    print(f"\nüìä Nombre de pr√©dictions: {len(df)}")
    print(f"üöï Nombre de v√©hicules: {df['vehicle_id'].nunique()}")
    print(f"üìà Nombre de steps: {df['step'].nunique()}")
    print(f"‚è∞ Plage de temps: {df['timestamp'].min()} √† {df['timestamp'].max()}")
    
    # Avec et sans vraies valeurs
    with_actual = df.dropna(subset=['actual_lat']).shape[0]
    print(f"‚úì Lignes avec vraies valeurs: {with_actual}/{len(df)}")
    
    return df


# ============================================================================
# 4. M√âTRIQUES PAR STEP
# ============================================================================

def metrics_by_step(df):
    """Calcule R¬≤, RMSE, MAE par step"""
    print("\n" + "="*80)
    print("M√âTRIQUES PAR STEP")
    print("="*80)
    
    df_with_actual = df.dropna(subset=['actual_lat'])
    
    for step in sorted(df['step'].unique()):
        step_data = df_with_actual[df_with_actual['step'] == step]
        
        print(f"\nüìç STEP {step}:")
        
        # Latitude
        metrics_lat = calculate_metrics(
            step_data['actual_lat'].values,
            step_data['pred_lat'].values
        )
        print(f"  LATITUDE:")
        print(f"    R¬≤ = {metrics_lat['R2']:.4f}")
        print(f"    RMSE = {metrics_lat['RMSE']:.6f}¬∞")
        print(f"    MAE = {metrics_lat['MAE']:.6f}¬∞")
        
        # Longitude
        metrics_lon = calculate_metrics(
            step_data['actual_lon'].values,
            step_data['pred_lon'].values
        )
        print(f"  LONGITUDE:")
        print(f"    R¬≤ = {metrics_lon['R2']:.4f}")
        print(f"    RMSE = {metrics_lon['RMSE']:.6f}¬∞")
        print(f"    MAE = {metrics_lon['MAE']:.6f}¬∞")
        
        # Vitesse
        metrics_speed = calculate_metrics(
            step_data['actual_speed'].values,
            step_data['pred_speed'].values
        )
        print(f"  VITESSE:")
        print(f"    R¬≤ = {metrics_speed['R2']:.4f}")
        print(f"    RMSE = {metrics_speed['RMSE']:.4f} km/h")
        print(f"    MAE = {metrics_speed['MAE']:.4f} km/h")
        
        # Direction
        metrics_dir = calculate_metrics(
            step_data['actual_direction'].values,
            step_data['pred_direction'].values
        )
        print(f"  DIRECTION:")
        print(f"    R¬≤ = {metrics_dir['R2']:.4f}")
        print(f"    RMSE = {metrics_dir['RMSE']:.4f}¬∞")
        print(f"    MAE = {metrics_dir['MAE']:.4f}¬∞")


# ============================================================================
# 5. M√âTRIQUES GLOBALES (tous les steps)
# ============================================================================

def metrics_global(df):
    """Calcule R¬≤, RMSE, MAE sur toutes les pr√©dictions"""
    print("\n" + "="*80)
    print("M√âTRIQUES GLOBALES (TOUS LES STEPS)")
    print("="*80)
    
    df_with_actual = df.dropna(subset=['actual_lat'])
    
    # Latitude
    metrics_lat = calculate_metrics(
        df_with_actual['actual_lat'].values,
        df_with_actual['pred_lat'].values
    )
    print(f"\nLATITUDE:")
    print(f"  R¬≤ = {metrics_lat['R2']:.4f}")
    print(f"  RMSE = {metrics_lat['RMSE']:.6f}¬∞")
    print(f"  MAE = {metrics_lat['MAE']:.6f}¬∞")
    
    # Longitude
    metrics_lon = calculate_metrics(
        df_with_actual['actual_lon'].values,
        df_with_actual['pred_lon'].values
    )
    print(f"\nLONGITUDE:")
    print(f"  R¬≤ = {metrics_lon['R2']:.4f}")
    print(f"  RMSE = {metrics_lon['RMSE']:.6f}¬∞")
    print(f"  MAE = {metrics_lon['MAE']:.6f}¬∞")
    
    # Vitesse
    metrics_speed = calculate_metrics(
        df_with_actual['actual_speed'].values,
        df_with_actual['pred_speed'].values
    )
    print(f"\nVITESSE:")
    print(f"  R¬≤ = {metrics_speed['R2']:.4f}")
    print(f"  RMSE = {metrics_speed['RMSE']:.4f} km/h")
    print(f"  MAE = {metrics_speed['MAE']:.4f} km/h")
    
    # Direction
    metrics_dir = calculate_metrics(
        df_with_actual['actual_direction'].values,
        df_with_actual['pred_direction'].values
    )
    print(f"\nDIRECTION:")
    print(f"  R¬≤ = {metrics_dir['R2']:.4f}")
    print(f"  RMSE = {metrics_dir['RMSE']:.4f}¬∞")
    print(f"  MAE = {metrics_dir['MAE']:.4f}¬∞")

print("\n" + "="*80)
print("ANALYSE DES PR√âDICTIONS MULTI-√âTAPES")
print("="*80)

# ============================================================================
# 1. CHARGER LES DONN√âES
# ============================================================================
print("\n[1/4] Chargement des donn√©es...")
df = load_predictions('predictions_multi_step_results.csv')

# ============================================================================
# 2. R√âSUM√â GLOBAL
# ============================================================================
print("\n[2/4] Statistiques g√©n√©rales...")
print_summary_stats(df)

# ============================================================================
# 3. M√âTRIQUES GLOBALES
# ============================================================================
print("\n[3/4] M√©triques globales...")
metrics_global(df)

# ============================================================================
# 4. M√âTRIQUES PAR STEP
# ============================================================================
print("\n[4/4] M√©triques par step...")
metrics_by_step(df)

print("\n" + "="*80)
print("‚úÖ ANALYSE TERMIN√âE")
print("="*80 + "\n")


ANALYSE DES PR√âDICTIONS MULTI-√âTAPES

[1/4] Chargement des donn√©es...

[2/4] Statistiques g√©n√©rales...
R√âSUM√â DES PR√âDICTIONS

üìä Nombre de pr√©dictions: 25
üöï Nombre de v√©hicules: 5
üìà Nombre de steps: 5
‚è∞ Plage de temps: 2008-02-04 21:11:18 √† 2008-02-04 21:11:22
‚úì Lignes avec vraies valeurs: 25/25

[3/4] M√©triques globales...

M√âTRIQUES GLOBALES (TOUS LES STEPS)

LATITUDE:
  R¬≤ = 0.9224
  RMSE = 0.000221¬∞
  MAE = 0.000214¬∞

LONGITUDE:
  R¬≤ = -19.4299
  RMSE = 0.001856¬∞
  MAE = 0.001163¬∞

VITESSE:
  R¬≤ = -0.4563
  RMSE = 8.6845 km/h
  MAE = 6.2669 km/h

DIRECTION:
  R¬≤ = 0.9969
  RMSE = 4.7496¬∞
  MAE = 3.6855¬∞

[4/4] M√©triques par step...

M√âTRIQUES PAR STEP

üìç STEP 1:
  LATITUDE:
    R¬≤ = 0.9217
    RMSE = 0.000221¬∞
    MAE = 0.000214¬∞
  LONGITUDE:
    R¬≤ = -3.7209
    RMSE = 0.000454¬∞
    MAE = 0.000390¬∞
  VITESSE:
    R¬≤ = -1.2796
    RMSE = 4.9017 km/h
    MAE = 4.0046 km/h
  DIRECTION:
    R¬≤ = 0.9999
    RMSE = 0.8784¬∞
    MAE = 0.6

In [8]:
df = pd.read_csv('predictions_multi_step_results.csv')
print("\n[2/4] Application des crit√®res...\n")

def haversine_distance_meters(lat1, lon1, lat2, lon2):
    """Calcule la distance en m√®tres entre deux points GPS"""
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371000  # Rayon de la Terre en m√®tres
    return c * r

# Traiter tous les steps
for step in sorted(df['step'].unique()):
    df_step = df[df['step'] == step]
    
    # Obtenir les donn√©es du witness
    witness_data = df_step[df_step['vehicle_id'] == 'witness'].iloc[0]
    witness_pred_lat = witness_data['pred_lat']
    witness_pred_lon = witness_data['pred_lon']
    witness_pred_speed = witness_data['pred_speed']
    witness_pred_direction = witness_data['pred_direction']
    
    print("\n" + "="*80)
    print(f"STEP {step} (T+{step}s)")
    print("="*80)
    
    print("\nWITNESS (Valeurs PR√âDITES):")
    print(f"  Latitude:  {witness_pred_lat:.6f}¬∞")
    print(f"  Longitude: {witness_pred_lon:.6f}¬∞")
    print(f"  Vitesse:   {witness_pred_speed:.2f} km/h")
    print(f"  Direction: {witness_pred_direction:.2f}¬∞")
    
    print(f"\nCRIT√àRES DE MATCHING:")
    print(f"  1. Distance < 300 m√®tres (rayon autour de witness)")
    print(f"  2. Vitesse: {witness_pred_speed - 20:.2f} √† {witness_pred_speed + 20:.2f} km/h (witness ¬± 20)")
    print(f"  3. Direction: diff√©rence < 25¬∞ avec witness")
    
    candidates = []
    
    # Traiter chaque candidat pour ce step
    for vehicle_id in df_step['vehicle_id'].unique():
        if vehicle_id == 'witness':  # Skip le witness lui-m√™me
            continue
        
        # Prendre la ligne du v√©hicule pour ce step
        vehicle_data = df_step[df_step['vehicle_id'] == vehicle_id].iloc[0]
        
        # Distance
        distance = haversine_distance_meters(
            witness_pred_lat, witness_pred_lon,
            vehicle_data['pred_lat'], vehicle_data['pred_lon']
        )
        
        # Crit√®res
        dist_ok = distance < 300
        speed_ok = (witness_pred_speed - 20) <= vehicle_data['pred_speed'] <= (witness_pred_speed + 20)
        
        # Direction: diff√©rence < 25¬∞ consid√©r√©e comme "m√™me direction"
        direction_diff = abs(vehicle_data['pred_direction'] - witness_pred_direction)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff
        direction_ok = direction_diff < 25
        
        candidates.append({
            'vehicle_id': vehicle_id,
            'distance_m': distance,
            'distance_ok': dist_ok,
            'speed': vehicle_data['pred_speed'],
            'speed_ok': speed_ok,
            'direction': vehicle_data['pred_direction'],
            'direction_diff_from_witness': direction_diff,
            'direction_ok': direction_ok,
            'match': dist_ok and speed_ok and direction_ok
        })
    
    candidates_df = pd.DataFrame(candidates).sort_values('distance_m')
    
    print("\nTous les v√©hicules (tri√©s par distance):")
    print("-"*80)
    print(candidates_df[['vehicle_id', 'distance_m', 'speed', 'direction', 'distance_ok', 'speed_ok', 'direction_ok', 'match']].to_string(index=False))
    
    # V√©hicules candidates
    matches = candidates_df[candidates_df['match']]
    
    if len(matches) > 0:
        print(f"\n‚úì {len(matches)} V√âHICULE(S) CANDIDATE(S) TROUV√â(S):")
        print("-"*80)
        for i, (idx, row) in enumerate(matches.iterrows(), 1):
            print(f"\n  {i}. {row['vehicle_id']}")
            print(f"      Distance:     {row['distance_m']:.1f}m (< 300m ‚úì)")
            print(f"      Vitesse:       {row['speed']:.2f} km/h ({witness_pred_speed-20:.2f} - {witness_pred_speed+20:.2f} ‚úì)")
            print(f"      Direction:     {row['direction']:.2f}¬∞ (diff {row['direction_diff_from_witness']:.2f}¬∞ de witness ‚úì)")
    else:
        print("\n‚ùå Aucun v√©hicule ne correspond aux crit√®res de matching")
        print("\nV√©hicules les plus proches:")
        for idx, row in candidates_df.head(3).iterrows():
            print(f"  {row['vehicle_id']:20}: {row['distance_m']:8.1f}m, {row['speed']:8.2f}km/h, {row['direction']:7.2f}¬∞")

print("\n" + "="*80)


[2/4] Application des crit√®res...


STEP 1 (T+1s)

WITNESS (Valeurs PR√âDITES):
  Latitude:  39.921857¬∞
  Longitude: 116.393504¬∞
  Vitesse:   35.11 km/h
  Direction: 87.35¬∞

CRIT√àRES DE MATCHING:
  1. Distance < 300 m√®tres (rayon autour de witness)
  2. Vitesse: 15.11 √† 55.11 km/h (witness ¬± 20)
  3. Direction: diff√©rence < 25¬∞ avec witness

Tous les v√©hicules (tri√©s par distance):
--------------------------------------------------------------------------------
        vehicle_id  distance_m     speed  direction  distance_ok  speed_ok  direction_ok  match
opposite_direction    0.000000 33.521057 268.195179         True      True         False  False
       opposite_50   89.886515 33.521057 264.999499         True      True         False  False
        close_100m  100.267444 34.449783  92.067082         True      True          True   True
        close_200m  214.250923 34.449783  97.372349         True      True          True   True

‚úì 2 V√âHICULE(S) CANDIDATE(S) TROUV√â(

In [9]:
"""
Visualisation de l'√©volution r√©el vs pr√©dit pour les pr√©dictions multi-√©tapes
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Charger les r√©sultats
df = pd.read_csv('predictions_multi_step_results.csv')

# Garder seulement les lignes avec vraies valeurs
df_with_actual = df.dropna(subset=['actual_lat'])

print(f"Donn√©es charg√©es: {len(df_with_actual)} pr√©dictions avec vraies valeurs")
print(f"V√©hicules: {df_with_actual['vehicle_id'].nunique()}")

# ============================================================================
# GRAPHIQUES PAR V√âHICULE
# ============================================================================

for vehicle_id in sorted(df_with_actual['vehicle_id'].unique()):
    vehicle_data = df_with_actual[df_with_actual['vehicle_id'] == vehicle_id].sort_values('step')
    
    if len(vehicle_data) == 0:
        continue
    
    # Cr√©er une figure avec 4 sous-graphiques
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle(f'√âvolution R√©el vs Pr√©dit - {vehicle_id}', fontsize=16, fontweight='bold')
    
    steps = vehicle_data['step'].values
    
    # ===== LATITUDE =====
    ax = axes[0, 0]
    ax.plot(steps, vehicle_data['actual_lat'], 'o-', label='R√©el', color='green', linewidth=2, markersize=8)
    ax.plot(steps, vehicle_data['pred_lat'], 's--', label='Pr√©dit', color='red', linewidth=2, markersize=8)
    ax.set_xlabel('Step', fontsize=11)
    ax.set_ylabel('Latitude (¬∞)', fontsize=11)
    ax.set_title('Latitude', fontsize=12, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # ===== LONGITUDE =====
    ax = axes[0, 1]
    ax.plot(steps, vehicle_data['actual_lon'], 'o-', label='R√©el', color='green', linewidth=2, markersize=8)
    ax.plot(steps, vehicle_data['pred_lon'], 's--', label='Pr√©dit', color='red', linewidth=2, markersize=8)
    ax.set_xlabel('Step', fontsize=11)
    ax.set_ylabel('Longitude (¬∞)', fontsize=11)
    ax.set_title('Longitude', fontsize=12, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # ===== VITESSE =====
    ax = axes[1, 0]
    ax.plot(steps, vehicle_data['actual_speed'], 'o-', label='R√©el', color='green', linewidth=2, markersize=8)
    ax.plot(steps, vehicle_data['pred_speed'], 's--', label='Pr√©dit', color='red', linewidth=2, markersize=8)
    ax.set_xlabel('Step', fontsize=11)
    ax.set_ylabel('Vitesse (km/h)', fontsize=11)
    ax.set_title('Vitesse', fontsize=12, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # ===== DIRECTION =====
    ax = axes[1, 1]
    ax.plot(steps, vehicle_data['actual_direction'], 'o-', label='R√©el', color='green', linewidth=2, markersize=8)
    ax.plot(steps, vehicle_data['pred_direction'], 's--', label='Pr√©dit', color='red', linewidth=2, markersize=8)
    ax.set_xlabel('Step', fontsize=11)
    ax.set_ylabel('Direction (¬∞)', fontsize=11)
    ax.set_title('Direction', fontsize=12, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'evolution_{vehicle_id}.png', dpi=150, bbox_inches='tight')
    print(f"‚úì Graphique sauvegard√©: evolution_{vehicle_id}.png")
    plt.close()


# ============================================================================
# GRAPHIQUES AGR√âG√âS (TOUS LES V√âHICULES)
# ============================================================================

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('√âvolution R√©el vs Pr√©dit - TOUS LES V√âHICULES', fontsize=16, fontweight='bold')

# Grouper par step
steps_unique = sorted(df_with_actual['step'].unique())

# ===== LATITUDE (moyenne par step) =====
ax = axes[0, 0]
lat_real_by_step = [df_with_actual[df_with_actual['step'] == s]['actual_lat'].mean() for s in steps_unique]
lat_pred_by_step = [df_with_actual[df_with_actual['step'] == s]['pred_lat'].mean() for s in steps_unique]
ax.plot(steps_unique, lat_real_by_step, 'o-', label='R√©el (moy)', color='green', linewidth=2, markersize=8)
ax.plot(steps_unique, lat_pred_by_step, 's--', label='Pr√©dit (moy)', color='red', linewidth=2, markersize=8)
ax.set_xlabel('Step', fontsize=11)
ax.set_ylabel('Latitude (¬∞)', fontsize=11)
ax.set_title('Latitude - Moyenne par Step', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

# ===== LONGITUDE (moyenne par step) =====
ax = axes[0, 1]
lon_real_by_step = [df_with_actual[df_with_actual['step'] == s]['actual_lon'].mean() for s in steps_unique]
lon_pred_by_step = [df_with_actual[df_with_actual['step'] == s]['pred_lon'].mean() for s in steps_unique]
ax.plot(steps_unique, lon_real_by_step, 'o-', label='R√©el (moy)', color='green', linewidth=2, markersize=8)
ax.plot(steps_unique, lon_pred_by_step, 's--', label='Pr√©dit (moy)', color='red', linewidth=2, markersize=8)
ax.set_xlabel('Step', fontsize=11)
ax.set_ylabel('Longitude (¬∞)', fontsize=11)
ax.set_title('Longitude - Moyenne par Step', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

# ===== VITESSE (moyenne par step) =====
ax = axes[1, 0]
speed_real_by_step = [df_with_actual[df_with_actual['step'] == s]['actual_speed'].mean() for s in steps_unique]
speed_pred_by_step = [df_with_actual[df_with_actual['step'] == s]['pred_speed'].mean() for s in steps_unique]
ax.plot(steps_unique, speed_real_by_step, 'o-', label='R√©el (moy)', color='green', linewidth=2, markersize=8)
ax.plot(steps_unique, speed_pred_by_step, 's--', label='Pr√©dit (moy)', color='red', linewidth=2, markersize=8)
ax.set_xlabel('Step', fontsize=11)
ax.set_ylabel('Vitesse (km/h)', fontsize=11)
ax.set_title('Vitesse - Moyenne par Step', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

# ===== DIRECTION (moyenne par step) =====
ax = axes[1, 1]
dir_real_by_step = [df_with_actual[df_with_actual['step'] == s]['actual_direction'].mean() for s in steps_unique]
dir_pred_by_step = [df_with_actual[df_with_actual['step'] == s]['pred_direction'].mean() for s in steps_unique]
ax.plot(steps_unique, dir_real_by_step, 'o-', label='R√©el (moy)', color='green', linewidth=2, markersize=8)
ax.plot(steps_unique, dir_pred_by_step, 's--', label='Pr√©dit (moy)', color='red', linewidth=2, markersize=8)
ax.set_xlabel('Step', fontsize=11)
ax.set_ylabel('Direction (¬∞)', fontsize=11)
ax.set_title('Direction - Moyenne par Step', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('evolution_tous_vehicules.png', dpi=150, bbox_inches='tight')
print(f"\n‚úì Graphique global sauvegard√©: evolution_tous_vehicules.png")
plt.close()

print("\n‚úÖ Tous les graphiques ont √©t√© g√©n√©r√©s!")

Donn√©es charg√©es: 25 pr√©dictions avec vraies valeurs
V√©hicules: 5
‚úì Graphique sauvegard√©: evolution_close_100m.png
‚úì Graphique sauvegard√©: evolution_close_200m.png
‚úì Graphique sauvegard√©: evolution_opposite_50.png
‚úì Graphique sauvegard√©: evolution_opposite_direction.png
‚úì Graphique sauvegard√©: evolution_witness.png

‚úì Graphique global sauvegard√©: evolution_tous_vehicules.png

‚úÖ Tous les graphiques ont √©t√© g√©n√©r√©s!
