In [1]:
# Before starting this notebook, first write in CLI (inside docker folder):
# docker compose down --remove-orphans
# docker compose up --build -d

# Make predictions using the package

## Predict via the tabular dataset solution

In [2]:

from pathlib import Path

raw_path      = Path("../../data/NDW/ndw_three_weeks.parquet")

import sys, joblib, numpy as np, pandas as pd
sys.path.insert(0, str(Path.cwd().parents[0]))  # repo root on sys.path

from traffic_flow.tabular.pipeline.data_pipeline_orchestrator import TrafficDataPipelineOrchestrator
from traffic_flow.post_processing.post_processing import xgb_to_lstm_like_df


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
HORIZON = 15  # minutes into the future to predict

In [4]:
# --- OFFLINE (training path) ---
tdp = TrafficDataPipelineOrchestrator(
    file_path=raw_path
)

tdp.prepare_base_features(test_size = 1/3)


# Horizon-specific (use same options as training)
X_train,X_test, y_train, y_test = tdp.finalise_for_horizon(horizon=HORIZON)

Running prepare_base_features!!!!!!!!!!!!!!!!
path: ../../data/NDW/ndw_three_weeks.parquet


2025-09-02 17:11:36,890 - INFO - Loaded 6168960 rows from '../../data/NDW/ndw_three_weeks.parquet'.
2025-09-02 17:11:40,687 - INFO - Aligned sensors to 2023-03-01 00:00:00-2023-03-21 23:59:00. Dropped 0 rows.
2025-09-02 17:11:40,817 - INFO - Proportional split (test_size=0.3333333333333333) at 2023-03-15 00:00:00
2025-09-02 17:11:44,540 - INFO - Using sensor encoding type: mean


[MeanSensorEncoder] Mean encoding learned for 204 sensors. Global mean=93.85.
[AdjacentSensorFeatureAdder] Adding adjacent sensor features.
[AdjacentSensorFeatureAdder] Added features: downstream_sensor_1, upstream_sensor_1
[PreviousWeekdayWindowFeatureEngineer] horizon=15′  window=[-0,+0]′ step=1′  aggs=-  mode=local
[WeatherFeatureDropper] Will drop ['incremental_id', 'Per_cent_frozen_precipitation_surface', 'Precipitable_water_entire_atmosphere_single_layer', 'Precipitation_rate_surface_3_Hour_Average', 'Storm_relative_helicity_height_above_ground_layer', 'Total_precipitation_surface_3_Hour_Accumulation', 'Categorical_Rain_surface_3_Hour_Average', 'Categorical_Freezing_Rain_surface_3_Hour_Average', 'Categorical_Ice_Pellets_surface_3_Hour_Average', 'Categorical_Snow_surface_3_Hour_Average', 'Convective_Precipitation_Rate_surface_3_Hour_Average', 'Convective_precipitation_surface_3_Hour_Accumulation', 'U-Component_Storm_Motion_height_above_ground_layer', 'V-Component_Storm_Motion_heig

In [5]:
run_model = False # set to True to run model tuning
if run_model:
    # --- MODEL TUNING ---
    from traffic_flow import ModelTunerXGB
    mt = ModelTunerXGB(X_train, X_test, y_train, y_test,XGBoost_model_name = 'xgb_test_with_lstm')
    best_model_path, best_params_, training_time, total_time  = mt.tune_xgboost(use_gpu=False)

else:
    best_model_path = './models/best_model_xgb_test_with_lstm.pkl'

In [6]:
from traffic_flow.tabular.evaluation.model_comparison import ModelEvaluator

me =ModelEvaluator(X_test=X_test,df_for_ML=tdp.df,y_train=y_train,y_test=y_test)
me.evaluate_model_from_path(best_model_path)


--- Evaluation Results ---

Naive Metrics:
{'Naive_MAE': 4.46, 'Naive_Median_AE': 1.95, 'Naive_RMSE': 9.6, 'Naive_MAPE': 7.45, 'Naive_SMAPE': 6.15}

Naive Metrics Standard Deviations:
{'Naive_MAE_std': 8.5, 'Naive_Median_AE_std': 8.5, 'Naive_RMSE_std': 476.31, 'Naive_MAPE_std': 36.63, 'Naive_SMAPE_std': 15.34}

Metrics:
{'MAE': 3.98, 'Median_AE': 1.79, 'RMSE': 8.34, 'MAPE': 7.7, 'SMAPE': 5.6, 'inference_time': 1.72, 'inference_time_per_sample': 0.0}

Metrics Standard Deviations:
{'MAE_std': 7.33, 'Median_AE_std': 7.33, 'RMSE_std': 366.02, 'MAPE_std': 39.46, 'SMAPE_std': 14.08}
--------------------------



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df_for_ML['y_pred'] = self.y_pred


{'metrics': {'MAE': 3.98,
  'Median_AE': 1.79,
  'RMSE': 8.34,
  'MAPE': 7.7,
  'SMAPE': 5.6,
  'inference_time': 1.72,
  'inference_time_per_sample': 0.0},
 'metrics_std': {'MAE_std': 7.33,
  'Median_AE_std': 7.33,
  'RMSE_std': 366.02,
  'MAPE_std': 39.46,
  'SMAPE_std': 14.08},
 'naive_metrics': {'Naive_MAE': 4.46,
  'Naive_Median_AE': 1.95,
  'Naive_RMSE': 9.6,
  'Naive_MAPE': 7.45,
  'Naive_SMAPE': 6.15},
 'naive_metrics_std': {'Naive_MAE_std': 8.5,
  'Naive_Median_AE_std': 8.5,
  'Naive_RMSE_std': 476.31,
  'Naive_MAPE_std': 36.63,
  'Naive_SMAPE_std': 15.34}}

In [7]:
df_pred_xgb = me.df_predictions
df_pred_xgb

Unnamed: 0,date,sensor_id,prediction_time,value,target_total_speed,y_pred
4112640,2023-03-15 00:00:00,RWS01_MONIBAS_0040vwe0633ra,2023-03-15 00:15:00,86.800003,91.080002,86.697869
4112641,2023-03-15 00:00:00,RWS01_MONIBAS_0040vwe0637ra,2023-03-15 00:15:00,97.199997,93.760002,92.257903
4112642,2023-03-15 00:00:00,RWS01_MONIBAS_0040vwe0755ra,2023-03-15 00:15:00,86.510002,92.078003,93.180369
4112643,2023-03-15 00:00:00,RWS01_MONIBAS_0040vwe0757ra,2023-03-15 00:15:00,86.000000,90.788002,87.295738
4112644,2023-03-15 00:00:00,RWS01_MONIBAS_0040vwe0758ra,2023-03-15 00:15:00,90.139999,95.225998,90.908434
...,...,...,...,...,...,...
6165895,2023-03-21 23:44:00,RWS01_MONIBAS_0201hrr0461ra,2023-03-21 23:59:00,111.851997,103.288002,106.872620
6165896,2023-03-21 23:44:00,RWS01_MONIBAS_0201hrr0465ra,2023-03-21 23:59:00,117.283997,107.071999,109.458273
6165897,2023-03-21 23:44:00,RWS01_MONIBAS_0201hrr0470ra,2023-03-21 23:59:00,110.400002,109.129997,107.224273
6165898,2023-03-21 23:44:00,RWS01_MONIBAS_0201hrr0475ra,2023-03-21 23:59:00,114.629997,101.758003,106.955503


In [8]:
df_pred_xgb = xgb_to_lstm_like_df(df_pred_xgb)
df_pred_xgb

Unnamed: 0,date,prediction_time,RWS01_MONIBAS_0040vwe0633ra,RWS01_MONIBAS_0040vwe0633ra_pred,RWS01_MONIBAS_0040vwe0633ra_at_issued_time,RWS01_MONIBAS_0040vwe0637ra,RWS01_MONIBAS_0040vwe0637ra_pred,RWS01_MONIBAS_0040vwe0637ra_at_issued_time,RWS01_MONIBAS_0040vwe0755ra,RWS01_MONIBAS_0040vwe0755ra_pred,...,RWS01_MONIBAS_0201hrr0465ra_at_issued_time,RWS01_MONIBAS_0201hrr0470ra,RWS01_MONIBAS_0201hrr0470ra_pred,RWS01_MONIBAS_0201hrr0470ra_at_issued_time,RWS01_MONIBAS_0201hrr0475ra,RWS01_MONIBAS_0201hrr0475ra_pred,RWS01_MONIBAS_0201hrr0475ra_at_issued_time,RWS01_MONIBAS_0201hrr0478ra,RWS01_MONIBAS_0201hrr0478ra_pred,RWS01_MONIBAS_0201hrr0478ra_at_issued_time
0,2023-03-15 00:00:00,2023-03-15 00:15:00,91.080002,86.697868,86.800003,93.760002,92.257904,97.199997,92.078003,93.180367,...,109.379997,101.283997,100.848640,101.699997,102.592003,99.727112,98.790001,102.751999,98.523567,96.000000
1,2023-03-15 00:01:00,2023-03-15 00:16:00,86.440002,87.196335,86.800003,91.639999,95.111588,100.300003,91.678001,94.686859,...,105.190002,99.683998,98.051170,94.349998,99.991997,97.827950,90.895004,101.552002,96.020531,92.750000
2,2023-03-15 00:02:00,2023-03-15 00:17:00,86.440002,87.127258,83.533333,90.120003,90.116386,93.266663,91.557999,94.129494,...,121.126663,98.484001,100.472748,103.900002,97.792000,99.941742,98.699997,97.951996,98.754105,98.500000
3,2023-03-15 00:03:00,2023-03-15 00:18:00,83.040001,87.562668,82.900002,87.720001,89.473083,89.650002,89.370003,93.162750,...,118.345001,96.599998,99.788589,101.675003,96.599998,99.346291,97.275002,96.951996,98.340485,98.779999
4,2023-03-15 00:04:00,2023-03-15 00:19:00,82.480003,88.818413,84.200005,88.400002,89.462639,90.000000,92.129997,92.641586,...,113.715996,93.599998,100.703278,103.540001,92.400002,100.609390,100.620003,93.239998,99.982628,100.624001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10060,2023-03-21 23:40:00,2023-03-21 23:55:00,79.279999,84.049973,84.160004,79.199997,85.439056,86.760002,100.742004,100.273766,...,113.914001,100.860001,107.005180,110.667999,97.199997,106.276566,108.391998,98.239998,106.319000,109.400002
10061,2023-03-21 23:41:00,2023-03-21 23:56:00,79.879997,81.917000,79.080002,77.040001,84.018753,84.919998,96.942001,98.839172,...,122.398003,104.054001,106.698936,111.267998,94.599998,106.209312,116.391998,98.639999,106.357025,112.000000
10062,2023-03-21 23:42:00,2023-03-21 23:57:00,81.839996,82.508652,74.919998,79.000000,83.529442,82.919998,96.832001,98.493599,...,120.797997,109.529999,105.492188,117.964005,100.543999,106.210243,116.022003,100.440002,106.668457,113.400002
10063,2023-03-21 23:43:00,2023-03-21 23:58:00,80.760002,83.051231,78.879997,81.080002,83.118675,82.800003,99.274002,98.532402,...,116.998001,107.529999,107.962875,112.793999,101.157997,107.139671,114.678001,102.400002,106.772865,110.487999


## Predict via the LSTM

In [9]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')  # hide Metal GPU -> CPU only
print("Devices:", tf.config.list_logical_devices())

2025-09-02 17:12:55,989 - DEBUG - Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
2025-09-02 17:12:56,301 - DEBUG - Creating converter from 7 to 5
2025-09-02 17:12:56,301 - DEBUG - Creating converter from 5 to 7
2025-09-02 17:12:56,302 - DEBUG - Creating converter from 7 to 5
2025-09-02 17:12:56,302 - DEBUG - Creating converter from 5 to 7


Devices: [LogicalDevice(name='/device:CPU:0', device_type='CPU')]


In [10]:
# ------------------ CONFIG: edit these ------------------
DATA_PATH      = "../../data/NDW/ndw_three_weeks.parquet"
SEQ_LEN        = 45          # lookback (minutes/rows)
HORIZON_MIN    = 15          # forecast horizon (minutes)
BATCH_SIZE     = 256
TARGET_MODE    = "delta"     # <<< delta

UNITS          = 64
LAYERS         = 1
DROPOUT        = 0.2
DENSE_UNITS    = 32          # dense head ON
EPOCHS         = 300         # allow early-stopping to find best epoch
PATIENCE       = 20
LEARNING_RATE  = 1.5e-3      # 0.0015

# LSTM / extras (match run-id)
bidirectional        = False # bi0
recurrent_dropout    = 0.0   # we used 0.0 in v2
conv_frontend        = True  # conv1
conv_filters         = 32    # cf32
conv_kernel          = 3     # ck3
layer_norm_in_lstm   = True
attention_pooling    = False # attn0
residual_head        = False # res0

# Demand-context (keep as in v2)
USE_DC         = True
DC_WINDOWS_MIN = (60, 1440)

In [11]:
from traffic_flow.deep.experiment import TrafficDeepExperiment, DataCfg, ModelCfg
import pandas as pd
import numpy as np

In [12]:


# ------------------ build configs ------------------
data_cfg = DataCfg(
    seq_len=SEQ_LEN,
    horizon_minutes=HORIZON_MIN,
    feature_mode="value_plus_time",
    batch_size=BATCH_SIZE,
    val_fraction_of_train=0.10,
    target_mode=TARGET_MODE,
    add_demand_context=USE_DC,
    dc_windows_minutes=tuple(int(x) for x in DC_WINDOWS_MIN),
    dc_add_deviation=True,
    dc_add_zscore=True,
    dc_add_ratio=False,
    dc_add_flags=True,
    add_short_term_dynamics = False,
    std_base_unit = "kph",
    std_short_windows= (5, 15, 30),  # minutes
    std_diff_windows= (1, 3, 5, 10),# minutes
    std_ema_fast = 5,
    std_ema_slow = 15,
    std_z_threshold = 1.5,
    # loader knobs (defaults are fine; keep as-is or adjust)
    smooth_series=True,
    filter_extreme_changes=True,
    filter_on_train_only=False,
    use_median_instead_of_mean=False,
    relative_threshold=0.7,
    test_size=1/3,
    test_start_time=None,
)

model_cfg = ModelCfg(
    units=UNITS,
    n_layers=LAYERS,
    dropout=DROPOUT,
    use_norm=True,
    add_dense=bool(DENSE_UNITS > 0),
    dense_units=max(DENSE_UNITS, 0),
    dense_activation="relu",
    epochs=EPOCHS,
    patience=PATIENCE,
    learning_rate=LEARNING_RATE,
    loss="huber",
    optimizer="adam",
    # --- extras ---
    bidirectional=bidirectional,
    recurrent_dropout=recurrent_dropout,
    conv_frontend=conv_frontend,
    conv_filters=conv_filters,
    conv_kernel=conv_kernel,
    layer_norm_in_lstm=layer_norm_in_lstm,
    attention_pooling=attention_pooling,
    residual_head=residual_head,
     conv_padding = "same"
)

In [13]:

# ------------------ run experiment ------------------
exp = TrafficDeepExperiment(
    data_path=DATA_PATH,
    artifacts_dir = None,
    datetime_col="date",
)

result = exp.run(
    data_cfg=data_cfg,
    model_cfg=model_cfg,
    run_name='best_model_lstm_test_with_xgb',
    results_csv=None,       # set a CSV path to append a summary row
    save_predictions=False,  # also writes predictions.parquet
    log_dataset_shapes=True
)

paths   = result["paths"]
summary = result["summary"]
df_pred_lstm = result["preds_df"]  # wide frame (prediction_time + each sensor & <sensor>_pred)



# Example usage:
# if isinstance(df_pred, pd.DataFrame):
#     plot_sensor(df_pred, sensor="RWS01_MONIBAS_0040vwe0633ra")

path: ../../data/NDW/ndw_three_weeks.parquet


2025-09-02 17:13:00,270 - INFO - Loaded 6168960 rows from '../../data/NDW/ndw_three_weeks.parquet'.
2025-09-02 17:13:04,030 - INFO - Aligned sensors to 2023-03-01 00:00:00-2023-03-21 23:59:00. Dropped 0 rows.
2025-09-02 17:13:04,156 - INFO - Proportional split (test_size=0.3333333333333333) at 2023-03-15 00:00:00
2025-09-02 17:13:08,282 - INFO - [split] attached frame: 30240 rows, sensors=204; first_test_ts=2023-03-15 00:00:00
2025-09-02 17:13:08,282 - INFO - [split] target-based splits: {'train': (0, 18143), 'val': (18144, 20159), 'test': (20160, 30239)}
2025-09-02 17:13:09,874 - INFO - [windowing] features/step: total=222 (sensors=204, time=6, pred_time=6, custom=6)


[dataset] -------------------------------
 horizon: 15m | seq_len: 45
 features_per_step: 222 | n_sensors: 204
 windows -> train: 71 | val: 8 | test: 40
 batch shapes -> X: (256, 45, 222) | y: (256, 204)
-----------------------------------------




Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.000750000006519258.
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.000375000003259629.
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 21: ReduceLROnPlateau reducing learning rate to 0.0001875000016298145.
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 26: ReduceLROnPlateau reducing learning rate to 9.375000081490725e-05.


In [14]:
summary

{'run_id': 'best_model_lstm_test_with_xgb',
 'horizon_minutes': 15,
 'target_mode': 'delta',
 'seq_len': 45,
 'features_per_step': 222,
 'n_sensors': 204,
 'n_train_windows': 18085,
 'n_val_windows': 2016,
 'n_test_windows': 10080,
 'MAE': 4.479416666666666,
 'MedianAE': 2.1158382352941176,
 'RMSE': 8.891901960784315,
 'MAPE': 7.594897058823529,
 'SMAPE': 6.126009803921568,
 'naive_MAE': 4.457151960784313,
 'naive_MedianAE': 2.0538529411764705,
 'naive_RMSE': 9.021426470588235,
 'naive_MAPE': 7.447529411764705,
 'naive_SMAPE': 6.149779411764705}

In [15]:
df_pred_lstm

Unnamed: 0,date,prediction_time,RWS01_MONIBAS_0040vwe0633ra,RWS01_MONIBAS_0040vwe0633ra_pred,RWS01_MONIBAS_0040vwe0633ra_at_issued_time,RWS01_MONIBAS_0040vwe0637ra,RWS01_MONIBAS_0040vwe0637ra_pred,RWS01_MONIBAS_0040vwe0637ra_at_issued_time,RWS01_MONIBAS_0040vwe0755ra,RWS01_MONIBAS_0040vwe0755ra_pred,...,RWS01_MONIBAS_0201hrr0465ra_at_issued_time,RWS01_MONIBAS_0201hrr0470ra,RWS01_MONIBAS_0201hrr0470ra_pred,RWS01_MONIBAS_0201hrr0470ra_at_issued_time,RWS01_MONIBAS_0201hrr0475ra,RWS01_MONIBAS_0201hrr0475ra_pred,RWS01_MONIBAS_0201hrr0475ra_at_issued_time,RWS01_MONIBAS_0201hrr0478ra,RWS01_MONIBAS_0201hrr0478ra_pred,RWS01_MONIBAS_0201hrr0478ra_at_issued_time
0,2023-03-14 23:45:00,2023-03-15 00:00:00,86.800003,82.820091,85.520004,97.199997,89.233643,90.680000,86.510002,88.393463,...,94.599998,101.699997,91.411034,91.199997,98.790001,91.663948,91.561996,96.000000,91.483849,91.400002
1,2023-03-14 23:46:00,2023-03-15 00:01:00,86.800003,85.044388,87.160004,100.300003,89.803131,90.959999,90.675003,88.130066,...,96.000000,94.349998,89.928970,89.800003,90.895004,91.820274,91.764000,92.750000,92.095215,92.000000
2,2023-03-14 23:47:00,2023-03-15 00:02:00,83.533333,84.850449,87.760002,93.266663,87.678711,89.160004,93.116669,89.953613,...,96.599998,103.900002,97.108315,96.800003,98.699997,96.157547,96.000000,98.500000,93.620644,93.400002
3,2023-03-14 23:48:00,2023-03-15 00:03:00,82.900002,86.798195,90.680000,89.650002,86.265472,88.120003,91.714996,89.892914,...,95.800003,101.675003,101.151588,100.599998,97.275002,96.295250,96.000000,98.779999,92.223396,91.800003
4,2023-03-14 23:49:00,2023-03-15 00:04:00,84.200005,86.661858,91.000000,90.000000,85.285248,87.360001,92.772003,86.927162,...,98.902000,103.540001,103.221466,102.599998,100.620003,95.734261,95.400002,100.624001,92.226517,91.800003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10075,2023-03-21 23:40:00,2023-03-21 23:55:00,79.279999,86.017822,84.160004,79.199997,87.784836,86.760002,100.742004,100.009819,...,113.914001,100.860001,110.673859,110.667999,97.199997,108.402412,108.391998,98.239998,109.291229,109.400002
10076,2023-03-21 23:41:00,2023-03-21 23:56:00,79.879997,81.854309,79.080002,77.040001,86.457542,84.919998,96.942001,97.973274,...,122.398003,104.054001,111.204308,111.267998,94.599998,116.368149,116.391998,98.639999,111.873764,112.000000
10077,2023-03-21 23:42:00,2023-03-21 23:57:00,81.839996,77.807220,74.919998,79.000000,84.578102,82.919998,96.832001,99.236954,...,120.797997,109.529999,118.050751,117.964005,100.543999,116.087830,116.022003,100.440002,113.261246,113.400002
10078,2023-03-21 23:43:00,2023-03-21 23:58:00,80.760002,80.656143,78.879997,81.080002,83.789383,82.800003,99.274002,98.564156,...,116.998001,107.529999,112.834412,112.793999,101.157997,114.708214,114.678001,102.400002,110.378929,110.487999


In [17]:
def plot_sensor(df_wide: pd.DataFrame, sensor: str, start=None, end=None):
    import plotly.graph_objects as go
    d = df_wide
    if start is not None: d = d[d["prediction_time"] >= pd.to_datetime(start)]
    if end   is not None: d = d[d["prediction_time"] <= pd.to_datetime(end)]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=d["prediction_time"], y=d[sensor], mode="lines", name="actual"))
    fig.add_trace(go.Scatter(x=d["prediction_time"], y=d[f"{sensor}_pred"], mode="lines", name="predicted"))
    fig.update_layout(
        title=f"{sensor} — horizon {HORIZON_MIN}m",
        xaxis_title="prediction_time",
        yaxis_title="value",
        legend=dict(x=0.01, y=0.99, bgcolor="rgba(255,255,255,0.6)", bordercolor="rgba(0,0,0,0.15)"),
        margin=dict(l=40,r=20,t=50,b=40),
    )
    fig.show()

In [1]:
s = 'RWS01_MONIBAS_0160vwx0185ra'
plot_sensor(df_pred_xgb,s)

NameError: name 'plot_sensor' is not defined

In [19]:
plot_sensor(df_pred_lstm,s)