In [2]:
import pandas as pd

test2024 = pd.read_csv("../validation/testing2024manual.csv")
prediction_mapping = pd.read_csv("../data/prediction_mapping.csv")

merged = test2024.merge(prediction_mapping, on="ID")

filtered = merged[merged.groupby("rm_id")["predicted_weight"].transform("sum") > 0]

agg_df = filtered.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max",
}).sort_values("predicted_weight", ascending=False)

In [3]:
used_rm_ids = set(agg_df["rm_id"])

receivals = pd.read_csv("../data_cleaned/orders_with_receivals_detailed.csv")
receivals_filtered = receivals[receivals["rm_id"].isin(used_rm_ids)]

In [4]:
# Select columns: rm_id, date_arrival, net_weight
selected = receivals_filtered[["rm_id", "date_arrival", "net_weight"]]

print(selected)
print(len(used_rm_ids))

         rm_id               date_arrival  net_weight
34057   2130.0  2012-03-14 10:58:00+02:00      8920.0
34058   2130.0  2012-03-14 10:58:00+02:00      4120.0
34059   2130.0  2012-03-20 17:34:00+02:00      2236.0
34060   2130.0  2012-03-20 17:34:00+02:00      4188.0
34065   2142.0  2012-03-14 10:58:00+02:00       680.0
...        ...                        ...         ...
133275  2142.0  2024-12-12 12:41:00+02:00      3920.0
133276  2143.0  2024-12-12 12:41:00+02:00       260.0
133288  3381.0  2024-12-18 12:18:00+02:00      2806.0
133292  3901.0  2024-12-17 16:05:00+02:00     12540.0
133293  3901.0  2024-12-19 11:40:00+02:00     14040.0

[60444 rows x 3 columns]
46


In [5]:
# Temporal Fusion Transformer (TFT) forecasting for receival prediction
# Install dependencies if needed
# !pip install pytorch-lightning pytorch-forecasting torch --quiet

import pandas as pd
import numpy as np
import torch
from pytorch_lightning import Trainer
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, Baseline, GroupNormalizer, QuantileLoss
from torch.utils.data import DataLoader  # Correct import for DataLoader

# Prepare data
receivals_filtered["date_arrival"] = pd.to_datetime(receivals_filtered["date_arrival"])
# Convert rm_id to string for categorical encoding
receivals_filtered["rm_id"] = receivals_filtered["rm_id"].astype(str)
receivals_filtered["time_idx"] = (receivals_filtered["date_arrival"] - receivals_filtered["date_arrival"].min()).dt.days

# Only use data after 2017 for training
df_hist = receivals_filtered[receivals_filtered["date_arrival"].dt.year > 2017].copy()

# Define max prediction length (e.g., 12 months ahead)
max_prediction_length = 365  # days
max_encoder_length = 365 * 3  # use last 3 years for context

# Prepare TimeSeriesDataSet
training = TimeSeriesDataSet(
    df_hist,
    time_idx="time_idx",
    target="net_weight",
    group_ids=["rm_id"],
    min_encoder_length=max_encoder_length // 2,  # allow shorter history
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["rm_id"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["net_weight"],
    target_normalizer=GroupNormalizer(groups=["rm_id"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True, # <-- Fix for irregular time steps
)

# Create dataloaders
train_dataloader = DataLoader(training, batch_size=64, shuffle=True)

# Initialize TFT model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # Quantile regression
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Train model (quick demo, adjust epochs for real use)
# Use 'accelerator' and 'devices' instead of deprecated 'gpus' argument
trainer = Trainer(max_epochs=10, accelerator="cpu", devices=1)  # set accelerator="gpu", devices=1 for GPU
trainer.fit(tft, train_dataloaders=train_dataloader)

# Predict future receivals for each rm_id
# Prepare prediction data for 2025
last_time_idx = df_hist["time_idx"].max()
future_df = []
for rm_id in df_hist["rm_id"].unique():
    for i in range(1, max_prediction_length + 1):
        future_df.append({
            "rm_id": rm_id,
            "time_idx": last_time_idx + i,
            "date_arrival": df_hist[df_hist["rm_id"] == rm_id]["date_arrival"].max() + pd.Timedelta(days=i),
        })
future_df = pd.DataFrame(future_df)

# Merge with last known net_weight (required for TFT input)
future_df = future_df.merge(
    df_hist[["rm_id", "net_weight"]].groupby("rm_id").last().reset_index(),
    on="rm_id",
    how="left"
)

# Create prediction TimeSeriesDataSet
prediction = TimeSeriesDataSet.from_dataset(training, future_df, predict=True, stop_randomization=True, allow_missing_timesteps=True)
pred_dataloader = DataLoader(prediction, batch_size=64, shuffle=False)

# Run prediction
raw_predictions, x = tft.predict(pred_dataloader, mode="raw", return_x=True)
# Extract mean predictions
predicted_weights = raw_predictions["prediction"].mean(axis=2).flatten()

# Build simulated receivals DataFrame
simulated_df = future_df.copy()
simulated_df["net_weight"] = predicted_weights
print("TFT-based forecasting complete.")
print(f"Total simulated receivals for 2025: {len(simulated_df)}")
print(simulated_df.head())

  __import__("pkg_resources").declare_namespace(__name__)


  __import__("pkg_resources").declare_namespace(__name__)


ModuleNotFoundError: No module named 'lightning'

In [14]:
# --- Diagnostic: Check module origins and inheritance ---
print('tft class:', type(tft))
print('tft module:', type(tft).__module__)
print('Trainer class:', type(trainer))
print('Trainer module:', type(trainer).__module__)
import inspect
print('tft base classes:', inspect.getmro(type(tft)))
print('Is tft a LightningModule?', isinstance(tft, torch.nn.Module))
print('Is tft a pl.LightningModule?', hasattr(tft, 'training_step'))

tft class: <class 'pytorch_forecasting.models.temporal_fusion_transformer._tft.TemporalFusionTransformer'>
tft module: pytorch_forecasting.models.temporal_fusion_transformer._tft
Trainer class: <class 'pytorch_lightning.trainer.trainer.Trainer'>
Trainer module: pytorch_lightning.trainer.trainer
tft base classes: (<class 'pytorch_forecasting.models.temporal_fusion_transformer._tft.TemporalFusionTransformer'>, <class 'pytorch_forecasting.models.base._base_model.BaseModelWithCovariates'>, <class 'pytorch_forecasting.models.base._base_model.BaseModel'>, <class 'pytorch_forecasting.utils._utils.InitialParameterRepresenterMixIn'>, <class 'lightning.pytorch.core.module.LightningModule'>, <class 'lightning.fabric.utilities.device_dtype_mixin._DeviceDtypeModuleMixin'>, <class 'lightning.pytorch.core.mixins.hparams_mixin.HyperparametersMixin'>, <class 'lightning.pytorch.core.hooks.ModelHooks'>, <class 'lightning.pytorch.core.hooks.DataHooks'>, <class 'lightning.pytorch.core.hooks.CheckpointHooks

In [23]:
simulated_df.to_csv("simulated_receivals_2025.csv", index=False)

In [24]:
sample_submission = pd.read_csv("../data/sample_submission.csv")
prediction_mapping = pd.read_csv("../data/prediction_mapping.csv", parse_dates=["forecast_start_date", "forecast_end_date"])
submission = sample_submission.merge(prediction_mapping, on="ID")
simulated_df = pd.read_csv("simulated_receivals_2025.csv", parse_dates=["date_arrival"])

In [25]:
for receival in simulated_df.itertuples():
    rm_id = receival.rm_id
    date_arrival = receival.date_arrival
    net_weight = receival.net_weight
    # Convert date_arrival to naive datetime for comparison
    date_arrival_naive = date_arrival.replace(tzinfo=None)
    submission.loc[
        (submission['rm_id'] == rm_id) & (submission['forecast_end_date'] >= date_arrival_naive),
        'predicted_weight'
    ] += (net_weight*0.8)

In [26]:
submission = submission[["ID", "predicted_weight"]]
submission.to_csv("testing2025.csv", index=False)

In [17]:
submission = pd.read_csv("testing2025.csv")

In [27]:
test_df = submission.merge(prediction_mapping, on="ID")
test_df = test_df.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max",
}).sort_values("predicted_weight", ascending=False)

print(test_df[0:46])

     rm_id  predicted_weight
75    2130      6.009917e+06
180   3865      2.117237e+06
151   3126      1.819248e+06
83    2140      1.670645e+06
147   3122      1.637264e+06
160   3282      1.525760e+06
150   3125      1.462576e+06
149   3124      1.024576e+06
148   3123      1.002832e+06
176   3781      9.871144e+05
79    2134      6.253800e+05
159   3265      4.949120e+05
182   3901      4.429440e+05
85    2142      3.663764e+05
87    2144      2.637948e+05
142   2981      2.551360e+05
80    2135      2.384537e+05
76    2131      2.293000e+05
77    2132      1.388832e+05
181   3883      1.175360e+05
88    2145      1.114832e+05
78    2133      1.029703e+05
152   3142      9.710400e+04
163   3421      9.254000e+04
136   2741      9.111520e+04
191   4263      8.025600e+04
172   3642      5.952000e+04
190   4222      5.915200e+04
86    2143      5.849040e+04
71    2125      3.602880e+04
161   3362      2.980800e+04
90    2147      2.336704e+04
70    2124      1.680160e+04
74    2129    

In [None]:
# SCALE SPECIFIC RM_ID DOWN
testing_2130 = pd.read_csv("testing2025.csv")
testing_2130 = testing_2130.merge(prediction_mapping, on="ID")

testing_2130.loc[testing_2130['rm_id'] == 2130, 'predicted_weight'] *= 0.6

testing = testing_2130.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max",
}).sort_values("predicted_weight", ascending=False)

print(testing)
testing_2130 = testing_2130[["ID", "predicted_weight"]]


     rm_id  predicted_weight
75    2130      3.605950e+06
180   3865      2.117237e+06
151   3126      1.819248e+06
83    2140      1.670645e+06
147   3122      1.637264e+06
..     ...               ...
64    2001      0.000000e+00
65    2061      0.000000e+00
66    2102      0.000000e+00
67    2121      0.000000e+00
202   4501      0.000000e+00

[203 rows x 2 columns]


In [40]:
testing_2130.to_csv("testing2025_2130_only.csv", index=False)

In [None]:

# Check installed versions of PyTorch Forecasting and PyTorch Lightning
import pytorch_forecasting
import pytorch_lightning
print("PyTorch Forecasting version:", pytorch_forecasting.__version__)
print("PyTorch Lightning version:", pytorch_lightning.__version__)

PyTorch Forecasting version: 1.5.0
PyTorch Lightning version: 2.5.5
