In [3]:
import pandas as pd
test2024 = pd.read_csv("../validation/testing2024manual.csv")
prediction_mapping = pd.read_csv("../data/prediction_mapping.csv")
merged = test2024.merge(prediction_mapping, on="ID")
filtered = merged[merged.groupby("rm_id")["predicted_weight"].transform("sum") > 0]
agg_df = filtered.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max"
})

In [4]:
used_rm_ids = set(agg_df["rm_id"])

receivals = pd.read_csv("../data_cleaned/orders_with_receivals_detailed.csv")
receivals_filtered = receivals[receivals["rm_id"].isin(used_rm_ids)]
selected = receivals_filtered[["rm_id", "date_arrival", "net_weight"]]

In [5]:
# --- TFT Model Training: Use all data for training, no validation split ---
import pandas as pd
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet, GroupNormalizer, QuantileLoss
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import lightning.pytorch as pl

# Load historical data
df_hist = receivals_filtered[["rm_id", "date_arrival", "net_weight"]].copy()
df_hist["date_arrival"] = pd.to_datetime(df_hist["date_arrival"])
if hasattr(df_hist["date_arrival"].dt, "tz") and df_hist["date_arrival"].dt.tz is not None:
    df_hist["date_arrival"] = df_hist["date_arrival"].dt.tz_localize(None)
df_hist["rm_id"] = df_hist["rm_id"].astype(str)
df_hist["time_idx"] = (df_hist["date_arrival"] - df_hist["date_arrival"].min()).dt.days

# Use all data for training
train_data = df_hist.copy()

max_encoder_length = 60
max_prediction_length = 30
batch_size = 64

training = TimeSeriesDataSet(
    train_data,
    time_idx="time_idx",
    target="net_weight",
    group_ids=["rm_id"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["rm_id"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["net_weight"],
    target_normalizer=GroupNormalizer(groups=["rm_id"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,
)

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,
    loss=QuantileLoss(),
    log_interval=0,
    reduce_on_plateau_patience=4,
)

trainer = pl.Trainer(
    max_epochs=20,
    accelerator="auto",
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[LearningRateMonitor(), EarlyStopping(monitor="train_loss", patience=3)],
    logger=TensorBoardLogger("lightning_logs"),
    enable_checkpointing=True,
)

trainer.fit(tft, train_dataloaders=train_dataloader)

  from tqdm.autonotebook import tqdm
C:\Users\david\AppData\Roaming\Python\Python312\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
C:\Users\david\AppData\Roaming\Python\Python312\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
C:\Users\david\AppData\Roaming\Python\Python312\site-package

Epoch 1:   0%|          | 0/30 [00:00<?, ?it/s, v_num=7, train_loss_step=2.22e+3, train_loss_epoch=2.46e+3]         

C:\Users\david\AppData\Roaming\Python\Python312\site-packages\lightning\pytorch\loops\training_epoch_loop.py:492: ReduceLROnPlateau conditioned on metric val_loss which is not available but strict is set to `False`. Skipping learning rate update.


Epoch 6: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 30/30 [00:33<00:00,  0.90it/s, v_num=7, train_loss_step=2.32e+3, train_loss_epoch=2.22e+3]


In [7]:
# Make predictions for 2025 using the trained TFT model
import pandas as pd
import numpy as np

# Get the last available data point for each rm_id
last_data = df_hist.groupby("rm_id").tail(max_encoder_length).reset_index(drop=True)

# Define the prediction period (2025-01-01 to 2025-05-31)
prediction_start = pd.Timestamp("2025-01-01")
prediction_end = pd.Timestamp("2025-05-31")

# Convert to time_idx based on the historical data's date range
base_date = df_hist["date_arrival"].min()
start_time_idx = (prediction_start - base_date).days
end_time_idx = (prediction_end - base_date).days

print(f"Historical data time_idx range: 0 to {df_hist['time_idx'].max()}")
print(f"Prediction time_idx range: {start_time_idx} to {end_time_idx}")

# Create future time steps for the prediction period
future_time_steps = list(range(start_time_idx, end_time_idx + 1))

# Prepare future data for prediction
future_data = []
for rm_id in df_hist["rm_id"].unique():
    for future_time in future_time_steps:
        future_data.append({
            "rm_id": rm_id,
            "time_idx": future_time,
            "net_weight": 0,  # Placeholder - will be predicted
        })

future_df = pd.DataFrame(future_data)

# Combine historical data with future data for prediction
prediction_data = pd.concat([df_hist, future_df]).reset_index(drop=True)
prediction_data = prediction_data.sort_values(["rm_id", "time_idx"]).reset_index(drop=True)

print(f"Combined dataset shape: {prediction_data.shape}")
print(f"Time_idx range in combined data: {prediction_data['time_idx'].min()} to {prediction_data['time_idx'].max()}")

# Create prediction dataset
try:
    prediction_dataset = TimeSeriesDataSet.from_dataset(
        training, 
        prediction_data, 
        predict=True, 
        stop_randomization=True
    )
    
    # Create prediction dataloader
    pred_dataloader = prediction_dataset.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
    
    # Make predictions
    print("Making predictions...")
    predictions = tft.predict(pred_dataloader, mode="prediction", return_x=True)
    
    print("Predictions completed.")
    print(f"Prediction shape: {predictions[0].shape}")
    
    # Process predictions to create simulated receivals
    predicted_values = predictions[0].cpu().numpy()
    
    # Create results dataframe
    results = []
    
    # Get the prediction data that corresponds to the future period
    future_prediction_data = prediction_data[prediction_data["time_idx"] >= start_time_idx].copy()
    
    for idx, row in future_prediction_data.iterrows():
        if idx - len(df_hist) < len(predicted_values):
            pred_idx = idx - len(df_hist)
            
            # Convert time_idx back to date
            predicted_date = base_date + pd.Timedelta(days=int(row["time_idx"]))
            
            # Get the predicted value (handle different output formats)
            if predicted_values.ndim == 3:  # (batch, time, quantiles)
                pred_value = predicted_values[pred_idx, 0, 3]  # Middle quantile (median)
            elif predicted_values.ndim == 2:  # (batch, quantiles)
                pred_value = predicted_values[pred_idx, 3]  # Middle quantile (median)
            else:
                pred_value = predicted_values[pred_idx]
            
            # Only include positive predictions
            if pred_value > 0:
                results.append({
                    "rm_id": row["rm_id"],
                    "time_idx": row["time_idx"],
                    "date_arrival": predicted_date,
                    "net_weight": pred_value,
                })
    
    simulated_df = pd.DataFrame(results)
    
    print("TFT-based forecasting complete.")
    print(f"Total simulated receivals for 2025: {len(simulated_df)}")
    if len(simulated_df) > 0:
        print(f"Date range: {simulated_df['date_arrival'].min()} to {simulated_df['date_arrival'].max()}")
        print(f"Weight range: {simulated_df['net_weight'].min():.2f} to {simulated_df['net_weight'].max():.2f}")
        print(simulated_df.head())
        
        # Save the results
        simulated_df.to_csv("simulated_receivals_2025.csv", index=False)
        print("Results saved to simulated_receivals_2025.csv")
    else:
        print("No positive predictions generated. Check model training and data.")
        
except Exception as e:
    print(f"Error during prediction: {str(e)}")
    print("This might be due to insufficient historical data or model training issues.")
    
    # Create a fallback simple prediction based on historical patterns
    print("Creating fallback predictions based on historical averages...")
    
    # Calculate daily averages for each rm_id
    daily_avg = df_hist.groupby("rm_id")["net_weight"].mean().reset_index()
    
    fallback_results = []
    for _, avg_row in daily_avg.iterrows():
        rm_id = avg_row["rm_id"]
        avg_weight = avg_row["net_weight"]
        
        # Generate predictions for each day in the prediction period
        for time_idx in future_time_steps:
            pred_date = base_date + pd.Timedelta(days=int(time_idx))
            
            # Add some randomness to avoid identical predictions
            random_factor = np.random.normal(1.0, 0.1)  # Â±10% variation
            pred_weight = max(0, avg_weight * random_factor)
            
            if pred_weight > 0:
                fallback_results.append({
                    "rm_id": rm_id,
                    "time_idx": time_idx,
                    "date_arrival": pred_date,
                    "net_weight": pred_weight,
                })
    
    simulated_df = pd.DataFrame(fallback_results)
    print(f"Fallback predictions generated: {len(simulated_df)} records")
    simulated_df.to_csv("simulated_receivals_2025.csv", index=False)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Historical data time_idx range: 0 to 4784
Prediction time_idx range: 4796 to 4946
Combined dataset shape: (67390, 4)
Time_idx range in combined data: 0 to 4946
Making predictions...
Predictions completed.
Prediction shape: torch.Size([46, 30])
Error during prediction: index -60375 is out of bounds for axis 0 with size 46
This might be due to insufficient historical data or model training issues.
Creating fallback predictions based on historical averages...
Fallback predictions generated: 6946 records
Predictions completed.
Prediction shape: torch.Size([46, 30])
Error during prediction: index -60375 is out of bounds for axis 0 with size 46
This might be due to insufficient historical data or model training issues.
Creating fallback predictions based on historical averages...
Fallback predictions generated: 6946 records


C:\Users\david\AppData\Roaming\Python\Python312\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


In [8]:
# Load submission template and prepare data
sample_submission = pd.read_csv("../data/sample_submission.csv")
prediction_mapping = pd.read_csv("../data/prediction_mapping.csv", parse_dates=["forecast_start_date", "forecast_end_date"])

# Initialize submission with zeros
submission = sample_submission.copy()
submission["predicted_weight"] = 0.0

# Merge with prediction mapping to get rm_id and date information
submission = submission.merge(prediction_mapping, on="ID")

# Load the simulated receivals
try:
    simulated_df = pd.read_csv("simulated_receivals_2025.csv", parse_dates=["date_arrival"])
    print(f"Loaded {len(simulated_df)} simulated receivals")
    print(f"Date range: {simulated_df['date_arrival'].min()} to {simulated_df['date_arrival'].max()}")
except FileNotFoundError:
    print("Error: simulated_receivals_2025.csv not found. Please run the TFT prediction cell first.")
    simulated_df = pd.DataFrame()

Loaded 6946 simulated receivals
Date range: 2024-12-31 10:15:00 to 2025-05-30 10:15:00


In [9]:
# Generate submission with proper 2025 date handling
if len(simulated_df) > 0:
    print("Processing TFT predictions for submission...")
    
    for receival in simulated_df.itertuples():
        rm_id = receival.rm_id
        date_arrival = receival.date_arrival
        net_weight = receival.net_weight
        
        # Convert rm_id to the same type as in submission (likely int or string)
        try:
            rm_id_converted = int(float(rm_id)) if isinstance(rm_id, str) else rm_id
        except:
            rm_id_converted = rm_id
        
        # Ensure date_arrival is timezone-naive for comparison
        if hasattr(date_arrival, 'tz') and date_arrival.tz is not None:
            date_arrival_naive = date_arrival.tz_localize(None)
        else:
            date_arrival_naive = date_arrival
        
        # Find matching rows in submission where:
        # 1. rm_id matches
        # 2. the forecast_end_date is on or after the predicted arrival date
        # This means the receival could contribute to forecasts ending on or after that date
        mask = (
            (submission['rm_id'] == rm_id_converted) & 
            (submission['forecast_end_date'] >= date_arrival_naive)
        )
        
        # Add the predicted weight to matching rows
        # Apply a decay factor based on how far the forecast end date is from arrival
        matched_rows = submission[mask].copy()
        if len(matched_rows) > 0:
            for idx in matched_rows.index:
                forecast_end = submission.loc[idx, 'forecast_end_date']
                days_diff = (forecast_end - date_arrival_naive).days
                
                # Apply decay factor - weight decreases as forecast extends further past arrival
                decay_factor = max(0.1, 1.0 / (1.0 + days_diff * 0.1))
                
                submission.loc[idx, 'predicted_weight'] += net_weight * decay_factor

    print(f"Updated {submission['predicted_weight'].sum():.2f} total predicted weight across {len(submission)} rows")
    print(f"Non-zero predictions: {(submission['predicted_weight'] > 0).sum()} out of {len(submission)} rows")
    
    # Show some statistics
    non_zero_submission = submission[submission['predicted_weight'] > 0]
    if len(non_zero_submission) > 0:
        print(f"Weight statistics - Min: {non_zero_submission['predicted_weight'].min():.4f}, "
              f"Max: {non_zero_submission['predicted_weight'].max():.4f}, "
              f"Mean: {non_zero_submission['predicted_weight'].mean():.4f}")
else:
    print("No simulated receivals available for submission generation.")

Updated 7844164248.18 total predicted weight across 30450 rows
Non-zero predictions: 6900 out of 30450 rows


In [10]:
submission = submission[["ID", "predicted_weight"]]
submission.to_csv("testing2025.csv", index=False)

In [11]:
submission = pd.read_csv("testing2025.csv")

In [12]:
test_df = submission.merge(prediction_mapping, on="ID")
test_df = test_df.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max",
}).sort_values("predicted_weight", ascending=False)

print(test_df[0:46])

     rm_id  predicted_weight
191   4263      3.810537e+06
149   3124      3.770091e+06
160   3282      3.768698e+06
83    2140      3.727930e+06
148   3123      3.692572e+06
150   3125      3.670355e+06
190   4222      3.668641e+06
156   3201      3.662033e+06
147   3122      3.648571e+06
174   3761      3.639693e+06
172   3642      3.590106e+06
181   3883      3.580041e+06
152   3142      3.566936e+06
169   3581      3.549365e+06
159   3265      3.424846e+06
185   4021      3.322451e+06
136   2741      3.231165e+06
187   4081      3.086082e+06
151   3126      2.798929e+06
176   3781      2.592025e+06
142   2981      2.570669e+06
171   3621      2.455260e+06
161   3362      2.450508e+06
90    2147      2.395735e+06
186   4044      2.283560e+06
75    2130      2.279215e+06
182   3901      2.125484e+06
192   4302      1.992285e+06
180   3865      1.694056e+06
162   3381      1.606221e+06
74    2129      1.469537e+06
71    2125      1.314172e+06
78    2133      1.237740e+06
76    2131    

In [None]:
# SCALE SPECIFIC RM_ID DOWN
testing_2130 = pd.read_csv("testing2025.csv")
testing_2130 = testing_2130.merge(prediction_mapping, on="ID")

testing_2130.loc[testing_2130['rm_id'] == 2130, 'predicted_weight'] *= 0.6

testing = testing_2130.groupby("rm_id", as_index=False).agg({
    "predicted_weight": "max",
}).sort_values("predicted_weight", ascending=False)

print(testing)
testing_2130 = testing_2130[["ID", "predicted_weight"]]

testing_2130.to_csv("testing2025_2130_only.csv", index=False)