In [2]:
import json

import pandas as pd
import torch
from tqdm import tqdm


def invert_scaling_torch(
    scaled_series: torch.Tensor, uid: str, stats_dict: dict, device="cpu"
) -> torch.Tensor:
    scaler_info = stats_dict.get(uid)
    if not scaler_info:
        raise ValueError(f"No scaler info found for unique_id '{uid}'")

    scaled_series = scaled_series.to(device)

    if scaler_info["type"] == "minmax":
        min_val = torch.tensor(scaler_info["min"], dtype=torch.float32, device=device)
        max_val = torch.tensor(scaler_info["max"], dtype=torch.float32, device=device)
        return scaled_series * (max_val - min_val + 1e-8) + min_val

    elif scaler_info["type"] == "standard":
        mean = torch.tensor(scaler_info["mean"], dtype=torch.float32, device=device)
        std = torch.tensor(scaler_info["std"], dtype=torch.float32, device=device)
        return scaled_series * (std + 1e-8) + mean

    else:
        raise ValueError(f"Unknown scaler type: {scaler_info['type']}")

In [7]:
# Load scaler stats
with open("artifacts/scalers.json", "r") as f:
    stats_dict = json.load(f)

# Load scaled data
scaled_df = pd.read_parquet("data/intermediate/m5_scaled_forecast.parquet")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize an empty Series to collect results
y_inverted_series = pd.Series(index=scaled_df.index, dtype=float)
yhat_inverted_series = pd.Series(index=scaled_df.index, dtype=float)
# Loop through each group with tqdm
for uid, group in tqdm(scaled_df.groupby("unique_id", sort=False), desc="Inverting"):
    scaled_tensor = torch.tensor(group["y_scaled"].values, dtype=torch.float32)
    inverted_tensor = invert_scaling_torch(scaled_tensor, uid, stats_dict, device=device)
    y_inverted_series.loc[group.index] = inverted_tensor.cpu().numpy()

    scaled_tensor = torch.tensor(group["yhat_scaled"].values, dtype=torch.float32)
    inverted_tensor = invert_scaling_torch(scaled_tensor, uid, stats_dict, device=device)
    yhat_inverted_series.loc[group.index] = inverted_tensor.cpu().numpy()

# Assign back to DataFrame
scaled_df["y"] = y_inverted_series
scaled_df["yhat"] = yhat_inverted_series
scaled_df.to_parquet("data/output/m5_forecast.parquet")

Inverting: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1108/1108 [00:00<00:00, 3260.82it/s]

CPU times: user 356 ms, sys: 2.56 ms, total: 358 ms
Wall time: 356 ms





Inverting:  88%|████████▊ | 1262/1428 [00:00<00:00, 4781.34it/s]

Inverting: 100%|██████████| 1428/1428 [00:00<00:00, 4346.52it/s]

CPU times: user 288 ms, sys: 76.6 ms, total: 365 ms
Wall time: 365 ms



