## Preparation

In [1]:
import os
import time
from datetime import date

import numpy as np
import polars as pl
import torch
import sys


dir = "C:/Users/USER/PycharmProjects/ts_forecaster_lib/raw_data/"        # default project directory\
save_dir = os.path.join(dir, 'fit')
os.makedirs(save_dir, exist_ok = True)
save_root = os.path.join(save_dir, 'Xpatchtst', '20260119')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
lookback = 52
horizon = 27
batch_size = 256
freq = 'weekly'
split_mode = 'multi'
shuffle = True
id_col = 'unique_id'
date_col = 'date'
y_col = 'y'

# add past exogenous continuous variable columns
past_exo_cont_cols = (
    # "exo_p_y_lag_1w",
    "exo_p_y_lag_2w",
    # "exo_p_y_lag_52w",
    "exo_p_y_rollmean_4w","exo_p_y_rollmean_12w","exo_p_y_rollstd_4w",
    # "exo_p_weeks_since_holiday",
    # "exo_p_temperature",
    # "exo_p_fuel_price",
    # "exo_p_cpi",
    # "exo_p_unemployment",
    # "exo_p_markdown_sum",
    # "exo_p_markdown1",
    # "exo_p_markdown2",
    # "exo_p_markdown3",
    # "exo_p_markdown4",
    # "exo_p_markdown5",
    # "exo_markdown1_isnull",
    # "exo_markdown2_isnull",
    # "exo_markdown3_isnull",
    # "exo_markdown4_isnull",
    # "exo_markdown5_isnull",
)

# add past exogenous categorical variable columns
past_exo_cat_cols = (
    # "exo_c_woy_bucket",
)

future_exo_cb = None

# real dataframe
df = pl.read_parquet(dir + 'train_data/walmart_best_feature_train.parquet')

In [2]:
from modeling_module.training.model_trainers.total_train import run_total_train_weekly
from modeling_module.data_loader import MultiPartExoDataModule

def inspect(loader, name):
    b = next(iter(loader))
    x, y, uid, fe, pe_cont, pe_cat = b
    print(f"[{name}] x:", x.shape, x.device, x.dtype)
    print(f"[{name}] fe:", fe.shape, fe.device, fe.dtype)
    print(f"[{name}] pe:", pe_cont.shape, pe_cont.device, pe_cont.dtype)
    print(f"[{name}] future_exo_cb is None?", loader.collate_fn.future_exo_cb is None)
    if fe.shape[-1] > 0:
        print(f"[{name}] fe sample:", fe[0, :3, :])

data_module = MultiPartExoDataModule(
    df = df,
    id_col = id_col,
    date_col = date_col,
    y_col = y_col,
    lookback = lookback,
    horizon = horizon,
    batch_size = batch_size,
    past_exo_cont_cols = past_exo_cont_cols,
    past_exo_cat_cols = past_exo_cat_cols,
    future_exo_cb = future_exo_cb,
    freq = freq,
    shuffle = shuffle,
    split_mode = split_mode,
)

train_loader = data_module.get_train_loader()
val_loader = data_module.get_val_loader()

inspect(train_loader, 'train_loader')

run_total_train_weekly(
    train_loader, val_loader, device = device,
    lookback = lookback, horizon = horizon,
    warmup_epochs = 30, spike_epochs = 0,
    save_dir = save_root,
    use_exogenous_mode = True,
    models_to_run = ['patchtst'], use_ssl_pretrain = True
)

[train_loader] x: torch.Size([256, 52, 1]) cpu torch.float32
[train_loader] fe: torch.Size([256, 27, 0]) cpu torch.float32
[train_loader] pe: torch.Size([256, 52, 4]) cpu torch.float32
[train_loader] future_exo_cb is None? True

[total_train] === RUN: patchtst (weekly) ===
[DBG-pt_kwargs] use_exogenous_mode=True | d_future=2 | d_past_cont=4 | d_past_cat=0
[SSL] PatchTST Pretrain (Weekly)


  return nn.TransformerEncoder(enc_layer, num_layers=self.e_layers)


[Pretrain] stage=0 epochs=10 lr=0.0003 wd=0.001 mask_ratio=0.3 loss=mse
[Pretrain][stage=0 ep=1/10] train=1.068017 val=1.017288
[Pretrain][stage=0 ep=2/10] train=0.956214 val=0.875542
[Pretrain][stage=0 ep=3/10] train=0.911285 val=0.901295
[Pretrain][stage=0 ep=4/10] train=0.854913 val=0.896292
[Pretrain][stage=0 ep=5/10] train=0.846559 val=0.884240
[Pretrain][stage=0 ep=6/10] train=0.804724 val=0.805929
[Pretrain][stage=0 ep=7/10] train=0.804156 val=0.749205
[Pretrain][stage=0 ep=8/10] train=0.729104 val=0.733138
[Pretrain][stage=0 ep=9/10] train=0.704641 val=0.664551
[Pretrain][stage=0 ep=10/10] train=0.672732 val=0.659830
[Pretrain] done | best_val=0.659830
[DBG-backbone-init] d_past_cont=4 cont_input_dim=48 target_input_dim=12 total_input_dim=60
PatchTST Base (Weekly)
[Finetune] loaded pretrain ckpt: C:\Users\USER\PycharmProjects\ts_forecaster_lib\raw_data\fit\Xpatchtst\20260119\pretrain\patchtst_pretrain_best.pt
[Finetune] load_strict=False
[Finetune] missing_keys=69 unexpected_ke

{'PatchTST Base': {'model': PatchTSTPointModel(
    (backbone): SupervisedBackbone(
      (cat_embs): ModuleList()
      (input_proj): Linear(in_features=60, out_features=256, bias=True)
      (attn_core): FullAttention(
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (encoder): TSTEncoder(
        (layers): ModuleList(
          (0-2): 3 x TSTEncoderLayer(
            (mha): MultiHeadAttention(
              (W_Q): Linear(in_features=256, out_features=256, bias=True)
              (W_K): Linear(in_features=256, out_features=256, bias=True)
              (W_V): Linear(in_features=256, out_features=256, bias=True)
              (core): FullAttentionWithLogits(
                (dropout): Dropout(p=0.0, inplace=False)
              )
              (to_out): Sequential(
                (0): Linear(in_features=256, out_features=256, bias=True)
                (1): Dropout(p=0.0, inplace=False)
              )
            )
            (dropout_attn): Dropout(p=0.0, inplace=Fa