In [1]:
import sys

import torch

'''
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
https://developer.nvidia.com/cuda-12-8-0-download-archive
'''

MAC_DIR = '/Users/igwanhyeong/PycharmProjects/data_research/raw_data/'
WINDOW_DIR = 'C:/Users/USER/PycharmProjects/research/raw_data/'

if sys.platform == 'win32':
    DIR = WINDOW_DIR
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.version.cuda)
    print(torch.__version__)
    print(torch.cuda.get_device_name(0))
    print(torch.__version__)
else:
    DIR = MAC_DIR

save_dir = DIR + 'fit/20260106_running'


True
1
12.8
2.9.0.dev20250716+cu128
NVIDIA GeForce RTX 5080
2.9.0.dev20250716+cu128


In [None]:

import polars as pl
import numpy as np
import torch
from modeling_module import MultiPartExoDataModule

# 사용자의 모듈 경로에 맞게 수정 필요

# -------------------------------------------------------------------------
# 1. 데이터 준비 및 임의의 외생변수 생성
# -------------------------------------------------------------------------
# (사용자가 제공한 전처리 로직)
target_dyn_demand_weekly = pl.read_parquet(DIR + 'target_dyn_demand_weekly.parquet').sort(['oper_part_no', 'demand_dt'])

# 필터링 로직 (길이 260 이상)
target_dyn_demand_weekly = (
    target_dyn_demand_weekly
    .group_by('oper_part_no', maintain_order=True)
    .map_groups(lambda g: g.with_columns(pl.arange(1, len(g) + 1).alias('seq')))
)
filtered_target = (
    target_dyn_demand_weekly
    .group_by('oper_part_no')
    .agg(pl.col('seq').max().alias('seq_max'))
    .filter(pl.col('seq_max') > 52)
    .select('oper_part_no')
)
df = target_dyn_demand_weekly.join(filtered_target, on='oper_part_no', how='inner')

# [NEW] 임의의 외생변수 추가
# 1) Past Continuous: 'price' (0~1 사이 랜덤 실수)
# 2) Past Categorical: 'event_id' (0~4 사이 랜덤 정수, cardinality=5)
np.random.seed(42)
n_rows = len(df)

df = df.with_columns([
    pl.Series(name="price", values=np.random.rand(n_rows)).cast(pl.Float64),
    pl.Series(name="event_id", values=np.random.randint(0, 5, n_rows)).cast(pl.Utf8) # DataModule이 str을 받아 인덱싱하도록 유도
])

print(f"Data Prepared. Shape: {df.shape}")
print(df.head(3))

# -------------------------------------------------------------------------
# 2. Future Exogenous Callback 정의
# -------------------------------------------------------------------------
# 미래 시점의 'promotion' (연속형) 정보를 반환한다고 가정
def dummy_future_exo_cb(start_idx: int, horizon: int, device: str):
    # 실제로는 start_idx(날짜 인덱스)를 기반으로 조회해야 하지만, 테스트용으로 랜덤 생성
    # [Horizon, d_future]
    return torch.randn(horizon, 1, device=device)
# def calendar_sin_cos(t: torch.Tensor, period: float, device: str) -> torch.Tensor:
#     """
#     단일 주기에 대한 sin/cos 쌍 반환 (..., 2)
#     """
#     return torch.stack([
#         torch.sin(2 * torch.pi * t / period),
#         torch.cos(2 * torch.pi * t / period)
#     ], dim=-1)


In [None]:


# -------------------------------------------------------------------------
# 3. 설정 (Config) 및 DataModule 초기화
# -------------------------------------------------------------------------
lookback = 24
horizon = 8
patch_len = 8
stride = 4
actual_cardinality = 5 + 1  # 0(UNK) + 1,2,3,4,5

# DataModule 설정
dm = MultiPartExoDataModule(
    df=df,
    lookback=lookback,
    horizon=horizon,
    freq='weekly',
    batch_size=512,
    part_col='oper_part_no',
    date_col='demand_dt',
    y_col='demand_qty',

    # 외생변수 컬럼 지정
    past_exo_cont_cols=['price'],
    past_exo_cat_cols=['event_id_id'], # build_cat_indexer_from 사용 시 _id 접미사 자동 생성 주의

    # Future Exo Callback
    future_exo_cb=dummy_future_exo_cb,

    # Categorical Indexing (문자열 -> 정수 변환)
    build_cat_indexer_from=['event_id'],
    cat_indexer_target_col='event_id_id' # 변환된 컬럼명 명시
)

dm.setup()
train_loader = dm.get_train_loader()
val_loader = dm.get_val_loader()



In [None]:
for train in train_loader:
    print(train)
    break

In [None]:
ds = train_loader.dataset
if hasattr(ds, 'dataset'): ds = ds.dataset # Subset핑

print("=== Debugging Cardinalities ===")
if hasattr(ds, 'cat_indexers'):
    for col, indexer in ds.cat_indexers.items():
        max_id_in_map = max(indexer.mapping.values())
        print(f"Column '{col}': Max ID in Data = {max_id_in_map}, Required Cardinality >= {max_id_in_map + 1}")

        # total_train.py의 로직이 이걸 잘 잡는지 확인
        calculated_card = max_id_in_map + 1
        print(f" -> Config will use: {calculated_card}")

In [None]:
from modeling_module import run_total_train_weekly

model_dict = run_total_train_weekly(
    train_loader,
    val_loader,
    lookback=lookback, horizon=horizon, save_dir=save_dir,
    models_to_run=['patchtst']
)

In [None]:
from modeling_module import load_model_dict
from modeling_module import build_patchTST_base, build_patchTST_quantile

builders = {
    'patchtst_base': build_patchTST_base,
    'patchtst_quantile': build_patchTST_quantile
}
device = 'cuda'
loaded = load_model_dict(save_dir, builders, device = device)

In [None]:
%load_ext autoreload
%autoreload 2

import importlib
import modeling_module as pu
import modeling_module as fo

importlib.reload(pu)
importlib.reload(fo)

def my_exo_cb(start_idx: int, Hm: int, device="cuda" if torch.cuda.is_available() else "cpu"):
    # exo_dim = 2 (sin, cos)
    return fo.make_calendar_exo(start_idx, Hm, period=52, device=device)

pu.plot_27w(
    models=loaded,           # {"PatchMixer": pm_model, "Titan": ti_model, ...}
    loader=val_loader,       # (xb, yb[, part_ids])
    device="cuda" if torch.cuda.is_available() else "cpu",
    mode="val",              # ← 검증 모드
    max_plots=5,
    out_dir=None,
    show=True,
    future_exo_cb=None
)