In [1]:
import sys

import polars as pl
import torch


'''
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
https://developer.nvidia.com/cuda-12-8-0-download-archive
'''

MAC_DIR = '/Users/igwanhyeong/PycharmProjects/data_research/raw_data/'
WINDOW_DIR = 'C:/Users/USER/PycharmProjects/research/raw_data/'

if sys.platform == 'win32':
    DIR = WINDOW_DIR
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.version.cuda)
    print(torch.__version__)
    print(torch.cuda.get_device_name(0))
    print(torch.__version__)
else:
    DIR = MAC_DIR

save_dir = DIR + 'fit/20260106_running'


True
1
12.8
2.9.0.dev20250716+cu128
NVIDIA GeForce RTX 5080
2.9.0.dev20250716+cu128


In [3]:
pl.read_csv(DIR + 'csv/ETTh1.csv')

date,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT
str,f64,f64,f64,f64,f64,f64,f64
"""2016-07-01 00:00:00""",5.827,2.009,1.599,0.462,4.203,1.34,30.531
"""2016-07-01 01:00:00""",5.693,2.076,1.492,0.426,4.142,1.371,27.787001
"""2016-07-01 02:00:00""",5.157,1.741,1.279,0.355,3.777,1.218,27.787001
"""2016-07-01 03:00:00""",5.09,1.942,1.279,0.391,3.807,1.279,25.044001
"""2016-07-01 04:00:00""",5.358,1.942,1.492,0.462,3.868,1.279,21.948
…,…,…,…,…,…,…,…
"""2018-06-26 15:00:00""",-1.674,3.55,-5.615,2.132,3.472,1.523,10.904
"""2018-06-26 16:00:00""",-5.492,4.287,-9.132,2.274,3.533,1.675,11.044
"""2018-06-26 17:00:00""",2.813,3.818,-0.817,2.097,3.716,1.523,10.271
"""2018-06-26 18:00:00""",9.243,3.818,5.472,2.097,3.655,1.432,9.778


In [2]:
from modeling_module.models.PatchTST.supervised.PatchTST import PatchTSTPointModel
import polars as pl
import numpy as np
import torch
import torch.nn as nn
from torch.optim import AdamW
from modeling_module.data_loader.MultiPartExoDataModule import MultiPartExoDataModule

# 사용자의 모듈 경로에 맞게 수정 필요
from modeling_module.models.PatchTST.common.configs import PatchTSTConfig

# -------------------------------------------------------------------------
# 1. 데이터 준비 및 임의의 외생변수 생성
# -------------------------------------------------------------------------
# (사용자가 제공한 전처리 로직)
target_dyn_demand_weekly = pl.read_parquet(DIR + 'target_dyn_demand_weekly.parquet').sort(['oper_part_no', 'demand_dt'])

# 필터링 로직 (길이 260 이상)
# target_dyn_demand_weekly = (
#     target_dyn_demand_weekly
#     .group_by('oper_part_no', maintain_order=True)
#     .map_groups(lambda g: g.with_columns(pl.arange(1, len(g) + 1).alias('seq')))
# )
# filtered_target = (
#     target_dyn_demand_weekly
#     .group_by('oper_part_no')
#     .agg(pl.col('seq').max().alias('seq_max'))
#     .filter(pl.col('seq_max') > 52)
#     .select('oper_part_no')
# )
# df = target_dyn_demand_weekly.join(filtered_target, on='oper_part_no', how='inner')

# [NEW] 임의의 외생변수 추가
# 1) Past Continuous: 'price' (0~1 사이 랜덤 실수)
# 2) Past Categorical: 'event_id' (0~4 사이 랜덤 정수, cardinality=5)
np.random.seed(42)
n_rows = len(df)

df = df.with_columns([
    pl.Series(name="price", values=np.random.rand(n_rows)).cast(pl.Float64),
    pl.Series(name="event_id", values=np.random.randint(0, 5, n_rows)).cast(pl.Utf8) # DataModule이 str을 받아 인덱싱하도록 유도
])

print(f"Data Prepared. Shape: {df.shape}")
print(df.head(3))

# -------------------------------------------------------------------------
# 2. Future Exogenous Callback 정의
# -------------------------------------------------------------------------
# 미래 시점의 'promotion' (연속형) 정보를 반환한다고 가정
def dummy_future_exo_cb(start_idx: int, horizon: int, device: str):
    # 실제로는 start_idx(날짜 인덱스)를 기반으로 조회해야 하지만, 테스트용으로 랜덤 생성
    # [Horizon, d_future]
    return torch.randn(horizon, 1, device=device)

# -------------------------------------------------------------------------
# 3. 설정 (Config) 및 DataModule 초기화
# -------------------------------------------------------------------------
lookback = 36
horizon = 8
patch_len = 8
stride = 4
actual_cardinality = 5 + 1  # 0(UNK) + 1,2,3,4,5

# [중요] 리팩토링된 Config 설정
cfg = PatchTSTConfig(
    c_in=1,
    target_dim=1,
    lookback=lookback,
    horizon=horizon,
    patch_len=patch_len,
    stride=stride,

    # --- 외생변수 설정 ---
    d_past_cont=1,
    d_past_cat=1,

    # [FIX] 여기를 수정했습니다! (5 -> 6 혹은 그 이상)
    cat_cardinalities=[actual_cardinality],

    d_cat_emb=8,
    d_future=1,

    d_model=64,
    n_layers=2,
    head_dropout=0.2
)

# DataModule 설정
dm = MultiPartExoDataModule(
    df=df,
    lookback=lookback,
    horizon=horizon,
    freq='weekly',
    batch_size=32,
    part_col='oper_part_no',
    date_col='demand_dt',
    y_col='demand_qty',

    # 외생변수 컬럼 지정
    past_exo_cont_cols=['price'],
    past_exo_cat_cols=['event_id_id'], # build_cat_indexer_from 사용 시 _id 접미사 자동 생성 주의

    # Future Exo Callback
    future_exo_cb=dummy_future_exo_cb,

    # Categorical Indexing (문자열 -> 정수 변환)
    build_cat_indexer_from=['event_id'],
    cat_indexer_target_col='event_id_id' # 변환된 컬럼명 명시
)

dm.setup()
train_loader = dm.get_train_loader()

# -------------------------------------------------------------------------
# 4. 모델 생성 및 학습 루프 검증
# -------------------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PatchTSTPointModel.from_config(cfg).to(device)
optimizer = AdamW(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

print(f"\nModel Initialized on {device}.")
print(f"Architecture: Backbone Input Dim per patch approx: "
      f"Target({patch_len}) + Cont({patch_len}) + Cat({patch_len}*8) = {patch_len * (1 + 1 + 8)}")

scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=0.001, steps_per_epoch=len(train_loader), epochs=2
)

model.train()
print("\nStarting Training Loop with Gradient Clipping...")

for epoch in range(2):
    total_loss = 0
    steps = 0

    for batch in train_loader:
        x, y, part_ids, fe_cont, pe_cont, pe_cat = batch

        x = x.to(device)
        y = y.to(device)
        fe_cont = fe_cont.to(device)
        pe_cont = pe_cont.to(device)
        pe_cat = pe_cat.to(device)

        optimizer.zero_grad()

        pred = model(x, fe_cont=fe_cont, pe_cont=pe_cont, pe_cat=pe_cat)

        loss = criterion(pred, y)

        # 1. Backward
        loss.backward()

        # [SOLUTION] 2. Gradient Clipping (이 한 줄이 Loss 폭발을 막아줍니다)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=4.0)

        # 3. Step
        optimizer.step()
        scheduler.step() # 스케줄러 업데이트

        total_loss += loss.item()
        steps += 1

        # 로그 출력 (Loss가 튀더라도 금방 잡히는지 확인)
        if steps % 100 == 0:
            print(f"Epoch {epoch+1} | Step {steps} | Loss: {loss.item():.4f}")

    avg_loss = total_loss / max(1, steps)
    print(f"Epoch {epoch+1} Finished. Avg Loss: {avg_loss:.4f}")

print("\nVerification Complete! The model successfully accepted all exogenous variables.")

Data Prepared. Shape: (1095148, 6)
shape: (3, 6)
┌──────────────┬───────────┬────────────┬─────┬──────────┬──────────┐
│ oper_part_no ┆ demand_dt ┆ demand_qty ┆ seq ┆ price    ┆ event_id │
│ ---          ┆ ---       ┆ ---        ┆ --- ┆ ---      ┆ ---      │
│ str          ┆ i64       ┆ f64        ┆ i64 ┆ f64      ┆ str      │
╞══════════════╪═══════════╪════════════╪═════╪══════════╪══════════╡
│ 01023-50612  ┆ 201802    ┆ 10.0       ┆ 1   ┆ 0.37454  ┆ 2        │
│ 01023-50612  ┆ 201817    ┆ 2.0        ┆ 2   ┆ 0.950714 ┆ 0        │
│ 01023-50612  ┆ 201821    ┆ 20.0       ┆ 3   ┆ 0.731994 ┆ 1        │
└──────────────┴───────────┴────────────┴─────┴──────────┴──────────┘

Model Initialized on cuda.
Architecture: Backbone Input Dim per patch approx: Target(8) + Cont(8) + Cat(8*8) = 80

Starting Training Loop with Gradient Clipping...
Epoch 1 | Step 100 | Loss: 7897.8779
Epoch 1 | Step 200 | Loss: 8980.6904
Epoch 1 | Step 300 | Loss: 3287.6514
Epoch 1 | Step 400 | Loss: 119.5499
Epoch 1 |

KeyboardInterrupt: 