In [1]:
# import os
# os.chdir("/Users/igwanhyeong/PycharmProjects/data_research")

# conda activate ts_forecaster
# python -m pip install ipykernel
# python -m ipykernel install --user --name ts_forecaster --display-name "Python (ts_forecaster)"

import os, sys

print("cwd:", os.getcwd())
print("sys.path head:", sys.path[:8])

cwd: /Users/igwanhyeong/PycharmProjects/ts_forecaster_lib/src/model_test
sys.path head: ['/opt/anaconda3/envs/ts_forecaster/lib/python312.zip', '/opt/anaconda3/envs/ts_forecaster/lib/python3.12', '/opt/anaconda3/envs/ts_forecaster/lib/python3.12/lib-dynload', '', '/opt/anaconda3/envs/ts_forecaster/lib/python3.12/site-packages', '/Users/igwanhyeong/PycharmProjects/ts_forecaster_lib/src']


In [2]:
import sys
import torch

'''
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
https://developer.nvidia.com/cuda-12-8-0-download-archive
'''

MAC_DIR = '/Users/igwanhyeong/PycharmProjects/data_research/raw_data/'
WINDOW_DIR = 'C:/Users/USER/PycharmProjects/research/raw_data/'

if sys.platform == 'win32':
    DIR = WINDOW_DIR
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.version.cuda)
    print(torch.__version__)
    print(torch.cuda.get_device_name(0))
    print(torch.__version__)
else:
    DIR = MAC_DIR
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

save_dir = DIR + 'fit/model_validation'

# if os.path.exists(save_dir):
#     files = glob.glob(os.path.join(save_dir, "*.pt"))
#     print(f"Deleting {len(files)} old checkpoint files...")
#     for f in files:
#         try:
#             os.remove(f)
#         except Exception as e:
#             print(f"Error deleting {f}: {e}")
# else:
#     os.makedirs(save_dir, exist_ok=True)

print("Clean up complete.")


Clean up complete.


In [3]:
import polars as pl
import numpy as np

ETT1 = pl.read_csv(DIR + "csv/ETTh1.csv")

df = (
    ETT1
    .select(["date", "HUFL"])
    .with_columns(pl.lit("A").alias("unique_id"))
    # 원본 date 문자열을 그대로 Datetime으로 파싱
    .with_columns(
        pl.col("date").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S", strict=False).alias("date")
    )
    .sort(["unique_id", "date"])
)

# time index
df = df.with_columns(
    pl.arange(0, pl.len()).over("unique_id").alias("t_idx")
)

# (1) known-future 스케줄: promo (예: 특정 시간대에만 1)
# 하루 24시간 중 8~10시, 18~20시에 프로모션이라고 가정
df = df.with_columns([
    (pl.col("t_idx") % 24).alias("hour"),
])

df = df.with_columns([
    (
        ((pl.col("hour") >= 8) & (pl.col("hour") <= 10)) |
        ((pl.col("hour") >= 18) & (pl.col("hour") <= 20))
    ).cast(pl.Int8).alias("promo_flag")
])

# (2) calendar exo: 24h sin/cos
df = df.with_columns([
    ( (2*np.pi*pl.col("t_idx")/24.0).sin().cast(pl.Float32) ).alias("exo_fut_sin24"),
    ( (2*np.pi*pl.col("t_idx")/24.0).cos().cast(pl.Float32) ).alias("exo_fut_cos24"),
])

# (3) (중요) 타깃에 promo 효과 "주입" -> exo가 없으면 예측이 어려워지고, 있으면 쉬워짐
# HUFL_y = HUFL + alpha*promo_flag + beta*sin24  (alpha는 체감되게 크게)
alpha = 2.0
beta  = 0.5
df = df.with_columns([
    (
        pl.col("HUFL").cast(pl.Float32)
        + pl.col("promo_flag").cast(pl.Float32) * pl.lit(alpha)
        + pl.col("exo_fut_sin24").cast(pl.Float32) * pl.lit(beta)
    ).alias("y")
])

# =========================
# past_exo 후보 생성
# =========================
# 기준: y를 만들었으면 y 기반으로 만드는 게 가장 직관적.
# (HUFL 원본 기반으로도 가능하나, 지금은 y에 promo/seasonality가 주입되어 있으니 y 기준 추천)

df = df.with_columns([
    # (A) lag / diff
    pl.col("y").shift(1).over("unique_id").alias("pe_lag1_y"),
    pl.col("y").shift(24).over("unique_id").alias("pe_lag24_y"),  # 하루 전(24시간 전)
    (pl.col("y") - pl.col("y").shift(1).over("unique_id")).alias("pe_diff1_y"),
    (pl.col("y") - pl.col("y").shift(24).over("unique_id")).alias("pe_diff24_y"),

    # (B) rolling mean / std (짧은/중간 윈도우)
    pl.col("y").rolling_mean(window_size=6).over("unique_id").alias("pe_rm6_y"),
    pl.col("y").rolling_mean(window_size=24).over("unique_id").alias("pe_rm24_y"),
    pl.col("y").rolling_std(window_size=24).over("unique_id").alias("pe_rs24_y"),

    # (C) z-score (24시간 기준)
    (
        (pl.col("y") - pl.col("y").rolling_mean(24).over("unique_id"))
        / (pl.col("y").rolling_std(24).over("unique_id") + 1e-6)
    ).alias("pe_z24_y"),

    # (D) EMA (지수이동평균) - Polars ewm_mean 사용
    pl.col("y").ewm_mean(alpha=0.2).over("unique_id").alias("pe_ema_a02_y"),

    # (E) promo의 과거 상태 (이벤트의 lag)
    pl.col("promo_flag").shift(1).over("unique_id").cast(pl.Float32).alias("pe_lag1_promo"),
    pl.col("promo_flag").rolling_mean(24).over("unique_id").cast(pl.Float32).alias("pe_rm24_promo"),
])

# rolling/shift로 인해 처음 구간에 null이 생깁니다.
# TrainingDataset은 null을 그대로 numpy로 가져오면 nan이 될 수 있으니, 보통 0으로 채우는 편이 안전합니다.
past_cols = [
    "pe_lag1_y", "pe_lag24_y", "pe_diff1_y", "pe_diff24_y",
    "pe_rm6_y", "pe_rm24_y", "pe_rs24_y", "pe_z24_y",
    "pe_ema_a02_y", "pe_lag1_promo", "pe_rm24_promo",
]

df = df.with_columns([pl.col(c).fill_null(0.0).cast(pl.Float32) for c in past_cols])

df.select(["date","promo_flag", "y", 'HUFL'] + past_cols).head(5)


date,promo_flag,y,HUFL,pe_lag1_y,pe_lag24_y,pe_diff1_y,pe_diff24_y,pe_rm6_y,pe_rm24_y,pe_rs24_y,pe_z24_y,pe_ema_a02_y,pe_lag1_promo,pe_rm24_promo
datetime[μs],i8,f32,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
2016-07-01 00:00:00,0,5.827,5.827,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.827,0.0,0.0
2016-07-01 01:00:00,0,5.822409,5.693,5.827,0.0,-0.004591,0.0,0.0,0.0,0.0,0.0,5.82445,0.0,0.0
2016-07-01 02:00:00,0,5.407,5.157,5.822409,0.0,-0.415409,0.0,0.0,0.0,0.0,0.0,5.653364,0.0,0.0
2016-07-01 03:00:00,0,5.443553,5.09,5.407,0.0,0.036553,0.0,0.0,0.0,0.0,0.0,5.58229,0.0,0.0
2016-07-01 04:00:00,0,5.791012,5.358,5.443553,0.0,0.347459,0.0,0.0,0.0,0.0,0.0,5.64438,0.0,0.0


In [4]:

from modeling_module.data_loader import MultiPartExoDataModule
from modeling_module.utils.exogenous_utils import compose_exo_calendar_cb

lookback = 52
horizon = 8

future_exo_cb = compose_exo_calendar_cb(date_type = 'H', sincos = True)

data_module = MultiPartExoDataModule(
    df,
    id_col = 'unique_id',
    date_col = 'date',
    y_col = 'y',
    lookback = lookback,
    horizon = horizon,
    batch_size = 128,
    past_exo_cont_cols = past_cols,
    future_exo_cb = future_exo_cb,
    freq = 'hourly',
    shuffle = True,
    split_mode = 'multi',
)

train_loader = data_module.get_train_loader()
val_loader = data_module.get_val_loader()

In [6]:
from modeling_module.training.config import TrainingConfig
import torch

from modeling_module.models.PatchTST import PatchTSTConfig

from modeling_module.models.PatchTST.self_supervised.PatchTST import PatchTSTPretrainModel
from modeling_module.training.model_trainers.patchtst_pretrain import train_patchtst_pretrain

# 1) 모델 설정 (이미 cfg를 만들었다면 이 블록은 생략)
cfg = PatchTSTConfig(
    lookback=lookback,
    horizon=horizon,
    patch_len=14,
    stride=7,
    # n_vars=1,  # ← 여기는 실제 입력 채널 수(예: target_channel=1)와 일치해야 함
    use_revin=True,
)

# 2) 학습 설정(TrainingConfig)
train_cfg = TrainingConfig(
    device="mps" if torch.backends.mps.is_available() else "cpu",
    epochs=10,
    lr=1e-4,
    weight_decay=1e-4,
    use_amp=False,   # mac mps에서는 보통 False
    log_every=100,
)

# 3) pretrain 모델 생성
pre_model = PatchTSTPretrainModel(cfg=cfg)

# 4) pretrain 실행 (★ train_cfg는 TrainingConfig를 넣어야 함)
pre_out = train_patchtst_pretrain(
    pre_model,
    train_loader,
    val_loader,
    train_cfg=train_cfg,   # 여기 중요
    mask_ratio=0.3,
    loss_type="mse",
    save_dir=save_dir + "/pretrain",
    ckpt_name="patchtst_pretrain_best.pt",
)

pretrain_ckpt_path = save_dir + "/pretrain/patchtst_pretrain_best.pt"



[Pretrain] stage=0 epochs=10 lr=0.0001 wd=0.0001 mask_ratio=0.3 loss=mse
[Pretrain][stage=0 ep=1/10 step=100] loss=0.890717
[Pretrain][stage=0 ep=1/10] train=0.881201 val=0.766637
[Pretrain][stage=0 ep=2/10 step=100] loss=0.644695
[Pretrain][stage=0 ep=2/10] train=0.636348 val=0.513947
[Pretrain][stage=0 ep=3/10 step=100] loss=0.405477
[Pretrain][stage=0 ep=3/10] train=0.401659 val=0.320691
[Pretrain][stage=0 ep=4/10 step=100] loss=0.283987
[Pretrain][stage=0 ep=4/10] train=0.281228 val=0.258503
[Pretrain][stage=0 ep=5/10 step=100] loss=0.238457
[Pretrain][stage=0 ep=5/10] train=0.238010 val=0.226871
[Pretrain][stage=0 ep=6/10 step=100] loss=0.220457
[Pretrain][stage=0 ep=6/10] train=0.219860 val=0.203676
[Pretrain][stage=0 ep=7/10 step=100] loss=0.197287
[Pretrain][stage=0 ep=7/10] train=0.196786 val=0.199820
[Pretrain][stage=0 ep=8/10 step=100] loss=0.185184
[Pretrain][stage=0 ep=8/10] train=0.184649 val=0.180987
[Pretrain][stage=0 ep=9/10 step=100] loss=0.174660
[Pretrain][stage=0 e