<a href="https://colab.research.google.com/github/ScientistLim/ProjectAW/blob/feature%2Ftft-model-Hyun/tft-v2-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytorch-forecasting


Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.1.1-py3-none-any.whl.metadata (13 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading torchmetrics-1.4.3-py3-none-any.whl.metadata (19 kB)
Collecting pytorch-lightning (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Downloading pytorch_forecasting-1.1.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning-2.4.0-py3-none-any.whl (810 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import pandas as pd
import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping

# 엑셀 파일 불러오기
file_path = 'four-cycle-16months-only-feed-pressure.xlsx'
df = pd.read_excel(file_path)

# 데이터 전처리: 필요한 컬럼 설정 (Time, feed_pressure)
df['Time'] = pd.to_datetime(df['Time'])
df['time_idx'] = ((df['Time'] - df['Time'].min()).dt.total_seconds() // 60).astype(int)  # 시간 인덱스 생성 (정수형 변환)
df['group'] = "feed_pressure"  # 그룹화 컬럼 추가

# 데이터의 길이 확인
total_length = len(df)
print(f"Total data length: {total_length} minutes")

# 학습 및 예측할 데이터 기간
minutes_per_day = 1440  # 하루에 1440분
days_in_six_months = 180  # 6개월을 180일로 가정

# max_encoder_length와 max_prediction_length를 데이터 크기에 맞게 조정
# max_encoder_length = min(365 * minutes_per_day, int(total_length * 0.7))  # 전체 데이터 길이의 70% 사용
# max_prediction_length = min(days_in_six_months * minutes_per_day, int(total_length * 0.3))  # 전체 데이터 길이의 30% 사용
# # 시퀀스 길이를 줄임 (예: 128)
max_encoder_length = 172800  # 4개월 (예: 172,800분)
max_prediction_length = 259200  # 6개월 (예: 259,200분)


print(f"Max encoder length: {max_encoder_length} minutes")
print(f"Max prediction length: {max_prediction_length} minutes")

# TimeSeriesDataSet 생성 (allow_missing_timesteps=True로 설정)
training = TimeSeriesDataSet(
    df,
    time_idx="time_idx",
    target="feed_pressure",
    group_ids=["group"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_unknown_reals=["feed_pressure"],
    time_varying_known_reals=["time_idx"],
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
    allow_missing_timesteps=True  # 누락된 시간 간격을 허용하도록 설정
)

# DataLoader 생성
batch_size = 32
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

# Temporal Fusion Transformer 모델 생성
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=RMSE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# LightningModule로 감싸기
class TFTLightningModule(LightningModule):
    def __init__(self, tft_model):
        super().__init__()
        self.tft_model = tft_model.cuda()  # 모델을 GPU로 이동

    def forward(self, x):
        return self.tft_model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch

        # 텐서의 차원 및 타입을 출력
        print(f"x의 차원: {x['encoder_cont'].shape}, x의 타입: {type(x['encoder_cont'])}")
        print(f"y의 차원: {y.shape if isinstance(y, torch.Tensor) else 'Not a tensor'}, y의 타입: {type(y)}")

        x = {key: val.cuda() for key, val in x.items()}  # 데이터를 GPU로 이동

        # y가 tuple인 경우 첫 번째 요소만 사용하고 None은 무시
        if isinstance(y, tuple):
            y = y[0]  # 첫 번째 텐서만 사용
        if y is not None:
            y = y.cuda().contiguous()

        y_hat = self(x)
        loss = self.tft_model.loss(y_hat["prediction"], y)
        self.log("train_loss", loss, batch_size=len(x['encoder_cont'].squeeze()))
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = {key: val.cuda() for key, val in x.items()}  # 데이터를 GPU로 이동

        # y가 tuple인 경우 첫 번째 요소만 사용하고 None은 무시
        if isinstance(y, tuple):
            y = y[0]  # 첫 번째 텐서만 사용
        if y is not None:
            y = y.cuda().contiguous()

        y_hat = self(x)
        loss = self.tft_model.loss(y_hat["prediction"], y)
        self.log("val_loss", loss, batch_size=len(x['encoder_cont'].squeeze()))
        return loss


    def configure_optimizers(self):
        return self.tft_model.configure_optimizers()

# TFT를 감싼 LightningModule 생성
tft_module = TFTLightningModule(tft)

# 모델 학습 설정
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, min_delta=1e-4)

# Trainer 설정
trainer = Trainer(
    max_epochs=30,
    accelerator='gpu',  # GPU 사용
    devices=1,  # 사용할 GPU 장치 개수
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback],
)

# 모델 학습
trainer.fit(tft_module, train_dataloader)

# 6개월 예측
# 예측에 필요한 데이터 준비
encoder_data = df[lambda x: x.time_idx > x.time_idx.max() - max_encoder_length]
new_prediction_data = pd.DataFrame(
    {
        "Time": pd.date_range(df["Time"].max() + pd.Timedelta(minutes=1), periods=max_prediction_length, freq="T"),
        "feed_pressure": [0] * max_prediction_length,
        "time_idx": range(df["time_idx"].max() + 1, df["time_idx"].max() + 1 + max_prediction_length),
        "group": "feed_pressure",
    }
)
new_data = pd.concat([encoder_data, new_prediction_data]).reset_index(drop=True)

# 예측 수행
new_data_cuda = training.transform(new_data)  # 데이터셋의 transform을 적용하여 모델이 예측할 수 있는 포맷으로 변환
new_data_cuda = {key: val.cuda() for key, val in new_data_cuda.items()}  # 예측 데이터를 GPU로 이동
predictions = tft_module.tft_model.predict(new_data_cuda)

# 예측 결과 시각화
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(df['Time'], df['feed_pressure'], label='Historical Feed Pressure', color='blue')
plt.plot(new_prediction_data['Time'], predictions, label='Predicted Feed Pressure (6 months)', color='red')
plt.xlabel('Time')
plt.ylabel('Feed Pressure')
plt.legend()
plt.title('TFT Model Feed Pressure Prediction for Next 6 Months')
plt.show()


Total data length: 647744 minutes
Max encoder length: 172800 minutes
Max prediction length: 259200 minutes


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `

Training: |          | 0/? [00:00<?, ?it/s]

x의 차원: torch.Size([32, 172800, 2]), x의 타입: <class 'torch.Tensor'>
y의 차원: Not a tensor, y의 타입: <class 'tuple'>


OutOfMemoryError: CUDA out of memory. Tried to allocate 170.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 89.06 MiB is free. Process 19001 has 14.66 GiB memory in use. Of the allocated memory 14.49 GiB is allocated by PyTorch, and 34.27 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)