<a href="https://colab.research.google.com/github/ScientistLim/ProjectAW/blob/feature%2Ftft-model-Hyun/tft-v2-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
​

SyntaxError: invalid non-printable character U+200B (<ipython-input-42-62f2d95838f9>, line 3)

In [2]:
!pip install pytorch-forecasting
!pip instsall



Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.1.1-py3-none-any.whl.metadata (13 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading torchmetrics-1.4.3-py3-none-any.whl.metadata (19 kB)
Collecting pytorch-lightning (from lightning<3.0.0,>=2.0.0->pytorch-forecasting)
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Downloading pytorch_forecasting-1.1.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning-2.4.0-py3-none-any.whl (810 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import pandas as pd
import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# 엑셀 파일 불러오기
file_path = 'four-cycle-16months-feed-pressure-5min.xlsx'
df = pd.read_excel(file_path)

# 데이터 전처리: 필요한 컬럼 설정 (Time, feed_pressure)
df['Time'] = pd.to_datetime(df['Time'])
df['time_idx'] = ((df['Time'] - df['Time'].min()).dt.total_seconds() // 300).astype(int)  # 5분 간격의 시간 인덱스 생성
df['group'] = "feed_pressure"  # 그룹화 컬럼 추가

# 데이터 확인
print(df.head())

# 데이터 길이 확인
total_length = len(df)
print(f"Total data length: {total_length} rows")

# max_encoder_length와 max_prediction_length 설정
max_encoder_length = 10000  # 30000 - 약 104일 (5분 간격의 데이터)
max_prediction_length = 1000  # 3000 - 약 10일 예측

print(f"Max encoder length: {max_encoder_length} entries")
print(f"Max prediction length: {max_prediction_length} entries")

# TimeSeriesDataSet 생성
training = TimeSeriesDataSet(
    df,
    time_idx="time_idx",
    target="feed_pressure",
    group_ids=["group"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_unknown_reals=["feed_pressure"],
    time_varying_known_reals=["time_idx"],
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
    allow_missing_timesteps=True
)

# DataLoader 생성
batch_size = 16
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

# Temporal Fusion Transformer 모델 생성
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=RMSE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# LightningModule 정의
class TFTLightningModule(LightningModule):
    def __init__(self, tft_model):
        super().__init__()
        self.tft_model = tft_model.cuda()

    def forward(self, x):
        return self.tft_model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = {key: val.cuda().contiguous() for key, val in x.items()}
        y = y[0].cuda().contiguous() if isinstance(y, tuple) else y.cuda().contiguous()
        y_hat = self(x)
        loss = self.tft_model.loss(y_hat["prediction"], y)
        self.log("train_loss", loss, batch_size=len(x['encoder_cont'].squeeze()))
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = {key: val.cuda().contiguous() for key, val in x.items()}
        y = y[0].cuda().contiguous() if isinstance(y, tuple) else y.cuda().contiguous()
        y_hat = self(x)
        loss = self.tft_model.loss(y_hat["prediction"], y)
        self.log("val_loss", loss, batch_size=len(x['encoder_cont'].squeeze()))
        return loss

    def configure_optimizers(self):
        return self.tft_model.configure_optimizers()

# TFT 모델을 LightningModule로 감싸기
tft_module = TFTLightningModule(tft)

# 학습을 위한 EarlyStopping 설정
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, min_delta=1e-4)

# Trainer 설정
trainer = Trainer(
    max_epochs=30,
    accelerator='gpu',
    devices=1,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback],
)

# 모델 학습
trainer.fit(tft_module, train_dataloader)

# 6개월 예측
encoder_data = df[df['time_idx'] > df['time_idx'].max() - max_encoder_length]
new_prediction_data = pd.DataFrame({
    "Time": pd.date_range(df["Time"].max() + pd.Timedelta(minutes=5), periods=max_prediction_length, freq="5T"),
    "feed_pressure": [0] * max_prediction_length,
    "time_idx": range(df["time_idx"].max() + 1, df["time_idx"].max() + 1 + max_prediction_length),
    "group": "feed_pressure",
})

new_data = pd.concat([encoder_data, new_prediction_data]).reset_index(drop=True)

# 예측 수행
new_data_cuda = training.transform(new_data)
new_data_cuda = {key: val.cuda() for key, val in new_data_cuda.items()}
predictions = tft_module.tft_model.predict(new_data_cuda)

# 예측 결과 시각화
plt.figure(figsize=(10, 6))
plt.plot(df['Time'], df['feed_pressure'], label='Historical Feed Pressure', color='blue')
plt.plot(new_prediction_data['Time'], predictions, label='Predicted Feed Pressure (6 months)', color='red')
plt.xlabel('Time')
plt.ylabel('Feed Pressure')
plt.legend()
plt.title('TFT Model Feed Pressure Prediction for Next 6 Months')
plt.show()


                 Time  feed_pressure  time_idx          group
0 2023-07-09 04:17:00       0.000000         0  feed_pressure
1 2023-07-09 04:22:00       0.024188         1  feed_pressure
2 2023-07-09 04:27:00       0.048376         2  feed_pressure
3 2023-07-09 04:32:00       0.072564         3  feed_pressure
4 2023-07-09 04:37:00       0.086913         4  feed_pressure
Total data length: 129549 rows
Max encoder length: 10000 entries
Max prediction length: 1000 entries


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `

Training: |          | 0/? [00:00<?, ?it/s]