<a href="https://colab.research.google.com/github/LadyAmely/TFT-notebook/blob/main/Temporal_Fusion_Transformer_energy_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("/content/energy_df.csv")
df

Sprawdzenie, czy istnieją brakujące dane

In [None]:
missing_data = df.isnull().sum()
missing_data

Wyświetlenie statystyk opisowych

In [None]:
df.describe()

Wyświetlenie informacji o DataFrame

In [None]:
df.info()

Usuwanie duplikatów

In [None]:
duplicates = df.duplicated().sum()
duplicates

In [None]:
df = df.drop_duplicates()

In [None]:
n = len(df)
df = df.tail(int(n * 0.3))

In [None]:
df

In [None]:
!pip install lightning

In [None]:
!pip install pytorch-forecasting

In [None]:
!pip install optuna-integration

In [None]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import torch

In [None]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss

In [None]:
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet

In [None]:
df["Time"] = pd.to_datetime(df["Time"])
df["Time"]

In [None]:
df["time_idx"] = (df["Time"] - df["Time"].min()).dt.days
df["time_idx"]

In [None]:
first_day = df["Time"].min().date()
last_day = df["Time"].max().date()

In [None]:
number_of_days = (last_day - first_day).days + 1
number_of_days

In [None]:
max_encoder_length = 365
min_encoder_length = 250
max_prediction_length = 30
min_prediction_length = 7

In [None]:
training_cutoff = df["time_idx"].max() - max_prediction_length
training_cutoff

In [None]:
df["group_id"] = 0

In [None]:
df["Day_of_Week"] = df["Time"].dt.dayofweek.astype(str)
df["Month"] = df["Time"].dt.month.astype(str)
df["Season"] = ((df["Time"].dt.month % 12 + 3) // 3).astype(str)

training = TimeSeriesDataSet(
    df[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="Total Generation(MWh)",
    group_ids=["group_id"],
    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=min_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=["Day_of_Week", "Month", "Season"],
    time_varying_known_reals=[
        "Natural Gas", "Hydro Water Reservoir", "Lignite", "Hydro Run-of-river",
        "Imported Coal", "Wind", "Solar", "Fuel Oil", "Geothermal", "Asphaltite Coal",
        "Hard Coal", "Biomass", "Naphtha", "Liquefied Natural Gas",
        "International", "Waste", "Load"
    ],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Total Generation(MWh)"],
    target_normalizer=GroupNormalizer(
        groups=["group_id"], transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

In [None]:
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

In [None]:
batch_size = 16
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [None]:
baseline_predictions = Baseline().predict(val_dataloader, return_y=True)
MAE()(baseline_predictions.output, baseline_predictions.y)

In [None]:
test_cutoff = df["time_idx"].max() - max_prediction_length

test_cutoff = test_cutoff - (training.min_encoder_length + training.min_prediction_length)

test = TimeSeriesDataSet.from_dataset(
    training,
    df[lambda x: x.time_idx > test_cutoff],
    predict=True,
    stop_randomization=True
)

In [None]:
test_dataloader = test.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [None]:
pl.seed_everything(42)
trainer = pl.Trainer(
    max_epochs=10,
    accelerator="cpu",
    gradient_clip_val=0.1,
     log_every_n_steps=1
)

In [None]:
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=True, mode="min")
lr_monitor = LearningRateMonitor(logging_interval='step')