In [None]:
import copy
import warnings
import holidays
import matplotlib
import seaborn as sns
from pathlib import Path
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight') #Not a great fan of their website (I found it super-biased), but this stylesheet is the best
warnings.filterwarnings('ignore')

import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
# DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss, MAE
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
# from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
# from optuna.integration import PyTorchLightningPruningCallback

import gc
import random
import tensorflow as tf
# import tensorboard as tb
# tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

import lightning.pytorch as pl
from lightning.pytorch.callbacks import LearningRateMonitor
from lightning.pytorch.callbacks.early_stopping import EarlyStopping


random.seed(30)
np.random.seed(30)
torch.manual_seed(30)
tf.random.set_seed(30)
torch.cuda.manual_seed(30)
torch.cuda.is_available()

In [None]:
df_2023 = pd.read_excel('./data_train2023-202404.xlsx')
data = df_2023.reset_index().rename(columns = {'index' : 'time_idx' })

In [None]:
data['yilin_haftasi'] = data['ds'].dt.isocalendar().week.astype(str).astype('category')
data['haftanın_günü'] = data['haftanın_günü'].astype('category')
data["log_ptf_tl"] = np.log(data.ptf_tl_mwh + 1e-8)
data["avg_ptf_dayofweek"] = data.groupby(["time_idx", "haftanın_günü"],observed=True).ptf_tl_mwh.transform("mean")
data[['ptf_tl_mwh', 'avg_ptf_dayofweek']]

In [None]:
holiday = pd.DataFrame(columns=['tarih', 'holiday'])
for date, name in sorted(holidays.Turkey(years=[2023,2024]).items()):
    holiday.loc[len(holiday)] = [date, name]
holiday['tarih'] = pd.to_datetime(holiday['tarih'], format='%Y-%m-%d', errors='ignore')

In [None]:
df = data.set_index('ds').join(holiday.set_index('ds')).reset_index()

df['holiday'] = df['holiday'].astype(str)

df.loc[df['holiday'] != 'nan', 'holiday'] = True
df.loc[df['holiday'] == 'nan', 'holiday'] = False

n = df[df['tarih'].isin(pd.to_datetime(df['tarih'] + pd.DateOffset(day=1)))].index[0]
n2 = df[df['tarih'].isin(pd.to_datetime(df['tarih'] + pd.DateOffset(day=2)))].index[0]


df['is_holiday_lead_1'] = df['holiday'].shift(-n)
df['is_holiday_lead_2'] = df['holiday'].shift(-n2)
df['is_holiday_lag_1'] = df['holiday'].shift(n)
df['is_holiday_lag_2'] = df['holiday'].shift(n2)

df.loc[df['is_holiday_lead_1'].isnull(), 'is_holiday_lead_1' ] = False
df.loc[df['is_holiday_lead_2'].isnull(), 'is_holiday_lead_2' ] = False
df.loc[df['is_holiday_lag_1'].isnull(), 'is_holiday_lag_1' ] = False
df.loc[df['is_holiday_lag_2'].isnull(), 'is_holiday_lag_2' ] = False

df.loc[df['holiday'] == 'nan', 'holiday'] = False

df['id'] = 'ptf'

In [None]:
df = df[['tarih','time_idx','id','ptf_tl_mwh', 'eşleşen_satış_miktarı_mwh',
       'pozitif_dengesizlik_miktarı_mwh', 
       'güneş_üretim__tl_mwh', 
       'i̇şlem_hacmi_tl', 'azami_fiyat_limiti_tl_mwh',
       'teklif_edilen_alış_miktarı_mwh', 'ptf_usd_mwh',
       'eşleşen_alış_miktarı_mwh',"saat", "holiday","is_holiday_lag_2",'haftanın_günü']]

In [None]:
def Create_Model(df_train, max_encoder_length, max_prediction_length, Variable_to_predict):
    batch_size = 32

    ##Dataset prepare
    training = TimeSeriesDataSet(
        df_train, 
        time_idx = 'time_idx',
        target = Variable_to_predict,
        group_ids = ['id'],
        min_encoder_length = max_encoder_length //2,
        max_encoder_length = max_encoder_length,
        min_prediction_length = 1,
        max_prediction_length = max_prediction_length,
        static_categoricals=["haftanın_günü"],
        time_varying_known_categoricals=["saat", "holiday","is_holiday_lag_2"], 
                                     time_varying_known_reals=["time_idx"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        'ptf_tl_mwh', 'eşleşen_satış_miktarı_mwh',
       'pozitif_dengesizlik_miktarı_mwh', 
       'güneş_üretim__tl_mwh', 
       'i̇şlem_hacmi_tl', 'azami_fiyat_limiti_tl_mwh',
       'teklif_edilen_alış_miktarı_mwh', 'ptf_usd_mwh',
       'eşleşen_alış_miktarı_mwh'
    ],
    target_normalizer = GroupNormalizer(groups = ['id'], transformation = 'softplus'),
    add_relative_time_idx = False,
    add_target_scales = True,
    add_encoder_length = True,
    allow_missing_timesteps = False,
    )


    pl.seed_everything(42)
    train_dataloader = training.to_dataloader(train = True, batch_size = batch_size, num_workers = 0)
    validation = TimeSeriesDataSet.from_dataset(training, df_train, predict = True, stop_randomization= True)
    val_dataloader = validation.to_dataloader(train = False, batch_size = batch_size *10, num_workers = 0)


    ## Model Creation
    early_stop_callback = EarlyStopping( monitor= 'val_loss', min_delta= 1e-4, patience = 10, verbose= False, mode = 'min')
    lr_logger = LearningRateMonitor()
    logger = TensorBoardLogger('lightning_logs')

    trainer = pl.Trainer(
        max_epochs = 20,
        #weights_summary = 'top',
        accelerator="auto",
        gradient_clip_val = 0.25,
        limit_train_batches = 45,
        enable_model_summary=True,
        callbacks = [lr_logger, early_stop_callback],
        logger = logger,
        )
    
#     trainer = pl.Trainer(
#     max_epochs=20,
#     accelerator="auto",
#     enable_model_summary=True,
#     gradient_clip_val=0.25,
#     limit_train_batches=45,  # coment in for training, running valiation every 30 batches
#     #fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
#     callbacks=[lr_logger, early_stop_callback],
#     logger=logger,
# )
    
    tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    #lstm_layers=2,
    hidden_size=8,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    #log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4
)
    tft.to('cuda')
    print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
    
    return (train_dataloader, val_dataloader, tft, trainer)

In [None]:
Nper = 24
max_prediction_length = 744
max_encoder_length = 8760
date_inicial = '2024-01-31'
Variable_to_predict = 'ptf_tl_mwh'
idx_first_pred = (df.loc[(df['tarih'] == date_inicial), 'time_idx'][:1].to_numpy()[0]) 

In [None]:
for i in range(idx_first_pred, (idx_first_pred) + Nper):
    df_train = df[df['time_idx'] < i]
    train_dataloader, val_dataloader, tft, trainer = Create_Model(df_train, max_encoder_length, max_prediction_length, Variable_to_predict)
    pl.seed_everything(42)

    trainer.fit(tft, train_dataloaders = train_dataloader, val_dataloaders=val_dataloader)
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

    df_pred = df[(df['time_idx'] >= (i - max_encoder_length)) & (df['time_idx'] < (i + max_prediction_length))]

    prediction = best_tft.predict(df_pred, return_index = True)
    prediction2 = prediction.output.cpu().numpy()
    Prediction2 = np.transpose(prediction2)
    Prediction_df = pd.DataFrame(data = Prediction2, columns = prediction.index['id'].to_numpy())
    Prediction_df.iloc[24:720]

In [None]:
real_subat = pd.read_excel('ptfsubat.xlsx')
df_ratio = Prediction_df.iloc[24:720].reset_index(drop = True)
df_ratio['real'] = real_subat['PTF (TL/MWh)'].reset_index(drop = True)
df_ratio['err'] = df_ratio['ptf'] - df_rat['real']
(df_ratio['err'].sum() / df_ratio['real']).mean()

In [None]:
plt.plot(df_ratio.index, df_ratio['ptf'], label = "Tahmin") 
plt.plot(df_ratio.index, df_ratio['real'], label = "Gercek") 
plt.legend() 
plt.show()