In [None]:
!pip install pytorch-lightning
!pip install git+https://github.com/jdb78/pytorch-forecasting.git
!pip install optuna==3.4.0
!pip install optuna-integration
!pip install scikit-learn==1.3.2
import copy
from pathlib import Path
import warnings
import lightning.pytorch as pl
import pytorch_forecasting as pf
from pytorch_forecasting.data import NaNLabelEncoder
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelSummary
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import pandas as pd
import json
import pickle
import torch
import optuna
import gc
import joblib
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import logging

#import optuna
#from optuna.integration import PyTorchLightningPruningCallback

from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss


train_source = pd.read_pickle('Data/TFT Preprocessed data/12M/train_source.pkl')
test_source = pd.read_pickle('Data/TFT Preprocessed data/12M/test_source.pkl')
test_source = test_source.reset_index(drop=True)


train_target = pd.read_pickle('Data/TFT Preprocessed data/12M/train_target.pkl')
test_target = pd.read_pickle('Data/TFT Preprocessed data/12M/test_target.pkl')
test_target = test_target.reset_index(drop=True)


train_target_2 = pd.read_pickle('Data/TFT Preprocessed data/12M/train_target_2.pkl')
test_target_2 = pd.read_pickle('Data/TFT Preprocessed data/12M/test_target_2.pkl')
test_target_2 = test_target_2.reset_index(drop=True)



max_encoder_length = 48
max_prediction_length = 24

training_cutoff_source = len(train_source) - max_prediction_length
training_cutoff_target = len(train_target) - max_prediction_length
training_cutoff_target_2 = len(train_target_2) - max_prediction_length

training_source = TimeSeriesDataSet(
    train_source[lambda x: x.time_index <= training_cutoff_source],
    time_idx="time_index",
    target="Electrical_Consumption/sqm",
    max_encoder_length=max_encoder_length,
    group_ids=["group_id"],
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=["lat", "lng", "yearbuilt"],
    time_varying_known_categoricals=["month"],
    time_varying_known_reals=["time_index", "airTemperature", "cloudCoverage", "dewTemperature", "windDirection", "windSpeed"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Electrical_Consumption/sqm"],
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=True,
    allow_missing_timesteps=False,
    categorical_encoders={"month": NaNLabelEncoder(add_nan=True)},
)

# Define the source validation dataset
validation_source = TimeSeriesDataSet.from_dataset(training_source, train_source, predict=True, stop_randomization=True)
testing_source = TimeSeriesDataSet.from_dataset(training_source, test_source)


training_target = TimeSeriesDataSet(
    train_target[lambda x: x.time_index <= training_cutoff_target],
    time_idx="time_index",
    target="Electrical_Consumption/sqm",
    max_encoder_length=max_encoder_length,
    group_ids=["group_id"],
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=["lat", "lng", "yearbuilt"],
    time_varying_known_categoricals=["month"],
    time_varying_known_reals=["time_index", "airTemperature", "cloudCoverage", "dewTemperature", "windDirection", "windSpeed"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Electrical_Consumption/sqm"],
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=True,
    categorical_encoders={"month": NaNLabelEncoder(add_nan=True)},
    allow_missing_timesteps=False
)

validation_target = TimeSeriesDataSet.from_dataset(training_target, train_target, predict=True, stop_randomization=True)
testing_target = TimeSeriesDataSet.from_dataset(training_target, test_target)


training_target_2 = TimeSeriesDataSet(
    train_target_2[lambda x: x.time_index <= training_cutoff_target_2],
    time_idx="time_index",
    target="Electrical_Consumption/sqm",
    max_encoder_length=max_encoder_length,
    group_ids=["group_id"],
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=["lat", "lng", "yearbuilt"],
    time_varying_known_categoricals=["month"],
    time_varying_known_reals=["time_index", "airTemperature", "cloudCoverage", "dewTemperature", "windDirection", "windSpeed"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Electrical_Consumption/sqm"],
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=True,
    categorical_encoders={"month": NaNLabelEncoder(add_nan=True)},
    allow_missing_timesteps=False
)

validation_target_2 = TimeSeriesDataSet.from_dataset(training_target_2, train_target_2, predict=True, stop_randomization=True)



def objective_source(trial):

    pl.seed_everything(42)
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    disabled_model_summary = ModelSummary(max_depth=0)
    logger = TensorBoardLogger("lightning_logs", name=f"trial_{trial.number}", log_graph=False)
    y_test = test_source['Electrical_Consumption/sqm']

    # Define other hyperparameters
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128])
    hidden_continuous_size = trial.suggest_categorical('hidden_continuous_size', [8, 16, 32])
    attention_head_size = trial.suggest_int('attention_head_size', 2, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True)
    dropout = trial.suggest_float('dropout', 0.2, 0.4, step=0.05)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])


    train_dataloader = training_source.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation_source.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)
    test_dataloader = testing_source.to_dataloader(train=False, batch_size=batch_size, num_workers=0)



    # Define your PyTorch Lightning Trainer with the specified hyperparameters
    trainer = pl.Trainer(
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        max_epochs=40,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, disabled_model_summary],
        limit_train_batches=15,
        logger=logger,
        enable_checkpointing=False,
        log_every_n_steps=10,  # Log every step
        num_sanity_val_steps=0  # To avoid running validation before training starts
    )


    # Define the model
    model = TemporalFusionTransformer.from_dataset(
        training_source,
        hidden_size=hidden_size,
        hidden_continuous_size=hidden_continuous_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        lstm_layers=2,
        loss = QuantileLoss(),
        optimizer="Adam",
        log_interval=10,
        log_val_interval = 10,
        reduce_on_plateau_patience=4
    )

    print(f"Starting trial {trial.number} with params: {trial.params}")

    # Optimize the model using the trainer
    trainer.fit(model, train_dataloader, val_dataloader)

    predictions_scaled_gpu = model.predict(test_dataloader)

    predictions_scaled = (predictions_scaled_gpu.cpu()).numpy()

    predictions_scaled = predictions_scaled[:,:1].ravel()

    predictions_scaled = predictions_scaled[1:]

    df = pd.DataFrame(columns=['predictions', 'actuals'])

    r2 = r2_score(y_test[48:len(y_test)-24], predictions_scaled)

    val_r2 = r2*100

    del model, trainer, train_dataloader, val_dataloader, test_dataloader
    torch.cuda.empty_cache()
    gc.collect()

    return val_r2

def objective_target(trial):

    pl.seed_everything(42)
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    disabled_model_summary = ModelSummary(max_depth=0)
    logger = TensorBoardLogger("lightning_logs", name=f"trial_{trial.number}", log_graph=False)
    y_test = test_target['Electrical_Consumption/sqm']

    # Define other hyperparameters
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128])
    hidden_continuous_size = trial.suggest_categorical('hidden_continuous_size', [8, 16, 32])
    attention_head_size = trial.suggest_int('attention_head_size', 2, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True)
    dropout = trial.suggest_float('dropout', 0.2, 0.4, step=0.05)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])


    train_dataloader = training_target.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation_target.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)
    test_dataloader = testing_target.to_dataloader(train=False, batch_size=batch_size, num_workers=0)



    # Define your PyTorch Lightning Trainer with the specified hyperparameters
    trainer = pl.Trainer(
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        max_epochs=40,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, disabled_model_summary],
        limit_train_batches=15,
        logger=logger,
        enable_checkpointing=False,
        log_every_n_steps=10,  # Log every step
        num_sanity_val_steps=0  # To avoid running validation before training starts
    )


    # Define the model
    model = TemporalFusionTransformer.from_dataset(
        training_source,
        hidden_size=hidden_size,
        hidden_continuous_size=hidden_continuous_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        lstm_layers=2,
        loss = QuantileLoss(),
        optimizer="Adam",
        log_interval=10,
        log_val_interval = 10,
        reduce_on_plateau_patience=4
    )

    print(f"Starting trial {trial.number} with params: {trial.params}")

    # Optimize the model using the trainer
    trainer.fit(model, train_dataloader, val_dataloader)

    predictions_scaled_gpu = model.predict(test_dataloader)

    predictions_scaled = (predictions_scaled_gpu.cpu()).numpy()

    predictions_scaled = predictions_scaled[:,:1].ravel()

    predictions_scaled = predictions_scaled[1:]

    r2 = r2_score(y_test[48:len(y_test)-24], predictions_scaled)

    val_r2 = r2*100

    del model, trainer, train_dataloader, val_dataloader, test_dataloader
    torch.cuda.empty_cache()
    gc.collect()

    return val_r2

def objective_target_2(trial):

    pl.seed_everything(42)
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    disabled_model_summary = ModelSummary(max_depth=0)
    logger = TensorBoardLogger("lightning_logs", name=f"trial_{trial.number}", log_graph=False)
    y_test = test_target_2['Electrical_Consumption/sqm']

    # Define other hyperparameters
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128])
    hidden_continuous_size = trial.suggest_categorical('hidden_continuous_size', [8, 16, 32])
    attention_head_size = trial.suggest_int('attention_head_size', 2, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True)
    dropout = trial.suggest_float('dropout', 0.2, 0.4, step=0.05)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

    train_dataloader = training_target_2.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation_target_2.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)
    test_dataloader = testing_target_2.to_dataloader(train=False, batch_size=batch_size, num_workers=0)


    # Define your PyTorch Lightning Trainer with the specified hyperparameters
    trainer = pl.Trainer(
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        max_epochs=40,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, disabled_model_summary],
        limit_train_batches=15,
        logger=logger,
        enable_checkpointing=False,
        log_every_n_steps=10,  # Log every step
        num_sanity_val_steps=0  # To avoid running validation before training starts
    )


    # Define the model
    model = TemporalFusionTransformer.from_dataset(
        training_source,
        hidden_size=hidden_size,
        hidden_continuous_size=hidden_continuous_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        lstm_layers=2,
        loss = QuantileLoss(),
        optimizer="Adam",
        log_interval=10,
        log_val_interval = 10,
        reduce_on_plateau_patience=4
    )

    print(f"Starting trial {trial.number} with params: {trial.params}")

    # Optimize the model using the trainer
    trainer.fit(model, train_dataloader, val_dataloader)

    predictions_scaled_gpu = model.predict(test_dataloader)

    predictions_scaled = (predictions_scaled_gpu.cpu()).numpy()

    predictions_scaled = predictions_scaled[:,:1].ravel()

    predictions_scaled = predictions_scaled[1:]

    r2 = r2_score(y_test[48:len(y_test)-24], predictions_scaled)

    val_r2 = r2*100

    del model, trainer, train_dataloader, val_dataloader, test_dataloader
    torch.cuda.empty_cache()
    gc.collect()

    return val_r2


def best_param(data, filename):
    study = optuna.create_study(direction='maximize')
    # Start the optimization
    if data == 'source':
        study.optimize(objective_source, n_trials=60, gc_after_trial=True)
    elif data == 'target':
        study.optimize(objective_target, n_trials=60, gc_after_trial=True)
    elif data == 'target_2':
        study.optimize(objective_target_2, n_trials=60, gc_after_trial=True)

    # Print the best parameters
    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    with open(filename, 'w') as f:
        json.dump(trial.params, f)


#save best hyperparameters for each base TFT model (source, target, target_2)

best_param('source', 'Models/TFT/12M/Tuned Hyperparameters/source_params.json')

best_param('target', 'Models/TFT/12M/Tuned Hyperparameters/target_params.json')

best_param('target_2', 'Models/TFT/12M/Tuned Hyperparameters/target_2_params.json')

In [None]:

train_target = pd.read_pickle('Data/TFT Preprocessed data/6M/train_target.pkl')
test_target = pd.read_pickle('Data/TFT Preprocessed data/6M/test_target.pkl')
test_target = test_target.reset_index(drop=True)



max_encoder_length = 48
max_prediction_length = 24

training_cutoff_target = len(train_target) - max_prediction_length
training_cutoff_target_2 = len(train_target_2) - max_prediction_length


training_target = TimeSeriesDataSet(
    train_target[lambda x: x.time_index <= training_cutoff_target],
    time_idx="time_index",
    target="Electrical_Consumption/sqm",
    max_encoder_length=max_encoder_length,
    group_ids=["group_id"],
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=["lat", "lng", "yearbuilt"],
    time_varying_known_categoricals=["month"],
    time_varying_known_reals=["time_index", "airTemperature", "cloudCoverage", "dewTemperature", "windDirection", "windSpeed"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Electrical_Consumption/sqm"],
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=True,
    categorical_encoders={"month": NaNLabelEncoder(add_nan=True)},
    allow_missing_timesteps=False
)

validation_target = TimeSeriesDataSet.from_dataset(training_target, train_target, predict=True, stop_randomization=True)
testing_target = TimeSeriesDataSet.from_dataset(training_target, test_target)


def objective_target(trial):

    pl.seed_everything(42)
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
    disabled_model_summary = ModelSummary(max_depth=0)
    logger = TensorBoardLogger("lightning_logs", name=f"trial_{trial.number}", log_graph=False)
    y_test = test_target['Electrical_Consumption/sqm']

    # Define other hyperparameters
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128])
    hidden_continuous_size = trial.suggest_categorical('hidden_continuous_size', [8, 16, 32])
    attention_head_size = trial.suggest_int('attention_head_size', 2, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True)
    dropout = trial.suggest_float('dropout', 0.2, 0.4, step=0.05)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])


    train_dataloader = training_target.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation_target.to_dataloader(train=False, batch_size=batch_size*10, num_workers=0)
    test_dataloader = testing_target.to_dataloader(train=False, batch_size=batch_size, num_workers=0)



    # Define your PyTorch Lightning Trainer with the specified hyperparameters
    trainer = pl.Trainer(
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        max_epochs=40,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, disabled_model_summary],
        limit_train_batches=15,
        logger=logger,
        enable_checkpointing=False,
        log_every_n_steps=10,  # Log every step
        num_sanity_val_steps=0  # To avoid running validation before training starts
    )


    # Define the model
    model = TemporalFusionTransformer.from_dataset(
        training_target,
        hidden_size=hidden_size,
        hidden_continuous_size=hidden_continuous_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        learning_rate=learning_rate,
        lstm_layers=2,
        loss = QuantileLoss(),
        optimizer="Adam",
        log_interval=10,
        log_val_interval = 10,
        reduce_on_plateau_patience=4
    )

    print(f"Starting trial {trial.number} with params: {trial.params}")

    # Optimize the model using the trainer
    trainer.fit(model, train_dataloader, val_dataloader)

    predictions_scaled_gpu = model.predict(test_dataloader)

    predictions_scaled = (predictions_scaled_gpu.cpu()).numpy()

    predictions_scaled = predictions_scaled[:,:1].ravel()

    predictions_scaled = predictions_scaled[1:]

    r2 = r2_score(y_test[48:len(y_test)-24], predictions_scaled)

    val_r2 = r2*100

    del model, trainer, train_dataloader, val_dataloader, test_dataloader
    torch.cuda.empty_cache()
    gc.collect()

    return val_r2


def best_param(data, filename):
    study = optuna.create_study(direction='maximize')
    # Start the optimization
    if data == 'target':
        study.optimize(objective_target, n_trials=60, gc_after_trial=True)
    elif data == 'target_2':
        study.optimize(objective_target_2, n_trials=60, gc_after_trial=True)

    # Print the best parameters
    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))
    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    with open(filename, 'w') as f:
        json.dump(trial.params, f)


#save best hyperparameters for each base TFT model (source, target, target_2)


best_param('target', 'Models/TFT/6M/Tuned Hyperparameters/target_params.json')

best_param('target_2', 'Models/TFT/6M/Tuned Hyperparameters/target_2_params.json')
