In [161]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, Baseline
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import matplotlib.pyplot as plt

In [163]:
file_path = "PJME_hourly.csv"
df = pd.read_csv(file_path)

In [178]:
df

Unnamed: 0_level_0,PJME_MW,hour,day,weekday,month,group,time_idx
time_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01,26498.0,0,1,3,1,0,0


In [181]:
# Detect or generate a datetime column
datetime_column = None
for col in df.columns:
    if "date" in col.lower() or "time" in col.lower():
        datetime_column = col
        break

if not datetime_column:
    print("No datetime column found. Generating a datetime index.")
    # Define the start date for the dataset
    start_date = '2002-12-31 00:00:00'  # Replace with your dataset's start date
    df['Datetime'] = pd.date_range(start=start_date, periods=len(df), freq='H')
    datetime_column = 'Datetime'

# Convert to datetime
df[datetime_column] = pd.to_datetime(df[datetime_column])

# Set the datetime column as index
df.set_index(datetime_column, inplace=True)

# Ensure the data is in hourly frequency
df = df.asfreq('H')

# Fill missing values (e.g., forward fill)
df = df.fillna(method="ffill")

# Step 3: Add time-related features
df['hour'] = df.index.hour
df['day'] = df.index.day
df['weekday'] = df.index.weekday
df['month'] = df.index.month

df.reset_index(inplace=True)

# Create an integer time index if it does not already exist
if 'time_idx' not in df.columns:
    df['time_idx'] = np.arange(len(df))

# Step 4: Split the data into train and validation sets
max_prediction_length = 24   # Predict the next 24 hours
max_encoder_length = 168     # Use the past 7 days for predictions

# Determine the cutoff point for training data
train_cutoff = df['time_idx'].max() - (max_prediction_length + 500)  # Reserve last 500+24 hours for validation
train_cutoff = int(train_cutoff)  # Ensure train_cutoff is an integer
train_df = df[df['time_idx'] <= train_cutoff]
val_df = df[df['time_idx'] > train_cutoff]

# Step 5: Define TimeSeriesDataSet
target_column = "PJME_MW"  # Update this based on the target column in your dataset

if target_column not in df.columns:
    raise ValueError(f"Target column '{target_column}' not found in the dataset.")

# Add a group column for single time series
train_df['group'] = 0
val_df['group'] = 0

# Create the TimeSeriesDataSet
training = TimeSeriesDataSet(
    train_df,
    time_idx='time_idx',
    target=target_column,
    group_ids=['group'],
    time_varying_known_reals=['time_idx', 'hour', 'day', 'weekday', 'month'],
    time_varying_unknown_reals=[target_column],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
)

# Create validation TimeSeriesDataSet
validation = TimeSeriesDataSet.from_dataset(training, val_df, predict=True)

# Create DataLoaders
batch_size = 64
train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

# Step 6: Define the TFT model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-3,
    hidden_size=16,  # Number of hidden units
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # Quantiles output
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 7: Train the model
early_stop_callback = EarlyStopping(monitor="val_loss", patience=5, verbose=True, mode="min")
logger = TensorBoardLogger("tft_logs", name="tft_model")

trainer = Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback],
    logger=logger,
)

trainer.fit(tft, train_loader, val_loader)

# Step 8: Evaluate the model
best_model = TemporalFusionTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
actuals = torch.cat([y[0] for x, y in iter(val_loader)])
predictions = best_model.predict(val_loader)

# Step 9: Visualize predictions
plt.figure(figsize=(12, 6))
plt.plot(actuals[:100], label="Actual")
plt.plot(predictions[:100], label="Predicted")
plt.legend()
plt.title("TFT Predictions vs Actuals")
plt.show()

  df = df.asfreq('H')
  df = df.fillna(method="ffill")


TypeError: Addition/subtraction of integers and integer-arrays with Timestamp is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`