In [1]:
#!pip install torch torchvision torchaudio pandas numpy scikit-learn

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from  sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_absolute_error
from datetime import timedelta
from torch.utils.tensorboard  import SummaryWriter
from datetime import datetime
import itertools
from sklearn.metrics import root_mean_squared_error


try:
  from google.colab import files
  from google.colab import drive
  uploaded = files.upload()
  !mkdir -p "/content/drive/My Drive/p9"
  drive.mount('/content/drive')
  IN_COLAB = True
except:
  IN_COLAB = False
import warnings
warnings.filterwarnings('once')

Initialize time series

In [3]:
df = pd.read_csv('../Dataset/ConsumptionIndustry.csv' if not IN_COLAB else 'ConsumptionIndustry.csv', sep=';')

df['HourDK'] = pd.to_datetime(df['HourDK'])
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)

# Lag features
df['ConsumptionkWh_lag1'] = df['ConsumptionkWh'].shift(1)
df['ConsumptionkWh_lag24'] = df['ConsumptionkWh'].shift(24)
df['ConsumptionkWh_lag168'] = df['ConsumptionkWh'].shift(168)


# Rolling Average
df['ConsumptionkWh_roll24'] = df['ConsumptionkWh'].rolling(window=24).mean()
df['ConsumptionkWh_roll168'] = df['ConsumptionkWh'].rolling(window=168).mean()

# Holidays in Denmark from 2021 to 2024 (source: https://publicholidays.dk/)
holidays = ['2021-01-01', '2021-04-01', '2021-04-02', '2021-04-05', '2021-05-13', '2021-05-21', '2021-06-01', '2021-06-24', '2021-12-24', '2021-12-25', '2021-12-26', '2021-12-31', '2022-01-01', '2022-04-14', '2022-04-15', '2022-04-18', '2022-05-05', '2022-05-13', '2022-05-26', '2022-06-05', '2022-06-24', '2022-12-24', '2022-12-25', '2022-12-26',
            '2022-12-31', '2023-01-01', '2023-03-24', '2023-03-25', '2023-03-26', '2023-04-07', '2023-05-05', '2023-05-13', '2023-05-26', '2023-06-05', '2023-06-24', '2023-12-24', '2023-12-25', '2023-12-26', '2023-12-31', '2024-01-01', '2024-03-28', '2024-03-29', '2024-03-30', '2024-04-05', '2024-05-05', '2024-05-13', '2024-05-26', '2024-06-05', '2024-06-24']
holidays = pd.to_datetime(holidays)
df['is_holiday'] = df['HourDK'].dt.date.isin(holidays.date)

# Weekday and weekend flag
df['day_of_week'] = df['HourDK'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)


# Hour of the Day (0-23) to sine/cosine transformation
df['hour_sin'] = np.sin(2 * np.pi * df['HourDK'].dt.hour / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['HourDK'].dt.hour / 24)

# Day of the Week (0-6) to sine/cosine transformation
df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

# Month of the Year (1-12) to sine/cosine transformation
df['month_sin'] = np.sin(2 * np.pi * df['HourDK'].dt.month / 12)
df['month_cos'] = np.cos(2 * np.pi * df['HourDK'].dt.month / 12)

df.index = df['HourDK']

# drop Nan values
df = df.dropna()

print(df.head(1))

                        HourUTC     HourDK  MunicipalityNo Branche  \
HourDK                                                               
2021-01-08  2021-01-07 23:00:00 2021-01-08             851  Privat   

            ConsumptionkWh  ConsumptionkWh_lag1  ConsumptionkWh_lag24  \
HourDK                                                                  
2021-01-08       28924.472            33787.185             29444.332   

            ConsumptionkWh_lag168  ConsumptionkWh_roll24  \
HourDK                                                     
2021-01-08              37842.849           42275.367875   

            ConsumptionkWh_roll168  is_holiday  day_of_week  is_weekend  \
HourDK                                                                    
2021-01-08            42130.498304       False            4           0   

            hour_sin  hour_cos   day_sin   day_cos  month_sin  month_cos  
HourDK                                                                    
2021-01-08  

### Transformer model

In [4]:
scaler = MinMaxScaler()
feature_cols = ['ConsumptionkWh_lag1', 'ConsumptionkWh_lag24', 'ConsumptionkWh_lag168',
                'ConsumptionkWh_roll24', 'ConsumptionkWh_roll168', 'hour_sin', 'hour_cos',
                'day_sin', 'day_cos', 'month_sin', 'month_cos']
target_col = 'ConsumptionkWh'


def normalize_dataset(train_df, val_df, test_df):

  # Make explicit copies to avoid modifying slices
  train_df = train_df.copy()
  val_df = val_df.copy()
  test_df = test_df.copy()

  # Apply scaling to features (.loc for Explicit Indexing)
  train_df.loc[:, feature_cols] = scaler.fit_transform(train_df[feature_cols])
  val_df.loc[:, feature_cols] = scaler.transform(val_df[feature_cols])
  test_df.loc[:, feature_cols] = scaler.transform(test_df[feature_cols])

  # Apply scaling to the target column
  train_df.loc[:, target_col] = scaler.fit_transform(train_df[[target_col]])
  val_df.loc[:, target_col] = scaler.transform(val_df[[target_col]])
  test_df.loc[:, target_col] = scaler.transform(test_df[[target_col]])

  return train_df, val_df, test_df


class EnergyDataset(Dataset):
  def __init__(self, data, feature_cols, target_col):
    self.features = torch.tensor(
        data[feature_cols].values, dtype=torch.float32)
    self.targets = torch.tensor(data[target_col].values, dtype=torch.float32)

  def __len__(self):
    return len(self.targets)

  def __getitem__(self, idx):
    return self.features[idx], self.targets[idx]

def create_dataset(train_df, val_df, test_df):
  train_dataset = EnergyDataset(train_df, feature_cols, target_col)
  val_dataset = EnergyDataset(val_df, feature_cols, target_col)
  test_dataset = EnergyDataset(test_df, feature_cols, target_col)

  # Create dataloaders
  batch_size = 128
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  return train_loader, val_loader, test_loader
  

class EnergyTransformer(nn.Module):
  def __init__(self, input_size, d_model, nhead, output_size, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.1):
    super(EnergyTransformer, self).__init__()
    self.input_size = input_size
    self.d_model = d_model
    self.embedding = nn.Linear(input_size, d_model)  # Embedding layer
    self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, d_model))  # Replace nn.Embedding
    # self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, d_model)) # Positional encoding
    self.transformer = nn.Transformer(
        d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers,
        num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward,
        dropout=dropout, batch_first=True
    )
    self.fc_out = nn.Linear(d_model, output_size)

  def forward(self, x):
    x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
    output = self.transformer(x, x)
    return self.fc_out(output[:, -1, :])
  
  

Functions

In [5]:

def plot_data(data_train, data_val, data_test, predictions, save_at=''):
  plt.figure(figsize=(7, 3))
  plt.plot(data_train.index, data_train, label=f'Train ({data_train.index[0]} - {data_train.index[-1]})')
  plt.plot(data_val.index, data_val, label=f'Val ({data_val.index[0]} - {data_val.index[-1]})')
  plt.plot(data_test.index, data_test, label=f'Test ({data_test.index[0]} - {data_test.index[-1]})')
  plt.plot(data_test.index, predictions, label='Prediction')
  plt.title('Consumption in danish private households with prediction')
  plt.xlabel('Measurements')
  plt.ylabel('Power (kW / charger)')
  plt.legend()
  if save_at:
    plt.savefig(save_at)
  plt.show()

def sample_data_with_train_window(df, start_date, train_window_size):
  start_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(hours=train_window_size)
  end_date = df.index[-1]
  return df[(df.index >= start_date) & (df.index <= end_date)]

def get_next_window(data, train_window_size, validation_window_size, forecast_horizon):
  return data[:train_window_size], data[train_window_size:validation_window_size+train_window_size], data[train_window_size+validation_window_size:train_window_size + forecast_horizon + validation_window_size]



# def forecast_whitebox_model(model, forecast_horizon, model_name):
#   model_res = model.fit()

#   if "SARIMA" in model_name and "STL" not in model_name:
#     return model_res.get_forecast(steps=forecast_horizon).predicted_mean
#   else:
#     return model_res.forecast(steps=forecast_horizon)


def forecast_blackbox_model(model, train_loader, val_loader, test_loader ,num_epochs,):

  criterion = nn.MSELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  model = model.to(device)

  for epochs in range(num_epochs):
    model.train() 

    train_loss = 0

    for batch in train_loader:
      features, targets = batch
      features, targets = features.to(device), targets.to(device)

      # Forward Pass
      optimizer.zero_grad()
      targets_pred = model(features.unsqueeze(1)) # Add a dummy sequence length dimension
      loss = criterion(targets_pred.squeeze(), targets) # Squeeze the output to match the target shape

      # Backward Pass
      loss.backward()

      # Update the weights
      optimizer.step()

      # Accumulate the loss for monitoring
      train_loss += loss.item()

      # Calculate the average loss over the entire training data
      train_loss /= len(train_loader)

      # Validation Loop
      model.eval()
      val_loss = 0
      with torch.no_grad():
        for batch in val_loader:
          features, targets = batch
          features, targets = features.to(device), targets.to(device)
          targets_pred = model(features.unsqueeze(1))
          loss = criterion(targets_pred.squeeze(), targets)
          val_loss += loss.item()

      val_loss /= len(val_loader)

    # Model predictions 
    model.eval()


  # Collect predictions and targets
  predictions = []

  with torch.no_grad():  # Disable gradient tracking
    for batch in test_loader:
      inputs, targets = batch  # Get the inputs and targets
      inputs, targets = inputs.to(device), targets.to(
          device)  # Move to GPU if available

      # Forward pass
      outputs = model(inputs.unsqueeze(1))

      # Store predictions 
      predictions.append(outputs.cpu().numpy())

  # Convert back from normalized values to real values
    predictions = np.array(predictions)  # Convert list to numpy array
    predictions_reshaped = predictions.reshape(-1, 1)

    # Use the inverse_transform method to unnormalize
    predictions = scaler.inverse_transform(predictions_reshaped)

  return predictions.flatten().tolist()  


def create_result_table(results, columns=[]):
  result_table = pd.DataFrame(results)
  result_table.columns = columns
  result_table = result_table.sort_values(by='rmse', ascending=True).reset_index(drop=True)
  return result_table

Optimize functions

In [6]:

# def optimize_Theta_model(data_train, data_test, forecast_horizon, model_name):
#   results = []
#   best_rmse = 0
#   p = range(1, 25)
#   d = [True, False]
#   u = [True, False]
#   m = ['additive', 'multiplicative']
#   di = [True, False]

#   for param in itertools.product(p, d, u, m, di):
#     try:
#       model = ThetaModel(data_train, period=param[0], deseasonalize=param[1], use_test=param[2], method=param[3], difference=param[4])
#     except:
#       continue

#     predictions = forecast_whitebox_model(model, forecast_horizon, model_name)
#     rmse = root_mean_squared_error(data_test, predictions)
#     results.append([param, rmse])
#     print(f"{param} - RMSE: {rmse}")
    
#     if rmse < best_rmse or best_rmse == 0:
#       best_prediction = predictions

#   result_table = create_result_table(results, columns=['params', 'rmse'])
#   return result_table, best_prediction


def optimize_Transformer_model(data_test, train_loader, val_loader, test_loader):
  results = []
  best_rmse = 0

  # Hard coded 
  input_size = len(feature_cols)
  output_size = 1

  d = [64, 128, 256]
  nhead = [4, 8, 12]
  num_encoder_layers = [3, 6, 9]
  num_decoder_layers = [3, 6, 9]
  dim_feedforward = [256, 512, 1024]
  dropout = [0.1, 0.2, 0.3]
  num_epochs = [100, 500]

  for param in itertools.product(d, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout, num_epochs):
    model = EnergyTransformer(input_size=input_size, d_model=param[0], nhead=param[1], output_size=output_size, num_encoder_layers=param[2], num_decoder_layers=param[3], dim_feedforward=param[4], dropout=param[5])
    predictions = forecast_blackbox_model(model, train_loader, val_loader, test_loader, num_epochs=param[6])
    rmse = root_mean_squared_error(data_test['ConsumptionkWh'], predictions)
    results.append([param, rmse])
    print(f"{param} - RMSE: {rmse}")

    if rmse < best_rmse or best_rmse == 0:
      best_prediction = predictions

  result_table = create_result_table(results, columns=['params', 'rmse'])
  return result_table, best_prediction
  




  

Optimizing through whole dataset

In [7]:
model_name = 'Transformer'
date_start = '2023-11-01'
# window_train_size = 24*911 #hours (911 days)
# window_val_size = 24*123 #hours (123 days)
# forecast_horizon = 24*376 #hours (376 days)

window_train_size = 24*7*2  # hours (911 days)
window_val_size = 24*7 # hours (123 days)
forecast_horizon = 24  # hours (376 days)

data = sample_data_with_train_window(df, date_start, window_train_size) # start: date_start - window_train_size, end: last date in df
data_train, data_val, data_test = get_next_window(data, window_train_size, window_val_size, forecast_horizon)

# Ensure dataframes are not empty
if data_train.empty or data_val.empty or data_test.empty:
  print("One of the dataframes (data_train, data_val, data_test) is empty.")

data_trainN, data_valN, data_testN = normalize_dataset(data_train, data_val, data_test)
train_loader, val_loader, test_loader = create_dataset(data_trainN, data_valN, data_testN)

warnings.filterwarnings("ignore")
result, pred = optimize_Transformer_model(data_test, train_loader, val_loader, test_loader)
warnings.filterwarnings("default")


if IN_COLAB:
  plot_data(data_train, data_test, pred, save_at=f'/content/drive/My Drive/p9/{window_train_size}_{forecast_horizon}_{model_name}.png')
  result.to_csv(f'/content/drive/My Drive/p9/{window_train_size}_{forecast_horizon}_{model_name}.csv')
else:
  plot_data(data_train, data_test, pred,
            save_at=f'../Results/BlackBox/Tuning/{window_train_size}_{forecast_horizon}_{model_name}.png')
  result.to_csv(f'../Results/BlackBox/Tuning/{window_train_size}_{forecast_horizon}_{model_name}.csv')

(64, 4, 3, 3, 256, 0.1, 100) - RMSE: 4550.805668849192
(64, 4, 3, 3, 256, 0.1, 500) - RMSE: 2374.364644208935
(64, 4, 3, 3, 256, 0.2, 100) - RMSE: 3188.627632871474


KeyboardInterrupt: 