In [1]:
import pandas as pd
import datetime 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os 
from datetime import datetime

## Pipeline

1. Data Preparation:
   - Load and preprocess data.
   - Create lagged features for power consumption and align them with the forecasted temperature data for the next 24 hours.
   - Split data into train, validation, and test sets
2. Model Definition:
   - Use the TCN architecture, which employs causal convolutions to ensure predictions only depend on past data.
3. Training:
   - Define a loss function and optimizer.
   - Train the model using the training set, and monitor validation loss to prevent overfitting.
8. Evaluation:
   - Predict the next 24 hours on the test set.
   - Compare TCN predictions with SARIMA using evaluation metrics like RMSE, MAE, and MAPE.

In [3]:
# Save current directory
current_directory = os.getcwd()

# Set print options to suppress scientific notation and show 3 decimal places
np.set_printoptions(suppress=True, precision=5)
pd.options.display.float_format = '{:.5f}'.format

# Suppress all warnings globally
import warnings
warnings.filterwarnings("ignore")

In [4]:
file_path = os.path.join(current_directory, 'data_augmented/X_small.csv')
X_small = pd.read_csv(file_path, index_col = 0)

file_path = os.path.join(current_directory, 'data_augmented/timestamps.csv')
timestamps = pd.read_csv(file_path, index_col = 0)

file_path = os.path.join(current_directory, 'data_augmented/temperature.csv')
temperature = pd.read_csv(file_path, index_col = 0)

In [5]:
df = X_small 

In [6]:
df['timestamp'] = timestamps
df.set_index("timestamp", inplace=True)

df.index = pd.to_datetime(df.index)
df = df.asfreq('H')  # 'H' for hourly frequency

In [7]:
def prepare_tcn_features(df, target_col, window_length=168, forecast_horizon=24):
    """
    Prepares the feature and target tensors for TCN.

    Parameters:
        df (pd.DataFrame): Input DataFrame with historical data.
        power_col (str): Column name for power consumption.
        temp_col (str): Column name for temperature.
        window_length (int): Length of the temporal window (168 for 7 days).
        forecast_horizon (int): Forecast horizon (24 for next 24 hours).

    Returns:
        np.ndarray: Feature tensor of shape (num_samples, window_length, num_features).
        np.ndarray: Target tensor of shape (num_samples, forecast_horizon).
        pd.DatetimeIndex: Timestamps corresponding to each sample.
    """
    df = df.copy()

    # Prepare features and targets
    X, y = [], []
    timestamps = []

    for i in range(len(df) - window_length - forecast_horizon + 1):
        # Extract historic features
        features = df.iloc[i:i + window_length].values  # (window_length, num_features)
        X.append(features)

        # Extract target (next 24 hours of power consumption)
        y.append(df.iloc[i + window_length:i + window_length + forecast_horizon][target_col].values)

        # Timestamps for the target period
        timestamps.append(df.index[i + window_length])

    return np.array(X), np.array(y), pd.DatetimeIndex(timestamps)

In [8]:
df

Unnamed: 0_level_0,power_consumption,ghi,temp,wind,is_weekend,is_spring,is_summer,is_autumn,is_winter,is_holiday,is_daylight
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-13 10:00:00+00:00,0.21837,2.31866,0.44842,-0.83739,0,1,0,0,0,0,1
2022-04-13 11:00:00+00:00,-0.11143,2.09107,0.56725,-0.36810,0,1,0,0,0,0,1
2022-04-13 12:00:00+00:00,-0.16639,1.98543,0.69246,0.11499,0,1,0,0,0,0,1
2022-04-13 13:00:00+00:00,-0.03585,1.27027,0.76656,0.01837,0,1,0,0,0,0,1
2022-04-13 14:00:00+00:00,0.30769,1.71318,0.79978,-0.17487,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
2024-06-17 01:00:00+00:00,-1.06418,-0.65983,-0.05753,-0.23008,0,0,1,0,0,0,0
2024-06-17 02:00:00+00:00,-1.05848,-0.65983,-0.09075,-0.21628,0,0,1,0,0,0,0
2024-06-17 03:00:00+00:00,-1.02523,-0.65983,-0.07669,-0.25768,0,0,1,0,0,0,0
2024-06-17 04:00:00+00:00,-0.76414,-0.49193,0.05363,-0.54754,0,0,1,0,0,0,0


In [9]:
target_col = "power_consumption"
window_length = 168  # 7 days
forecast_horizon = 24  # Next 24 hours

# Reduce the feature space dimension
# df = df[['power_consumption', 'temp']]

# Add forecasted temperature data
# df['temp_forecast'] = temperature[window_length : len(df) + window_length]['temperature'].values

In [10]:
# Step 1: Train-Test Split
train_size = int(len(df) * 0.8)  # Determine initial train size based on 80%
initial_test_start = train_size  

while df.index[initial_test_start].hour != 11: # Adjust test start to align with the next occurrence of 11 AM
    initial_test_start += 1

final_test_end = len(df) - 1
while df.index[final_test_end].hour != 10: # Adjust test end to align with the last 10 AM in the dataset
    final_test_end -= 1

train = df.iloc[:initial_test_start]
test = df.iloc[initial_test_start:final_test_end+1]  # Include the last index

In [11]:
X, y, timestamps = prepare_tcn_features(df, target_col, window_length, forecast_horizon)
X_train, y_train, timestamps_train = prepare_tcn_features(train, target_col, window_length, forecast_horizon)
X_test, y_test, timestamps_test = prepare_tcn_features(test, target_col, window_length, forecast_horizon)

In [12]:
import torch
import torch.nn as nn

class TemporalConvNet(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size=3, dropout=0.2):
        """
        Temporal Convolutional Network for Power Consumption Forecasting.

        Parameters:
            input_size (int): Number of input features.
            output_size (int): Number of output features (forecast horizon).
            num_channels (list): Number of channels in each TCN layer.
            kernel_size (int): Size of the convolution kernel.
            dropout (float): Dropout rate.
        """
        super(TemporalConvNet, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            in_channels = input_size if i == 0 else num_channels[i - 1]
            out_channels = num_channels[i]
            layers += [
                nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=(kernel_size - 1)),
                nn.ReLU(),
                nn.Dropout(dropout)  # Dropout layer
            ]
        self.network = nn.Sequential(*layers)
        self.linear = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        """
        Forward pass of the TCN.

        Input:
            x: Shape (batch_size, seq_length, input_size).
        Output:
            y: Shape (batch_size, output_size).
        """
        x = x.permute(0, 2, 1)  # Change to (batch_size, input_size, seq_length)
        x = self.network(x)
        x = x[:, :, -1]  # Take the last time step
        x = self.linear(x)
        return x

    def enable_mc_dropout(self):
        """Enable MC Dropout by setting all dropout layers to train mode."""
        for module in self.modules():
            if isinstance(module, nn.Dropout):
                module.train()



In [13]:
def monte_carlo_predictions(model, X, num_samples=100):
    """
    Perform Monte Carlo Dropout predictions.

    Parameters:
        model (nn.Module): TCN model with MC Dropout enabled.
        X (torch.Tensor): Input tensor of shape (batch_size, seq_length, num_features).
        num_samples (int): Number of MC samples.

    Returns:
        torch.Tensor: Mean predictions.
        torch.Tensor: Prediction standard deviations (uncertainty).
    """
    model.enable_mc_dropout()  # Enable dropout during testing
    predictions = []

    for _ in range(num_samples):
        with torch.no_grad():
            predictions.append(model(X))  # Append prediction for each MC sample

    predictions = torch.stack(predictions)  # Shape: (num_samples, batch_size, output_size)
    mean_prediction = predictions.mean(dim=0)  # Mean over MC samples
    uncertainty = predictions.std(dim=0)  # Std dev over MC samples

    return mean_prediction, uncertainty

In [14]:
import torch

# Check if MPS (Metal Performance Shaders) is available
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Use the MPS device for Apple Silicon
elif torch.cuda.is_available():
    device = torch.device("cuda")  # Use CUDA if available
else:
    device = torch.device("cpu")  # Default to CPU if no GPU backend is available

print(f"Using device: {device}")

Using device: cpu


In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batching
from torch.utils.data import DataLoader, TensorDataset

batch_size = 128
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the TCN model
input_size = X.shape[2]  # Number of features per time step
output_size = y.shape[1]  # Forecast horizon (24 hours)
hidden_channels = [64, 128, 64]  # Number of channels in hidden layers

model = TemporalConvNet(input_size, output_size, hidden_channels, kernel_size=3, dropout=0.2).to(device)

# Training setup
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# === Training Loop ===
num_epochs = 10
model.train()  # Enable training mode

for epoch in range(num_epochs):
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        predictions = model(batch_X)  # Forward pass
        loss = criterion(predictions, batch_y)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters
        epoch_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_loader):.4f}")

# === Evaluation with MC Dropout ===
model.eval()  # Enable evaluation mode

# Use MC Dropout for predictions
num_samples = 100  # Number of Monte Carlo samples
mean_predictions, uncertainties = monte_carlo_predictions(model, X_test_tensor.to(device), num_samples)

# Compute test loss using the mean prediction
test_loss = criterion(mean_predictions, y_test_tensor.to(device)).item()
print(f"Test Loss (MC Dropout): {test_loss:.4f}")

# Convert results to numpy arrays for further analysis
mean_predictions_np = mean_predictions.cpu().numpy()  # Mean predictions
uncertainties_np = uncertainties.cpu().numpy()  # Prediction uncertainties

# Example: Print predictions and uncertainties for the first test sample
print("Mean Predictions (First Test Sample):", mean_predictions_np[0])
print("Uncertainties (First Test Sample):", uncertainties_np[0])

Epoch 1, Loss: 0.6301
Epoch 2, Loss: 0.4389
Epoch 3, Loss: 0.4142
Epoch 4, Loss: 0.4046
Epoch 5, Loss: 0.3985
Epoch 6, Loss: 0.3930


In [None]:
time_steps = np.arange(mean_predictions_np.shape[1])
plt.figure(figsize=(12, 6))
plt.plot(time_steps, mean_predictions_np[0], label="Mean Prediction", color="blue")
plt.fill_between(
    time_steps,
    mean_predictions_np[0] - 1.96 * uncertainties_np[0],
    mean_predictions_np[0] + 1.96 * uncertainties_np[0],
    color="blue",
    alpha=0.3,
    label="95% Confidence Interval"
)
plt.title("Predictions with Uncertainty")
plt.xlabel("Time Step")
plt.ylabel("Power Consumption")
plt.legend()
plt.show()

In [None]:
def compute_picp_pinaw(y_true, y_lower, y_upper):
    """
    Computes PICP (Prediction Interval Coverage Probability) and 
    PINAW (Prediction Interval Normalized Average Width).
    
    Parameters:
        y_true (array-like): True values of the target variable.
        y_lower (array-like): Lower bounds of the prediction intervals.
        y_upper (array-like): Upper bounds of the prediction intervals.
    
    Returns:
        picp (float): Prediction Interval Coverage Probability.
        pinaw (float): Prediction Interval Normalized Average Width.
    """
    # Convert inputs to numpy arrays for easier manipulation
    y_true = np.array(y_true)
    y_lower = np.array(y_lower)
    y_upper = np.array(y_upper)
    
    # PICP: Proportion of true values within the bounds
    coverage = (y_true >= y_lower) & (y_true <= y_upper)  # Boolean array
    picp = np.mean(coverage)  # Average of the boolean array
    
    # PINAW: Average width of intervals, normalized by the range of y_true
    interval_widths = y_upper - y_lower
    pinaw = np.mean(interval_widths) / (np.max(y_true) - np.min(y_true))
    
    return picp, pinaw

In [None]:
# picp, pinaw = compute_picp_pinaw(y_true, y_lower, y_upper)