1. Fetch data

In [27]:
import requests
import pandas as pd

train_start_date = '2024-01-01'
train_end_date = '2024-03-01'
test_start_date = '2024-03-01'
test_end_date = '2024-05-01'

# Fetch data from the API for training (1 year) and testing (1 month)
url_train = f"https://api.energy-charts.info/price?bzn=DE-LU&start={train_start_date}&end={train_end_date}"
url_test = f"https://api.energy-charts.info/price?bzn=DE-LU&start={test_start_date}&end={test_end_date}"

response_train = requests.get(url_train)
response_test = requests.get(url_test)

data_train = response_train.json()
data_test = response_test.json()

# Convert to DataFrame
train_df = pd.DataFrame({
    'timestamp': pd.to_datetime(data_train['unix_seconds'], unit='s'),
    'price': data_train['price']
})
test_df = pd.DataFrame({
    'timestamp': pd.to_datetime(data_test['unix_seconds'], unit='s'),
    'price': data_test['price']
})

# Clean the data (remove rows with null prices)
train_df = train_df.dropna()
test_df = test_df.dropna()

# Rename columns for NeuralForecast
train_df = train_df.rename(columns={'timestamp': 'ds', 'price': 'y'})
test_df = test_df.rename(columns={'timestamp': 'ds', 'price': 'y'})
train_df['unique_id'] = 'electricity_prices'
test_df['unique_id'] = 'electricity_prices'

# Check date ranges
print("Last date in train_df:", train_df['ds'].max())
print("First date in test_df:", test_df['ds'].min())

Last date in train_df: 2024-03-01 22:00:00
First date in test_df: 2024-02-29 23:00:00


In [28]:
train_df.head()

Unnamed: 0,ds,y,unique_id
0,2023-12-31 23:00:00,0.1,electricity_prices
1,2024-01-01 00:00:00,0.01,electricity_prices
2,2024-01-01 01:00:00,0.0,electricity_prices
3,2024-01-01 02:00:00,-0.01,electricity_prices
4,2024-01-01 03:00:00,-0.03,electricity_prices


In [29]:
test_df.head()

Unnamed: 0,ds,y,unique_id
0,2024-02-29 23:00:00,62.04,electricity_prices
1,2024-03-01 00:00:00,61.42,electricity_prices
2,2024-03-01 01:00:00,58.14,electricity_prices
3,2024-03-01 02:00:00,57.83,electricity_prices
4,2024-03-01 03:00:00,58.3,electricity_prices


In [30]:
from neuralforecast import NeuralForecast
from neuralforecast.models import VanillaTransformer
import pandas as pd

horizon = 24
historical = 24 * 7  # use last week as input

# Set up the model with horizon = 24 (for day-ahead hourly forecasts)
models = [
    VanillaTransformer(
        h=horizon,  # Forecast horizon
        input_size=historical,  # Historical window size
        hidden_size=32,
        max_steps=1000,
        val_check_steps=50,  # Validation steps
        early_stop_patience_steps=2,  # Early stopping patience
    )
]
# Set a validation size (e.g., 10% of the training data)
validation_size = int(len(train_df) * 0.1)
# Train the model on the entire training data just once
nf = NeuralForecast(models=models, freq='H')
nf.fit(df=train_df, val_size=validation_size)

Seed set to 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | enc_embedding | DataEmbedding | 96     | train
4 | dec_embedding | DataEmbedding | 96     | train
5 | encoder       | TransEncoder  | 13.0 K | train
6 | decoder       | TransDecoder  | 10.8 K | train
--------------------------------------------------------
24.0 K    Trainable params
0         Non-trainable params
24.0 K    Total params
0.096     Total estimated model params size (MB)
67        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
# Define the date range based on the test dataset
daterange = pd.date_range(
start = test_df['ds'].min(),
end = test_df['ds'].max() - pd.Timedelta(hours=historical),
freq = '24H'
)

# Generate predictions for each 7-day window
predictions = [
    nf.predict(test_df[(test_df['ds'] >= d) & (test_df['ds'] < d + pd.Timedelta(hours=historical))])
    for d in daterange
]

# Flatten the predictions into a DataFrame
forecast_df = pd.concat(predictions, ignore_index=True)

# Ensure the results align with the test dataset
# Replace 'VT' with the correct model output column name
forecast_df = forecast_df[['ds', 'VanillaTransformer']].merge(test_df[['ds', 'y']], on='ds', how='inner')
forecast_df.rename(columns={'y': 'y_actual', 'VanillaTransformer': 'y_pred'}, inplace=True)

In [None]:
import matplotlib.pyplot as plt

# Plotting
fig, ax = plt.subplots(1, 1, figsize=(20, 7))

# Plot predictions for VT model
forecast_df.set_index('ds')['y_pred'].plot(ax=ax, linewidth=2, label='VT Prediction')

# Optional: Plot actual test values for comparison
test_df.set_index('ds')['y'].plot(ax=ax, linewidth=2, label='Actual Price')

# Formatting the plot
ax.set_title('Electricity Price Forecast (Day-by-Day)', fontsize=22)
ax.set_ylabel('Price', fontsize=20)
ax.set_xlabel('Date', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
plt.show()

In [None]:
forecast_df.head(100)

In [None]:
forecast_df.info()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Calculate error metrics
mae = mean_absolute_error(forecast_df['y_actual'], forecast_df['y_pred'])
mse = mean_squared_error(forecast_df['y_actual'], forecast_df['y_pred'])
rmse = np.sqrt(mse)

# Print the results
print("Model Performance:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Merge forecasted values with actual test data, selecting only the relevant columns
comparison_df = forecast_df[['ds', 'y_pred']].merge(test_df[['ds', 'y']], on='ds', how='inner')
comparison_df.rename(columns={'y': 'y_actual', 'y_pred': 'y_pred'}, inplace=True)

# Add a column for the day of the week (0=Monday, 1=Tuesday, ..., 6=Sunday)
comparison_df['day_of_week'] = comparison_df['ds'].dt.dayofweek

# Calculate and display MAE, MSE, and RMSE for each day of the week
metrics_by_day = {}

for day in range(7):

# Filter data for each day of the week
    day_data = comparison_df[comparison_df['day_of_week'] == day]

# Calculate the metrics for this day
mae = mean_absolute_error(day_data['y_actual'], day_data['y_pred'])
mse = mean_squared_error(day_data['y_actual'], day_data['y_pred'])
rmse = np.sqrt(mse)

# Store the metrics for each day
metrics_by_day[day] = {
'MAE': mae,
'MSE': mse,
'RMSE': rmse
}

# Print the results for each day
day_name = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'][day]
print(f"MAE of VT for {day_name} is: {mae:.2f}")
print(f"MSE of VT for {day_name} is: {mse:.2f}")
print(f"RMSE of VT for {day_name} is: {rmse:.2f}")
print('-' * 50)

# Optionally, you can store these results in a DataFrame for easy viewing or further analysis
metrics_df = pd.DataFrame(metrics_by_day).T
print("\nMetrics by Day of the Week:")
print(metrics_df)