In [8]:
import requests
import pandas as pd
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS

In [9]:

# Step 1: Fetch Data from the API
def fetch_data(start_date, end_date):
    url = f"https://api.energy-charts.info/price?bzn=DE-LU&start={start_date}&end={end_date}"
    response = requests.get(url)
    data = response.json()

    # Extracting timestamps and prices, converting timestamps to datetime
    df = pd.DataFrame({
        'timestamp': pd.to_datetime(data['unix_seconds'], unit='s'),
        'price': data['price']
    })

    # Dropping rows with null prices (if any)
    df = df.dropna(subset=['price']).reset_index(drop=True)
    return df


# Define the date range for 1 year and 1 month
start_date = '2022-01-01'  # Start date for 1 year of training
end_date = '2023-02-01'  # End date, covering 1 year and 1 month

# Fetch the data
df = fetch_data(start_date, end_date)


In [10]:
df.head(100)

Unnamed: 0,timestamp,price
0,2021-12-31 23:00:00,50.05
1,2022-01-01 00:00:00,41.33
2,2022-01-01 01:00:00,43.22
3,2022-01-01 02:00:00,45.46
4,2022-01-01 03:00:00,37.67
...,...,...
95,2022-01-04 22:00:00,97.56
96,2022-01-04 23:00:00,105.31
97,2022-01-05 00:00:00,97.29
98,2022-01-05 01:00:00,84.50


In [12]:

# Step 2: Data Preprocessing
# Ensure hourly frequency and add lagged feature
df = df.set_index('timestamp').asfreq('H').reset_index()  # Fill in missing hours if any
df['Price_lag_1d'] = df['price'].shift(24)  # Use 1-day lag (24 hours ago)
df.dropna(inplace=True)  # Drop rows without lag data

# Step 3: Train-Test Split
# Use the first year for training and the next month for testing
train_df = df[df['timestamp'] < '2023-01-01']  # Training set: 1 year of data
test_df = df[df['timestamp'] >= '2023-01-01']  # Testing set: the following month

test_df.head(100)

  df = df.set_index('timestamp').asfreq('H').reset_index()  # Fill in missing hours if any


Unnamed: 0,timestamp,price,Price_lag_1d
8737,2023-01-01 00:00:00,-1.07,-0.07
8738,2023-01-01 01:00:00,-1.47,-0.03
8739,2023-01-01 02:00:00,-5.08,-0.04
8740,2023-01-01 03:00:00,-4.49,-0.03
8741,2023-01-01 04:00:00,-5.40,-0.02
...,...,...,...
8832,2023-01-04 23:00:00,0.05,80.89
8833,2023-01-05 00:00:00,0.07,70.67
8834,2023-01-05 01:00:00,0.07,40.00
8835,2023-01-05 02:00:00,0.12,8.68


In [14]:
from neuralforecast.models import LSTM, NHITS, RNN

In [16]:


# Step 4: Configure Models and Prepare Data for Training

# Define forecasting horizon and input sequence length
horizon = 24  # Forecasts the next 24 hours
input_size = 2 * horizon

# Ensure the training data has the required columns
# Prepare data for NeuralForecast model, with required columns 'unique_id', 'ds', and 'y'
train_df = train_df.rename(columns={'timestamp': 'ds', 'price': 'y'})
train_df['unique_id'] = 'price_series'  # Assign a unique ID for the time series

# Define the models with LSTM and NBEATS configurations as per your example
models = [
    LSTM(
        h=horizon,                         # Forecast horizon
        max_steps=500,                     # Number of steps to train
        scaler_type='standard',            # Scaler for normalization
        encoder_hidden_size=64,            # Hidden state size of the LSTM
        decoder_hidden_size=64             # Hidden units in each layer of MLP decoder
    ),
    NBEATS(
        h=horizon,                         # Forecast horizon
        input_size=input_size,             # Length of input sequence
        max_steps=100                      # Training steps
    )
]

# Initialize NeuralForecast with models list and frequency
nf = NeuralForecast(models=models, freq='H')  # 'H' for hourly frequency

# Fit the model on the prepared training DataFrame
nf.fit(df=train_df[['unique_id', 'ds', 'y']])



Seed set to 1
Seed set to 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type          | Params | Mode 
----------------------------------------------------------
0 | loss            | MAE           | 0      | train
1 | padder          | ConstantPad1d | 0      | train
2 | scaler          | TemporalNorm  | 0      | train
3 | hist_encoder    | LSTM          | 50.4 K | train
4 | context_adapter | Linear        | 15.6 K | train
5 | mlp_decoder     | MLP           | 769    | train
----------------------------------------------------------
66.8 K    Trainable params
0         Non-trainable params
66.8 K    Total params
0.267     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Epoch 99: 100%|██████████| 1/1 [00:00<00:00,  2.59it/s, v_num=23, train_loss_step=0.439, train_loss_epoch=0.439]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 628.55it/s][A
Epoch 199: 100%|██████████| 1/1 [00:00<00:00,  2.40it/s, v_num=23, train_loss_step=0.372, train_loss_epoch=0.373]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 607.61it/s][A
Epoch 299: 100%|██████████| 1/1 [00:00<00:00,  2.55it/s, v_num=23, train_loss_step=0.307, train_loss_epoch=0.305]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Valid

`Trainer.fit` stopped: `max_steps=500` reached.


Epoch 499: 100%|██████████| 1/1 [00:00<00:00,  2.47it/s, v_num=23, train_loss_step=0.265, train_loss_epoch=0.265]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
3.5 K     Non-trainable params
2.5 M     Total params
10.120    Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode



Epoch 99: 100%|██████████| 1/1 [00:00<00:00, 36.03it/s, v_num=24, train_loss_step=41.00, train_loss_epoch=41.40]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 987.13it/s][A
Epoch 99: 100%|██████████| 1/1 [00:00<00:00, 29.78it/s, v_num=24, train_loss_step=41.00, train_loss_epoch=41.00]

`Trainer.fit` stopped: `max_steps=100` reached.


Epoch 99: 100%|██████████| 1/1 [00:00<00:00, 28.45it/s, v_num=24, train_loss_step=41.00, train_loss_epoch=41.00]


In [17]:
import matplotlib.pyplot as plt

In [19]:
# Step 5: Prediction and Visualization

# Prepare test data in the same format as training data
test_df = test_df.rename(columns={'timestamp': 'ds', 'price': 'y'})
test_df['unique_id'] = 'price_series'  # Unique ID for time series

# Predict for the test period (the next month)
Y_hat_df = nf.predict(df=test_df[['unique_id', 'ds', 'y']])

# Assuming Y_hat_df contains predictions from LSTM and NBEATS models,
# let's concatenate actual (Y_df) and forecast data (Y_hat_df)
# and plot them

import pandas as pd
import matplotlib.pyplot as plt

# Concatenate actual and forecast data
# Ensure Y_df is structured with columns: 'ds' (dates), 'y' (actual values)
plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds')

# Plotting
fig, ax = plt.subplots(1, 1, figsize=(20, 7))
plot_df[['y', 'LSTM', 'NBEATS']].plot(ax=ax, linewidth=2)  # Adjust labels if necessary

ax.set_title('Electricity Price Forecast', fontsize=22)
ax.set_ylabel('Electricity Price', fontsize=20)
ax.set_xlabel('Date', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
plt.show()

  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 27.71it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 89.94it/s] 




NameError: name 'Y_df' is not defined