In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam
import numpy as np

# Function to load and preprocess the data
def load_and_preprocess(file_path):
    data = pd.read_csv(file_path)
    data = data.iloc[2:]  # Skip the first two rows of header information
    data.columns = ['Date', 'Hour', 'HOEP', 'Hour 1 Predispatch', 'Hour 2 Predispatch', 'Hour 3 Predispatch', 'OR 10 Min Sync', 'OR 10 Min non-sync', 'OR 30 Min']
    data = data[['Date', 'Hour', 'HOEP']].reset_index(drop=True)
    data = data[data['Hour'].str.isnumeric()]  # Filter out non-numeric rows
    data['Hour'] = data['Hour'].astype(int) - 1  # Adjust hour to 0-23 format
    data['HOEP'] = pd.to_numeric(data['HOEP'], errors='coerce')
    data.dropna(subset=['HOEP'], inplace=True)
    return data

# Define file paths for each dataset
file_paths = {
    '2015': './PUB_PriceHOEPPredispOR_2015_v1.csv',
    '2016': './PUB_PriceHOEPPredispOR_2016_v1.csv',
    '2017': './PUB_PriceHOEPPredispOR_2017_v1.csv',
    '2018': './PUB_PriceHOEPPredispOR_2018_v148.csv',
    '2019': './PUB_PriceHOEPPredispOR_2019_v395.csv',
    '2020': './PUB_PriceHOEPPredispOR_2020_v396.csv',
    '2021': './PUB_PriceHOEPPredispOR_2021_v395.csv',
    '2022': './PUB_PriceHOEPPredispOR_2022_v396.csv',
    '2023': './PUB_PriceHOEPPredispOR_2023_v393.csv'
}

# Load and concatenate the training datasets
df_list = [load_and_preprocess(file_paths[year]) for year in file_paths if year != '2023']
data_train = pd.concat(df_list)

# Preprocess the data: Filling missing values and scaling
data_train['HOEP'].fillna(method='ffill', inplace=True)

# Feature Engineering: Create a lagged feature dataset
def create_lagged_features(df, n_lags=24):
    """
    Creates lagged features from a time series data.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing the 'HOEP' column.
    n_lags (int): Number of lagged features to create.
    
    Returns:
    pd.DataFrame: DataFrame containing lagged features and original features.
    """
    df_lagged = pd.DataFrame(index=df.index)
    # Create lagged features
    for lag in range(1, n_lags + 1):
        df_lagged[f'HOEP_lag_{lag}'] = df['HOEP'].shift(lag)
    df_lagged['HOEP'] = df['HOEP']  # Current HOEP to predict
    df_lagged.dropna(inplace=True)  # Drop rows with NaN values resulting from shifting
    return df_lagged

# Apply the function to create lagged features
data_train_lagged = create_lagged_features(data_train, n_lags=24)

# Normalize the data
scaler_features = StandardScaler()
scaler_labels = StandardScaler()

# Fit the scaler on the features and transform them
scaled_features = scaler_features.fit_transform(data_train_lagged.drop('HOEP', axis=1))

# Fit the scaler on the labels and transform them
# Reshape the 'HOEP' values to be a 2D array for the scaler
scaled_labels = scaler_labels.fit_transform(data_train_lagged[['HOEP']])

# Split the data into features and labels
X_train = scaled_features
y_train = scaled_labels.ravel()  # Flatten to 1D array

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Model Design: Using LSTM for time series forecasting
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(50),
    Dense(1)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), verbose=1)

# Preprocess the evaluation dataset
data_eval = load_and_preprocess(file_paths['2023'])
data_eval['HOEP'].fillna(method='ffill', inplace=True)
data_eval_lagged = create_lagged_features(data_eval, n_lags=24)

# Normalize the evaluation data using the scalers fitted on the training data
# Transform the features
scaled_features_eval = scaler_features.transform(data_eval_lagged.drop('HOEP', axis=1))
# Transform the labels and ensure they are the correct shape
scaled_labels_eval = scaler_labels.transform(data_eval_lagged[['HOEP']])

X_eval = scaled_features_eval
y_eval = scaled_labels_eval.ravel()  # Flatten to 1D array

# Evaluate the model
evaluation = model.evaluate(X_eval, y_eval, verbose=0)

print(f'Evaluation MSE: {evaluation}')

2024-01-30 13:17:47.460808: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/software/quadis/latest/quadis/build/lib:/opt/conda/lib
2024-01-30 13:17:47.460847: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-01-30 13:17:49.906380: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/software/quadis/latest/quadis/build/lib:/opt/conda/lib
2024-01-30 13:17:49.906457: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2024-01-30 13:17:49.930416: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GP

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Evaluation MSE: 0.25913912057876587


In [2]:
# Predict the HOEP prices for 2023 using the trained model
predicted_prices_2023_scaled = model.predict(X_eval)
predicted_prices_2023 = scaler_labels.inverse_transform(predicted_prices_2023_scaled)

# Strategy to charge or discharge the battery
battery_capacity = 4  # MWh
battery_state = battery_capacity  # Assuming the battery starts fully charged
charge_rate = 1  # MWh per hour, assuming 1 MWh/h charge and discharge rate
earnings = 0.0

for i in range(len(predicted_prices_2023) - 1):
    current_hour_price = data_eval['HOEP'].iloc[i]
    next_hour_predicted_price = predicted_prices_2023[i + 1][0]

    if battery_state < battery_capacity and current_hour_price < next_hour_predicted_price:
        # Charge the battery, since the price is expected to increase
        charge_amount = min(charge_rate, battery_capacity - battery_state)
        battery_state += charge_amount
        earnings -= charge_amount * current_hour_price

    elif battery_state > 0 and current_hour_price > next_hour_predicted_price:
        # Discharge (sell energy), since the price is expected to decrease
        discharge_amount = min(charge_rate, battery_state)
        battery_state -= discharge_amount
        earnings += discharge_amount * current_hour_price

# Consider the final discharge at the end of the period to sell any remaining energy
earnings += battery_state * data_eval['HOEP'].iloc[-1]
battery_state = 0

# Output the earnings
print(f"Earnings for 2023 based on the model's predictions: ${earnings:.2f}")

Earnings for 2023 based on the model's predictions: $23825.56
