In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


# Load the dataset

In [None]:
file_path = "C:/_Projects/home-energy-ai/data/raw/Elspotprices/sweden_elspot_prices.csv"
df = pd.read_csv(file_path, parse_dates=["cet_cest_timestamp"], index_col="cet_cest_timestamp")

# Filter SE3 day-ahead prices
df_se3 = df[["SE3"]].ffill().bfill()

if df_se3.isnull().values.any():
    print("There are missing values in the dataset")
else :
    print("There are no missing values in the dataset")
# Ensure index is in datetime format
df_se3.index = pd.to_datetime(df_se3.index, utc=True)
df_se3.index = df_se3.index.tz_localize(None)

df_se3.head()

There are no missing values in the old dataset


# Feature engineering

In [None]:
df_se3['hour'] = df_se3.index.hour
df_se3['dayofweek'] = df_se3.index.dayofweek
df_se3['month'] = df_se3.index.month
df_se3['year'] = df_se3.index.year

df_se3.head()

Unnamed: 0_level_0,SE3,hour,dayofweek,month,year
cet_cest_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-12-31 23:00:00,19.33,23,2,12,2014
2015-01-01 00:00:00,19.33,0,3,1,2015
2015-01-01 01:00:00,19.33,1,3,1,2015
2015-01-01 02:00:00,17.66,2,3,1,2015
2015-01-01 03:00:00,17.53,3,3,1,2015


# Train-test split

In [None]:
train = df_se3.loc[df_se3.index < '2019-01-01']
test = df_se3.loc[df_se3.index >= '2019-01-01']

X_train = train.drop(columns=['SE_3_price_day_ahead'])
y_train = train['SE_3_price_day_ahead']
X_test = test.drop(columns=['SE_3_price_day_ahead'])
y_test = test['SE_3_price_day_ahead']

# Train a model

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")


# Visualization

In [None]:
fig, ax = plt.subplots(figsize=(15, 4))
test['SE_3_price_day_ahead'].plot(ax=ax, label='Actual Prices', title='SE3 Day-Ahead Price Prediction')
ax.plot(test.index, y_pred, label='Predicted Prices', linestyle='dashed')
ax.axvline('2025-01-01', color='black', ls='--')
ax.legend(['Actual Prices', 'Predicted Prices'])
plt.show()
