In [22]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import gaussian_kde
from statsmodels.tsa.seasonal import seasonal_decompose
from plotly.subplots import make_subplots
import numpy as np
from time import sleep

In [23]:
def write_image(fig, fname, show=False):
    if show:
        fig.show()

    # create random figure to load math js
    fig2 = px.scatter(x=[0, 1], y=[0, 1])
    fig2.write_image(fname)
    sleep(1)

    fig.write_image(fname)

In [24]:
price = pd.read_csv('../../src/forecasting/price.csv')
forecasts = pd.read_csv('../../src/forecasting/price_forecasts.csv')

In [25]:
yhats = forecasts.copy() 
yhats.iloc[22:40]

Unnamed: 0,yhat1,yhat8,yhat12
22,,,
23,,,
24,43.471123,,
25,43.109058,,
26,43.341057,,
27,43.326283,,
28,43.045635,,
29,42.723537,,
30,42.976597,,
31,43.001232,55.663715,


In [41]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=price['TIME'],
    y=price['PRICE'],
    mode='lines',
    name='Actual Price',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=price['TIME'],
    y=yhats['yhat1'],
    mode='lines',
    name='Forecasted Price (1h ahead)',
    line=dict(color='red'),
))

# fig.add_trace(go.Scatter(
#     x=price['TIME'],
#     y=yhats['yhat8'],
#     mode='lines',
#     name='Forecasted Price (8h ahead)',
#     line=dict(color='green'),
# ))

# fig.add_trace(go.Scatter(
#     x=price['TIME'],
#     y=yhats['yhat12'],
#     mode='lines',
#     name='Forecasted Price (12h ahead)',
#     line=dict(color='violet'),
# ))

date_range1 = pd.date_range(start=price['TIME'].min(), periods=24+1, freq='h')
date_range2 = pd.date_range(start=price['TIME'].min(), periods=24*5+1, freq='h')

fig.update_layout(
    height=500,
    width=700,
    margin=dict(l=20, r=20, t=20, b=20),
    xaxis_title='Time',
    yaxis_title='€/MWh',
    yaxis=dict(
        range=[0, 130],
        tickvals=[20 + i * 20 for i in range(6)],
        ticktext=[20 + i * 20 for i in range(6)],
    ),
    xaxis=dict(
        range=[date_range1[-1], date_range2[-1]],
    ),
    legend=dict(
        orientation="h",  # horizontal layout
        yanchor="bottom",
        y=1.05,            # slightly above the top of the plot
        xanchor="center",
        x=0.5
    ),
)

write_image(fig, 'price_forecast.pdf', show=True)
write_image(fig, 'price_forecast.png')

In [43]:
fig = go.Figure()

# residuals
residuals = price['PRICE'].reset_index(drop=True) - yhats['yhat1'].reset_index(drop=True)
fig.add_trace(go.Scatter(
    x=price['TIME'],
    y=residuals,
    mode='lines',
    name='Residuals',
    line=dict(color='orange')
))

date_range1 = pd.date_range(start=price['TIME'].min(), periods=24+1, freq='h')
date_range2 = pd.date_range(start=price['TIME'].min(), periods=24*5+1, freq='h')

fig.update_layout(
    height=500,
    width=700,
    margin=dict(l=20, r=20, t=20, b=20),
    xaxis_title='Time',
    yaxis_title='€/MWh',
    yaxis=dict(
        range=[-25, 25],
        # tickvals=[20 + i * 20 for i in range(6)],
        # ticktext=[20 + i * 20 for i in range(6)],
    ),
    xaxis=dict(
        range=[date_range1[-1], date_range2[-1]],
    ),
    legend=dict(
        orientation="h",  # horizontal layout
        yanchor="bottom",
        y=1.05,            # slightly above the top of the plot
        xanchor="center",
        x=0.5
    ),
)

fig.show()

In [8]:
prices = price['PRICE'].dropna()

q1 = prices.quantile(0.25)
q2 = prices.quantile(0.50)
q3 = prices.quantile(0.75)
# iqr = q3 - q1
# lower_bound = q1 - 1.5 * iqr
# upper_bound = q3 + 1.5 * iqr

In [9]:
kde = gaussian_kde(prices)
x_vals = np.linspace(prices.min(), prices.max(), 500)
y_vals = kde(x_vals)

# Create figure and KDE line
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', name='KDE'))

# no outliers
fig.add_shape(
    type="rect",
    x0=q1, x1=q3,
    y0=0, y1=max(y_vals)*1.1,
    fillcolor="rgba(255, 0, 0, 0.4)",
    opacity=0.2,
    layer="below",
    line_width=0,
)

# Add vertical dashed lines for quartiles
for q, label in zip([q1, q2, q3], ['Q1', 'Median', 'Q3']):
    fig.add_shape(
        type="line",
        x0=q, x1=q,
        y0=0, y1=max(y_vals),
        line=dict(color="black", width=1, dash="dash"),
    )

    # Add label annotation
    fig.add_annotation(
        x=q, y=max(y_vals),
        text=label,
        yshift=20,
        showarrow=False,
        yanchor="top",
        font=dict(size=14)
    )

# Layout settings
fig.update_layout(
    height=500,
    width=700,
    margin=dict(l=20, r=20, t=20, b=20),
    xaxis_title='€/MWh',
    yaxis_title='Density',
    yaxis=dict(
        range=[0, max(y_vals) * 1.05],
        # no 0 as tick
        tickvals=[0.002 + i * 0.002 for i in range(6)],
        ticktext=[f'{0.002 + i * 0.002:.3f}' for i in range(6)],
    ),
    xaxis=dict(
        zeroline=True,
    ),
)

write_image(fig, 'price_kde.pdf', show=True)
write_image(fig, 'price_kde.png')

- **Multimodal Distribution**  
  - The KDE curve shows multiple peaks (modes), indicating the data is not unimodal.  
  - This suggests the presence of different market regimes or operating conditions (e.g., peak vs off-peak, renewable vs non-renewable, weekday vs weekend).

- **High Density at Low Prices**  
  - The highest density is observed in the 0–10 unit range.  
  - This implies that prices most frequently fall in this lower range, possibly due to high renewable generation or low demand periods (e.g., nights or weekends).

- **Long Tail Toward High Prices**  
  - The KDE has a long right tail, extending beyond 150 units.  
  - This reflects occasional price spikes, typical in energy markets during high demand or supply disruptions.

- **Secondary Peaks (~40–100)**  
  - Additional smaller peaks appear around 40, 60–70, and near 100.  
  - These may represent typical pricing under different supply/demand conditions, such as fossil fuel-driven periods or transitional energy mixes.


In [10]:
fig = px.violin(price, y='PRICE')

# Add horizontal dashed lines at Q1, Q2 (median), and Q3
for q, label in zip([q1, q2, q3], ['Q1', 'Median', 'Q3']):
    fig.add_shape(
        type="line",
        x0=0, x1=1,
        xref='paper',
        y0=q, y1=q,
        line=dict(color="red", width=1, dash="dash"),
    )

    fig.add_annotation(
        x=1.01, y=q, xref="paper", yref="y",
        text=label,
        showarrow=False,
        font=dict(size=13),
        yshift=10,
        xshift=-10,
        align="left"
    )

fig.update_layout(
    height=500,
    width=600,
    margin=dict(l=20, r=20, t=20, b=20),
    yaxis_title='€/MWh',
)

fig.show()

In [11]:
price.index = pd.to_datetime(price['TIME'])
df_daily = price['PRICE'].resample('D').mean()

# Drop NaNs
df_daily = df_daily.dropna()

# Seasonal decomposition (requires no missing dates and regular frequency)
decomp = seasonal_decompose(df_daily, model='additive', period=7)  # e.g., weekly seasonality

# Plot with Plotly
fig = make_subplots(rows=4, cols=1, subplot_titles=['Observed', 'Trend', 'Seasonal', 'Residual'], vertical_spacing=0.09)

fig.add_trace(go.Scatter(x=df_daily.index, y=decomp.observed, name='Observed'), row=1, col=1)
fig.add_trace(go.Scatter(x=df_daily.index, y=decomp.trend, name='Trend'), row=2, col=1)
fig.add_trace(go.Scatter(x=df_daily.index, y=decomp.seasonal, name='Seasonal'), row=3, col=1)
fig.add_trace(go.Scatter(x=df_daily.index, y=decomp.resid, name='Residual'), row=4, col=1)

fig.update_layout(
    height=750,
    width=1000,
    margin=dict(l=20, r=20, t=20, b=20),
    showlegend=False,
)

write_image(fig, 'price_decomp.pdf', show=True)
write_image(fig, 'price_decomp.png')

#### 📈 Observed
This is the original time series.

It shows strong variability, with price spikes and dips, but a generally increasing pattern toward the end.

#### 🔺 Trend
A clear upward trend is visible in the latter part of the data, indicating that prices have generally increased over time.

A dip occurs early on, possibly a low-demand period or policy effect, before recovering.

#### 🔁 Seasonal
The seasonal component is regular and repeating, suggesting a strong weekly or monthly seasonality.

The pattern is periodic and stable, meaning it does not vary much over time.

The sharp cyclic waves might represent daily price cycles, weekday-weekend effects, or operational schedules in the grid.

#### 🎲 Residual
The residuals appear random with no obvious structure, indicating that the model has captured most of the signal through trend and seasonality.

There are some large spikes, which may indicate occasional anomalies or outliers (e.g., unexpected demand, blackout events, extreme weather).