In [6]:
import pandas as pd
import plotly.graph_objects as go

In [2]:
file_path = '../../data/processed/day_ahead/da_price_2024.csv'
da_price = pd.read_csv(file_path)

In [3]:
da_price.head()

Unnamed: 0,datetime,da_price
0,2024-01-01 00:00:00,39.91
1,2024-01-01 00:15:00,-0.04
2,2024-01-01 00:30:00,-9.01
3,2024-01-01 00:45:00,-29.91
4,2024-01-01 01:00:00,25.28


In [4]:
da_price['datetime'] = pd.to_datetime(da_price['datetime'])
da_price['hour'] = da_price['datetime'].dt.hour
da_price['minute'] = da_price['datetime'].dt.minute
da_price['da_price'] = da_price['da_price'].astype(float)

In [25]:
# for every two hours, like 0-2, 2-4, 4-6, ..., 22-24, get a seperate df
da_price_0_2 = da_price[da_price['hour'].isin([0, 1])]
da_price_2_4 = da_price[da_price['hour'].isin([2, 3])]
da_price_4_6 = da_price[da_price['hour'].isin([4, 5])]
da_price_6_8 = da_price[da_price['hour'].isin([6, 7])]
da_price_8_10 = da_price[da_price['hour'].isin([8, 9])]
da_price_10_12 = da_price[da_price['hour'].isin([10, 11])]
da_price_12_14 = da_price[da_price['hour'].isin([12, 13])]
da_price_14_16 = da_price[da_price['hour'].isin([14, 15])]
da_price_16_18 = da_price[da_price['hour'].isin([16, 17])]
da_price_18_20 = da_price[da_price['hour'].isin([18, 19])]
da_price_20_22 = da_price[da_price['hour'].isin([20, 21])]
da_price_22_24 = da_price[da_price['hour'].isin([22, 23])]

In [33]:
# Create box plot
fig = go.Figure()

# Define colors for different time periods
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']


# Create a list of all dataframes with properly formatted labels
time_periods = [
    (da_price_0_2, '0-2'), (da_price_2_4, '2-4'), 
    (da_price_4_6, '4-6'), (da_price_6_8, '6-8'),
    (da_price_8_10, '8-10'), (da_price_10_12, '10-12'),
    (da_price_12_14, '12-14'), (da_price_14_16, '14-16'),
    (da_price_16_18, '16-18'), (da_price_18_20, '18-20'),
    (da_price_20_22, '20-22'), (da_price_22_24, '22-24')
]

# Extract labels from time_periods
time_periods_labels = [label for _, label in time_periods]

# Add each time period's box plot
for (df, label), color in zip(time_periods, colors):
    fig.add_trace(
        go.Box(
            y=df['da_price'],
            name=label,
            boxpoints='outliers',  # show outliers
            jitter=0.3,  # add some jitter to points
            pointpos=-1.8,  # offset of points from center
            marker_color=color,  # set color
            showlegend=True,  # show in legend
            legendgroup=label,  # group in legend
            hovertemplate=(
                f"Time Period: {label}<br>" +
                "Price: %{y:.2f} EUR/MWh<br>" +
                "<extra></extra>"
            )
        )
    )

# Update layout
fig.update_layout(
    title='2024 German Day-Ahead Price Distribution by hours',
    yaxis_title='Price (EUR/MWh)',
    xaxis_title='hour',
    height=600,
    width=1000,
    xaxis=dict(
        type='category',  # Set x-axis type to category
        tickmode='array',  # Force specific tick labels
        ticktext=time_periods_labels,  # Use your time period labels
    ),
    yaxis=dict(
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='LightGray',
        gridcolor='LightGray'
    ),
    plot_bgcolor='white',
    showlegend=True,  # Show legend
    legend=dict(
        title="Hour",
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1.15,
        bgcolor='rgba(255, 255, 255, 0.8)'  # semi-transparent white background
    )
)

# Show the plot
fig.show()

In [34]:
# Save to HTML file
fig.write_html("../../data/analysis/visuals/2024_day_ahead_price_distribution_2h.html")

In [37]:
# exclude all weekend from all dataframes
# First, filter out weekends from the original dataframe
da_price['is_weekend'] = da_price['datetime'].dt.dayofweek.isin([5, 6])  # 5=Saturday, 6=Sunday
da_price_weekday = da_price[~da_price['is_weekend']]

# Create new dataframes for each time period, excluding weekends
time_periods = [
    (da_price_weekday[da_price_weekday['hour'].isin([0, 1])], '0-2'),
    (da_price_weekday[da_price_weekday['hour'].isin([2, 3])], '2-4'),
    (da_price_weekday[da_price_weekday['hour'].isin([4, 5])], '4-6'),
    (da_price_weekday[da_price_weekday['hour'].isin([6, 7])], '6-8'),
    (da_price_weekday[da_price_weekday['hour'].isin([8, 9])], '8-10'),
    (da_price_weekday[da_price_weekday['hour'].isin([10, 11])], '10-12'),
    (da_price_weekday[da_price_weekday['hour'].isin([12, 13])], '12-14'),
    (da_price_weekday[da_price_weekday['hour'].isin([14, 15])], '14-16'),
    (da_price_weekday[da_price_weekday['hour'].isin([16, 17])], '16-18'),
    (da_price_weekday[da_price_weekday['hour'].isin([18, 19])], '18-20'),
    (da_price_weekday[da_price_weekday['hour'].isin([20, 21])], '20-22'),
    (da_price_weekday[da_price_weekday['hour'].isin([22, 23])], '22-24')
]

# Extract labels
time_periods_labels = [label for _, label in time_periods]

# Create box plot
fig = go.Figure()

# Define colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

# Add each time period's box plot
for (df, label), color in zip(time_periods, colors):
    fig.add_trace(
        go.Box(
            y=df['da_price'],
            name=label,
            boxpoints='outliers',
            jitter=0.3,
            pointpos=-1.8,
            marker_color=color,
            showlegend=True,
            legendgroup=label,
            hovertemplate=(
                f"Time Period: {label}<br>" +
                "Price: %{y:.2f} EUR/MWh<br>" +
                "<extra></extra>"
            )
        )
    )

# Update layout
fig.update_layout(
    title='2024 Weekday Day-Ahead Price Distribution by Time Period',  # Updated title
    yaxis_title='Price (EUR/MWh)',
    xaxis_title='Time Period (hours)',
    height=600,
    width=1000,
    xaxis=dict(
        type='category',
        tickmode='array',
        ticktext=time_periods_labels,
    ),
    yaxis=dict(
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='LightGray',
        gridcolor='LightGray'
    ),
    plot_bgcolor='white',
    showlegend=True,
    legend=dict(
        title="Time Periods",
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1.15,
        bgcolor='rgba(255, 255, 255, 0.8)'
    )
)

# Show the plot
fig.show()

fig.write_html("../../data/analysis/visuals/2024_day_ahead_price_distribution_2h_weekday_only.html")


In [38]:
# Filter for weekends only from the original dataframe
da_price['is_weekend'] = da_price['datetime'].dt.dayofweek.isin([5, 6])  # 5=Saturday, 6=Sunday
da_price_weekend = da_price[da_price['is_weekend']]

# Create new dataframes for each time period, weekend only
time_periods = [
    (da_price_weekend[da_price_weekend['hour'].isin([0, 1])], '0-2'),
    (da_price_weekend[da_price_weekend['hour'].isin([2, 3])], '2-4'),
    (da_price_weekend[da_price_weekend['hour'].isin([4, 5])], '4-6'),
    (da_price_weekend[da_price_weekend['hour'].isin([6, 7])], '6-8'),
    (da_price_weekend[da_price_weekend['hour'].isin([8, 9])], '8-10'),
    (da_price_weekend[da_price_weekend['hour'].isin([10, 11])], '10-12'),
    (da_price_weekend[da_price_weekend['hour'].isin([12, 13])], '12-14'),
    (da_price_weekend[da_price_weekend['hour'].isin([14, 15])], '14-16'),
    (da_price_weekend[da_price_weekend['hour'].isin([16, 17])], '16-18'),
    (da_price_weekend[da_price_weekend['hour'].isin([18, 19])], '18-20'),
    (da_price_weekend[da_price_weekend['hour'].isin([20, 21])], '20-22'),
    (da_price_weekend[da_price_weekend['hour'].isin([22, 23])], '22-24')
]


# Extract labels
time_periods_labels = [label for _, label in time_periods]

# Create box plot
fig = go.Figure()

# Define colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

# Add each time period's box plot
for (df, label), color in zip(time_periods, colors):
    fig.add_trace(
        go.Box(
            y=df['da_price'],
            name=label,
            boxpoints='outliers',
            jitter=0.3,
            pointpos=-1.8,
            marker_color=color,
            showlegend=True,
            legendgroup=label,
            hovertemplate=(
                f"Time Period: {label}<br>" +
                "Price: %{y:.2f} EUR/MWh<br>" +
                "<extra></extra>"
            )
        )
    )

# Update layout
fig.update_layout(
    title='2024 Weekend Day-Ahead Price Distribution by Time Period',  # Updated title
    yaxis_title='Price (EUR/MWh)',
    xaxis_title='Time Period (hours)',
    height=600,
    width=1000,
    xaxis=dict(
        type='category',
        tickmode='array',
        ticktext=time_periods_labels,
    ),
    yaxis=dict(
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='LightGray',
        gridcolor='LightGray'
    ),
    plot_bgcolor='white',
    showlegend=True,
    legend=dict(
        title="Time Periods",
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1.15,
        bgcolor='rgba(255, 255, 255, 0.8)'
    )
)

# Show the plot
fig.show()

fig.write_html("../../data/analysis/visuals/2024_day_ahead_price_distribution_2h_weekend_only.html")

In [9]:
# get Q2Q3 price and visualize the distribution
# get Q2Q3 price
da_price['is_q2q3'] = da_price['datetime'].dt.quarter.isin([2, 3])
q2q3_price = da_price[da_price['is_q2q3']]
# visualize the distribution

# Create new dataframes for each time period, weekend only
time_periods = [
    (q2q3_price[q2q3_price['hour'].isin([0, 1])], '0-2'),
    (q2q3_price[q2q3_price['hour'].isin([2, 3])], '2-4'),
    (q2q3_price[q2q3_price['hour'].isin([4, 5])], '4-6'),
    (q2q3_price[q2q3_price['hour'].isin([6, 7])], '6-8'),
    (q2q3_price[q2q3_price['hour'].isin([8, 9])], '8-10'),
    (q2q3_price[q2q3_price['hour'].isin([10, 11])], '10-12'),
    (q2q3_price[q2q3_price['hour'].isin([12, 13])], '12-14'),
    (q2q3_price[q2q3_price['hour'].isin([14, 15])], '14-16'),
    (q2q3_price[q2q3_price['hour'].isin([16, 17])], '16-18'),
    (q2q3_price[q2q3_price['hour'].isin([18, 19])], '18-20'),
    (q2q3_price[q2q3_price['hour'].isin([20, 21])], '20-22'),
    (q2q3_price[q2q3_price['hour'].isin([22, 23])], '22-24')
]


# Extract labels
time_periods_labels = [label for _, label in time_periods]

# Create box plot
fig = go.Figure()

# Define colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

# Add each time period's box plot
for (df, label), color in zip(time_periods, colors):
    fig.add_trace(
        go.Box(
            y=df['da_price'],
            name=label,
            boxpoints='outliers',
            jitter=0.3,
            pointpos=-1.8,
            marker_color=color,
            showlegend=True,
            legendgroup=label,
            hovertemplate=(
                f"Time Period: {label}<br>" +
                "Price: %{y:.2f} EUR/MWh<br>" +
                "<extra></extra>"
            )
        )
    )

# Update layout
fig.update_layout(
    title='2024 Q2Q3 Day-Ahead Price Distribution by Time Period',  # Updated title
    yaxis_title='Price (EUR/MWh)',
    xaxis_title='Time Period (hours)',
    height=600,
    width=1000,
    xaxis=dict(
        type='category',
        tickmode='array',
        ticktext=time_periods_labels,
    ),
    yaxis=dict(
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='LightGray',
        gridcolor='LightGray'
    ),
    plot_bgcolor='white',
    showlegend=True,
    legend=dict(
        title="Time Periods",
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1.15,
        bgcolor='rgba(255, 255, 255, 0.8)'
    )
)

# Show the plot
fig.show()

fig.write_html("../../data/analysis/visuals/2024_day_ahead_price_distribution_2h_q2q3.html")

In [10]:
# get Q2Q3 price and visualize the distribution
# get Q2Q3 price
da_price['is_q1q4'] = da_price['datetime'].dt.quarter.isin([1, 4])
q1q4_price = da_price[da_price['is_q1q4']]
# visualize the distribution

# Create new dataframes for each time period, weekend only
time_periods = [
    (q1q4_price[q1q4_price['hour'].isin([0, 1])], '0-2'),
    (q1q4_price[q1q4_price['hour'].isin([2, 3])], '2-4'),
    (q1q4_price[q1q4_price['hour'].isin([4, 5])], '4-6'),
    (q1q4_price[q1q4_price['hour'].isin([6, 7])], '6-8'),
    (q1q4_price[q1q4_price['hour'].isin([8, 9])], '8-10'),
    (q1q4_price[q1q4_price['hour'].isin([10, 11])], '10-12'),
    (q1q4_price[q1q4_price['hour'].isin([12, 13])], '12-14'),
    (q1q4_price[q1q4_price['hour'].isin([14, 15])], '14-16'),
    (q1q4_price[q1q4_price['hour'].isin([16, 17])], '16-18'),
    (q1q4_price[q1q4_price['hour'].isin([18, 19])], '18-20'),
    (q1q4_price[q1q4_price['hour'].isin([20, 21])], '20-22'),
    (q1q4_price[q1q4_price['hour'].isin([22, 23])], '22-24')
]


# Extract labels
time_periods_labels = [label for _, label in time_periods]

# Create box plot
fig = go.Figure()

# Define colors
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

# Add each time period's box plot
for (df, label), color in zip(time_periods, colors):
    fig.add_trace(
        go.Box(
            y=df['da_price'],
            name=label,
            boxpoints='outliers',
            jitter=0.3,
            pointpos=-1.8,
            marker_color=color,
            showlegend=True,
            legendgroup=label,
            hovertemplate=(
                f"Time Period: {label}<br>" +
                "Price: %{y:.2f} EUR/MWh<br>" +
                "<extra></extra>"
            )
        )
    )

# Update layout
fig.update_layout(
    title='2024 Q1Q4 Day-Ahead Price Distribution by Time Period',  # Updated title
    yaxis_title='Price (EUR/MWh)',
    xaxis_title='Time Period (hours)',
    height=600,
    width=1000,
    xaxis=dict(
        type='category',
        tickmode='array',
        ticktext=time_periods_labels,
    ),
    yaxis=dict(
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='LightGray',
        gridcolor='LightGray'
    ),
    plot_bgcolor='white',
    showlegend=True,
    legend=dict(
        title="Time Periods",
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1.15,
        bgcolor='rgba(255, 255, 255, 0.8)'
    )
)

# Show the plot
fig.show()

fig.write_html("../../data/analysis/visuals/2024_day_ahead_price_distribution_2h_q1q4.html")