In [80]:
import datetime, time, os

import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
import pandas as pd

from functools import reduce

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

import plotly.graph_objects as go
# Create traces

import warnings
warnings.filterwarnings('ignore')

In [81]:
df = pd.read_pickle('../data/pickle/df_merged_5.pickle')

## Volatility Feature Engineering

In [82]:
#Create a new column for the absolute change since previous quarter hour
df["diff_reb"] = df["rebap_eur_mwh"]-df["rebap_eur_mwh"].shift()
df["diff_rz_saldo_mwh"] = df["rz_saldo_mwh"]-df["rz_saldo_mwh"].shift()

In [83]:
# Create a new column for the voatility of the last quarter hour
df["voality_rebap"] = (df["rebap_eur_mwh"]-df["rebap_eur_mwh"].shift())/df["rebap_eur_mwh"].shift().abs()
df["voality_rebap_abs"] =df["voality_rebap"].abs()
df["voality_rz_saldo_mwh"] = (df["rz_saldo_mwh"]-df["rz_saldo_mwh"].shift())/df["rz_saldo_mwh"].shift().abs()

In [84]:
# Revenue Feature
df["volume_eur"] = df["rz_saldo_mwh"]*df["rebap_eur_mwh"]

In [86]:
df.to_pickle('../data/pickle/df_merged_6.pickle')

# Volatility Visualisation

## Monthly Volatility 

In [87]:
# We create data frame, which is grouped by month and its median value
df_month_mean = df["voality_rebap"].groupby(df.index.month).median().reset_index()
df_month_mean = df_month_mean.rename({"voality_rebap": 'voality_median'}, axis=1)

# We create data frame, which is grouped by month and its lowest 5% 
df_month_q1 = df["voality_rebap"].groupby(df.index.month).quantile(q=0.05).reset_index()
df_month_q1 = df_month_q1.rename({"voality_rebap": 'voality_q1'}, axis=1)

# We create data frame, which is grouped by month and its highest 95%
df_month_q2 = df["voality_rebap"].groupby(df.index.month).quantile(q=0.95).reset_index()
df_month_q2 = df_month_q2.rename({"voality_rebap": 'voality_q2'}, axis=1)

# All data frames are merged to single data frame  
df_month_stats = reduce(lambda  left,right: pd.merge(left, right, on=['dt_start_utc'],
                                            how='inner'), [df_month_mean, df_month_q1, df_month_q2])

In [88]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_month_stats["dt_start_utc"], y=df_month_stats["voality_q2"],
                    mode='lines+markers', 
                    name='q2 [95%]'))

fig.add_trace(go.Scatter(x=df_month_stats["dt_start_utc"], y=df_month_stats["voality_median"],
                    mode='lines+markers',
                    name='Median'))

fig.add_trace(go.Scatter(x=df_month_stats["dt_start_utc"], y=df_month_stats["voality_q1"],
                    mode='lines+markers',
                    name='q1 [5%]' ))

fig.update_layout(
    #title="Average outliers per month",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [1, 2, 3, 4, 5, 6],
        ticktext=["January","February", "March", "April", "May", "June"]),
    xaxis_title="Month",
    yaxis_title="Average outliers per month"
)

fig.show()

## Weekdays

In [89]:
# We create data frame, which is grouped by weekdays and its median value
df_weekday_mean = df["voality_rebap"].groupby(df.index.weekday).median().reset_index()
df_weekday_mean = df_weekday_mean.rename({"voality_rebap": 'voality_median'}, axis=1)
# We create data frame, which is grouped by month and its highest 95% 
df_weekday_q1 = df["voality_rebap"].groupby(df.index.weekday).quantile(q=0.95).reset_index()
df_weekday_q1 = df_weekday_q1.rename({"voality_rebap": 'voality_q1'}, axis=1)
# We create data frame, which is grouped by month and its lowest 5%
df_weekday_q2 = df["voality_rebap"].groupby(df.index.weekday).quantile(q=0.05).reset_index()
df_weekday_q2 = df_weekday_q2.rename({"voality_rebap": 'voality_q2'}, axis=1)
# All data frames are merged to single data frame 
df_weekday_stats = reduce(lambda  left,right: pd.merge(left, right, on=['dt_start_utc'],
                                            how='inner'), [df_weekday_mean, df_weekday_q1, df_weekday_q2])

In [90]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_weekday_stats["dt_start_utc"], y=df_weekday_stats["voality_q2"],
                    mode='lines+markers', 
                    name='q2 [95%]'))

fig.add_trace(go.Scatter(x=df_weekday_stats["dt_start_utc"], y=df_weekday_stats["voality_median"],
                    mode='lines+markers',
                    name='Median'))

fig.add_trace(go.Scatter(x=df_weekday_stats["dt_start_utc"], y=df_weekday_stats["voality_q1"],
                    mode='lines+markers',
                    name='q1 [5%]' ))

fig.update_layout(
    #title="Average outliers per weekday",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0 ,1, 2, 3, 4, 5, 6],
        ticktext=["Monday","Tueday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]),
    xaxis_title="Weekday",
    yaxis_title="Average outliers per weekday"
)

fig.show()

## Hours

In [91]:
# We create data frame, which is grouped by hours and its median value
df_hour_mean = df["voality_rebap"].groupby(df.index.hour).median().reset_index()
df_hour_mean = df_hour_mean.rename({"voality_rebap": 'voality_median'}, axis=1)
# We create data frame, which is grouped by hours and its lowest 5% 
df_hour_q1 = df["voality_rebap"].groupby(df.index.hour).quantile(q=0.05).reset_index()
df_hour_q1 = df_hour_q1.rename({"voality_rebap": 'voality_q1'}, axis=1)
# We create data frame, which is grouped by hours and its highest 95%
df_hour_q2 = df["voality_rebap"].groupby(df.index.hour).quantile(q=0.95).reset_index()
df_hour_q2 = df_hour_q2.rename({"voality_rebap": 'voality_q2'}, axis=1)
# All data frames are merged to single data frame 
df_hour_stats = reduce(lambda  left,right: pd.merge(left, right, on=['dt_start_utc'],
                                            how='inner'), [df_hour_mean, df_hour_q1, df_hour_q2])

In [92]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_hour_stats["dt_start_utc"], y=df_hour_stats["voality_q2"],
                    mode='lines+markers', 
                    name='q2 [95%]'))

fig.add_trace(go.Scatter(x=df_hour_stats["dt_start_utc"], y=df_hour_stats["voality_median"],
                    mode='lines+markers',
                    name='Median'))

fig.add_trace(go.Scatter(x=df_hour_stats["dt_start_utc"], y=df_hour_stats["voality_q1"],
                    mode='lines+markers',
                    name='q1 [5%]' ))

fig.update_layout(
    #title="Outliers per month",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
        ticktext=["00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", 
        "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00"]),
    xaxis_title="Hour",
    yaxis_title="Average outliers per hour"
)
fig.show()

## Business Days Hours

In [93]:
# We create data frame, which is grouped by business days and its median value
df_bd = df[df.index.dayofweek < 5]

df_bhour_mean = df[df.index.dayofweek < 5]["voality_rebap"].groupby(df[df.index.dayofweek < 5].index.hour).median().reset_index()
df_bhour_mean = df_bhour_mean.rename({"voality_rebap": 'voality_median'}, axis=1)
# We create data frame, which is grouped by business days and its lowest 5% 
df_bhour_q1 = df[df.index.dayofweek < 5]["voality_rebap"].groupby(df[df.index.dayofweek < 5].index.hour).quantile(q=0.05).reset_index()
df_bhour_q1 = df_bhour_q1.rename({"voality_rebap": 'voality_q1'}, axis=1)
# We create data frame, which is grouped by business days and its highest 95%
df_bhour_q2 = df[df.index.dayofweek < 5]["voality_rebap"].groupby(df[df.index.dayofweek < 5].index.hour).quantile(q=0.95).reset_index()
df_bhour_q2 = df_bhour_q2.rename({"voality_rebap": 'voality_q2'}, axis=1)
# All data frames are merged to single data frame 
df_bhour_stats = reduce(lambda  left,right: pd.merge(left, right, on=['dt_start_utc'],
                                            how='inner'), [df_bhour_mean, df_bhour_q1, df_bhour_q2])

In [94]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_q2"],
                    mode='lines+markers', 
                    name='q2 [95%]'))

fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_median"],
                    mode='lines+markers',
                    name='Median'))

fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_q1"],
                    mode='lines+markers',
                    name='q1 [5%]' ))

fig.update_layout(
    #title="Outliers per month",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
        ticktext=["00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", 
        "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00"]),
    xaxis_title="Hour",
    yaxis_title="Average outliers per hour on business days"
)

fig.show()

## Weekends hours

In [95]:
# We create data frame, which is grouped by weekends and its median value
df_bhour_mean = df[df.index.dayofweek > 4]["voality_rebap"].groupby(df[df.index.dayofweek > 4].index.hour).median().reset_index()
df_bhour_mean = df_bhour_mean.rename({"voality_rebap": 'voality_median'}, axis=1)
# We create data frame, which is grouped by month and its lowest 5% 
df_bhour_q1 = df[df.index.dayofweek > 4]["voality_rebap"].groupby(df[df.index.dayofweek > 4].index.hour).quantile(q=0.05).reset_index()
df_bhour_q1 = df_bhour_q1.rename({"voality_rebap": 'voality_q1'}, axis=1)
# We create data frame, which is grouped by month and its highest 95%
df_bhour_q2 = df[df.index.dayofweek > 4]["voality_rebap"].groupby(df[df.index.dayofweek > 4].index.hour).quantile(q=0.95).reset_index()
df_bhour_q2 = df_bhour_q2.rename({"voality_rebap": 'voality_q2'}, axis=1)
# All data frames are merged to single data frame 
df_bhour_stats = reduce(lambda  left,right: pd.merge(left, right, on=['dt_start_utc'],
                                            how='inner'), [df_bhour_mean, df_bhour_q1, df_bhour_q2])

In [96]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_q2"],
                    mode='lines+markers', 
                    name='q2 [95%]'))

fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_median"],
                    mode='lines+markers',
                    name='Median'))

fig.add_trace(go.Scatter(x=df_bhour_stats ["dt_start_utc"], y=df_bhour_stats ["voality_q1"],
                    mode='lines+markers',
                    name='q1 [5%]' ))

fig.update_layout(
    #title="Outliers on weekends per hour",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
        ticktext=["00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", 
        "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00"]),
    xaxis_title="Hour",
    yaxis_title="Average outliers per hour on weekends"
)

fig.show()