In [1]:
import pandas as pd
import plotly.express as px
import pendulum
import re
import os
import plotly
import plotly.graph_objs as go
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import display

#### Configurtions / Mappings

In [2]:
data_folder = r"../data"
tz_mapper = {"CET/CEST": "CET", "EET/EEST": "EET"}
currency_mapper = {"BGN": 0.511292, "EUR": 1, "RON": 0.205}

## Parse data

#### Data operation definitions

In [3]:
def read_file(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, na_values=["-"])
    df.dropna(thresh=3, inplace=True)
    return df
    

def parse_column_names(df: pd.DataFrame) -> dict[str]:
    """ Get metadata from column names """
    price_column = [x for x in df.columns if x.startswith('Day-ahead Price')][0]
    
    timestamp_column = [x for x in df.columns if x.startswith('MTU')][0]
    tz_name = tz_mapper[re.search(r"MTU \((.*?)\)", timestamp_column).group(1)]
    country = [x for x in df.columns if x.startswith('BZN|')][0].replace("BZN|","")
    return {"price_column": price_column, "timestamp_column": timestamp_column,
            "time_zone": tz_name, "country": country}


def parse_timestamp(value: str, source_tz: str, output_tz: str = "UTC") -> pendulum.DateTime:
    "Parse string to timezone aware datetime"
    tz_obj = pendulum.timezone(source_tz)
    date_str = f"{value.split(' - ')[0]}"
    return pendulum.from_format(date_str, 'DD.MM.YYYY HH:mm', tz=tz_obj).in_tz(output_tz)


def gen_utc_index(df: pd.DataFrame, timestamp_column: str, timezone: str) -> pd.DatetimeIndex:
    """Returns a datetime index for the data period. Fails if provided incomplete data"""
    start_timestamp = parse_timestamp(
        value=df[timestamp_column].iloc[0], 
        source_tz=timezone, 
        output_tz="UTC"
    )
    
    end_timestamp = parse_timestamp(
        value=df[timestamp_column].iloc[-1], 
        source_tz=timezone, 
        output_tz="UTC"
    )
    
    utc_index = pd.date_range(start=start_timestamp, end=end_timestamp, freq="h", name="utc_timestamp")
    if len(utc_index) == len(df[timestamp_column]):
        return utc_index
    else:
        raise Exception(f"Data has time gaps! Available {len(df[timestamp_column])} out of {len(utc_index)} values")
        

def price_to_eur(df: pd.DataFrame, price_column: str, currency_column: str) -> pd.Series:
    """ Return a series with price converted to EUR """
    return round(df[price_column] * df[currency_column].map(currency_mapper), 2)


def transform_data_to_cet_eur(path: str) -> pd.DataFrame:
    """ Transforms the input file to a dataframe with UTC
    and CET timestamps and price in EUR/MWh
    """
    df = read_file(path)
    metadata = parse_column_names(df)
    
    df.index = gen_utc_index(df, metadata['timestamp_column'], metadata["time_zone"]) 
    df["cet_timestamp"] = df.index.tz_convert("CET")
    df["price_eur"] = price_to_eur(df=df, price_column=metadata["price_column"], currency_column="Currency")
    return df[["cet_timestamp", "price_eur"]]

## Display data

In [4]:
def plot_fig(files_list: list[str], freq: str = "h") -> plotly.graph_objs._figure.Figure:
    time_column = "cet_timestamp"
    price_column = "price_eur"
    fig = go.FigureWidget()
    fig.update_layout(
        height = 600,
        title_text = "Energy prices explorer",
        xaxis_title = "Period, CET",
        yaxis_title = "Price, EUR/MWh"
    )
    lines = [ (transform_data_to_cet_eur(f"{data_folder}/{file_name}"), file_name ) for file_name in files_list]
    for df, file_name in lines:
        line_name = file_name.replace("-DAM-PRICES-", " ").replace(".csv", "")
        aggregated_data = df.groupby(pd.Grouper(key=time_column, freq=freq)).mean().round(2)
        fig.add_scatter(y=aggregated_data[price_column], x=aggregated_data.index, name=line_name)
    print(type(fig))
    return fig
    

def create_files_widget(path:str, extension: str ="csv") -> widgets.widgets.widget_selection.SelectMultiple:
    all_files = os.listdir(path)
    filtered_files = [f for f in sorted(all_files) if f.endswith(f".{extension}")]
    return widgets.SelectMultiple(
        options=filtered_files, 
        description="Select files",
        rows = 10
    )

def create_frequency_dropdown() -> widgets.widgets.widget_selection.Dropdown:
    time_agg_list = [("Hourly", "h"), ("Daily", "d"),
                     ("Weekly", "w"), ("Monthly", "m"),
                     ("Quarterly", "q"), ("Yearly", "y")]
    return widgets.Dropdown(options=time_agg_list, value="d", description="Aggregation")

#### Create controls (widgets)

In [5]:
files_widget = create_files_widget(data_folder, extension="csv")
freq_widget = create_frequency_dropdown()

#### Display interactive plot

In [6]:
interact(plot_fig, files_list=files_widget, freq=freq_widget)

interactive(children=(SelectMultiple(description='Select files', options=('BG-DAM-PRICES-2020.csv', 'BG-DAM-PR…

<function __main__.plot_fig(files_list: list[str], freq: str = 'h') -> plotly.graph_objs._figure.Figure>