In [None]:
import pandas as pd
import numpy as np
import sys
from sklearn.preprocessing import StandardScaler, MinMaxScaler

sys.path.append("../../")
from utils.data_processing import _drop_consecutive_nans, add_day_ahead_column
from utils.error_metrics import _calc_mae, _calc_mse, _calc_rmse, _calc_nrmse, _calc_mape, _calc_mase, _calc_msse, _seas_naive_fcst, _calc_metrics
from utils.clustering import mapping_tsfeatures, clustering, sum_until_threshold, mapping_energy_metrics, get_results

### ML AZURE
from azureml.core import Workspace, Dataset, Datastore
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import mlflow
from config import subscription_id, resource_group, workspace_name
workspace = Workspace(subscription_id, resource_group, workspace_name)
datastore = Datastore.get(workspace, "workspaceblobstore")
credential = DefaultAzureCredential()

ml_client = MLClient(
    credential=credential,
    subscription_id = subscription_id,
    resource_group_name = resource_group,
    workspace_name = workspace_name
)

import warnings
warnings.simplefilter(action='ignore', category=Warning)

### Plotting
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import num2date
from matplotlib import gridspec
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 13  # Font size
stanford_colors = ['#1f78b5', '#33a12c', '#e41a1c', '#ff7f00', '#6a3d9b', '#b25928', #dark
                   '#a7cfe4', '#b3e08b', '#fc9b9a', '#fec06f', '#cbb3d7', '#ffff9a'] #light
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=stanford_colors)

In [None]:
### Load results

name = 'frosty_pepper_nsmqw7'
path = f'azureml/{name}/results/'
file = 'forecasts_forecast_test.csv'
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
forecast_test = dataset.to_pandas_dataframe()
forecast_test['ds'] = pd.to_datetime(forecast_test['ds'])
file = 'metrics_metrics_test.csv'
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
metrics_test = dataset.to_pandas_dataframe()

IDs = forecast_test['ID'].unique()

forecast = pd.DataFrame()
for ID in IDs:
    forecast_test_ID = forecast_test[forecast_test['ID']==ID]
    forecast_test_ID = add_day_ahead_column(forecast_test_ID, 'yhat')
    forecast_test_ID = add_day_ahead_column(forecast_test_ID, '1.0%')
    forecast_test_ID = add_day_ahead_column(forecast_test_ID, '99.0%')
    forecast_test_ID = add_day_ahead_column(forecast_test_ID, 'ar')
    forecast_test_ID = add_day_ahead_column(forecast_test_ID, 'lagged_regressor_temp')

    forecast = pd.concat([forecast, forecast_test_ID], ignore_index=True)

# 1. Graphical Analysis

In [None]:
### Saving all Countries

## Comment out for all IDs
IDs = ['DEU']
#IDs = forecast_test['ID'].unique()

for id in IDs:
    temp = forecast[forecast['ID'] == id]
    forecast_test_ID_short = temp[(temp['ds'] >= '2014-09-08') & (temp['ds'] <= '2014-09-15')]

    plt.figure(figsize=(20, 4))
    plt.fill_between(forecast_test_ID_short['ds'], forecast_test_ID_short['1.0%_day_ahead'], forecast_test_ID_short['99.0%_day_ahead'], color='lightblue', label='3.0% - 97.0% quantile')
    plt.plot(forecast_test_ID_short['ds'], forecast_test_ID_short['yhat_day_ahead'], label='yhat', color='blue')
    plt.scatter(forecast_test_ID_short['ds'], forecast_test_ID_short['y'], color='black', marker='x', label='y')
    ticks = plt.gca().get_xticks()
    labels = [num2date(tick).strftime('%a, %m-%d %H:%M') for tick in ticks]
    plt.gca().set_xticklabels(labels)
    plt.grid()
    plt.title(f"Day ahead forecast for {id}")
    plt.ylabel("Load [MW]")
    plt.legend()
    plt.show()

# 2. Components

In [None]:
### Load train data

name = '00_load_country'
path = f'{name}.csv'
df = pd.read_csv(path, parse_dates=['ds'])

In [None]:
### Comment out for all IDs

IDs = ['DEU']
#IDs = forecast_test['ID'].unique()

for id in IDs:
    ID = id
    start = pd.to_datetime('2014-09-08')
    end = start + pd.Timedelta(days=7)

    forecast_test_ID_short = forecast[forecast['ID']==ID]
    forecast_test_ID_short = forecast_test_ID_short[(forecast_test_ID_short['ds'] >= start) & (forecast_test_ID_short['ds'] <= end)]

    df_ID_short = df[df['country']==ID]
    df_ID_short = df_ID_short[(df_ID_short['ds'] >= start - pd.Timedelta(hours=0)) & (df_ID_short['ds'] <= end)]

    daily_seasonality_comp = 'season_winter' if start.month in [10, 11, 12, 1, 2, 3] else 'season_summer'
    daily_seasonality_label = 'Daily Winter\nSeasonality' if start.month in [10, 11, 12, 1, 2, 3] else 'Daily Summer\nSeasonality'

    components = ['season_yearly', 'season_weekly', daily_seasonality_comp, 'ar_day_ahead', 'lagged_regressor_temp_day_ahead']
    components_labels = ['Yearly Seasonality', 'Weekly Seasonality', daily_seasonality_label, 'AR Component', 'Temperature Component']
    data = forecast_test_ID_short[components].values.T
    data = data.astype(float)
    data_shape = np.shape(data)

    def get_cumulated_array(data, **kwargs):
        cum = data.clip(**kwargs)
        cum = np.cumsum(cum, axis=0)
        d = np.zeros(np.shape(data))
        d[1:] = cum[:-1]
        return d

    cumulated_data = get_cumulated_array(data, min=0)
    cumulated_data_neg = get_cumulated_array(data, max=0)
    row_mask = (data<0)
    cumulated_data[row_mask] = cumulated_data_neg[row_mask]
    data_stack = cumulated_data

    ## Plot
    fig = plt.figure(figsize=(16, 7), constrained_layout=True)
    gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1.3, 0.7])
    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[1])
    ax3 = fig.add_subplot(gs[2])

    ax1.fill_between(forecast_test_ID_short['ds'], forecast_test_ID_short['1.0%_day_ahead'], forecast_test_ID_short['99.0%_day_ahead'], color='lightblue', label='1.0% - 99.0% quantile')
    ax1.plot(forecast_test_ID_short['ds'], forecast_test_ID_short['yhat_day_ahead'], label='Predicted')
    ax1.scatter(forecast_test_ID_short['ds'], forecast_test_ID_short['y'], color='black', marker='x', label='Actuals')
    ax1.set_ylabel("Load [MW]")
    ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax1.xaxis.set_visible(False)

    forecast_test_ID_short['trend'] = forecast_test_ID_short['trend'].astype(float)
    forecast_test_ID_short['season_yearly'] = forecast_test_ID_short['season_yearly'].astype(float)
    forecast_test_ID_short['season_winter'] = forecast_test_ID_short['season_winter'].astype(float)
    forecast_test_ID_short['season_summer'] = forecast_test_ID_short['season_summer'].astype(float)

    cols = stanford_colors[0:6]

    handles = []
    labels = []
    for i, label in enumerate(components_labels):
        bar = ax2.bar(forecast_test_ID_short['ds'], data[i], bottom=data_stack[i], color=cols[i], width = 0.03)
        handles.append(bar[0])
        labels.append(label)

    ax2.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.5))
    ax2.set_ylabel("Load Influence [MW]")
    ax2.xaxis.set_visible(False)

    ax3.plot(df_ID_short['ds'], df_ID_short['temp'].shift(0), label='Temperature', color=stanford_colors[1])
    ax3.set_xlabel("Date")
    ax3.set_ylabel("Temperature [°C]")
    ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5))

    dates = pd.date_range(start, end, freq='1H')
    custom_ticks = pd.date_range(start, end, freq='12H')
    custom_labels = []
    for tick in custom_ticks:
        if tick.hour == 0:
            custom_labels.append(tick.strftime('%a'))
        else:
            custom_labels.append(tick.strftime('%H:%M'))

    ax3.set_xticks(custom_ticks)
    ax3.set_xticklabels(custom_labels)

    plt.show()

# 3. Seasonality

In [None]:
### Plot

## List of IDs you want to plot
selected_IDs = ['GRC', 'ITA', 'DEU', 'POL']

fig, axes = plt.subplots(2, 2, figsize=(8, 8), sharex=True)
axes = axes.flatten()

for i, id_to_plot in enumerate(selected_IDs):
    ax1 = axes[i]
        
    df_filtered = forecast[(forecast['ID'] == id_to_plot) & (forecast['ds'].dt.year == 2014)].copy()
    df_filtered['hour'] = df_filtered['ds'].dt.hour
    df_filtered['month'] = df_filtered['ds'].dt.month

    summer_data = df_filtered[df_filtered['month'].apply(lambda x: x in [4, 5, 6, 7, 8, 9])]
    winter_data = df_filtered[df_filtered['month'].apply(lambda x: x in [10, 11, 12, 1, 2, 3])]

    summer_data = summer_data[['hour', 'season_summer']]
    winter_data = winter_data[['hour', 'season_winter']]

    summer_data['season_summer'] = summer_data['season_summer'].astype(float)
    winter_data['season_winter'] = winter_data['season_winter'].astype(float)

    summer_data = summer_data.groupby('hour').mean().reset_index()
    winter_data = winter_data.groupby('hour').mean().reset_index()

    ax1.plot(summer_data['hour'], summer_data['season_summer'], label='Daily Summer Seasonality', color='#1f78b5', linewidth=2)
    ax1.plot(winter_data['hour'], winter_data['season_winter'], label='Daily Winter Seasonality', color='#33a12c', linewidth=2)

    ax1.set_title(f'Country: {id_to_plot}')
    ax1.set_ylabel('Load [MW]')

    if (i == 2) or (i==3):
        ax1.set_xlabel('Hour of the Day')

labels, handles = ax1.get_legend_handles_labels()
ax1.legend(labels, handles, bbox_to_anchor=(0.85, -0.3), ncols=2)
plt.tight_layout()
plt.subplots_adjust(wspace=0.4, hspace=0.3)
plt.show()