In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from data_collector import Omie

from ml import GaussianKernel, ComputationMode
from potenciala import BucketMethod, FigureShape, SingleTimeSeries, VectorTimeSeries
from stochastic import (EulerMaruyama, ConstantDiffusionMultivariate, SpatialDifferenceDriftMultivariate, 
                        SpatialDiffusionUnivariate, SpatialDriftMultivariate)

In [None]:
# uncomment this to obtain most up-to-date day ahead market timeseries
#df = Omie.download_period_file(filename="marginalpdbc", start_year=2016, end_year=2021)
#df = Omie.include_old_file(df=df, filename="marginalpdbc_2002_2015.csv")

In [None]:
df_raw = pd.read_csv("day_ahead_market_20211005.csv")
df_raw["spain"] = df_raw["spain"].round(decimals=2)

In [None]:
# filter data before 2004 (missing dates in 2003) and beyond 2021
df = df_raw.query("year >= 2004 & year <= 2020")

### Univariate timeseries

In [None]:
s_1 = SingleTimeSeries(df=df, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Round, bin_size=1)
s_025 = SingleTimeSeries(df=df, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)

In [None]:
s_1.plot_ts()

In [None]:
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20, 7))
s_1.potential.loc[:120].plot(ax=ax0)
s_025.potential.loc[:120].plot(ax=ax1)
ax0.set_title("1 €/MW resolution")
ax1.set_title("0.25 €/MW resolution")
plt.show()

### Include all history or last n years

In [None]:
# Compare potential across different years
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2010, 2021)
for year in years:
    df_year = df[df["year"] == year]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        s_year.potential[col].loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# Compare potential across different years with minimum
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2010, 2021)
for year in years:
    df_year = df[df["year"] == year]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        (s_year.potential[col] / s_year.potential[col].min()).loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# Compute accumulative years
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2010, 2021)
years = years[::-1]
for year in years:
    df_year = df[df["year"] >= year]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        s_year.potential[col].loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# Compute accumulative years with minimum
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2010, 2021)
years = years[::-1]
for year in years:
    df_year = df[df["year"] >= year]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        (s_year.potential[col] / s_year.potential[col].min()).loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# Check n-year period
n = 4
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2002, 2022, n)
for year in years:
    df_year = df[(df["year"] >= year) & (df["year"] < year + n)]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        s_year.potential[col].loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# Check n-year period divided by minimum
n = 4
fig, axis = plt.subplots(3, 1, figsize=(10, 30))
years = range(2002, 2022, n)
for year in years:
    df_year = df[(df["year"] >= year) & (df["year"] < year + n)]
    s_year = SingleTimeSeries(df=df_year, signal_name="spain", metric_lag_time=[1, 24, 168], bucket_method=BucketMethod.Cut, bin_size=0.25)
    for ax, col in zip(axis, s_year.potential_cols):
        (s_year.potential[col] / s_year.potential[col].min()).loc[:100].plot(ax=ax)
for ax, col in zip(axis, s_year.potential_cols):
    ax.legend(years)
    ax.set_title(col)

In [None]:
# plot mean drift and diffusion
_ = s_1.drift.mean["drift_1"].iloc[:120].plot()

In [None]:
_ = s_1.diffusion.mean["diffusion_1"].iloc[:120].plot()

In [None]:
a = s_1.drift.mean["drift_1"]
a = a.reindex(np.arange(0, a.index.max() + 20)).fillna(0)
b = s_1.diffusion.mean["diffusion_1"]
b = b.reindex(np.arange(0, b.index.max() + 20)).fillna(0)

x_df = s_1.df[(s_1.df["date"] >= "2020-12-10")].iloc[:24*6]
x_0 = x_df["spain"].iloc[0]

In [None]:
drift = SpatialDriftMultivariate(df=a.to_frame())
diffusion = SpatialDiffusionUnivariate(df=b.to_frame())

em_1 = EulerMaruyama(num_sim=1000, period=24*6, delta_t=1, nu=0, drift=drift, diffusion=diffusion)

In [None]:
sim_df = em_1.simulate(x0=np.array([x_0]), random_seed=124)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(15, 10))
sim_df.iloc[:,  :em_1.num_sim//2].plot(alpha=0.05, ax=ax, legend=False)
sim_df.mean(axis=1).plot(lw=1.5, color="black", ax=ax)
x_df.reset_index()["spain"].plot(lw=1.5, color="blue", ax=ax)
sim_df.quantile(q=[0.1, 0.25, 0.75, 0.9], axis=1).T.plot(lw=1.5, ls="--", ax=ax, legend=False)
plt.show()

### Multivariate  timeseries $D_{1}^h(X_{d}^h)$ 

In [None]:
s = VectorTimeSeries(df=df, signal_name="spain", bucket_method=BucketMethod.Round, bin_size=1, 
                     diff_matrix_xi_xj_computation=False)

In [None]:
s.plot_hourly_boxplot()

In [None]:
fig, ax = plt.subplots(figsize=(20, 7))
sns.heatmap(s.drift_hour_x.iloc[:, :100], ax=ax, cmap='flare', vmin=-50, vmax=50)
ax.set_title("Mean 24-hour drift")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(20, 7))
sns.heatmap(s.potential_hour_x.iloc[:, :100], ax=ax, cmap='flare', vmin=-400, vmax=100)
ax.set_title("Potential")
fig.show()

In [None]:
m = 6
n = int(24/m)
fig, ax = plt.subplots(n, 1, figsize=(20, 25))
for i in range(n):
    s.potential_hour_x.loc[i*m+1:(i+1)*m].T.plot(ax=ax[i])
    ax[i].legend(loc=1)
    ax[i].set_xlim(0, 180)
    ax[i].set_ylim(-450, 150)
ax[0].set_title("Potential")
fig.show()

In [None]:
fig, axis = plt.subplots(4, 6, figsize=(40, 25))

for i, ax_list in enumerate(axis):
    for j, ax in enumerate(ax_list):
        h = 1 + i*6+j
        s.potential_hour_x.loc[h].plot(ax=ax)
        ax.set_xlabel("")
        ax.set_title(f"Hour = {h}", fontsize=30)
        ax.set_xlim(0, 100)
        ax.set_ylim(-450, 50)
        ax.tick_params(axis='both', labelsize=20)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

fig.text(0.5, 0.095, "spot price $X_{t}^{i}$ [€/MWh]", ha="center", va="center", fontdict={"size": 25})
fig.text(0.1, 0.5, "potential $[(€/MWh)^{2}/day]$", ha="center", va="center",
         fontdict={"size": 25}, rotation=90)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(20, 10))
potential = s.potential_hour_x.iloc[:, :100].stack().reset_index()
ax.scatter(potential.iloc[:, 0], potential.iloc[:, 1], potential.iloc[:, 2])
ax.set_xlabel("hour")
ax.set_ylabel("Spot price [€/Mwh]")
ax.set_zlabel("potential")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
mask = np.triu(np.ones_like(s.diffusion_matrix, dtype=bool), k=1)
sns.heatmap(s.diffusion_matrix, cmap="flare", mask=mask,
            square=True, linewidths=.5, cbar_kws={"shrink": 0.85})
ax.set_title("Mean diffusion")
fig.show()

In [None]:
s.diffusion_matrix

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
mask = np.triu(np.ones_like(s.sqrt_diff_matrix, dtype=bool), k=1)
sns.heatmap(s.sqrt_diff_matrix, cmap="flare", mask=mask,
            square=True, linewidths=.5, cbar_kws={"shrink": 0.85})
ax.set_title("Mean diffusion")
fig.show()

#### Simulation with $D_{1}^h(X_{d}^h)$ and diagonal $D^{2}$

In [None]:
x_df = s.df[(s.df["date"] >= "2020-12-10")].iloc[:24*6]
x_0 = x_df["spain"].iloc[:24].values

In [None]:
spatial_drift = SpatialDriftMultivariate(df=s.drift_hour_x)
diag_diff_matrix = pd.DataFrame(np.sqrt(np.diag(np.diag(s.diffusion_matrix))))
diag_diffusion = ConstantDiffusionMultivariate(df=diag_diff_matrix)

em_2 = EulerMaruyama(num_sim=1000, period=6, delta_t=1, nu=0, drift=spatial_drift, diffusion=diag_diffusion)

In [None]:
sim_df = em_2.simulate(x0=x_0, random_seed=124)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(15, 10))
sim_df.iloc[24:, : em_2.num_sim//2].plot(alpha=0.05, ax=ax, legend=False)
sim_df.mean(axis=1).plot(lw=1.5, color="black", ax=ax)
x_df.reset_index()["spain"].plot(lw=1.5, color="blue", ax=ax)
sim_df.iloc[24:, :].quantile(q=[0.1, 0.25, 0.75, 0.9], axis=1).T.plot(lw=1.5, ls="--", ax=ax, legend=False)
ax.set_xlabel("hour")
ax.set_ylim(0, 120)
ax.set_ylabel("€/MWh")
ax.set_title("Day ahead electricity price simulation with $D_1^{h}(X_d^h)$ and diagonal $D_2$")
plt.show()

#### Simulation with $D_{1}^h(X_{d}^h)$ and non-diagonal constant $D^{2}$

In [None]:
x_df = s.df[(s.df["date"] >= "2020-12-10")].iloc[:24*6]
x_0 = x_df["spain"].iloc[:24].values

In [None]:
spatial_drift = SpatialDriftMultivariate(df=s.drift_hour_x)
cte_diffusion = ConstantDiffusionMultivariate(df=pd.DataFrame(s.sqrt_diff_matrix))

em_3 = EulerMaruyama(num_sim=1000, period=6, delta_t=1, nu=0, drift=spatial_drift, diffusion=cte_diffusion)

In [None]:
sim_df = em_3.simulate(x0=x_0, random_seed=124)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(15, 10))
sim_df.iloc[24:, : em_3.num_sim//2].plot(alpha=0.05, ax=ax, legend=False)
sim_df.mean(axis=1).plot(lw=1.5, color="black", ax=ax)
x_df.reset_index()["spain"].plot(lw=1.5, color="blue", ax=ax)
sim_df.iloc[24:, :].quantile(q=[0.1, 0.25, 0.75, 0.9], axis=1).T.plot(lw=1.5, ls="--", ax=ax, legend=False)
ax.set_xlabel("hour")
ax.set_ylim(0, 120)
ax.set_ylabel("€/MWh")
ax.set_title("Day ahead electricity price simulation with $D_1^h(X_d^h)$ and non-diagonal $D_2$")
plt.show()

### Multivariate timeseries  - $D_{1}^{h}(X_{d}^h - X_{d-1}^h)$ 

In [None]:
s_back_drift = VectorTimeSeries(df=df, signal_name="spain", bucket_method=BucketMethod.Round, bin_size=1, x_transformation="back_drift")

In [None]:
fig, ax = plt.subplots(figsize=(20, 7))
sns.heatmap(s_back_drift.samples_hour_x.iloc[:, :200], ax=ax, cmap='flare')#, vmin=-40, vmax=40)
ax.set_title("Samples 24-hour drift")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(20, 7))
sns.heatmap(s_back_drift.drift_hour_x, ax=ax, cmap='flare')#, vmin=-40, vmax=40)
ax.set_title("Mean 24-hour drift")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(20, 7))
sns.heatmap(s_back_drift.potential_hour_x, ax=ax, cmap='flare')
ax.set_title("Potential")
fig.show()

In [None]:
m = 6
n = int(24/m)
fig, ax = plt.subplots(n, 1, figsize=(20, 25))
for i in range(n):
    s_back_drift.potential_hour_x.loc[i*m+1:(i+1)*m].T.plot(ax=ax[i])
    ax[i].legend(loc=1)
    ax[i].set_xlim(-80, 80)
    #ax[i].set_ylim(-450, 150)
ax[0].set_title("Potential")
fig.show()

In [None]:
fig, axis = plt.subplots(4, 6, figsize=(40, 25))

for i, ax_list in enumerate(axis):
    for j, ax in enumerate(ax_list):
        h = 1 + i*6+j
        s_back_drift.potential_hour_x.loc[h].plot(ax=ax)
        ax.set_xlabel("")
        ax.set_title(f"Hour = {h}", fontsize=30)
        ax.set_xlim(-60, 60)
        ax.set_ylim(-800, 50)
        ax.tick_params(axis='both', labelsize=20)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

fig.text(0.5, 0.095, "spot price difference $X_{t}^{i} - X_{t-1}^{i}$ [€/MWh]", ha="center", va="center", fontdict={"size": 25})
fig.text(0.1, 0.5, "potential $[(€/MWh)^{2}/day]$", ha="center", va="center",
         fontdict={"size": 25}, rotation=90)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(20, 10))
potential = s_back_drift.potential_hour_x.stack().reset_index()
ax.scatter(potential.iloc[:, 0], potential.iloc[:, 1], potential.iloc[:, 2])
ax.set_xlabel("hour")
ax.set_ylabel("X")
ax.set_zlabel("potential")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
mask = np.triu(np.ones_like(s_back_drift.diffusion_matrix, dtype=bool), k=1)
sns.heatmap(s_back_drift.diffusion_matrix, cmap="flare", mask=mask,
            square=True, linewidths=.5, cbar_kws={"shrink": 0.85})
ax.set_title("Mean diffusion")
fig.show()

#### Simulation with $D_{1}^h(X_{d}^h - X_{d-1}^h)$ and non-diagonal constant $D^{2}$

In [None]:
x_df = s_back_drift.df[(s_back_drift.df["date"] >= "2020-12-09")].iloc[:24*7]
t_0 = x_df["spain"].iloc[:24].values
t_1 = x_df["spain"].iloc[24:48].values
x_0 = np.vstack([t_0, t_1]).T

In [None]:
spatial_diff_drift = SpatialDifferenceDriftMultivariate(df=s_back_drift.drift_hour_x)
cte_diffusion = ConstantDiffusionMultivariate(df=pd.DataFrame(s_back_drift.sqrt_diff_matrix))

em_4 = EulerMaruyama(num_sim=1000, period=7, delta_t=1, nu=0, drift=spatial_diff_drift, diffusion=cte_diffusion)

In [None]:
sim_df = em_4.simulate(x0=x_0, random_seed=124)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(15, 10))
sim_df.iloc[48:, : em_4.num_sim//2].plot(alpha=0.05, ax=ax, legend=False)
sim_df.mean(axis=1).iloc[24:].plot(lw=1.5, color="black", ax=ax)
x_df.reset_index()["spain"].iloc[24:].plot(lw=1.5, color="blue", ax=ax)
sim_df.iloc[48:, :].quantile(q=[0.1, 0.25, 0.75, 0.9], axis=1).T.plot(lw=1.5, ls="--", ax=ax, legend=False)
ax.set_xlabel("hour")
ax.set_ylim(0, 120)
ax.set_ylabel("€/MWh")
ax.set_title("Day ahead electricity price simulation for $D_1^h(X_d^h - X_{d-1}^h)$ and non-diagonal $D_2$")
plt.show()

## Gaussian kde

In [None]:
from scipy import integrate, stats
from matplotlib.lines import Line2D

In [None]:
s_kde = VectorTimeSeries(df=df, signal_name="spain", bucket_method=BucketMethod.NoBucket)

In [None]:
def compute_historical_mean(df: pd.DataFrame, x_label_col_name: str, value_col_name: str):
    
    aux_df = df.copy(deep=True)
    aux_df[x_label_col_name] = aux_df[x_label_col_name].round(decimals=0)
    mean_x = aux_df.groupby(x_label_col_name)[value_col_name].mean()
    
    return mean_x

def plot_samples_kernel_mean_expected(ax: plt.axes, 
                                      gk: GaussianKernel, 
                                      historical_mean: pd.Series, 
                                      ymax_lim: float = None):

    ax.plot(gk.samples[0], gk.samples[1], 'k.', markersize=2, alpha=0.2)
    ax.plot(gk.expected_value_function[0, :], gk.expected_value_function[1, :], lw=2, color="blue")
    historical_mean.plot(ax=ax, lw=2, color="red")
    cs = ax.contourf(gk.grid[0], gk.grid[1], gk.p,
                     levels=100, cmap=plt.cm.gist_earth_r, alpha=0.7, antialiased=True)
    if ymax_lim:
        ax.set_ylim(0, ymax_lim)

def plot_potential(ax: plt.axis, gk: GaussianKernel, historical_mean: pd.Series):
    
    dx = gk.expected_value_function[0, 1] -  gk.expected_value_function[0, 0]
    ax.plot(gk.expected_value_function[0, :], (-1)*np.cumsum(gk.expected_value_function[1, :]*dx), color="blue")
    ax.plot(historical_mean.index, (-1)*np.cumsum(historical_mean.values), color="red")

$$D_1^{h}$$

In [None]:
%%time
d_1_dict = {}
d_1_historical_mean_dict = {}
for h in s_kde.df["hour"].unique():
    train_df = s_kde.df.query(f"hour == {h}")[["x_label", "drift_24"]].dropna()
    
    gk = GaussianKernel(samples=train_df, grid_shape=(100, 100), computation_mode=ComputationMode.Numba)
    historical_mean = compute_historical_mean(df=train_df, 
                                                x_label_col_name="x_label", 
                                                value_col_name="drift_24")
        
    d_1_dict[h] = gk
    d_1_historical_mean_dict[h] = historical_mean
    
    fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 7), gridspec_kw={'width_ratios': [3, 1]})
    
    plot_samples_kernel_mean_expected(ax=ax0, gk=gk, historical_mean=historical_mean)
    ax0.set_xlabel(f"spot price, $X_d^{{{h}}}$ [€/MWh]")
    ax0.set_ylabel(f"spot price difference $X_{{d+1}}^{{{h}}} - X_{{d}}^{{{h}}}$ [€/MWh]")
    ax0.set_title(f"Drift hour = {h}")
    
    plot_potential(ax=ax1, gk=gk, historical_mean=historical_mean)
    ax1.set_title(f"Potential hour = {h}")
    ax1.set_xlabel(f"spot price, $X_d^{{{h}}}$ [€/MWh]")
    ax1.set_ylabel(f"Potential")
    plt.show()

In [None]:
fig, axis = plt.subplots(6, 4, figsize=(25, 32))

xnew = np.arange(0, 150) 
for i, ax_list in enumerate(axis):
    for j, ax in enumerate(ax_list):
        h = 1 + i*4+j
        plot_samples_kernel_mean_expected(ax=ax, gk=d_1_dict[h], historical_mean=d_1_historical_mean_dict[h])
        ax.set_xlabel("")
        ax.set_title(f"Hour = {h}")
        ax.set_xlim(0, 100)
        ax.set_ylim(-50, 50)

fig.text(0.5, 0.11, "spot price, $X_d^{h}$ [€/MWh]", ha="center", va="center", fontdict={"size": 20})
fig.text(0.1, 0.5, "spot price difference $X_{d+1}^{h} - X_{d}^{h}$ [€/MWh]", ha="center", va="center",
         fontdict={"size": 20}, rotation=90)

custom_lines = [Line2D([0], [0], color="red", lw=2, alpha=0.7),
                Line2D([0], [0], color="blue", lw=2)]
fig.legend(custom_lines, 
           ["Empirical mean spot price difference", "Expected spot price difference from Gaussian KDE"],
           ncol=2, loc = (0.2, 0.935), prop={"size": 18})
plt.show()

In [None]:
import pickle

In [None]:
with open("d_1_1000_1000_coefficients.pkl", "wb") as f:
    pickle.dump(d_1_dict, f)

In [None]:
with open("d_1_1000_1000_coefficients.pkl", "rb") as f:
    d_1_dict = pickle.load(f)

In [None]:
for k,v in d_1_dict.items():
    with open(f"data/kde/d_1_{k}_1000_1000_coefficients.pkl", "wb") as f:
        pickle.dump(v, f)

$$ D_2^{h, j} $$

#### Diagonal $D_2^{h, j} \quad h=j$

In [None]:
%%time
d_2_diag_dict = {}
d_2_diag_historical_mean_dict = {}
for h in range(1, 25):
    
    x_label_col_name = s_kde.x_col_name
    diff_col_name = s_kde.diffusion_cols[h-1]
    
    train_df = s_kde.df.query(f"hour == {h}")[[x_label_col_name, diff_col_name]].dropna()
    gk = GaussianKernel(samples=train_df, grid_shape=(1000, 1000), ymin=-100)
    
    historical_mean = compute_historical_mean(df=train_df, 
                                              x_label_col_name=x_label_col_name, 
                                              value_col_name=diff_col_name)
    
    d_2_diag_dict[h] = gk
    d_2_diag_historical_mean_dict[h] = historical_mean
    
    fig, ax = plt.subplots(figsize=(10, 7))
    plot_samples_kernel_mean_expected(
        ax=ax, gk=d_2_diag_dict[h], 
        historical_mean=d_2_diag_historical_mean_dict[h], ymax_lim=historical_mean.max()
    )
    ax.set_xlabel(f"spot price, $X_d^{{{h}}}$ [€/MWh]")
    ax.set_ylabel(f"Diffusion coefficient $(X_{{d+1}}^{{{h}}} - X_{{d}}^{{{h}}})^2$ [$(€/MWh)^2$]")
    ax.set_title(f"Diffusion hour = {h}")
    plt.show()

In [None]:
with open("d_2_diag_1000_1000_coefficients.pkl", "wb") as f:
    pickle.dump(d_2_diag_dict, f)

In [None]:
for k,v in d_2_diag_dict.items():
    with open(f"data/kde/d_2_diag/d_2_diag_{k}_1000_1000_coefficients.pkl", "wb") as f:
        pickle.dump(v, f)

#### Non-diagonal $D_2^{h, j} \quad h \neq j$

In [None]:
%%time
df_d_2_23_24 = s_kde.df.query("hour == 24")[["x_label", "x_label_2_h_23", "diffusion_h_23"]].dropna()
gk_d2_23_24 = GaussianKernel(samples=df_d_2_23_24, grid_shape=(100, 100, 100))

In [None]:
hour_i_list = []
hour_j_list = []
for i in np.arange(24, 1, -1):
    for j in range(i-1, 0, -1):
        hour_i_list.append(i)
        hour_j_list.append(j)

In [None]:
def compute_diffusion_non_diagonal(vector_ts: VectorTimeSeries, hour_i: int, hour_j: int):
    
    x_label_col_name = vector_ts.x_col_name
    x_label_col_name_2 = vector_ts.x_two_col_names[hour_j-1]
    diff_col_name = vector_ts.diffusion_cols[hour_j-1] 
    
    train_df = vector_ts.df.query(f"hour == {hour_i}")[[x_label_col_name, x_label_col_name_2, diff_col_name]]
    train_df = train_df.dropna()
    
    gk = GaussianKernel(samples=train_df, grid_shape=(100, 100, 500), 
                        computation_mode=ComputationMode.Numba, 
                        mesh_chunks=8*4)
    with open("data/")
    return gk

In [None]:
%%time
d_2_list = Parallel(n_jobs=-1, verbose=1)(delayed(compute_diffusion_non_diagonal)(
    vector_ts=s_kde, hour_i=i, hour_j=j
) for i, j in zip(hour_i_list[:8], hour_j_list[:8]))

In [None]:
%%time
d_2_list = [compute_diffusion_non_diagonal(vector_ts=s_kde, hour_i=i, hour_j=j) 
            for i,j in zip(hour_i_list[:1], hour_j_list[:1])]

In [None]:
hour_j_list[:23]

In [None]:
d_2_list[0].expected_value

In [None]:
hour_j_list[:8]

In [None]:
d2_pickle_list = []
for i in range(1, 5):
    with open(f"d2_coefficients_batch_{i}.pkl", "rb") as f:
        d2_pickle_list.append(pickle.load(f))

In [None]:
d2_coefficient_list = []
for element in d2_pickle_list:
    for k in element:
        d2_coefficient_list.append(k)

In [None]:
len(d2_coefficient_list)

In [None]:
d2_coefficient_list[]

In [None]:
for k in d2_coefficient_list:
    print(k["hour_i"], k["hour_j"], k["most_likely_diff"][-1])

In [None]:
len(d2_coefficient_list)

In [None]:
with open("d2_coefficients_batch_4.pkl", "wb") as f:
    pickle.dump(d_2_list, f)

In [None]:
# diffusion matrix
diff_matrix = np.zeros((24, 24))

In [None]:
for element in d2_coefficient_list:
    diff_matrix[element["hour_i"] - 1, element["hour_j"] - 1] = element["expected_diff_most_likely_price"][-1]

In [None]:
df.iloc[:24]["spain"].values

In [None]:
for k,v in d_2_diag_hour_stats.items():
    #closet_index = np.abs(v["expected_value_x"].index - df.iloc[-24:]["spain"].values[k-1]).argmin()
    #diff_matrix[k-1, k-1] = v["expected_value_x"].iloc[closet_index]
    
    #diff_matrix[k-1, k-1] = v["expected_diff"]
    
    diff_matrix[k-1, k-1] = v["expected_value_x"].iloc[v["price_probability"].argmax()]

In [None]:
diff_matrix = diff_matrix + np.tril(diff_matrix, k=-1).T

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
#mask = np.triu(np.ones_like(diff_matrix, dtype=bool), k=1)
sns.heatmap(diff_matrix, cmap="flare",
            square=True, linewidths=.5, cbar_kws={"shrink": 0.85})
ax.set_title("Mean diffusion")
fig.show()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
#mask = np.triu(np.ones_like(diff_matrix, dtype=bool), k=1)
sns.heatmap(s.diffusion_matrix, cmap="flare",
            square=True, linewidths=.5, cbar_kws={"shrink": 0.85})
ax.set_title("Mean diffusion")
fig.show()

In [None]:
from scipy.linalg import sqrtm

In [None]:
pd.Series(np.linalg.eigvals(diff_matrix)).sort_values()

In [None]:
sqrtm(diff_matrix)

In [None]:
pd.DataFrame(diff_matrix)

In [None]:
np.random.seed(72)
result = np.dot(sqrtm(diff_matrix), np.random.normal(loc=0, scale=1, size=(24, 1))) + \
df.iloc[-24:]["spain"].values.reshape(-1, 1)

In [None]:
fig, ax = plt.subplots()
ax.plot(range(24), result.real.reshape(-1))
ax.plot(range(24), df.iloc[-24:]["spain"].values)

In [None]:
df.iloc[-24:]["spain"].values.reshape(-1, 1).shape

In [None]:
np.random.seed(22)
result = np.abs(np.dot(sqrtm(diff_matrix), np.random.normal(loc=0, scale=1, size=(24, 1))))+ \
df.iloc[-24:]["spain"].values.reshape(-1, 1)

fig, ax = plt.subplots()
ax.plot(range(24), result)
ax.plot(range(24), df.iloc[-24:]["spain"].values)

Compute price inflation as mean daily prices - mean drfit and learn this through a neural ODE