In [3]:
input_file = "mh.out"

import json
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import tensorflow_probability as tfp
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import gaussian_kde

# TODO: Change root path
root = "/Users/jay/Desktop/Bachelorarbeit"

import sys

sys.path.append(f"{root}/Implementation")
from dependencies.hbv_sask.model import HBVSASKModel as hbvmodel
from src.execute_model import run_model_single_parameter_node
from src.construct_model import get_model


ndims = 7
dims = ["TT", "C0", "beta", "ETF", "FC", "FRAC", "K2"]
colors = sns.color_palette(n_colors=ndims)

testConfigPath = f"{root}/test_config.json"
with open(testConfigPath, "r") as file:
    run_config = json.load(file)

configPath = run_config["configPath"]
basis = run_config["basis"]
model = get_model(configPath, basis)

start_date: 2004-01-01 00:00:00
start_date_predictions: 2005-01-01 00:00:00
end_date: 2006-01-01 00:00:00
simulation length: 365
full_data_range is 732 hours including spin_up_length of 366 hours
simulation_range is of length 366 hours


In [4]:
# Construct params
configurationObject = model.configurationObject
param_lower = []
param_upper = []
for param in configurationObject["parameters"]:
    if param["distribution"] == "Uniform":
        param_lower.append(param["lower"])
        param_upper.append(param["upper"])
    else:
        raise NotImplementedError(
            f"Sorry, the distribution {param['distribution']} is not supported yet"
        )
param_lower = np.array(param_lower)
param_upper = np.array(param_upper)

In [5]:
samples = pd.read_csv(f"{input_file}")
samples

Unnamed: 0,TT,C0,beta,ETF,FC,FRAC,K2
0,-3.539617,9.478780,2.123375,0.164080,160.034878,0.322583,0.034681
1,-3.700484,8.349416,2.576316,0.142683,137.310006,0.564735,0.045107
2,-3.700484,8.349416,2.576316,0.142683,137.310006,0.564735,0.045107
3,-3.700484,8.349416,2.576316,0.142683,137.310006,0.564735,0.045107
4,-3.700484,8.349416,2.576316,0.142683,137.310006,0.564735,0.045107
...,...,...,...,...,...,...,...
7996,0.757229,9.053234,2.864949,0.878193,457.959703,0.669797,0.009338
7997,0.757229,9.053234,2.864949,0.878193,457.959703,0.669797,0.009338
7998,0.757229,9.053234,2.864949,0.878193,457.959703,0.669797,0.009338
7999,0.757229,9.053234,2.864949,0.878193,457.959703,0.669797,0.009338


# Plotting

In [23]:
fig = make_subplots(rows=2, cols=4)

# Histogram and KDE
for i, col in enumerate(samples.columns):
    row = (i // 4) + 1
    col_idx = (i % 4) + 1

    # Histogram
    fig.add_trace(
        go.Histogram(x=samples[col], name=col, histnorm="probability density"),
        row=row,
        col=col_idx,
    )

    # KDE calculation
    kde = gaussian_kde(samples[col])
    x_values = np.linspace(samples[col].min(), samples[col].max(), 300)
    kde_values = kde(x_values)
    fig.add_trace(
        go.Scatter(
            x=x_values, y=kde_values, mode="lines", name=f"KDE {col}", showlegend=False
        ),
        row=row,
        col=col_idx,
    )

# Update layout
fig.update_layout(
    height=500,
    width=1200,
    title_text="Default Metropolis Hastings: Parameters Overview",
)
fig.show()

In [22]:
fig = make_subplots(rows=2, cols=4)

for i, col in enumerate(samples.columns):
    row = (i // 4) + 1
    col_idx = (i % 4) + 1

    fig.add_trace(
        go.Box(
            y=samples[col],
            name=col,
            boxpoints="all",
            jitter=0.5,
            whiskerwidth=0.2,
            marker_size=2,
            line_width=1,
        ),
        row=row,
        col=col_idx,
    )

# Update layout
fig.update_layout(
    height=500,
    width=1200,
    title_text="Default Metropolis Hastings: Boxplots of Parameters",
)
fig.show()

In [24]:
corr_matrix = samples.corr()

# Create the heatmap
fig = go.Figure(
    data=go.Heatmap(
        z=corr_matrix,
        x=corr_matrix.columns,
        y=corr_matrix.columns,
        colorscale="Viridis",
        colorbar=dict(title="Correlation Coefficient"),
    )
)

# Update layout
fig.update_layout(
    title="Default Metropolis Hastings: Heatmap of Variable Correlations",
    xaxis_title="Parameters",
    yaxis_title="Parameters",
    height=500,
    width=800,
)

fig.show()

# Sampling

In [8]:
# Sampling Max
param_vec = []
for i in range(len(samples.loc[0])):
    values, counts = np.unique(samples.iloc[:, i], return_counts=True)
    ind = np.argmax(counts)
    param_vec.append(values[ind])

_, posterior_max, _, _ = run_model_single_parameter_node(model, param_vec)

[HVBSASK INFO] [0] parameters: [[-3.885709716371331, 0.9785446649408487, 2.9437294873873054, 0.9597591940769152, 217.5650841828358, 0.3351672384407165, 0.025217950462687936]]


In [9]:
# Mean sampling from posterior
sample_param = []
for i in range(7):
    sample_param.append(np.random.choice(samples.iloc[:, i], 1000))
sample_param = np.array(sample_param).T

posterior = []
for _, vec in enumerate(sample_param):
    _, y_model, _, _ = run_model_single_parameter_node(model, np.array(vec))
    posterior.append(y_model)

print(np.array(posterior).shape)
posterior_mean = np.mean(np.array(posterior), axis=0)

[HVBSASK INFO] [0] parameters: [array([-2.75018055e+00,  6.63364041e+00,  1.95144646e+00,  7.80079287e-01,
        3.35883247e+02,  4.37044925e-01,  4.97148781e-04])]
[HVBSASK INFO] [0] parameters: [array([3.93362943e+00, 2.90757445e+00, 1.81301414e+00, 1.34272164e-01,
       3.89848023e+02, 8.06376323e-01, 4.90663792e-02])]
[HVBSASK INFO] [0] parameters: [array([1.47696036e+00, 2.42252222e+00, 1.33522535e+00, 1.58973947e-01,
       3.67626523e+02, 6.56670010e-01, 3.75986475e-02])]
[HVBSASK INFO] [0] parameters: [array([-4.74925396e-01,  4.44251104e+00,  2.79579385e+00,  8.67986738e-01,
        1.65306455e+02,  8.21590338e-01,  1.08237252e-02])]
[HVBSASK INFO] [0] parameters: [array([-2.98696798e+00,  9.40896390e+00,  2.49515941e+00,  6.09211023e-01,
        4.43202921e+02,  3.09407356e-01,  1.11974406e-03])]
[HVBSASK INFO] [0] parameters: [array([4.60504599e-01, 4.02031211e+00, 2.38182888e+00, 6.57640741e-01,
       1.44590114e+02, 3.95932806e-01, 8.16040533e-03])]
[HVBSASK INFO] [0] 

In [10]:
# Mean sampling from prior
sample_param = []
for i in range(7):
    uni = tfp.distributions.Uniform(low=param_lower[i], high=param_upper[i]).sample(1)
    sample_param.append(uni)
sample_param = np.array(sample_param).T

prior = []
for _, vec in enumerate(sample_param):
    _, y_model, _, _ = run_model_single_parameter_node(model, np.array(vec))
    prior.append(y_model)

_, _, measured_data, _ = run_model_single_parameter_node(model, np.array(vec))

prior_means = np.mean(np.array(prior), axis=0)

[HVBSASK INFO] [0] parameters: [array([4.65770073e-01, 3.58948358e+00, 1.19069549e+00, 1.84960432e-01,
       2.54637393e+02, 2.54383552e-01, 1.52275544e-02])]
[HVBSASK INFO] [0] parameters: [array([4.65770073e-01, 3.58948358e+00, 1.19069549e+00, 1.84960432e-01,
       2.54637393e+02, 2.54383552e-01, 1.52275544e-02])]


2024-06-05 23:48:45.454312: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-06-05 23:48:45.454337: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-06-05 23:48:45.454344: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-06-05 23:48:45.454361: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-05 23:48:45.454372: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:
fig = go.Figure()
model.get_start_date
dates = model.get_simulation_range()
fig.add_trace(
    go.Scatter(
        x=dates,
        y=prior_means,
        mode="lines",
        name="Prior Mean",
        line=dict(color="lightgrey"),
    )
)
fig.add_trace(
    go.Scatter(x=dates, y=posterior_mean, mode="lines", name="Posterior Mean")
)
fig.add_trace(go.Scatter(x=dates, y=posterior_max, mode="lines", name="Posterior Max"))
fig.add_trace(go.Scatter(x=dates, y=measured_data, mode="lines", name="Measured Data"))

# Update layout
fig.update_layout(
    title="Default Metropolis Hastings: Bayesian Inference Result Comparison",
    xaxis_title="Date",
    yaxis_title="Value",
    legend_title="Time Series",
    hovermode="x unified",
    template="plotly_white",
)

fig.show()

In [12]:
def rmse(result, target):
    diff = result - target
    aggr = 0
    for i in range(len(diff)):
        aggr += diff[i] ** 2
    rmse = (aggr / (len(diff))) ** 0.5
    return rmse


def mae(result, target):
    return np.absolute(result - target).mean()


print(f"RMSE of Posterior Mean: {rmse(posterior_mean, measured_data)}")
print(f"RMSE of Posterior Max: {rmse(posterior_max, measured_data)}")
print(f"MAE of Posterior Mean: {mae(posterior_mean, measured_data)}")
print(f"MAE of Posterior Max: {mae(posterior_max, measured_data)}")

RMSE of Posterior Mean: 22.14475485613421
RMSE of Posterior Max: 26.72454579307846
MAE of Posterior Mean: 11.399487080387862
MAE of Posterior Max: 13.196081237340492
