In [1]:
input_file = 'tuned_output.out'

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import tensorflow_probability as tfp
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import gaussian_kde

import sys
sys.path.append('/Users/jay/Desktop/Bachelorarbeit/Implementation')
from dependencies.hbv_sask.model import HBVSASKModel as hbvmodel
from src.execute_model import run_model_single_parameter_node
from src.construct_model import get_model


ndims = 7
dims = ['TT', 'C0', 'beta', 'ETF', 'FC', 'FRAC', 'K2']
colors = sns.color_palette(n_colors=ndims)

configPath = "/Users/jay/Desktop/Bachelorarbeit/Implementation/configurations/config_test_oldman.json"
basis = "Oldman_Basin"
model = get_model(configPath, basis)

start_date: 2004-01-01 00:00:00
start_date_predictions: 2005-01-01 00:00:00
end_date: 2006-01-01 00:00:00
simulation length: 365
full_data_range is 732 hours including spin_up_length of 366 hours
simulation_range is of length 366 hours


In [2]:
# Construct params
configurationObject = model.configurationObject
param_lower = []
param_upper = []
for param in configurationObject["parameters"]:
    # for now the Uniform distribution is only supported
    if param["distribution"] == "Uniform":
        param_lower.append(param["lower"])
        param_upper.append(param["upper"])
    else:
        raise NotImplementedError(f"Sorry, the distribution {param['distribution']} is not supported yet")
param_lower = np.array(param_lower)
param_upper = np.array(param_upper)


In [3]:
samples = pd.read_csv(f'{input_file}')
samples

Unnamed: 0,TT,C0,beta,ETF,FC,FRAC,K2
0,1.768094,0.613992,2.549540,0.077592,310.712986,0.371933,0.042178
1,3.371317,0.007119,2.455921,0.093480,426.881187,0.572871,0.038465
2,3.881307,1.465628,2.580539,0.052470,381.182304,0.635631,0.045813
3,3.881307,1.465628,2.580539,0.052470,381.182304,0.635631,0.045813
4,3.881307,1.465628,2.580539,0.052470,381.182304,0.635631,0.045813
...,...,...,...,...,...,...,...
2662,-1.000661,9.166985,2.298553,0.008278,434.513639,0.537944,0.049498
2663,-1.263485,8.820484,2.491536,0.095367,374.768080,0.475393,0.042473
2664,-1.263485,8.820484,2.491536,0.095367,374.768080,0.475393,0.042473
2665,-1.445394,5.055217,2.306354,0.073715,345.136632,0.405488,0.044325


# Plotting

In [14]:
# Create subplots with 1 row and 7 columns
fig = make_subplots(rows=1, cols=7)

# Adding a histogram for each parameter as a subplot
for i, col in enumerate(samples.columns):
    # Histogram
    fig.add_trace(
        go.Histogram(x=samples[col], name=col, histnorm='probability density'),
        row=1, col=i+1
    )

    # KDE calculation
    kde = gaussian_kde(samples[col])
    x_values = np.linspace(samples[col].min(), samples[col].max(), 300)
    kde_values = kde(x_values)

    # Adding KDE as a line plot
    fig.add_trace(
        go.Scatter(x=x_values, y=kde_values, mode='lines', name=f'KDE {col}', showlegend=False),
        row=1, col=i+1
    )

# Update layout if needed
fig.update_layout(height=600, width=2800, title_text="Tuned Metropolist Hastings: Parameters Overview")
fig.show()


In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import gaussian_kde
import numpy as np

# Assuming 'samples' is your DataFrame with the data
# Create subplots with 4 rows and 2 columns, adjust the last row to have only one plot
fig = make_subplots(rows=4, cols=2, specs=[[{}, {}], [{}, {}], [{}, {}], [{}, None]])

# Adding a histogram and KDE for each parameter as a subplot
for i, col in enumerate(samples.columns):
    row = (i // 2) + 1  # Calculate the row by integer division
    col_idx = (i % 2) + 1  # Calculate the column index by modulo operation

    # Histogram
    fig.add_trace(
        go.Histogram(x=samples[col], name=col, histnorm='probability density'),
        row=row, col=col_idx
    )

    # KDE calculation
    kde = gaussian_kde(samples[col])
    x_values = np.linspace(samples[col].min(), samples[col].max(), 300)
    kde_values = kde(x_values)

    # Adding KDE as a line plot
    fig.add_trace(
        go.Scatter(x=x_values, y=kde_values, mode='lines', name=f'KDE {col}', showlegend=False),
        row=row, col=col_idx
    )

# Update layout to better fit the subplots
fig.update_layout(height=1800, width=1200, title_text="Tuned Metropolist Hastings: Parameters Overview")
fig.show()


In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming 'samples' is your DataFrame with the data
# Create subplots with 4 rows and 2 columns, adjust the last two rows to have only one plot in the last row
fig = make_subplots(rows=4, cols=2, specs=[[{}, {}], [{}, {}], [{}, {}], [{}, None]])

# Adding a boxplot for each parameter as a subplot
for i, col in enumerate(samples.columns):
    row = (i // 2) + 1  # Calculate the row by integer division
    col_idx = (i % 2) + 1  # Calculate the column index by modulo operation

    fig.add_trace(
        go.Box(y=samples[col], name=col, boxpoints='all', jitter=0.5, whiskerwidth=0.2, marker_size=2, line_width=1),
        row=row, col=col_idx
    )

# Update layout to better fit the subplots
fig.update_layout(height=1800, width=1200, title_text="Tuned Metropolis Hastings: Boxplots of Parameters")
fig.show()


In [7]:
corr_matrix = samples.corr()

# Create the heatmap
fig = go.Figure(data=go.Heatmap(
    z=corr_matrix, 
    x=corr_matrix.columns, 
    y=corr_matrix.columns,
    colorscale='Viridis',  # You can choose other color scales such as 'Cividis', 'Blues', etc.
    colorbar=dict(title='Correlation Coefficient')))

# Update layout
fig.update_layout(
    title='Tuned Metropolis Hastings: Heatmap of Variable Correlations',
    xaxis_title='Variables',
    yaxis_title='Variables')

# Show the figure
fig.show()


# Sampling

In [8]:
# Sampling Max
param_vec = []
for i in range(len(samples.loc[0])):
    values, counts = np.unique(samples.iloc[:, i], return_counts=True)
    ind = np.argmax(counts)
    param_vec.append(values[ind])

_, posterior_max, _, _ = run_model_single_parameter_node(model, param_vec)
    

[HVBSASK INFO] [0] parameters: [[-3.0753658138270303, 0.12109833438707895, 1.809553025415667, 0.0823219222579042, 50.40687745137322, 0.17606617655708579, 0.00991000565869472]]


In [9]:
# Mean Sampling from MCMC
sample_param = []
for i in range(7):
    sample_param.append(np.random.choice(samples.iloc[:, i], 1000))
sample_param = np.array(sample_param).T

posterior = []
for _, vec in enumerate(sample_param):
    _, y_model, _, _ = run_model_single_parameter_node(model, np.array(vec))
    posterior.append(y_model)

print(np.array(posterior).shape)
posterior_mean = np.mean(np.array(posterior), axis=0) 


[HVBSASK INFO] [0] parameters: [array([-1.43683860e+00,  9.01866442e+00,  1.48043663e+00,  5.19783480e-01,
        1.46288600e+02,  3.13353964e-01,  2.11262403e-02])]
[HVBSASK INFO] [0] parameters: [array([-1.63154451e+00,  7.00636871e+00,  2.50242962e+00,  9.43354944e-01,
        2.89677120e+02,  6.04211490e-01,  2.68472231e-02])]
[HVBSASK INFO] [0] parameters: [array([2.86122624e+00, 3.26135955e+00, 1.05250601e+00, 7.41917360e-01,
       4.51509213e+02, 2.95539001e-01, 1.43994491e-02])]
[HVBSASK INFO] [0] parameters: [array([2.59809385e+00, 3.52651126e+00, 2.32758388e+00, 7.00127085e-01,
       1.14095870e+02, 5.54374050e-01, 4.64962077e-02])]
[HVBSASK INFO] [0] parameters: [array([3.30219303e+00, 1.30384289e-01, 2.20269115e+00, 9.16115913e-02,
       1.83575654e+02, 8.98952093e-01, 2.89392096e-02])]
[HVBSASK INFO] [0] parameters: [array([2.77047448e+00, 6.24962998e+00, 2.36303518e+00, 8.85957186e-01,
       1.21554993e+02, 3.26596193e-01, 4.26223539e-02])]
[HVBSASK INFO] [0] paramet

In [10]:
# Mean Sampling from Start
sample_param = []
for i in range(7):
    uni = tfp.distributions.Uniform(low=param_lower[i], high=param_upper[i]).sample(1)
    sample_param.append(uni)
sample_param = np.array(sample_param).T

prior = []
for _, vec in enumerate(sample_param):
    _, y_model, _, _ = run_model_single_parameter_node(model, np.array(vec))
    prior.append(y_model)

_, _, measured_data, _ = run_model_single_parameter_node(model, np.array(vec))

prior_means = np.mean(np.array(prior), axis=0)
    

[HVBSASK INFO] [0] parameters: [array([2.29126870e-01, 8.12458182e+00, 1.92360105e+00, 2.55420237e-01,
       3.75969489e+02, 7.61788067e-01, 4.95167345e-02])]
[HVBSASK INFO] [0] parameters: [array([2.29126870e-01, 8.12458182e+00, 1.92360105e+00, 2.55420237e-01,
       3.75969489e+02, 7.61788067e-01, 4.95167345e-02])]


2024-06-05 22:51:36.747192: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-06-05 22:51:36.747220: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-06-05 22:51:36.747224: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-06-05 22:51:36.747242: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-05 22:51:36.747255: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:
# Create a figure
fig = go.Figure()
model.get_start_date
dates = model.get_simulation_range()
fig.add_trace(go.Scatter(x=dates, y=prior_means, mode='lines', name='Prior Mean', line=dict(color='lightgrey')))
fig.add_trace(go.Scatter(x=dates, y=posterior_mean, mode='lines', name='Posterior Mean'))
fig.add_trace(go.Scatter(x=dates, y=posterior_max, mode='lines', name='Posterior Max'))
fig.add_trace(go.Scatter(x=dates, y=measured_data, mode='lines', name='Measured Data'))

# Update layout
fig.update_layout(
    title='Tuned Metropolis Hastings: Bayesian Inference Result Comparison',
    xaxis_title='Date',
    yaxis_title='Value',
    legend_title='Time Series',
    hovermode='x unified',
    template='plotly_white'
)

# Show the figure
fig.show()

In [12]:
def rmse(result, target):
    diff = result - target
    aggr = 0
    for i in range(len(diff)):
        aggr += diff[i] ** 2
    rmse = (aggr / (len(diff))) ** 0.5
    return rmse

def mae(result, target):
    return np.absolute(result - target).mean()

print(f'RMSE of Posterior Mean: {rmse(posterior_mean, measured_data)}')
print(f'RMSE of Posterior Max: {rmse(posterior_max, measured_data)}')
print(f'MAE of Posterior Mean: {mae(posterior_mean, measured_data)}')
print(f'MAE of Posterior Max: {mae(posterior_max, measured_data)}')


RMSE of Posterior Mean: 21.974609013782757
RMSE of Posterior Max: 24.458078992931487
MAE of Posterior Mean: 11.457657543376751
MAE of Posterior Max: 14.532783129590447
